polecat 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ source "http://rubygems.org"
2
+
3
+ # comment out the gems, you don't want
4
+ group :preferred do
5
+ gem "virtus"
6
+ end
7
+
8
+ group :development do
9
+ gem "rspec"
10
+ gem "rdoc"
11
+ gem "yard"
12
+ gem "bundler"
13
+ gem "jeweler"
14
+ gem "reek"
15
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Gibheer
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,23 @@
1
+ = polecat
2
+
3
+ This is my attempt to build a search library like lucene in native ruby.
4
+
5
+ The idea formed, when I learned about the rubinius project and it's ability to tune parts of the ruby code with it's jit. It's a bit sad for MRI, that more and more ruby code get's converted to C, so it would be great, if rbx could be as performant as MRI, even if great parts of a application are written in ruby.
6
+
7
+ More updates are to come, when it is further progessing.
8
+
9
+ == Contributing to polecat
10
+
11
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
12
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
13
+ * Fork the project
14
+ * Start a feature/bugfix branch
15
+ * Commit and push until you are happy with your contribution
16
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
17
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
18
+
19
+ == Copyright
20
+
21
+ Copyright (c) 2011 Gibheer. See LICENSE.txt for
22
+ further details.
23
+
data/Rakefile ADDED
@@ -0,0 +1,51 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "polecat"
18
+ gem.homepage = "http://github.com/Gibheer/polecat"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{library for searching through documents}
21
+ gem.description = %Q{This is a search library for searching terms in documents}
22
+ gem.email = "gibheer@gmail.com"
23
+ gem.authors = ["Gibheer"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ require 'reek/rake/task'
35
+ Reek::Rake::Task.new do |t|
36
+ t.fail_on_error = true
37
+ t.verbose = false
38
+ t.source_files = 'lib/**/*.rb'
39
+ end
40
+
41
+ task :default => :spec
42
+
43
+ namespace :spec do
44
+ desc "with all ruby versions"
45
+ task :all do
46
+ puts `rvm 1.8.7@polecat,1.9.2@polecat,rbx@polecat,rbx-hydra@polecat rake spec`
47
+ end
48
+ end
49
+
50
+ require 'yard'
51
+ YARD::Rake::YardocTask.new
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,61 @@
1
+ require 'polecat/document_resource'
2
+
3
+ class Polecat
4
+ module Document
5
+ OPTIONS = {
6
+ :index => true,
7
+ :lazy => false,
8
+ :value => nil
9
+ }
10
+
11
+ # include the document
12
+ #
13
+ # This includes the document into the target class.
14
+ # @private
15
+ def self.included klass #:nodoc:
16
+ klass.extend(DocumentResource)
17
+ klass.instance_variable_set :@attributes, {}
18
+ end
19
+
20
+ # creates a new document
21
+ #
22
+ # It is possible to create a new document with a hash, which has all values
23
+ # of the fields.
24
+ # @example initializing a document
25
+ # class Foo
26
+ # include Polecat::Document
27
+ #
28
+ # field :id
29
+ # field :description
30
+ # end
31
+ # f = Foo.new :id => 1, :description => 'foo'
32
+ def initialize fields = {}
33
+ fields.each do |key, value|
34
+ attribute_set key, value
35
+ end
36
+ end
37
+
38
+ # get an attribute of the document
39
+ def attribute_get name
40
+ attributes[name.to_sym][:value]
41
+ end
42
+
43
+ # set an attribute of the document
44
+ def attribute_set name, value
45
+ name = name.to_sym
46
+ att = attributes
47
+ if att.has_key? name
48
+ att[name][:value] = value
49
+ else
50
+ raise ArgumentError, "attribute #{name} does not exist"
51
+ end
52
+ end
53
+
54
+ # get all attributes
55
+ def attributes
56
+ return @attributes if @attributes
57
+ @attributes = Marshal.load(Marshal.dump(
58
+ self.class.instance_variable_get :@attributes))
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,37 @@
1
+ class Polecat
2
+ module DocumentResource
3
+ def field name, options = {}
4
+ attributes = self.instance_variable_get :@attributes
5
+ attributes[name.to_sym] = Document::OPTIONS.merge(options)
6
+
7
+ create_reader_for name
8
+ create_writer_for name
9
+ end
10
+
11
+ def mod
12
+ if !@mod
13
+ @mod = Module.new
14
+ self.class_eval do
15
+ include @mod
16
+ end
17
+ end
18
+ @mod
19
+ end
20
+
21
+ def create_reader_for name
22
+ mod.module_eval <<-RUBYCODE
23
+ def #{name.to_s}
24
+ attribute_get :#{name}
25
+ end
26
+ RUBYCODE
27
+ end
28
+
29
+ def create_writer_for name
30
+ mod.module_eval <<-RUBYCODE
31
+ def #{name.to_s}= o
32
+ attribute_set :#{name}, o
33
+ end
34
+ RUBYCODE
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,47 @@
1
+ class Polecat
2
+ # reads an index directory
3
+ #
4
+ # This class reads the content of an index directory and builds the
5
+ # necessary structures for the index type.
6
+ class IndexReader
7
+ attr_reader :path
8
+
9
+ # initialize a new reader
10
+ #
11
+ # Create a new reader for the given path. If the directory is empty, you
12
+ # will get an empty index, else all documents stored in that directory.
13
+ # @param [String] path the path to the index directory
14
+ def initialize path
15
+ @path = path
16
+ raise ArgumentError, 'no valid directory' unless File.directory? @path
17
+ end
18
+
19
+ # read the content of the directory
20
+ #
21
+ # Read all files of the directory and return an index object.
22
+ # @raise [IOError] raised when the directory is locked
23
+ # @return [Polecat::Index] the index with all documents
24
+ def read
25
+ raise IOError, 'index is locked' if locked?
26
+ files = Dir[@path + '/*']
27
+ if files.count > 0
28
+ documents = []
29
+ files.each do |file|
30
+ documents += Marshal.load(File.read(file))
31
+ end
32
+ documents
33
+ else
34
+ []
35
+ end
36
+ end
37
+
38
+ # checks whether the directory is locked or not
39
+ def locked?
40
+ if File.exists? @path + '/index.lock'
41
+ true
42
+ else
43
+ false
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,84 @@
1
+ class Polecat
2
+ # interface for searching an index
3
+ #
4
+ # Build on top of an Polecat::IndexReader, this class let's you search through
5
+ # all documents stored in an index.
6
+ class IndexSearcher
7
+ attr_reader :reader
8
+ attr_reader :default_field
9
+
10
+ # creates a new Polecat::IndexSearcher
11
+ #
12
+ # Create a new Polecat::IndexSearcher to search documents. Either a path
13
+ # to a directory or a Polecat::IndexReader has to be given, to make this
14
+ # searcher work.
15
+ # @example
16
+ # # the following has the same meaning
17
+ # IndexSearcher.new 'index_dir'
18
+ # IndexSearcher.new(IndexReader.new 'index_dir')
19
+ def initialize options
20
+ if options.has_key? :path
21
+ @reader = Polecat::IndexReader.new(options[:path])
22
+ elsif options.has_key? :reader
23
+ @reader = options[:reader]
24
+ raise ArgumentError, 'no reader' unless @reader.kind_of?(Polecat::IndexReader)
25
+ end
26
+
27
+ if options.has_key? :default_field
28
+ @default_field = options[:default_field]
29
+ end
30
+ end
31
+
32
+ # returns the path of the index directory
33
+ # @return [String] path of the index directory
34
+ def path
35
+ @reader.path
36
+ end
37
+
38
+ # searches through all documents
39
+ #
40
+ # Run the query against the @default_field@ of every stored document to get
41
+ # a list of all matching documents.
42
+ # @param [String] query a String which get's matched against the documents
43
+ # @return [Array] a list of all matching documents
44
+ def search query
45
+ @reader.read.select do |doc|
46
+ #doc.attributes.fetch(@default_field).fetch(:value) == query
47
+ rs = []
48
+ query.terms.each do |term|
49
+ val = doc.send(term.field.to_sym)
50
+ if compare val, term.operator, term.value
51
+ rs << true
52
+ end
53
+ end
54
+ if query.relation == :and
55
+ rs.count == query.terms.count
56
+ else
57
+ rs.empty?
58
+ end
59
+ end
60
+ end
61
+
62
+ # compare the document value with the searched value
63
+ #
64
+ # This compares the two values with the operator
65
+ # @return [Any] trueish for matches or falsey
66
+ # @private
67
+ def compare ival, op, tval
68
+ if op == :eq
69
+ if tval.class == Regexp
70
+ ival.match tval
71
+ else
72
+ ival == tval
73
+ end
74
+ elsif op == :gt
75
+ ival < tval
76
+ elsif op == :lt
77
+ ival > tval
78
+ else
79
+ false
80
+ end
81
+ end
82
+ private :compare
83
+ end
84
+ end
@@ -0,0 +1,123 @@
1
+ class Polecat
2
+ # handles the writing of new documents to the index.
3
+ #
4
+ # This class is responsible for writing the documents to the index. It takes
5
+ # a path on creation and checks, if it is an empty or a valid index directory.
6
+ #
7
+ # When the documents are getting written to the filesystem, a 'index.lock'
8
+ # file is written as an extra lock. It then writes a new file into the
9
+ # directory, which has all documents.
10
+ class IndexWriter
11
+ attr_reader :path
12
+
13
+ # create a new IndexWriter
14
+ #
15
+ # This creates a new IndexWriter set to the given path.
16
+ # @param [String] path the path to the index directory
17
+ def initialize path
18
+ if !File.directory? path
19
+ raise ArgumentError, 'not a directory'
20
+ elsif File.exists? path + '/index.lock'
21
+ raise IOError, 'index is locked'
22
+ else
23
+ @path = path
24
+ @documents = []
25
+ end
26
+ end
27
+
28
+ # returns the count of elements not flushed
29
+ #
30
+ # This method returns the count of all elements stored in the Writer, but
31
+ # not yet flushed to a file.
32
+ # @return [Fixnum] count of files
33
+ def count
34
+ @documents.count
35
+ end
36
+
37
+ # add a new document to the writer
38
+ #
39
+ # This adds a Document to the temporary storage. Call #write to write them
40
+ # to the filesystem.
41
+ # @param [Document] doc the document to store
42
+ def add doc
43
+ if doc.respond_to? :attributes
44
+ @documents << doc
45
+ else
46
+ raise ArgumentError, 'missing method attributes'
47
+ end
48
+ end
49
+
50
+ # write all documents to the disc
51
+ #
52
+ # Write all stored documents to the disc and clear the buffer.
53
+ # @return [Boolean] true when the write was a success
54
+ def write
55
+ return false unless set_lock
56
+ file_name = generate_filename
57
+
58
+ File.open file_name, 'w' do |file|
59
+ file.write Marshal.dump(@documents)
60
+ end
61
+
62
+ @documents = []
63
+ release_lock
64
+ end
65
+
66
+ # creates an index reader with the writers path
67
+ #
68
+ # @returns [Polecat::IndexReader] an IndexReader with the same path
69
+ def create_reader
70
+ Polecat::IndexReader.new @path
71
+ end
72
+
73
+ # set the lock on the index
74
+ # @private
75
+ def set_lock
76
+ if File.exists? lock_file_name
77
+ false
78
+ else
79
+ FileUtils.touch lock_file_name
80
+ true
81
+ end
82
+ end
83
+ private :set_lock
84
+
85
+ # release the index lock
86
+ # @private
87
+ def release_lock
88
+ if File.exists? lock_file_name
89
+ FileUtils.rm lock_file_name
90
+ true
91
+ else
92
+ false
93
+ end
94
+ end
95
+ private :release_lock
96
+
97
+ # get the full path of the lock file
98
+ # @private
99
+ def lock_file_name
100
+ @path + '/index.lock'
101
+ end
102
+ private :lock_file_name
103
+
104
+ # generates a new file name for an index file
105
+ # @private
106
+ def generate_filename
107
+ last_file = Dir[@path + '/*.ind'].sort.last
108
+ if last_file.nil?
109
+ file_name = @path + '/ind0.ind'
110
+ else
111
+ number = File.basename(last_file).match(/[0-9]+/)[0].to_i
112
+ # we have to match the complete name, because there can be
113
+ # numbers before the file too
114
+ file_name = last_file.gsub(
115
+ /ind#{number}\.ind/,
116
+ "ind#{(number + 1)}.ind"
117
+ )
118
+ end
119
+ file_name
120
+ end
121
+ private :generate_filename
122
+ end
123
+ end
@@ -0,0 +1,35 @@
1
+ class Polecat
2
+ # The Query manages a number of terms or queries which are set into a
3
+ # relation. A relation is needed to say, which documents shall be
4
+ # returned.
5
+ # In a @and@ relation only the documents, which are returned of the query
6
+ # parts get returned. For @or@ all documents found in a part get returned.
7
+ class Query
8
+ # returns the relation of the terms
9
+ # @return [Symbol] :and, :or
10
+ attr_accessor :relation
11
+
12
+ # returns the list of all terms
13
+ attr_reader :terms
14
+
15
+ # creates a new query object
16
+ #
17
+ # Create a new query object. As a default, the relation is set to @:and@
18
+ # (@see Query#relation)
19
+ def initialize relation = :and
20
+ if relation == :and || relation == :or
21
+ @relation = relation
22
+ else
23
+ raise ArgumentError, 'no valid relation'
24
+ end
25
+
26
+ @terms = []
27
+ end
28
+
29
+ # add a new term or query
30
+ def add term
31
+ @terms << term
32
+ self
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,21 @@
1
+ class Polecat
2
+ class Term
3
+ # the field name which should be found
4
+ attr_reader :field
5
+ # the operator to match the field with the value
6
+ attr_reader :operator
7
+ # the search value which get's matched against the document field
8
+ attr_reader :value
9
+
10
+ # create a new Term for a query
11
+ def initialize field, operator, value
12
+ @field = field
13
+ @operator = operator
14
+ if @operator == :eq && value.class == String
15
+ @value = /^#{value}$/
16
+ else
17
+ @value = value
18
+ end
19
+ end
20
+ end
21
+ end
data/lib/polecat.rb ADDED
@@ -0,0 +1,7 @@
1
+ class Polecat
2
+ require 'polecat/index_writer'
3
+ require 'polecat/index_reader'
4
+ require 'polecat/index_searcher'
5
+ require 'polecat/query'
6
+ require 'polecat/term'
7
+ end
data/polecat.gemspec ADDED
@@ -0,0 +1,85 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{polecat}
8
+ s.version = "0.0.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Gibheer"]
12
+ s.date = %q{2011-06-06}
13
+ s.description = %q{This is a search library for searching terms in documents}
14
+ s.email = %q{gibheer@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".rspec",
22
+ "Gemfile",
23
+ "LICENSE.txt",
24
+ "README.rdoc",
25
+ "Rakefile",
26
+ "VERSION",
27
+ "lib/polecat.rb",
28
+ "lib/polecat/document.rb",
29
+ "lib/polecat/document_resource.rb",
30
+ "lib/polecat/index_reader.rb",
31
+ "lib/polecat/index_searcher.rb",
32
+ "lib/polecat/index_writer.rb",
33
+ "lib/polecat/query.rb",
34
+ "lib/polecat/term.rb",
35
+ "polecat.gemspec",
36
+ "spec/index_reader/locked_spec.rb",
37
+ "spec/index_reader/new_spec.rb",
38
+ "spec/index_reader/read_spec.rb",
39
+ "spec/index_searcher/new_spec.rb",
40
+ "spec/index_searcher/search_spec.rb",
41
+ "spec/index_writer/add_spec.rb",
42
+ "spec/index_writer/count_spec.rb",
43
+ "spec/index_writer/create_reader_spec.rb",
44
+ "spec/index_writer/new_spec.rb",
45
+ "spec/index_writer/write_spec.rb",
46
+ "spec/polecat_spec.rb",
47
+ "spec/query/add_spec.rb",
48
+ "spec/query/new_spec.rb",
49
+ "spec/spec_helper.rb",
50
+ "spec/term/new_spec.rb"
51
+ ]
52
+ s.homepage = %q{http://github.com/Gibheer/polecat}
53
+ s.licenses = ["MIT"]
54
+ s.require_paths = ["lib"]
55
+ s.rubygems_version = %q{1.3.10}
56
+ s.summary = %q{library for searching through documents}
57
+
58
+ if s.respond_to? :specification_version then
59
+ s.specification_version = 3
60
+
61
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
62
+ s.add_development_dependency(%q<rspec>, [">= 0"])
63
+ s.add_development_dependency(%q<rdoc>, [">= 0"])
64
+ s.add_development_dependency(%q<yard>, [">= 0"])
65
+ s.add_development_dependency(%q<bundler>, [">= 0"])
66
+ s.add_development_dependency(%q<jeweler>, [">= 0"])
67
+ s.add_development_dependency(%q<reek>, [">= 0"])
68
+ else
69
+ s.add_dependency(%q<rspec>, [">= 0"])
70
+ s.add_dependency(%q<rdoc>, [">= 0"])
71
+ s.add_dependency(%q<yard>, [">= 0"])
72
+ s.add_dependency(%q<bundler>, [">= 0"])
73
+ s.add_dependency(%q<jeweler>, [">= 0"])
74
+ s.add_dependency(%q<reek>, [">= 0"])
75
+ end
76
+ else
77
+ s.add_dependency(%q<rspec>, [">= 0"])
78
+ s.add_dependency(%q<rdoc>, [">= 0"])
79
+ s.add_dependency(%q<yard>, [">= 0"])
80
+ s.add_dependency(%q<bundler>, [">= 0"])
81
+ s.add_dependency(%q<jeweler>, [">= 0"])
82
+ s.add_dependency(%q<reek>, [">= 0"])
83
+ end
84
+ end
85
+
@@ -0,0 +1,18 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe "IndexReader#locked?" do
4
+ before do
5
+ @path = prepare_index_dir
6
+ end
7
+
8
+ it "returns false when the directory is not locked" do
9
+ r = Polecat::IndexReader.new @path
10
+ r.locked?.should == false
11
+ end
12
+
13
+ it "returns true when the directory is locked" do
14
+ FileUtils.touch @path + '/index.lock'
15
+ r = Polecat::IndexReader.new @path
16
+ r.locked?.should == true
17
+ end
18
+ end
@@ -0,0 +1,21 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe "IndexReader#new" do
4
+ before do
5
+ @path = prepare_index_dir
6
+ end
7
+
8
+ it "takes a directory path as an argument" do
9
+ r = Polecat::IndexReader.new @path
10
+ r.path.should == @path
11
+ end
12
+
13
+ it "raises an error when no path is given" do
14
+ lambda { Polecat::IndexReader.new }.should raise_error(ArgumentError)
15
+ end
16
+
17
+ it "raises an error when the path is not a directory" do
18
+ lambda { Polecat::IndexReader.new '/dev/null' }.should(
19
+ raise_error(ArgumentError))
20
+ end
21
+ end
@@ -0,0 +1,47 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe "IndexReader#read" do
4
+ before do
5
+ @path = prepare_index_dir
6
+ end
7
+
8
+ it "returns a array with all documents" do
9
+ r = Polecat::IndexReader.new @path
10
+ r.read.class.should == Array
11
+ end
12
+
13
+ it "returns an empty hash for a empty directory" do
14
+ r = Polecat::IndexReader.new @path
15
+ r.read.count.should == 0
16
+ end
17
+
18
+ it "returns the document count found in the index directory" do
19
+ w = Polecat::IndexWriter.new @path
20
+ w.add Spec::TestDocument.new(:id => 23)
21
+ w.write
22
+ r = Polecat::IndexReader.new @path
23
+ r.read.count.should == 1
24
+ end
25
+
26
+ it "returns an array of documents" do
27
+ w = Polecat::IndexWriter.new @path
28
+ w.add Spec::TestDocument.new(:id => 23)
29
+ w.write
30
+ w.create_reader.read[0].respond_to?(:attributes).should == true
31
+ end
32
+
33
+ it "merges all documents from different files together" do
34
+ w = Polecat::IndexWriter.new @path
35
+ w.add Spec::TestDocument.new(:id => 23)
36
+ w.write
37
+ w.add Spec::TestDocument.new(:id => 24)
38
+ w.write
39
+ w.create_reader.read.count.should == 2
40
+ end
41
+
42
+ it "raises an error when the directory is locked" do
43
+ FileUtils.touch @path + '/index.lock'
44
+ r = Polecat::IndexReader.new @path
45
+ lambda { r.read }.should raise_error(IOError)
46
+ end
47
+ end
@@ -0,0 +1,27 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe "IndexSearcher#new" do
4
+ before do
5
+ @path = prepare_index_dir
6
+ end
7
+
8
+ it "takes a hash with options as an argument" do
9
+ s = Polecat::IndexSearcher.new(
10
+ :path => @path,
11
+ :default_field => :description
12
+ )
13
+ s.path.should == @path
14
+ s.default_field.should == :description
15
+ end
16
+
17
+ it "takes a reader in the options" do
18
+ r = Polecat::IndexReader.new(@path)
19
+ s = Polecat::IndexSearcher.new :reader => r
20
+ s.reader.should == r
21
+ end
22
+
23
+ it "raises an error, when the reader is not an IndexReader" do
24
+ lambda { Polecat::IndexSearcher.new(:reader => "foo") }.should(
25
+ raise_error(ArgumentError))
26
+ end
27
+ end
@@ -0,0 +1,57 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe "IndexSearcher#search" do
4
+ let(:path) { prepare_index_dir }
5
+ let(:w) { Polecat::IndexWriter.new(path) }
6
+ let(:s) { Polecat::IndexSearcher.new :path => path }
7
+
8
+ it "returns an empty array when the query is empty" do
9
+ s.search(Polecat::Query.new).should == []
10
+ end
11
+
12
+ context "searching on a filled index" do
13
+ before do
14
+ w.add Spec::TestDocument.new(:id => 0, :name => 'foo')
15
+ w.add Spec::TestDocument.new(:id => 1, :name => 'bar')
16
+ w.add Spec::TestDocument.new(:id => 2, :name => 'baz')
17
+ w.add Spec::TestDocument.new(:id => 3, :name => 'foobar')
18
+ w.write
19
+ end
20
+
21
+ let :s do
22
+ Polecat::IndexSearcher.new(
23
+ :reader => w.create_reader,
24
+ :default_field => :name
25
+ )
26
+ end
27
+
28
+ let (:q1) { Polecat::Query.new.add(Polecat::Term.new(:id, :eq, 1)) }
29
+ it "returns an array of documents, when a document was found" do
30
+ s.search(q1).count.should == 1
31
+ end
32
+
33
+ let (:q2) { Polecat::Query.new.add(Polecat::Term.new(:name, :eq, 'foo')) }
34
+ it "returns only matches for a String query" do
35
+ s.search(q2).count.should == 1
36
+ end
37
+
38
+ let (:q3) { Polecat::Query.new.add(Polecat::Term.new(:name, :eq, /foo/)) }
39
+ it "returns all documents when an regexp is given" do
40
+ s.search(q3).count.should == 2
41
+ end
42
+
43
+ let (:q4) { Polecat::Query.new.add(Polecat::Term.new(:id, :eq, 33)) }
44
+ it "returns an empty array when no document matched" do
45
+ s.search(q4).count.should == 0
46
+ end
47
+
48
+ let (:q5) {
49
+ Polecat::Query.new.
50
+ add(Polecat::Term.new(:id, :eq, 3)).
51
+ add(Polecat::Term.new(:name, :eq, 'foobar'))
52
+ }
53
+ it "returns a document for a query with multiple terms" do
54
+ s.search(q5).count.should == 1
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,24 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe "IndexWriter#add" do
4
+ let (:path) { prepare_index_dir }
5
+ let (:doc1) { Spec::TestDocument.new :id => 1 }
6
+ let (:doc2) { Spec::TestDocument.new :id => 2 }
7
+ let (:w) { w = Polecat::IndexWriter.new path }
8
+
9
+ it "adds the object to the list of objects" do
10
+ w.add doc1
11
+ w.count.should == 1
12
+ end
13
+
14
+ it "takes multiple documents and sotres them" do
15
+ w.add doc1
16
+ w.add doc2
17
+ w.count.should == 2
18
+ end
19
+
20
+ it "raises an error, when the object is not a document" do
21
+ lambda { Polecat::IndexWriter.new(path).add "foo" }.should(
22
+ raise_error(ArgumentError))
23
+ end
24
+ end
@@ -0,0 +1,18 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe "IndexWriter#count" do
4
+ before :all do
5
+ @path = prepare_index_dir
6
+ end
7
+
8
+ it "returns 0 on an empty writer" do
9
+ w = Polecat::IndexWriter.new @path
10
+ w.count.should == 0
11
+ end
12
+
13
+ it "returns the number of documents stored in the storage" do
14
+ w = Polecat::IndexWriter.new @path
15
+ w.add Spec::TestDocument.new
16
+ w.count.should == 1
17
+ end
18
+ end
@@ -0,0 +1,22 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe "IndexReader#read" do
4
+ before do
5
+ @path = prepare_index_dir
6
+ end
7
+
8
+ it "returns a IndexReader" do
9
+ w = Polecat::IndexWriter.new @path
10
+ w.create_reader.class.should == Polecat::IndexReader
11
+ end
12
+
13
+ it "returns a different object everytime it is called" do
14
+ w = Polecat::IndexWriter.new @path
15
+ w.create_reader.should_not == w.create_reader
16
+ end
17
+
18
+ it "returns an IndexReader with the same path" do
19
+ w = Polecat::IndexWriter.new @path
20
+ w.create_reader.path.should == w.path
21
+ end
22
+ end
@@ -0,0 +1,27 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe "IndexWriter#new" do
4
+ before :all do
5
+ @path = prepare_index_dir
6
+ end
7
+
8
+ it "takes a path as an argument" do
9
+ writer = Polecat::IndexWriter.new @path
10
+ writer.path.should == @path
11
+ end
12
+
13
+ it "raises an Argument when no path is given" do
14
+ lambda { Polecat::IndexWriter.new }.should raise_error(ArgumentError)
15
+ end
16
+
17
+ it "raises an error, when the path is not directory" do
18
+ lambda {
19
+ Polecat::IndexWriter.new "/dev/null"
20
+ }.should raise_error(ArgumentError)
21
+ end
22
+
23
+ it "raises an error when a index.lock file is in the directory" do
24
+ FileUtils.touch @path + '/index.lock'
25
+ lambda { Polecat::IndexWriter.new @path }.should raise_error(IOError)
26
+ end
27
+ end
@@ -0,0 +1,50 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe "IndexWriter#write" do
4
+ before do
5
+ @path = prepare_index_dir
6
+ end
7
+
8
+ it "sets count to 0" do
9
+ w = Polecat::IndexWriter.new @path
10
+ w.add Spec::TestDocument.new
11
+ w.write
12
+ w.count.should == 0
13
+ end
14
+
15
+ it "returns true when the write was a success" do
16
+ w = Polecat::IndexWriter.new @path
17
+ w.add Spec::TestDocument.new
18
+ w.write.should == true
19
+ end
20
+
21
+ it "removes the lock after a write" do
22
+ w = Polecat::IndexWriter.new @path
23
+ w.add Spec::TestDocument.new
24
+ w.write
25
+ File.exists?(@path + '/index.lock').should == false
26
+ end
27
+
28
+ it "takes a higher number for the index file, if there is already one" do
29
+ FileUtils.touch @path + '/ind0.ind'
30
+ w = Polecat::IndexWriter.new @path
31
+ w.add Spec::TestDocument.new
32
+ w.write
33
+ File.exists?(@path + '/ind1.ind').should == true
34
+ end
35
+
36
+ it "writes a marshalled representation of the document list" do
37
+ w = Polecat::IndexWriter.new @path
38
+ doc = Spec::TestDocument.new
39
+ w.add doc
40
+ w.write
41
+ File.read(@path + '/ind0.ind').should == Marshal.dump([doc])
42
+ end
43
+
44
+ it "returns false when the directory has an 'index.lock' file" do
45
+ w = Polecat::IndexWriter.new @path
46
+ FileUtils.touch @path + '/index.lock'
47
+ w.add Spec::TestDocument.new
48
+ w.write.should == false
49
+ end
50
+ end
@@ -0,0 +1,9 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "Polecat" do
4
+ describe "#new" do
5
+ it "can be initialized" do
6
+ Polecat.new
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,16 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe "Query#add" do
4
+ let (:term1) { Polecat::Term.new(:id, :eq, 23) }
5
+ let (:term2) { Polecat::Term.new(:name, :eq, 'foo') }
6
+ let (:term3) { Polecat::Term.new(:lastname, :eq, /foo/) }
7
+ let (:query) { Polecat::Query.new }
8
+
9
+ it "returns the query object for chaining" do
10
+ query.add(term1).should be(query)
11
+ end
12
+
13
+ it "adds the term to the list of terms" do
14
+ query.add(term1).terms.count.should be(1)
15
+ end
16
+ end
@@ -0,0 +1,17 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe "Query#new" do
4
+ it "uses 'and' as an default" do
5
+ q = Polecat::Query.new
6
+ q.relation.should be(:and)
7
+ end
8
+
9
+ it "takes a relation operator as an argument" do
10
+ q = Polecat::Query.new :or
11
+ q.relation.should be(:or)
12
+ end
13
+
14
+ it "raises an error, when the relation is not known" do
15
+ lambda { Polecat::Query.new :foo }.should raise_error(ArgumentError)
16
+ end
17
+ end
@@ -0,0 +1,38 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'fileutils'
5
+ require 'polecat'
6
+ require 'virtus'
7
+
8
+ # Requires supporting files with custom matchers and macros, etc,
9
+ # in ./support/ and its subdirectories.
10
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
11
+
12
+ RSpec.configure do |config|
13
+
14
+ end
15
+
16
+ def prepare_index_dir
17
+ path = File.expand_path(File.dirname(__FILE__) + '/index_dir')
18
+ begin
19
+ FileUtils.rm_r path
20
+ rescue SystemCallError
21
+ # the directory structure is not there, so just
22
+ # ignore it and print a hint
23
+ puts "error occured, but was ignored: $!"
24
+ end
25
+ Dir.mkdir path
26
+ return path
27
+ end
28
+
29
+ module Spec
30
+ class TestDocument
31
+ include Virtus
32
+
33
+ attribute :id, Integer
34
+ attribute :name, String
35
+ attribute :lastname, String
36
+ attribute :description, String, :analyze => true, :lazy => true
37
+ end
38
+ end
@@ -0,0 +1,19 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe "Term#new" do
4
+ it "takes a field, a operator and a value" do
5
+ t = Polecat::Term.new :id, :eq, 23
6
+ t.field.should be(:id)
7
+ t.operator.should be(:eq)
8
+ t.value.should be(23)
9
+ end
10
+
11
+ it "converts Strings to Regexps, if the operator is :eq" do
12
+ t = Polecat::Term.new :name, :eq, "foo"
13
+ t.value.should == /^foo$/
14
+ end
15
+
16
+ it "raises an error if no argument is given" do
17
+ lambda { Polecat::Term.new }.should raise_error
18
+ end
19
+ end
metadata ADDED
@@ -0,0 +1,181 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: polecat
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Gibheer
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-06-06 00:00:00 +02:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rspec
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ prerelease: false
33
+ type: :development
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: rdoc
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ hash: 3
43
+ segments:
44
+ - 0
45
+ version: "0"
46
+ prerelease: false
47
+ type: :development
48
+ version_requirements: *id002
49
+ - !ruby/object:Gem::Dependency
50
+ name: yard
51
+ requirement: &id003 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ hash: 3
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ prerelease: false
61
+ type: :development
62
+ version_requirements: *id003
63
+ - !ruby/object:Gem::Dependency
64
+ name: bundler
65
+ requirement: &id004 !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ hash: 3
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ prerelease: false
75
+ type: :development
76
+ version_requirements: *id004
77
+ - !ruby/object:Gem::Dependency
78
+ name: jeweler
79
+ requirement: &id005 !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
87
+ version: "0"
88
+ prerelease: false
89
+ type: :development
90
+ version_requirements: *id005
91
+ - !ruby/object:Gem::Dependency
92
+ name: reek
93
+ requirement: &id006 !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ hash: 3
99
+ segments:
100
+ - 0
101
+ version: "0"
102
+ prerelease: false
103
+ type: :development
104
+ version_requirements: *id006
105
+ description: This is a search library for searching terms in documents
106
+ email: gibheer@gmail.com
107
+ executables: []
108
+
109
+ extensions: []
110
+
111
+ extra_rdoc_files:
112
+ - LICENSE.txt
113
+ - README.rdoc
114
+ files:
115
+ - .document
116
+ - .rspec
117
+ - Gemfile
118
+ - LICENSE.txt
119
+ - README.rdoc
120
+ - Rakefile
121
+ - VERSION
122
+ - lib/polecat.rb
123
+ - lib/polecat/document.rb
124
+ - lib/polecat/document_resource.rb
125
+ - lib/polecat/index_reader.rb
126
+ - lib/polecat/index_searcher.rb
127
+ - lib/polecat/index_writer.rb
128
+ - lib/polecat/query.rb
129
+ - lib/polecat/term.rb
130
+ - polecat.gemspec
131
+ - spec/index_reader/locked_spec.rb
132
+ - spec/index_reader/new_spec.rb
133
+ - spec/index_reader/read_spec.rb
134
+ - spec/index_searcher/new_spec.rb
135
+ - spec/index_searcher/search_spec.rb
136
+ - spec/index_writer/add_spec.rb
137
+ - spec/index_writer/count_spec.rb
138
+ - spec/index_writer/create_reader_spec.rb
139
+ - spec/index_writer/new_spec.rb
140
+ - spec/index_writer/write_spec.rb
141
+ - spec/polecat_spec.rb
142
+ - spec/query/add_spec.rb
143
+ - spec/query/new_spec.rb
144
+ - spec/spec_helper.rb
145
+ - spec/term/new_spec.rb
146
+ has_rdoc: true
147
+ homepage: http://github.com/Gibheer/polecat
148
+ licenses:
149
+ - MIT
150
+ post_install_message:
151
+ rdoc_options: []
152
+
153
+ require_paths:
154
+ - lib
155
+ required_ruby_version: !ruby/object:Gem::Requirement
156
+ none: false
157
+ requirements:
158
+ - - ">="
159
+ - !ruby/object:Gem::Version
160
+ hash: 3
161
+ segments:
162
+ - 0
163
+ version: "0"
164
+ required_rubygems_version: !ruby/object:Gem::Requirement
165
+ none: false
166
+ requirements:
167
+ - - ">="
168
+ - !ruby/object:Gem::Version
169
+ hash: 3
170
+ segments:
171
+ - 0
172
+ version: "0"
173
+ requirements: []
174
+
175
+ rubyforge_project:
176
+ rubygems_version: 1.3.10
177
+ signing_key:
178
+ specification_version: 3
179
+ summary: library for searching through documents
180
+ test_files: []
181
+