mongoid_fulltext 0.3.6
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +13 -0
- data/LICENSE +19 -0
- data/README.md +259 -0
- data/Rakefile +36 -0
- data/VERSION +1 -0
- data/lib/mongoid_fulltext.rb +183 -0
- data/mongoid_fulltext.gemspec +85 -0
- data/spec/models/advanced_artwork.rb +7 -0
- data/spec/models/basic_artwork.rb +8 -0
- data/spec/models/external_artist.rb +6 -0
- data/spec/models/external_artwork.rb +6 -0
- data/spec/models/external_artwork_no_fields_supplied.rb +12 -0
- data/spec/models/filtered_artist.rb +10 -0
- data/spec/models/filtered_artwork.rb +10 -0
- data/spec/models/multi_external_artwork.rb +10 -0
- data/spec/models/multi_field_artist.rb +7 -0
- data/spec/models/multi_field_artwork.rb +7 -0
- data/spec/models/partitioned_artist.rb +15 -0
- data/spec/mongoid/fulltext_spec.rb +348 -0
- data/spec/spec_helper.rb +20 -0
- metadata +127 -0
data/.document
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
|
6
|
+
# Add dependencies to develop your gem here.
|
7
|
+
# Include everything needed to run rake, tests, features, etc.
|
8
|
+
group :development do
|
9
|
+
gem "mongoid", "~> 2.0.0"
|
10
|
+
gem 'database_cleaner', '~> 0.6.0'
|
11
|
+
gem 'rspec', '~> 2.5.0'
|
12
|
+
gem "jeweler", "~> 1.5.2"
|
13
|
+
end
|
data/LICENSE
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (c) 2011 by Artsy, Inc.
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
5
|
+
in the Software without restriction, including without limitation the rights
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
8
|
+
furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in
|
11
|
+
all copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,259 @@
|
|
1
|
+
Mongoid Fulltext Search
|
2
|
+
=======================
|
3
|
+
|
4
|
+
Full-text search using n-gram matching for the Mongoid ODM. Tested on MongoDB 1.6 and above, but
|
5
|
+
probably works on earlier versions as well.
|
6
|
+
|
7
|
+
Some examples:
|
8
|
+
--------------
|
9
|
+
|
10
|
+
Suppose you have an `Artist` model and want to index each artist's name:
|
11
|
+
|
12
|
+
class Artist
|
13
|
+
include Mongoid::Document
|
14
|
+
include Mongoid::FullTextSearch
|
15
|
+
|
16
|
+
field :first_name
|
17
|
+
field :last_name
|
18
|
+
|
19
|
+
def name
|
20
|
+
[first_name, last_name].join(' ')
|
21
|
+
end
|
22
|
+
|
23
|
+
fulltext_search_in :name
|
24
|
+
end
|
25
|
+
|
26
|
+
The `fulltext_search_in` directive will index the full name of the artist, so now
|
27
|
+
you can call:
|
28
|
+
|
29
|
+
Artist.fulltext_search("vince vangogh")
|
30
|
+
|
31
|
+
which will return an array of the Artist instances that best match the search string. Most likely,
|
32
|
+
Vincent van Gogh will be included in the results. You can index multiple fields with the same
|
33
|
+
index, so we can get the same effect of our Artist index above using:
|
34
|
+
|
35
|
+
class Artist
|
36
|
+
include Mongoid::Document
|
37
|
+
include Mongoid::FullTextSearch
|
38
|
+
|
39
|
+
field :first_name
|
40
|
+
field :last_name
|
41
|
+
|
42
|
+
fulltext_search_in :first_name, :last_name
|
43
|
+
end
|
44
|
+
|
45
|
+
To restrict the number of results returned, pass the `:max_results` parameter to `fulltext_search`:
|
46
|
+
|
47
|
+
Artist.fulltext_search("vince vangogh", { :max_results => 5 })
|
48
|
+
|
49
|
+
To return a pair of `[ result, score ]` instead of an array of results, pass the `:return_scores` parameter to `fulltext_search`:
|
50
|
+
|
51
|
+
Artist.fulltext_search("vince vangogh", { :return_scores => true })
|
52
|
+
|
53
|
+
If you don't specify a field to index, the default is the result of `to_s` called on the object.
|
54
|
+
The following definition will index the first and last name of an artist:
|
55
|
+
|
56
|
+
class Artist
|
57
|
+
include Mongoid::Document
|
58
|
+
include Mongoid::FullTextSearch
|
59
|
+
|
60
|
+
field :first_name
|
61
|
+
field :last_name
|
62
|
+
|
63
|
+
def to_s
|
64
|
+
'%s %s' % [first_name, last_name]
|
65
|
+
end
|
66
|
+
|
67
|
+
fulltext_search_in
|
68
|
+
end
|
69
|
+
|
70
|
+
The full-text index is stored in a separate MongoDB collection in the same database as the
|
71
|
+
models you're indexing. By default, the name of this collection is generated for you. Above,
|
72
|
+
a collection named something like `mongoid_fulltext.index_artist_0` will be created to
|
73
|
+
hold the index data. You can override this naming and provide your own collection name with
|
74
|
+
the :index_name parameter:
|
75
|
+
|
76
|
+
class Artwork
|
77
|
+
include Mongoid::Document
|
78
|
+
include Mongoid::FullTextSearch
|
79
|
+
|
80
|
+
field :title
|
81
|
+
fulltext_search_in :title, :index_name => 'mongoid_fulltext.foobar'
|
82
|
+
end
|
83
|
+
|
84
|
+
You can also create multiple indexes on a single model, in which case you'll want to
|
85
|
+
provide index names:
|
86
|
+
|
87
|
+
class Artwork
|
88
|
+
include Mongoid::Document
|
89
|
+
include Mongoid::FullTextSearch
|
90
|
+
|
91
|
+
field :title
|
92
|
+
field :artist_name
|
93
|
+
field :gallery_name
|
94
|
+
filed :gallery_address
|
95
|
+
|
96
|
+
fulltext_search_in :title, :index_name => 'title_index'
|
97
|
+
fulltext_search_in :artist_name, :index_name => 'artist_name_index'
|
98
|
+
fulltext_search_in :gallery_name, :gallery_address, :index_name => 'gallery_index'
|
99
|
+
end
|
100
|
+
|
101
|
+
The index names are helpful now because you'll have to specify which one you want to use when you
|
102
|
+
call `fulltext_search`:
|
103
|
+
|
104
|
+
Artwork.fulltext_search('warhol', :index => 'artist_name_index')
|
105
|
+
|
106
|
+
If you have multiple indexes specified and you don't supply a name to `fulltext_search`, the
|
107
|
+
method call will raise an exception.
|
108
|
+
|
109
|
+
If you're indexing multiple models, you may find that you need to combine results to create
|
110
|
+
a single result set. For example, if both the `Artist` model and the `Artwork` model are
|
111
|
+
indexed for full-text search, then to get results from both, you'd have to call
|
112
|
+
`Artist.fulltext_search` and `Artwork.fulltext_search` and combine the results yourself. If
|
113
|
+
your intention is instead to get the top k results from both Artists and Artworks, you can
|
114
|
+
merge both into a single index by using the same `:external_index` parameter:
|
115
|
+
|
116
|
+
class Artwork
|
117
|
+
include Mongoid::Document
|
118
|
+
include Mongoid::FullTextSearch
|
119
|
+
|
120
|
+
field :title
|
121
|
+
fulltext_search_in :title, :index_name => 'artwork_and_artists'
|
122
|
+
end
|
123
|
+
|
124
|
+
class Artist
|
125
|
+
include Mongoid::Document
|
126
|
+
include Mongoid::FullTextSearch
|
127
|
+
|
128
|
+
field :name
|
129
|
+
fulltext_search_in :name, :index_name => 'artwork_and_artists'
|
130
|
+
end
|
131
|
+
|
132
|
+
Now that these two models share the same external index collection, we can search them both through
|
133
|
+
either model's `fulltext_search` method:
|
134
|
+
|
135
|
+
Artwork.fulltext_search('picasso') # returns same results as Artist.fulltext_search('picasso')
|
136
|
+
|
137
|
+
If you want to filter the results from full-text search, you set up filters when the indexes are
|
138
|
+
defined. For example, suppose that in addition to wanting to use the `artwork_and_artists` index
|
139
|
+
defined above to search for `Artwork`s or `Artist`s, we want to be able to run full-text searches
|
140
|
+
for artists only and for artworks priced above $10,000. Instead of creating two new indexes or
|
141
|
+
attempting to filter the results after the query is run, we can specify the filter predicates
|
142
|
+
at the time of index definition:
|
143
|
+
|
144
|
+
class Artwork
|
145
|
+
include Mongoid::Document
|
146
|
+
include Mongoid::FullTextSearch
|
147
|
+
|
148
|
+
field :title
|
149
|
+
field :price
|
150
|
+
fulltext_search_in :title, :index_name => 'artwork_and_artists',
|
151
|
+
:filters => { :is_expensive => lambda { |x| x.price > 10000 },
|
152
|
+
:has_long_name => lambda { |x| x.title.length > 20 }}
|
153
|
+
end
|
154
|
+
|
155
|
+
class Artist
|
156
|
+
include Mongoid::Document
|
157
|
+
include Mongoid::FullTextSearch
|
158
|
+
|
159
|
+
field :name
|
160
|
+
field :birth_year
|
161
|
+
fulltext_search_in :name, :index_name => 'artwork_and_artists',
|
162
|
+
:filters => { :born_before_1900 => lambda { |x| x.birth_year < 1900 },
|
163
|
+
:has_long_name => lambda { |x| x.name.length > 20}}
|
164
|
+
end
|
165
|
+
|
166
|
+
After defining filters, you can query for results that match particular values of filters:
|
167
|
+
|
168
|
+
# Only return artists born before 1900 that match 'foobar'
|
169
|
+
Artist.fulltext_search('foobar', :born_before_1900 => true)
|
170
|
+
|
171
|
+
# Return artists or artworks that match 'foobar' and have short names
|
172
|
+
Artist.fulltext_search('foobar', :has_long_name => false)
|
173
|
+
|
174
|
+
# Only return artworks with prices over 10000 that match 'mona lisa'
|
175
|
+
Artwork.fulltext_search('mona lisa', :is_expensive => true)
|
176
|
+
|
177
|
+
# Only return artworks with prices less than 10000 that match 'mona lisa'
|
178
|
+
Artwork.fulltext_search('mona lisa', :is_expensive => false)
|
179
|
+
|
180
|
+
Note that in all of the example queries above, supplying a filter that is defined on exactly
|
181
|
+
one of the models will restrict the search to results from that model only. For example,
|
182
|
+
since `:is_expensive` is defined only on `Artwork`s, a call to `fulltext_search` with either
|
183
|
+
`:is_expensive => true` or `:is_expensive => false` will return only `Artwork` results.
|
184
|
+
|
185
|
+
You can specify multiple filters per index and per model. Each filter is a predicate that will
|
186
|
+
be called on objects as they're inserted into the full-text index (any time the model is saved.)
|
187
|
+
Filters are only called on instances of models they're defined on, so in the example above, the
|
188
|
+
`is_expensive` filter is only applied to instances of `Artwork` and the `born_before_1900` filter
|
189
|
+
is only applied to instances of `Artist`, although both filters can be used when querying from
|
190
|
+
either model. The `has_long_name` filter, on the other hand, will return instances of both
|
191
|
+
`Artwork` and `Artist` since it's defined on each model.
|
192
|
+
|
193
|
+
Filters shouldn't ever throw, but if they do, the filter is just ignored. If you apply filters to
|
194
|
+
indexes that are on multiple fields, the filter is applied to each field and the filter result is
|
195
|
+
the AND of all of the individual results for each of the fields. Finally, if a filter is defined
|
196
|
+
but criteria for that filter aren't passed to `fulltext_search`, the result is as if the filter
|
197
|
+
had never been defined - you see both models that both pass and fail the filter in the results.
|
198
|
+
|
199
|
+
Indexing Options
|
200
|
+
----------------
|
201
|
+
|
202
|
+
Additional indexing/query options can be used as parameters to `fulltext_search_in`.
|
203
|
+
|
204
|
+
* `alphabet`: letters to index, default is `abcdefghijklmnopqrstuvwxyz0123456789 `
|
205
|
+
* `word_separators`: word separators, default is ` `
|
206
|
+
* `ngram_width`: ngram width, default is `3`
|
207
|
+
* `index_full_words`: index full words, which improves exact matches, default is `true`
|
208
|
+
* `apply_prefix_scoring_to_all_words`: score n-grams at beginning of words higher, default is `true`
|
209
|
+
* `max_ngrams_to_search`: maximum number of ngrams to query at any given time, default is `6`
|
210
|
+
|
211
|
+
Array filters
|
212
|
+
-------------
|
213
|
+
|
214
|
+
A filter may also return an Array. Consider the following example.
|
215
|
+
|
216
|
+
class Artist
|
217
|
+
include Mongoid::Document
|
218
|
+
include Mongoid::FullTextSearch
|
219
|
+
|
220
|
+
field :name
|
221
|
+
field :exhibitions, as: Array, default: []
|
222
|
+
|
223
|
+
fulltext_search_in :name, :index_name => 'exhibited_artist',
|
224
|
+
:filters => {
|
225
|
+
:exhibitions => lambda { |artist| artist.exhibitions }
|
226
|
+
}
|
227
|
+
end
|
228
|
+
|
229
|
+
You can now find all artists that are at the Art Basel exhibition or all artists that have exhibited
|
230
|
+
at both the Art Basel and the New York Armory exhibition.
|
231
|
+
|
232
|
+
# All artists
|
233
|
+
Artist.fulltext_search('foobar')
|
234
|
+
|
235
|
+
# Artists at the Art Basel exhibition only
|
236
|
+
Artist.fulltext_search('foobar', :exhibitions => [ "Art Basel" ])
|
237
|
+
|
238
|
+
# Artists at both the Art Basel and the New York Armory exhibition
|
239
|
+
Artist.fulltext_search('foobar', :exhibitions => [ "Art Basel", "New York Armory" ])
|
240
|
+
|
241
|
+
Building the index
|
242
|
+
------------------
|
243
|
+
|
244
|
+
The fulltext index is built and maintained incrementally by hooking into `before_save` and
|
245
|
+
`before_destroy` callbacks on each model that's being indexed. If you want to build an index
|
246
|
+
on existing models, you can call the `update_ngram_index` method on each instance:
|
247
|
+
|
248
|
+
Artwork.all.each { |artwork| artwork.update_ngram_index }
|
249
|
+
|
250
|
+
You can also remove instances in bulk from the index with the `remove_from_ngram_index`
|
251
|
+
method:
|
252
|
+
|
253
|
+
Artwork.all.each { |artwork| artwork.remove_from_ngram_index }
|
254
|
+
|
255
|
+
Running the specs
|
256
|
+
-----------------
|
257
|
+
|
258
|
+
To run the specs, execute `rake spec`. You need a local MongoDB instance to run the specs.
|
259
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'rake'
|
11
|
+
require 'rspec/core/rake_task'
|
12
|
+
|
13
|
+
require 'jeweler'
|
14
|
+
Jeweler::Tasks.new do |gem|
|
15
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
16
|
+
gem.name = "mongoid_fulltext"
|
17
|
+
gem.homepage = "http://github.com/aaw/mongoid_fulltext"
|
18
|
+
gem.license = "MIT"
|
19
|
+
gem.summary = %Q{Full-text search for the Mongoid ORM}
|
20
|
+
gem.description = %Q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
|
21
|
+
gem.email = "aaron.windsor@gmail.com"
|
22
|
+
gem.authors = ["Aaron Windsor"]
|
23
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
24
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
25
|
+
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
26
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
27
|
+
end
|
28
|
+
Jeweler::RubygemsDotOrgTasks.new
|
29
|
+
|
30
|
+
|
31
|
+
desc "Run all tests"
|
32
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
33
|
+
spec.pattern = "spec/**/*_spec.rb"
|
34
|
+
end
|
35
|
+
|
36
|
+
task :default => :spec
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.3.6
|
@@ -0,0 +1,183 @@
|
|
1
|
+
module Mongoid::FullTextSearch
|
2
|
+
extend ActiveSupport::Concern
|
3
|
+
|
4
|
+
included do
|
5
|
+
cattr_accessor :mongoid_fulltext_config
|
6
|
+
end
|
7
|
+
|
8
|
+
class UnspecifiedIndexError < StandardError; end
|
9
|
+
|
10
|
+
module ClassMethods
|
11
|
+
|
12
|
+
def fulltext_search_in(*args)
|
13
|
+
self.mongoid_fulltext_config = {} if self.mongoid_fulltext_config.nil?
|
14
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
15
|
+
if options.has_key?(:index_name)
|
16
|
+
index_name = options[:index_name]
|
17
|
+
else
|
18
|
+
index_name = 'mongoid_fulltext.index_%s_%s' % [self.name.downcase, self.mongoid_fulltext_config.count]
|
19
|
+
end
|
20
|
+
|
21
|
+
config = {
|
22
|
+
:alphabet => 'abcdefghijklmnopqrstuvwxyz0123456789 ',
|
23
|
+
:word_separators => ' ',
|
24
|
+
:ngram_width => 3,
|
25
|
+
:max_ngrams_to_search => 6,
|
26
|
+
:apply_prefix_scoring_to_all_words => true,
|
27
|
+
:index_full_words => true
|
28
|
+
}
|
29
|
+
|
30
|
+
config.update(options)
|
31
|
+
|
32
|
+
args = [:to_s] if args.empty?
|
33
|
+
config[:ngram_fields] = args
|
34
|
+
config[:alphabet] = Hash[config[:alphabet].split('').map{ |ch| [ch,ch] }]
|
35
|
+
config[:word_separators] = Hash[config[:word_separators].split('').map{ |ch| [ch,ch] }]
|
36
|
+
self.mongoid_fulltext_config[index_name] = config
|
37
|
+
|
38
|
+
coll = collection.db.collection(index_name)
|
39
|
+
coll.ensure_index([['ngram', Mongo::ASCENDING]])
|
40
|
+
coll.ensure_index([['document_id', Mongo::ASCENDING]])
|
41
|
+
|
42
|
+
before_save :update_ngram_index
|
43
|
+
before_destroy :remove_from_ngram_index
|
44
|
+
end
|
45
|
+
|
46
|
+
def fulltext_search(query_string, options={})
|
47
|
+
max_results = options.has_key?(:max_results) ? options.delete(:max_results) : 10
|
48
|
+
return_scores = options.has_key?(:return_scores) ? options.delete(:return_scores) : false
|
49
|
+
if self.mongoid_fulltext_config.count > 1 and !options.has_key?(:index)
|
50
|
+
error_message = '%s is indexed by multiple full-text indexes. You must specify one by passing an :index_name parameter'
|
51
|
+
raise UnspecifiedIndexError, error_message % self.name, caller
|
52
|
+
end
|
53
|
+
index_name = options.has_key?(:index) ? options.delete(:index) : self.mongoid_fulltext_config.keys.first
|
54
|
+
|
55
|
+
# options hash should only contain filters after this point
|
56
|
+
ngrams = all_ngrams(query_string, self.mongoid_fulltext_config[index_name])
|
57
|
+
return [] if ngrams.empty?
|
58
|
+
|
59
|
+
query = {'ngram' => {'$in' => ngrams.keys}}
|
60
|
+
query.update(Hash[options.map { |key,value| [ 'filter_values.%s' % key, { '$all' => [ value ].flatten } ] }])
|
61
|
+
map = <<-EOS
|
62
|
+
function() {
|
63
|
+
emit(this['document_id'], {'class': this['class'], 'score': this['score']*ngrams[this['ngram']] })
|
64
|
+
}
|
65
|
+
EOS
|
66
|
+
reduce = <<-EOS
|
67
|
+
function(key, values) {
|
68
|
+
score = 0.0
|
69
|
+
for (i in values) {
|
70
|
+
score += values[i]['score']
|
71
|
+
}
|
72
|
+
return({'class': values[0]['class'], 'score': score})
|
73
|
+
}
|
74
|
+
EOS
|
75
|
+
mr_options = {:scope => {:ngrams => ngrams }, :query => query, :raw => true}
|
76
|
+
rc_options = { :return_scores => return_scores }
|
77
|
+
coll = collection.db.collection(index_name)
|
78
|
+
if collection.db.connection.server_version >= '1.7.4'
|
79
|
+
mr_options[:out] = {:inline => 1}
|
80
|
+
results = coll.map_reduce(map, reduce, mr_options)['results'].sort_by{ |x| -x['value']['score'] }
|
81
|
+
max_results = results.count if max_results.nil?
|
82
|
+
instantiate_mapreduce_results(results.first(max_results), rc_options)
|
83
|
+
else
|
84
|
+
result_collection = coll.map_reduce(map, reduce, mr_options)['result']
|
85
|
+
results = collection.db.collection(result_collection).find.sort(['value.score',-1])
|
86
|
+
results = results.limit(max_results) if !max_results.nil?
|
87
|
+
models = instantiate_mapreduce_results(results, rc_options)
|
88
|
+
collection.db.collection(result_collection).drop
|
89
|
+
models
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def instantiate_mapreduce_result(result)
|
94
|
+
Object::const_get(result['value']['class']).find(:first, :conditions => {:id => result['_id']})
|
95
|
+
end
|
96
|
+
|
97
|
+
def instantiate_mapreduce_results(results, options)
|
98
|
+
if (options[:return_scores])
|
99
|
+
results.map { |result| [ instantiate_mapreduce_result(result), result['value']['score'] ] }.find_all { |result| ! result[0].nil? }
|
100
|
+
else
|
101
|
+
results.map { |result| instantiate_mapreduce_result(result) }.find_all { |result| ! result.nil? }
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# returns an [ngram, score] [ngram, position] pair
|
106
|
+
def all_ngrams(str, config, bound_number_returned = true)
|
107
|
+
return {} if str.nil? or str.length < config[:ngram_width]
|
108
|
+
filtered_str = str.downcase.split('').map{ |ch| config[:alphabet][ch] }.find_all{ |ch| !ch.nil? }.join('')
|
109
|
+
|
110
|
+
if bound_number_returned
|
111
|
+
step_size = [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max
|
112
|
+
else
|
113
|
+
step_size = 1
|
114
|
+
end
|
115
|
+
|
116
|
+
# array of ngrams
|
117
|
+
ngram_ary = (0..filtered_str.length - config[:ngram_width]).step(step_size).map do |i|
|
118
|
+
if i == 0 or (config[:apply_prefix_scoring_to_all_words] and \
|
119
|
+
config[:word_separators].has_key?(filtered_str[i-1].chr))
|
120
|
+
score = Math.sqrt(1 + 1.0/filtered_str.length)
|
121
|
+
else
|
122
|
+
score = Math.sqrt(2.0/filtered_str.length)
|
123
|
+
end
|
124
|
+
[filtered_str[i..i+config[:ngram_width]-1], score]
|
125
|
+
end
|
126
|
+
|
127
|
+
if (config[:index_full_words])
|
128
|
+
filtered_str.split(Regexp.compile(config[:word_separators].keys.join)).each do |word|
|
129
|
+
if word.length >= config[:ngram_width]
|
130
|
+
ngram_ary << [ word, 1 ]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
ngram_hash = {}
|
136
|
+
|
137
|
+
# deduplicate, and keep the highest score
|
138
|
+
ngram_ary.each do |ngram, score, position|
|
139
|
+
ngram_hash[ngram] = [ngram_hash[ngram] || 0, score].max
|
140
|
+
end
|
141
|
+
|
142
|
+
ngram_hash
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
def update_ngram_index
|
148
|
+
self.mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
|
149
|
+
# remove existing ngrams from external index
|
150
|
+
coll = collection.db.collection(index_name)
|
151
|
+
coll.remove({'document_id' => self._id})
|
152
|
+
# extract ngrams from fields
|
153
|
+
field_values = fulltext_config[:ngram_fields].map { |field| self.send(field) }
|
154
|
+
ngrams = field_values.inject({}) { |accum, item| accum.update(self.class.all_ngrams(item, fulltext_config, false))}
|
155
|
+
return if ngrams.empty?
|
156
|
+
# apply filters, if necessary
|
157
|
+
filter_values = nil
|
158
|
+
if fulltext_config.has_key?(:filters)
|
159
|
+
filter_values = Hash[fulltext_config[:filters].map do |key,value|
|
160
|
+
begin
|
161
|
+
[key, value.call(self)]
|
162
|
+
rescue
|
163
|
+
# Suppress any exceptions caused by filters
|
164
|
+
end
|
165
|
+
end.find_all{ |x| !x.nil? }]
|
166
|
+
end
|
167
|
+
# insert new ngrams in external index
|
168
|
+
ngrams.each_pair do |ngram, score|
|
169
|
+
index_document = {'ngram' => ngram, 'document_id' => self._id, 'score' => score, 'class' => self.class.name}
|
170
|
+
index_document['filter_values'] = filter_values if fulltext_config.has_key?(:filters)
|
171
|
+
coll.insert(index_document)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def remove_from_ngram_index
|
177
|
+
self.mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
|
178
|
+
coll = collection.db.collection(index_name)
|
179
|
+
coll.remove({'document_id' => self._id})
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{mongoid_fulltext}
|
8
|
+
s.version = "0.3.6"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Aaron Windsor"]
|
12
|
+
s.date = %q{2011-05-27}
|
13
|
+
s.description = %q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
|
14
|
+
s.email = %q{aaron.windsor@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.md"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
"Gemfile",
|
22
|
+
"LICENSE",
|
23
|
+
"README.md",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"lib/mongoid_fulltext.rb",
|
27
|
+
"mongoid_fulltext.gemspec",
|
28
|
+
"spec/models/advanced_artwork.rb",
|
29
|
+
"spec/models/basic_artwork.rb",
|
30
|
+
"spec/models/external_artist.rb",
|
31
|
+
"spec/models/external_artwork.rb",
|
32
|
+
"spec/models/external_artwork_no_fields_supplied.rb",
|
33
|
+
"spec/models/filtered_artist.rb",
|
34
|
+
"spec/models/filtered_artwork.rb",
|
35
|
+
"spec/models/multi_external_artwork.rb",
|
36
|
+
"spec/models/multi_field_artist.rb",
|
37
|
+
"spec/models/multi_field_artwork.rb",
|
38
|
+
"spec/models/partitioned_artist.rb",
|
39
|
+
"spec/mongoid/fulltext_spec.rb",
|
40
|
+
"spec/spec_helper.rb"
|
41
|
+
]
|
42
|
+
s.homepage = %q{http://github.com/aaw/mongoid_fulltext}
|
43
|
+
s.licenses = ["MIT"]
|
44
|
+
s.require_paths = ["lib"]
|
45
|
+
s.rubygems_version = %q{1.3.7}
|
46
|
+
s.summary = %q{Full-text search for the Mongoid ORM}
|
47
|
+
s.test_files = [
|
48
|
+
"spec/models/advanced_artwork.rb",
|
49
|
+
"spec/models/basic_artwork.rb",
|
50
|
+
"spec/models/external_artist.rb",
|
51
|
+
"spec/models/external_artwork.rb",
|
52
|
+
"spec/models/external_artwork_no_fields_supplied.rb",
|
53
|
+
"spec/models/filtered_artist.rb",
|
54
|
+
"spec/models/filtered_artwork.rb",
|
55
|
+
"spec/models/multi_external_artwork.rb",
|
56
|
+
"spec/models/multi_field_artist.rb",
|
57
|
+
"spec/models/multi_field_artwork.rb",
|
58
|
+
"spec/models/partitioned_artist.rb",
|
59
|
+
"spec/mongoid/fulltext_spec.rb",
|
60
|
+
"spec/spec_helper.rb"
|
61
|
+
]
|
62
|
+
|
63
|
+
if s.respond_to? :specification_version then
|
64
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
65
|
+
s.specification_version = 3
|
66
|
+
|
67
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
68
|
+
s.add_development_dependency(%q<mongoid>, ["~> 2.0.0"])
|
69
|
+
s.add_development_dependency(%q<database_cleaner>, ["~> 0.6.0"])
|
70
|
+
s.add_development_dependency(%q<rspec>, ["~> 2.5.0"])
|
71
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
|
72
|
+
else
|
73
|
+
s.add_dependency(%q<mongoid>, ["~> 2.0.0"])
|
74
|
+
s.add_dependency(%q<database_cleaner>, ["~> 0.6.0"])
|
75
|
+
s.add_dependency(%q<rspec>, ["~> 2.5.0"])
|
76
|
+
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
77
|
+
end
|
78
|
+
else
|
79
|
+
s.add_dependency(%q<mongoid>, ["~> 2.0.0"])
|
80
|
+
s.add_dependency(%q<database_cleaner>, ["~> 0.6.0"])
|
81
|
+
s.add_dependency(%q<rspec>, ["~> 2.5.0"])
|
82
|
+
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class ExternalArtworkNoFieldsSupplied
|
2
|
+
include Mongoid::Document
|
3
|
+
include Mongoid::FullTextSearch
|
4
|
+
field :title
|
5
|
+
field :year
|
6
|
+
field :artist
|
7
|
+
fulltext_search_in :index_name => 'mongoid_fulltext.artworks_and_artists'
|
8
|
+
|
9
|
+
def to_s
|
10
|
+
'%s (%s %s)' % [title, artist, year]
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
class FilteredArtist
|
2
|
+
include Mongoid::Document
|
3
|
+
include Mongoid::FullTextSearch
|
4
|
+
field :full_name
|
5
|
+
fulltext_search_in :full_name, :index_name => 'mongoid_fulltext.artworks_and_artists',
|
6
|
+
:filters => { :is_foobar => lambda { |x| x.full_name == 'foobar' },
|
7
|
+
:is_artist => lambda { |x| true },
|
8
|
+
:is_artwork => lambda { |x| false }
|
9
|
+
}
|
10
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
class FilteredArtwork
|
2
|
+
include Mongoid::Document
|
3
|
+
include Mongoid::FullTextSearch
|
4
|
+
field :title
|
5
|
+
fulltext_search_in :title, :index_name => 'mongoid_fulltext.artworks_and_artists',
|
6
|
+
:filters => { :is_foobar => lambda { |x| x.title == 'foobar' },
|
7
|
+
:is_artwork => lambda { |x| true },
|
8
|
+
:is_artist => lambda { |x| false }
|
9
|
+
}
|
10
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
class MultiExternalArtwork
|
2
|
+
include Mongoid::Document
|
3
|
+
include Mongoid::FullTextSearch
|
4
|
+
field :title
|
5
|
+
field :year
|
6
|
+
field :artist
|
7
|
+
fulltext_search_in :title, :index_name => 'mongoid_fulltext.titles'
|
8
|
+
fulltext_search_in :year, :index_name => 'mongoid_fulltext.years'
|
9
|
+
fulltext_search_in :title, :year, :artist, :index_name => 'mongoid_fulltext.all'
|
10
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
class PartitionedArtist
|
2
|
+
include Mongoid::Document
|
3
|
+
include Mongoid::FullTextSearch
|
4
|
+
|
5
|
+
field :full_name
|
6
|
+
field :exhibitions, type: Array, default: []
|
7
|
+
|
8
|
+
fulltext_search_in :full_name,
|
9
|
+
:index_name => 'mongoid_fulltext.partitioned_artists',
|
10
|
+
:filters => {
|
11
|
+
:has_exhibitions => lambda { |x| x.exhibitions.size > 0 },
|
12
|
+
:exhibitions => lambda { |x| [ x.exhibitions ].flatten },
|
13
|
+
}
|
14
|
+
|
15
|
+
end
|
@@ -0,0 +1,348 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Mongoid
|
4
|
+
describe FullTextSearch do
|
5
|
+
|
6
|
+
context "with several config options defined" do
|
7
|
+
|
8
|
+
let!(:abcdef) { AdvancedArtwork.create(:title => 'abcdefg hijklmn') }
|
9
|
+
|
10
|
+
it "should recognize all options" do
|
11
|
+
# AdvancedArtwork is defined with an ngram_width of 4 and a different alphabet (abcdefg)
|
12
|
+
AdvancedArtwork.fulltext_search('abc').should == []
|
13
|
+
AdvancedArtwork.fulltext_search('abcd').first.should == abcdef
|
14
|
+
AdvancedArtwork.fulltext_search('defg').first.should == abcdef
|
15
|
+
AdvancedArtwork.fulltext_search('hijklmn').should == []
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
context "with default settings" do
|
20
|
+
|
21
|
+
let!(:flower_myth) { BasicArtwork.create(:title => 'Flower Myth') }
|
22
|
+
let!(:flowers) { BasicArtwork.create(:title => 'Flowers') }
|
23
|
+
let!(:lowered) { BasicArtwork.create(:title => 'Lowered') }
|
24
|
+
let!(:cookies) { BasicArtwork.create(:title => 'Cookies') }
|
25
|
+
let!(:empty) { BasicArtwork.create(:title => '') }
|
26
|
+
|
27
|
+
it "returns exact matches" do
|
28
|
+
BasicArtwork.fulltext_search('Flower Myth', :max_results => 1).first.should == flower_myth
|
29
|
+
BasicArtwork.fulltext_search('Flowers', :max_results => 1).first.should == flowers
|
30
|
+
BasicArtwork.fulltext_search('Cookies', :max_results => 1).first.should == cookies
|
31
|
+
BasicArtwork.fulltext_search('Lowered', :max_results => 1).first.should == lowered
|
32
|
+
end
|
33
|
+
|
34
|
+
it "returns exact matches regardless of case" do
|
35
|
+
BasicArtwork.fulltext_search('fLOWER mYTH', :max_results => 1).first.should == flower_myth
|
36
|
+
BasicArtwork.fulltext_search('FLOWERS', :max_results => 1).first.should == flowers
|
37
|
+
BasicArtwork.fulltext_search('cOOkies', :max_results => 1).first.should == cookies
|
38
|
+
BasicArtwork.fulltext_search('lOWERED', :max_results => 1).first.should == lowered
|
39
|
+
end
|
40
|
+
|
41
|
+
it "returns all relevant results, sorted by relevance" do
|
42
|
+
BasicArtwork.fulltext_search('Flowers').should == [flowers, flower_myth, lowered]
|
43
|
+
end
|
44
|
+
|
45
|
+
it "prefers prefix matches" do
|
46
|
+
[flowers, flower_myth].should include(BasicArtwork.fulltext_search('Floweockies').first)
|
47
|
+
BasicArtwork.fulltext_search('Lowers').first.should == lowered
|
48
|
+
BasicArtwork.fulltext_search('Cookilowers').first.should == cookies
|
49
|
+
end
|
50
|
+
|
51
|
+
it "returns an empty result set for an empty query" do
|
52
|
+
BasicArtwork.fulltext_search('').empty?.should be_true
|
53
|
+
end
|
54
|
+
|
55
|
+
it "returns an empty result set for a query that doesn't contain any characters in the alphabet" do
|
56
|
+
BasicArtwork.fulltext_search('_+=--@!##%#$%%').empty?.should be_true
|
57
|
+
end
|
58
|
+
|
59
|
+
it "returns results for a query that contains only a single ngram" do
|
60
|
+
BasicArtwork.fulltext_search('coo').first.should == cookies
|
61
|
+
BasicArtwork.fulltext_search('c!!!oo').first.should == cookies
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
context "with default settings" do
|
67
|
+
|
68
|
+
let!(:yellow) { BasicArtwork.create(:title => 'Yellow') }
|
69
|
+
let!(:yellow_leaves_2) { BasicArtwork.create(:title => 'Yellow Leaves 2') }
|
70
|
+
let!(:yellow_leaves_3) { BasicArtwork.create(:title => 'Yellow Leaves 3') }
|
71
|
+
let!(:yellow_leaves_20) { BasicArtwork.create(:title => 'Yellow Leaves 20') }
|
72
|
+
let!(:yellow_cup) { BasicArtwork.create(:title => 'Yellow Cup') }
|
73
|
+
|
74
|
+
it "prefers the best prefix that matches a given string" do
|
75
|
+
BasicArtwork.fulltext_search('yellow').first.should == yellow
|
76
|
+
BasicArtwork.fulltext_search('yellow leaves', :max_results => 3).sort_by{ |x| x.title }.should == \
|
77
|
+
[yellow_leaves_2, yellow_leaves_3, yellow_leaves_20].sort_by{ |x| x.title }
|
78
|
+
BasicArtwork.fulltext_search('yellow cup').first.should == yellow_cup
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
context "with default settings" do
|
84
|
+
let!(:monet) { BasicArtwork.create(:title => 'claude monet') }
|
85
|
+
let!(:one_month_weather_permitting) { BasicArtwork.create(:title => 'one month weather permitting monday') }
|
86
|
+
|
87
|
+
it "finds better matches within exact strings" do
|
88
|
+
BasicArtwork.fulltext_search('monet').first.should == monet
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
context "with default settings" do
|
93
|
+
|
94
|
+
let!(:abc) { BasicArtwork.create(:title => "abc") }
|
95
|
+
let!(:abcd) { BasicArtwork.create(:title => "abcd") }
|
96
|
+
let!(:abcde) { BasicArtwork.create(:title => "abcde") }
|
97
|
+
let!(:abcdef) { BasicArtwork.create(:title => "abcdef") }
|
98
|
+
let!(:abcdefg) { BasicArtwork.create(:title => "abcdefg") }
|
99
|
+
let!(:abcdefgh) { BasicArtwork.create(:title => "abcdefgh") }
|
100
|
+
|
101
|
+
it "returns exact matches from a list of similar prefixes" do
|
102
|
+
BasicArtwork.fulltext_search('abc').first.should == abc
|
103
|
+
BasicArtwork.fulltext_search('abcd').first.should == abcd
|
104
|
+
BasicArtwork.fulltext_search('abcde').first.should == abcde
|
105
|
+
BasicArtwork.fulltext_search('abcdef').first.should == abcdef
|
106
|
+
BasicArtwork.fulltext_search('abcdefg').first.should == abcdefg
|
107
|
+
BasicArtwork.fulltext_search('abcdefgh').first.should == abcdefgh
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
context "with an index name specified" do
|
112
|
+
let!(:pablo_picasso) { ExternalArtist.create(:full_name => 'Pablo Picasso') }
|
113
|
+
let!(:portrait_of_picasso) { ExternalArtwork.create(:title => 'Portrait of Picasso') }
|
114
|
+
let!(:andy_warhol) { ExternalArtist.create(:full_name => 'Andy Warhol') }
|
115
|
+
let!(:warhol) { ExternalArtwork.create(:title => 'Warhol') }
|
116
|
+
let!(:empty) { ExternalArtwork.create(:title => '') }
|
117
|
+
|
118
|
+
it "returns results of different types from the same query" do
|
119
|
+
results = ExternalArtwork.fulltext_search('picasso', :max_results => 2).map{ |result| result }
|
120
|
+
results.member?(portrait_of_picasso).should be_true
|
121
|
+
results.member?(pablo_picasso).should be_true
|
122
|
+
results = ExternalArtist.fulltext_search('picasso', :max_results => 2).map{ |result| result }
|
123
|
+
results.member?(portrait_of_picasso).should be_true
|
124
|
+
results.member?(pablo_picasso).should be_true
|
125
|
+
end
|
126
|
+
|
127
|
+
it "returns exact matches" do
|
128
|
+
ExternalArtwork.fulltext_search('Pablo Picasso', :max_results => 1).first.should == pablo_picasso
|
129
|
+
ExternalArtwork.fulltext_search('Portrait of Picasso', :max_results => 1).first.should == portrait_of_picasso
|
130
|
+
ExternalArtwork.fulltext_search('Andy Warhol', :max_results => 1).first.should == andy_warhol
|
131
|
+
ExternalArtwork.fulltext_search('Warhol', :max_results => 1).first.should == warhol
|
132
|
+
ExternalArtist.fulltext_search('Pablo Picasso', :max_results => 1).first.should == pablo_picasso
|
133
|
+
ExternalArtist.fulltext_search('Portrait of Picasso', :max_results => 1).first.should == portrait_of_picasso
|
134
|
+
ExternalArtist.fulltext_search('Andy Warhol', :max_results => 1).first.should == andy_warhol
|
135
|
+
ExternalArtist.fulltext_search('Warhol', :max_results => 1).first.should == warhol
|
136
|
+
end
|
137
|
+
|
138
|
+
it "returns exact matches regardless of case" do
|
139
|
+
ExternalArtwork.fulltext_search('pABLO pICASSO', :max_results => 1).first.should == pablo_picasso
|
140
|
+
ExternalArtist.fulltext_search('PORTRAIT OF PICASSO', :max_results => 1).first.should == portrait_of_picasso
|
141
|
+
ExternalArtwork.fulltext_search('andy warhol', :max_results => 1).first.should == andy_warhol
|
142
|
+
ExternalArtwork.fulltext_search('wArHoL', :max_results => 1).first.should == warhol
|
143
|
+
end
|
144
|
+
|
145
|
+
it "returns all relevant results, sorted by relevance" do
|
146
|
+
ExternalArtist.fulltext_search('Pablo Picasso').should == [pablo_picasso, portrait_of_picasso]
|
147
|
+
ExternalArtwork.fulltext_search('Pablo Picasso').should == [pablo_picasso, portrait_of_picasso]
|
148
|
+
ExternalArtist.fulltext_search('Portrait of Picasso').should == [portrait_of_picasso, pablo_picasso]
|
149
|
+
ExternalArtwork.fulltext_search('Portrait of Picasso').should == [portrait_of_picasso, pablo_picasso]
|
150
|
+
ExternalArtist.fulltext_search('Andy Warhol').should == [andy_warhol, warhol]
|
151
|
+
ExternalArtwork.fulltext_search('Andy Warhol').should == [andy_warhol, warhol]
|
152
|
+
ExternalArtist.fulltext_search('Warhol').should == [warhol, andy_warhol]
|
153
|
+
ExternalArtwork.fulltext_search('Warhol').should == [warhol, andy_warhol]
|
154
|
+
end
|
155
|
+
|
156
|
+
it "prefers prefix matches" do
|
157
|
+
ExternalArtist.fulltext_search('PabloWarhol').first.should == pablo_picasso
|
158
|
+
ExternalArtist.fulltext_search('AndyPicasso').first.should == andy_warhol
|
159
|
+
end
|
160
|
+
|
161
|
+
it "returns an empty result set for an empty query" do
|
162
|
+
ExternalArtist.fulltext_search('').empty?.should be_true
|
163
|
+
end
|
164
|
+
|
165
|
+
it "returns an empty result set for a query that doesn't contain any characters in the alphabet" do
|
166
|
+
ExternalArtwork.fulltext_search('#$%!$#*%*').empty?.should be_true
|
167
|
+
end
|
168
|
+
|
169
|
+
it "returns results for a query that contains only a single ngram" do
|
170
|
+
ExternalArtist.fulltext_search('and').first.should == andy_warhol
|
171
|
+
end
|
172
|
+
|
173
|
+
end
|
174
|
+
context "with an index name specified" do
|
175
|
+
|
176
|
+
let!(:andy_warhol) { ExternalArtist.create(:full_name => 'Andy Warhol') }
|
177
|
+
let!(:warhol) { ExternalArtwork.create(:title => 'Warhol') }
|
178
|
+
|
179
|
+
it "doesn't blow up if garbage is in the index collection" do
|
180
|
+
ExternalArtist.fulltext_search('warhol').should == [warhol, andy_warhol]
|
181
|
+
index_collection = ExternalArtist.collection.db.collection(ExternalArtist.mongoid_fulltext_config.keys.first)
|
182
|
+
index_collection.update({'document_id' => warhol.id}, {'$set' => { 'document_id' => BSON::ObjectId.new }}, :multi => true)
|
183
|
+
# We should no longer be able to find warhol, but that shouldn't keep it from returning results
|
184
|
+
ExternalArtist.fulltext_search('warhol').should == [andy_warhol]
|
185
|
+
end
|
186
|
+
|
187
|
+
end
|
188
|
+
context "with an index name specified" do
|
189
|
+
|
190
|
+
let!(:pop) { ExternalArtwork.create(:title => 'Pop') }
|
191
|
+
let!(:pop_culture) { ExternalArtwork.create(:title => 'Pop Culture') }
|
192
|
+
let!(:contemporary_pop) { ExternalArtwork.create(:title => 'Contemporary Pop') }
|
193
|
+
let!(:david_poppie) { ExternalArtist.create(:full_name => 'David Poppie') }
|
194
|
+
let!(:kung_fu_lollipop) { ExternalArtwork.create(:title => 'Kung-Fu Lollipop') }
|
195
|
+
|
196
|
+
it "prefers the best prefix that matches a given string" do
|
197
|
+
ExternalArtwork.fulltext_search('pop').first.should == pop
|
198
|
+
ExternalArtwork.fulltext_search('poppie').first.should == david_poppie
|
199
|
+
ExternalArtwork.fulltext_search('pop cult').first.should == pop_culture
|
200
|
+
ExternalArtwork.fulltext_search('pop', :max_results => 5)[4].should == kung_fu_lollipop
|
201
|
+
end
|
202
|
+
|
203
|
+
end
|
204
|
+
context "with an index name specified" do
|
205
|
+
|
206
|
+
let!(:abc) { ExternalArtwork.create(:title => "abc") }
|
207
|
+
let!(:abcd) { ExternalArtwork.create(:title => "abcd") }
|
208
|
+
let!(:abcde) { ExternalArtwork.create(:title => "abcde") }
|
209
|
+
let!(:abcdef) { ExternalArtwork.create(:title => "abcdef") }
|
210
|
+
let!(:abcdefg) { ExternalArtwork.create(:title => "abcdefg") }
|
211
|
+
let!(:abcdefgh) { ExternalArtwork.create(:title => "abcdefgh") }
|
212
|
+
|
213
|
+
it "returns exact matches from a list of similar prefixes" do
|
214
|
+
ExternalArtwork.fulltext_search('abc').first.should == abc
|
215
|
+
ExternalArtwork.fulltext_search('abcd').first.should == abcd
|
216
|
+
ExternalArtwork.fulltext_search('abcde').first.should == abcde
|
217
|
+
ExternalArtwork.fulltext_search('abcdef').first.should == abcdef
|
218
|
+
ExternalArtwork.fulltext_search('abcdefg').first.should == abcdefg
|
219
|
+
ExternalArtwork.fulltext_search('abcdefgh').first.should == abcdefgh
|
220
|
+
end
|
221
|
+
|
222
|
+
end
|
223
|
+
context "with an index name specified" do
|
224
|
+
|
225
|
+
it "cleans up item from the index after they're destroyed" do
|
226
|
+
foobar = ExternalArtwork.create(:title => "foobar")
|
227
|
+
barfoo = ExternalArtwork.create(:title => "barfoo")
|
228
|
+
ExternalArtwork.fulltext_search('foobar').should == [foobar, barfoo]
|
229
|
+
foobar.destroy
|
230
|
+
ExternalArtwork.fulltext_search('foobar').should == [barfoo]
|
231
|
+
barfoo.destroy
|
232
|
+
ExternalArtwork.fulltext_search('foobar').should == []
|
233
|
+
end
|
234
|
+
|
235
|
+
end
|
236
|
+
context "with an index name specified and no fields provided to index" do
|
237
|
+
|
238
|
+
let!(:big_bang) { ExternalArtworkNoFieldsSupplied.create(:title => 'Big Bang', :artist => 'David Poppie', :year => '2009') }
|
239
|
+
|
240
|
+
it "indexes the string returned by to_s" do
|
241
|
+
ExternalArtworkNoFieldsSupplied.fulltext_search('big bang').first.should == big_bang
|
242
|
+
ExternalArtworkNoFieldsSupplied.fulltext_search('poppie').first.should == big_bang
|
243
|
+
ExternalArtworkNoFieldsSupplied.fulltext_search('2009').first.should == big_bang
|
244
|
+
end
|
245
|
+
|
246
|
+
end
|
247
|
+
context "with multiple indexes defined" do
|
248
|
+
|
249
|
+
let!(:pop) { MultiExternalArtwork.create(:title => 'Pop', :year => '1970', :artist => 'Joe Schmoe') }
|
250
|
+
let!(:pop_culture) { MultiExternalArtwork.create(:title => 'Pop Culture', :year => '1977', :artist => 'Jim Schmoe') }
|
251
|
+
let!(:contemporary_pop) { MultiExternalArtwork.create(:title => 'Contemporary Pop', :year => '1800', :artist => 'Bill Schmoe') }
|
252
|
+
let!(:kung_fu_lollipop) { MultiExternalArtwork.create(:title => 'Kung-Fu Lollipop', :year => '2006', :artist => 'Michael Anderson') }
|
253
|
+
|
254
|
+
it "allows searches to hit a particular index" do
|
255
|
+
title_results = MultiExternalArtwork.fulltext_search('pop', :index => 'mongoid_fulltext.titles').sort_by{ |x| x.title }
|
256
|
+
title_results.should == [pop, pop_culture, contemporary_pop, kung_fu_lollipop].sort_by{ |x| x.title }
|
257
|
+
year_results = MultiExternalArtwork.fulltext_search('197', :index => 'mongoid_fulltext.years').sort_by{ |x| x.title }
|
258
|
+
year_results.should == [pop, pop_culture].sort_by{ |x| x.title }
|
259
|
+
all_results = MultiExternalArtwork.fulltext_search('1800 and', :index => 'mongoid_fulltext.all').sort_by{ |x| x.title }
|
260
|
+
all_results.should == [contemporary_pop, kung_fu_lollipop].sort_by{ |x| x.title }
|
261
|
+
end
|
262
|
+
|
263
|
+
it "should raise an error if you don't specify which index to search with" do
|
264
|
+
lambda { MultiExternalArtwork.fulltext_search('foobar') }.should raise_error(Mongoid::FullTextSearch::UnspecifiedIndexError)
|
265
|
+
end
|
266
|
+
|
267
|
+
end
|
268
|
+
context "with multiple fields indexed and the same index used by multiple models" do
|
269
|
+
|
270
|
+
let!(:andy_warhol) { MultiFieldArtist.create(:full_name => 'Andy Warhol', :birth_year => '1928') }
|
271
|
+
let!(:warhol) { MultiFieldArtwork.create(:title => 'Warhol', :year => '2010') }
|
272
|
+
let!(:pablo_picasso) { MultiFieldArtist.create(:full_name => 'Pablo Picasso', :birth_year => '1881') }
|
273
|
+
let!(:portrait_of_picasso) { MultiFieldArtwork.create(:title => 'Portrait of Picasso', :year => '1912') }
|
274
|
+
|
275
|
+
it "allows searches across all models on both fields indexed" do
|
276
|
+
MultiFieldArtist.fulltext_search('2010').first.should == warhol
|
277
|
+
MultiFieldArtist.fulltext_search('andy').first.should == andy_warhol
|
278
|
+
MultiFieldArtist.fulltext_search('pablo').first.should == pablo_picasso
|
279
|
+
MultiFieldArtist.fulltext_search('1881').first.should == pablo_picasso
|
280
|
+
MultiFieldArtist.fulltext_search('portrait 1912').first.should == portrait_of_picasso
|
281
|
+
|
282
|
+
MultiFieldArtwork.fulltext_search('2010').first.should == warhol
|
283
|
+
MultiFieldArtwork.fulltext_search('andy').first.should == andy_warhol
|
284
|
+
MultiFieldArtwork.fulltext_search('pablo').first.should == pablo_picasso
|
285
|
+
MultiFieldArtwork.fulltext_search('1881').first.should == pablo_picasso
|
286
|
+
MultiFieldArtwork.fulltext_search('portrait 1912').first.should == portrait_of_picasso
|
287
|
+
end
|
288
|
+
|
289
|
+
end
|
290
|
+
context "with filters applied to multiple models" do
|
291
|
+
|
292
|
+
let!(:foobar_artwork) { FilteredArtwork.create(:title => 'foobar') }
|
293
|
+
let!(:barfoo_artwork) { FilteredArtwork.create(:title => 'barfoo') }
|
294
|
+
let!(:foobar_artist) { FilteredArtist.create(:full_name => 'foobar') }
|
295
|
+
let!(:barfoo_artist) { FilteredArtist.create(:full_name => 'barfoo') }
|
296
|
+
|
297
|
+
it "allows filtered searches" do
|
298
|
+
FilteredArtwork.fulltext_search('foobar', :is_artwork => true).should == [foobar_artwork, barfoo_artwork]
|
299
|
+
FilteredArtist.fulltext_search('foobar', :is_artwork => true).should == [foobar_artwork, barfoo_artwork]
|
300
|
+
|
301
|
+
FilteredArtwork.fulltext_search('foobar', :is_artwork => true, :is_foobar => true).should == [foobar_artwork]
|
302
|
+
FilteredArtwork.fulltext_search('foobar', :is_artwork => true, :is_foobar => false).should == [barfoo_artwork]
|
303
|
+
FilteredArtwork.fulltext_search('foobar', :is_artwork => false, :is_foobar => true).should == [foobar_artist]
|
304
|
+
FilteredArtwork.fulltext_search('foobar', :is_artwork => false, :is_foobar => false).should == [barfoo_artist]
|
305
|
+
|
306
|
+
FilteredArtist.fulltext_search('foobar', :is_artwork => true, :is_foobar => true).should == [foobar_artwork]
|
307
|
+
FilteredArtist.fulltext_search('foobar', :is_artwork => true, :is_foobar => false).should == [barfoo_artwork]
|
308
|
+
FilteredArtist.fulltext_search('foobar', :is_artwork => false, :is_foobar => true).should == [foobar_artist]
|
309
|
+
FilteredArtist.fulltext_search('foobar', :is_artwork => false, :is_foobar => false).should == [barfoo_artist]
|
310
|
+
end
|
311
|
+
|
312
|
+
end
|
313
|
+
|
314
|
+
context "with partitions applied to a model" do
|
315
|
+
|
316
|
+
let!(:artist_2) { PartitionedArtist.create(:full_name => 'foobar', :exhibitions => [ "Art Basel 2011", "Armory NY" ]) }
|
317
|
+
let!(:artist_1) { PartitionedArtist.create(:full_name => 'foobar', :exhibitions => [ "Art Basel 2011", ]) }
|
318
|
+
let!(:artist_0) { PartitionedArtist.create(:full_name => 'foobar', :exhibitions => [ ]) }
|
319
|
+
|
320
|
+
it "allows partitioned searches" do
|
321
|
+
PartitionedArtist.fulltext_search('foobar').should == [ artist_2, artist_1, artist_0 ]
|
322
|
+
PartitionedArtist.fulltext_search('foobar', :exhibitions => [ "Armory NY" ]).should == [ artist_2 ]
|
323
|
+
PartitionedArtist.fulltext_search('foobar', :exhibitions => [ "Art Basel 2011" ]).should == [ artist_2, artist_1 ]
|
324
|
+
PartitionedArtist.fulltext_search('foobar', :exhibitions => [ "Art Basel 2011", "Armory NY" ]).should == [ artist_2 ]
|
325
|
+
end
|
326
|
+
|
327
|
+
end
|
328
|
+
|
329
|
+
context "using search options" do
|
330
|
+
let!(:patterns) { BasicArtwork.create(:title => 'Flower Patterns') }
|
331
|
+
let!(:flowers) { BasicArtwork.create(:title => 'Flowers') }
|
332
|
+
|
333
|
+
it "returns max_results" do
|
334
|
+
BasicArtwork.fulltext_search('flower', { :max_results => 1 }).length.should == 1
|
335
|
+
end
|
336
|
+
|
337
|
+
it "returns scored results" do
|
338
|
+
results = BasicArtwork.fulltext_search('flowers', { :return_scores => true })
|
339
|
+
first_result = results[0]
|
340
|
+
first_result.is_a?(Array).should be_true
|
341
|
+
first_result.size.should == 2
|
342
|
+
first_result[0].should == flowers
|
343
|
+
first_result[1].is_a?(Float).should be_true
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
end
|
348
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
require 'rspec'
|
4
|
+
|
5
|
+
require 'mongoid'
|
6
|
+
require 'database_cleaner'
|
7
|
+
|
8
|
+
Mongoid.configure do |config|
|
9
|
+
name = "mongoid_fulltext_test"
|
10
|
+
config.master = Mongo::Connection.new.db(name)
|
11
|
+
end
|
12
|
+
|
13
|
+
require File.expand_path("../../lib/mongoid_fulltext", __FILE__)
|
14
|
+
Dir["#{File.dirname(__FILE__)}/models/*.rb"].each { |f| require f }
|
15
|
+
|
16
|
+
Rspec.configure do |c|
|
17
|
+
c.before(:all) { DatabaseCleaner.strategy = :truncation }
|
18
|
+
c.before(:each) { DatabaseCleaner.clean }
|
19
|
+
end
|
20
|
+
|
metadata
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mongoid_fulltext
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.6
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Aaron Windsor
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-05-27 00:00:00.000000000 -04:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: mongoid
|
17
|
+
requirement: &82366030 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ~>
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.0.0
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *82366030
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: database_cleaner
|
28
|
+
requirement: &82365790 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.6.0
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: *82365790
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: rspec
|
39
|
+
requirement: &82365550 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ~>
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: 2.5.0
|
45
|
+
type: :development
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *82365550
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: jeweler
|
50
|
+
requirement: &82365310 !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ~>
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: 1.5.2
|
56
|
+
type: :development
|
57
|
+
prerelease: false
|
58
|
+
version_requirements: *82365310
|
59
|
+
description: Full-text search for the Mongoid ORM, using n-grams extracted from text
|
60
|
+
email: aaron.windsor@gmail.com
|
61
|
+
executables: []
|
62
|
+
extensions: []
|
63
|
+
extra_rdoc_files:
|
64
|
+
- LICENSE
|
65
|
+
- README.md
|
66
|
+
files:
|
67
|
+
- .document
|
68
|
+
- Gemfile
|
69
|
+
- LICENSE
|
70
|
+
- README.md
|
71
|
+
- Rakefile
|
72
|
+
- VERSION
|
73
|
+
- lib/mongoid_fulltext.rb
|
74
|
+
- mongoid_fulltext.gemspec
|
75
|
+
- spec/models/advanced_artwork.rb
|
76
|
+
- spec/models/basic_artwork.rb
|
77
|
+
- spec/models/external_artist.rb
|
78
|
+
- spec/models/external_artwork.rb
|
79
|
+
- spec/models/external_artwork_no_fields_supplied.rb
|
80
|
+
- spec/models/filtered_artist.rb
|
81
|
+
- spec/models/filtered_artwork.rb
|
82
|
+
- spec/models/multi_external_artwork.rb
|
83
|
+
- spec/models/multi_field_artist.rb
|
84
|
+
- spec/models/multi_field_artwork.rb
|
85
|
+
- spec/models/partitioned_artist.rb
|
86
|
+
- spec/mongoid/fulltext_spec.rb
|
87
|
+
- spec/spec_helper.rb
|
88
|
+
has_rdoc: true
|
89
|
+
homepage: http://github.com/aaw/mongoid_fulltext
|
90
|
+
licenses:
|
91
|
+
- MIT
|
92
|
+
post_install_message:
|
93
|
+
rdoc_options: []
|
94
|
+
require_paths:
|
95
|
+
- lib
|
96
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
|
+
none: false
|
104
|
+
requirements:
|
105
|
+
- - ! '>='
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
108
|
+
requirements: []
|
109
|
+
rubyforge_project:
|
110
|
+
rubygems_version: 1.6.2
|
111
|
+
signing_key:
|
112
|
+
specification_version: 3
|
113
|
+
summary: Full-text search for the Mongoid ORM
|
114
|
+
test_files:
|
115
|
+
- spec/models/advanced_artwork.rb
|
116
|
+
- spec/models/basic_artwork.rb
|
117
|
+
- spec/models/external_artist.rb
|
118
|
+
- spec/models/external_artwork.rb
|
119
|
+
- spec/models/external_artwork_no_fields_supplied.rb
|
120
|
+
- spec/models/filtered_artist.rb
|
121
|
+
- spec/models/filtered_artwork.rb
|
122
|
+
- spec/models/multi_external_artwork.rb
|
123
|
+
- spec/models/multi_field_artist.rb
|
124
|
+
- spec/models/multi_field_artwork.rb
|
125
|
+
- spec/models/partitioned_artist.rb
|
126
|
+
- spec/mongoid/fulltext_spec.rb
|
127
|
+
- spec/spec_helper.rb
|