mongoid_fulltext 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +13 -0
- data/LICENSE +19 -0
- data/README.md +259 -0
- data/Rakefile +36 -0
- data/VERSION +1 -0
- data/lib/mongoid_fulltext.rb +183 -0
- data/mongoid_fulltext.gemspec +85 -0
- data/spec/models/advanced_artwork.rb +7 -0
- data/spec/models/basic_artwork.rb +8 -0
- data/spec/models/external_artist.rb +6 -0
- data/spec/models/external_artwork.rb +6 -0
- data/spec/models/external_artwork_no_fields_supplied.rb +12 -0
- data/spec/models/filtered_artist.rb +10 -0
- data/spec/models/filtered_artwork.rb +10 -0
- data/spec/models/multi_external_artwork.rb +10 -0
- data/spec/models/multi_field_artist.rb +7 -0
- data/spec/models/multi_field_artwork.rb +7 -0
- data/spec/models/partitioned_artist.rb +15 -0
- data/spec/mongoid/fulltext_spec.rb +348 -0
- data/spec/spec_helper.rb +20 -0
- metadata +127 -0
data/.document
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
|
6
|
+
# Add dependencies to develop your gem here.
|
7
|
+
# Include everything needed to run rake, tests, features, etc.
|
8
|
+
group :development do
|
9
|
+
gem "mongoid", "~> 2.0.0"
|
10
|
+
gem 'database_cleaner', '~> 0.6.0'
|
11
|
+
gem 'rspec', '~> 2.5.0'
|
12
|
+
gem "jeweler", "~> 1.5.2"
|
13
|
+
end
|
data/LICENSE
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (c) 2011 by Artsy, Inc.
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
5
|
+
in the Software without restriction, including without limitation the rights
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
8
|
+
furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in
|
11
|
+
all copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,259 @@
|
|
1
|
+
Mongoid Fulltext Search
|
2
|
+
=======================
|
3
|
+
|
4
|
+
Full-text search using n-gram matching for the Mongoid ODM. Tested on MongoDB 1.6 and above, but
|
5
|
+
probably works on earlier versions as well.
|
6
|
+
|
7
|
+
Some examples:
|
8
|
+
--------------
|
9
|
+
|
10
|
+
Suppose you have an `Artist` model and want to index each artist's name:
|
11
|
+
|
12
|
+
class Artist
|
13
|
+
include Mongoid::Document
|
14
|
+
include Mongoid::FullTextSearch
|
15
|
+
|
16
|
+
field :first_name
|
17
|
+
field :last_name
|
18
|
+
|
19
|
+
def name
|
20
|
+
[first_name, last_name].join(' ')
|
21
|
+
end
|
22
|
+
|
23
|
+
fulltext_search_in :name
|
24
|
+
end
|
25
|
+
|
26
|
+
The `fulltext_search_in` directive will index the full name of the artist, so now
|
27
|
+
you can call:
|
28
|
+
|
29
|
+
Artist.fulltext_search("vince vangogh")
|
30
|
+
|
31
|
+
which will return an array of the Artist instances that best match the search string. Most likely,
|
32
|
+
Vincent van Gogh will be included in the results. You can index multiple fields with the same
|
33
|
+
index, so we can get the same effect of our Artist index above using:
|
34
|
+
|
35
|
+
class Artist
|
36
|
+
include Mongoid::Document
|
37
|
+
include Mongoid::FullTextSearch
|
38
|
+
|
39
|
+
field :first_name
|
40
|
+
field :last_name
|
41
|
+
|
42
|
+
fulltext_search_in :first_name, :last_name
|
43
|
+
end
|
44
|
+
|
45
|
+
To restrict the number of results returned, pass the `:max_results` parameter to `fulltext_search`:
|
46
|
+
|
47
|
+
Artist.fulltext_search("vince vangogh", { :max_results => 5 })
|
48
|
+
|
49
|
+
To return a pair of `[ result, score ]` instead of an array of results, pass the `:return_scores` parameter to `fulltext_search`:
|
50
|
+
|
51
|
+
Artist.fulltext_search("vince vangogh", { :return_scores => true })
|
52
|
+
|
53
|
+
If you don't specify a field to index, the default is the result of `to_s` called on the object.
|
54
|
+
The following definition will index the first and last name of an artist:
|
55
|
+
|
56
|
+
class Artist
|
57
|
+
include Mongoid::Document
|
58
|
+
include Mongoid::FullTextSearch
|
59
|
+
|
60
|
+
field :first_name
|
61
|
+
field :last_name
|
62
|
+
|
63
|
+
def to_s
|
64
|
+
'%s %s' % [first_name, last_name]
|
65
|
+
end
|
66
|
+
|
67
|
+
fulltext_search_in
|
68
|
+
end
|
69
|
+
|
70
|
+
The full-text index is stored in a separate MongoDB collection in the same database as the
|
71
|
+
models you're indexing. By default, the name of this collection is generated for you. Above,
|
72
|
+
a collection named something like `mongoid_fulltext.index_artist_0` will be created to
|
73
|
+
hold the index data. You can override this naming and provide your own collection name with
|
74
|
+
the :index_name parameter:
|
75
|
+
|
76
|
+
class Artwork
|
77
|
+
include Mongoid::Document
|
78
|
+
include Mongoid::FullTextSearch
|
79
|
+
|
80
|
+
field :title
|
81
|
+
fulltext_search_in :title, :index_name => 'mongoid_fulltext.foobar'
|
82
|
+
end
|
83
|
+
|
84
|
+
You can also create multiple indexes on a single model, in which case you'll want to
|
85
|
+
provide index names:
|
86
|
+
|
87
|
+
class Artwork
|
88
|
+
include Mongoid::Document
|
89
|
+
include Mongoid::FullTextSearch
|
90
|
+
|
91
|
+
field :title
|
92
|
+
field :artist_name
|
93
|
+
field :gallery_name
|
94
|
+
filed :gallery_address
|
95
|
+
|
96
|
+
fulltext_search_in :title, :index_name => 'title_index'
|
97
|
+
fulltext_search_in :artist_name, :index_name => 'artist_name_index'
|
98
|
+
fulltext_search_in :gallery_name, :gallery_address, :index_name => 'gallery_index'
|
99
|
+
end
|
100
|
+
|
101
|
+
The index names are helpful now because you'll have to specify which one you want to use when you
|
102
|
+
call `fulltext_search`:
|
103
|
+
|
104
|
+
Artwork.fulltext_search('warhol', :index => 'artist_name_index')
|
105
|
+
|
106
|
+
If you have multiple indexes specified and you don't supply a name to `fulltext_search`, the
|
107
|
+
method call will raise an exception.
|
108
|
+
|
109
|
+
If you're indexing multiple models, you may find that you need to combine results to create
|
110
|
+
a single result set. For example, if both the `Artist` model and the `Artwork` model are
|
111
|
+
indexed for full-text search, then to get results from both, you'd have to call
|
112
|
+
`Artist.fulltext_search` and `Artwork.fulltext_search` and combine the results yourself. If
|
113
|
+
your intention is instead to get the top k results from both Artists and Artworks, you can
|
114
|
+
merge both into a single index by using the same `:external_index` parameter:
|
115
|
+
|
116
|
+
class Artwork
|
117
|
+
include Mongoid::Document
|
118
|
+
include Mongoid::FullTextSearch
|
119
|
+
|
120
|
+
field :title
|
121
|
+
fulltext_search_in :title, :index_name => 'artwork_and_artists'
|
122
|
+
end
|
123
|
+
|
124
|
+
class Artist
|
125
|
+
include Mongoid::Document
|
126
|
+
include Mongoid::FullTextSearch
|
127
|
+
|
128
|
+
field :name
|
129
|
+
fulltext_search_in :name, :index_name => 'artwork_and_artists'
|
130
|
+
end
|
131
|
+
|
132
|
+
Now that these two models share the same external index collection, we can search them both through
|
133
|
+
either model's `fulltext_search` method:
|
134
|
+
|
135
|
+
Artwork.fulltext_search('picasso') # returns same results as Artist.fulltext_search('picasso')
|
136
|
+
|
137
|
+
If you want to filter the results from full-text search, you set up filters when the indexes are
|
138
|
+
defined. For example, suppose that in addition to wanting to use the `artwork_and_artists` index
|
139
|
+
defined above to search for `Artwork`s or `Artist`s, we want to be able to run full-text searches
|
140
|
+
for artists only and for artworks priced above $10,000. Instead of creating two new indexes or
|
141
|
+
attempting to filter the results after the query is run, we can specify the filter predicates
|
142
|
+
at the time of index definition:
|
143
|
+
|
144
|
+
class Artwork
|
145
|
+
include Mongoid::Document
|
146
|
+
include Mongoid::FullTextSearch
|
147
|
+
|
148
|
+
field :title
|
149
|
+
field :price
|
150
|
+
fulltext_search_in :title, :index_name => 'artwork_and_artists',
|
151
|
+
:filters => { :is_expensive => lambda { |x| x.price > 10000 },
|
152
|
+
:has_long_name => lambda { |x| x.title.length > 20 }}
|
153
|
+
end
|
154
|
+
|
155
|
+
class Artist
|
156
|
+
include Mongoid::Document
|
157
|
+
include Mongoid::FullTextSearch
|
158
|
+
|
159
|
+
field :name
|
160
|
+
field :birth_year
|
161
|
+
fulltext_search_in :name, :index_name => 'artwork_and_artists',
|
162
|
+
:filters => { :born_before_1900 => lambda { |x| x.birth_year < 1900 },
|
163
|
+
:has_long_name => lambda { |x| x.name.length > 20}}
|
164
|
+
end
|
165
|
+
|
166
|
+
After defining filters, you can query for results that match particular values of filters:
|
167
|
+
|
168
|
+
# Only return artists born before 1900 that match 'foobar'
|
169
|
+
Artist.fulltext_search('foobar', :born_before_1900 => true)
|
170
|
+
|
171
|
+
# Return artists or artworks that match 'foobar' and have short names
|
172
|
+
Artist.fulltext_search('foobar', :has_long_name => false)
|
173
|
+
|
174
|
+
# Only return artworks with prices over 10000 that match 'mona lisa'
|
175
|
+
Artwork.fulltext_search('mona lisa', :is_expensive => true)
|
176
|
+
|
177
|
+
# Only return artworks with prices less than 10000 that match 'mona lisa'
|
178
|
+
Artwork.fulltext_search('mona lisa', :is_expensive => false)
|
179
|
+
|
180
|
+
Note that in all of the example queries above, supplying a filter that is defined on exactly
|
181
|
+
one of the models will restrict the search to results from that model only. For example,
|
182
|
+
since `:is_expensive` is defined only on `Artwork`s, a call to `fulltext_search` with either
|
183
|
+
`:is_expensive => true` or `:is_expensive => false` will return only `Artwork` results.
|
184
|
+
|
185
|
+
You can specify multiple filters per index and per model. Each filter is a predicate that will
|
186
|
+
be called on objects as they're inserted into the full-text index (any time the model is saved.)
|
187
|
+
Filters are only called on instances of models they're defined on, so in the example above, the
|
188
|
+
`is_expensive` filter is only applied to instances of `Artwork` and the `born_before_1900` filter
|
189
|
+
is only applied to instances of `Artist`, although both filters can be used when querying from
|
190
|
+
either model. The `has_long_name` filter, on the other hand, will return instances of both
|
191
|
+
`Artwork` and `Artist` since it's defined on each model.
|
192
|
+
|
193
|
+
Filters shouldn't ever throw, but if they do, the filter is just ignored. If you apply filters to
|
194
|
+
indexes that are on multiple fields, the filter is applied to each field and the filter result is
|
195
|
+
the AND of all of the individual results for each of the fields. Finally, if a filter is defined
|
196
|
+
but criteria for that filter aren't passed to `fulltext_search`, the result is as if the filter
|
197
|
+
had never been defined - you see both models that both pass and fail the filter in the results.
|
198
|
+
|
199
|
+
Indexing Options
|
200
|
+
----------------
|
201
|
+
|
202
|
+
Additional indexing/query options can be used as parameters to `fulltext_search_in`.
|
203
|
+
|
204
|
+
* `alphabet`: letters to index, default is `abcdefghijklmnopqrstuvwxyz0123456789 `
|
205
|
+
* `word_separators`: word separators, default is ` `
|
206
|
+
* `ngram_width`: ngram width, default is `3`
|
207
|
+
* `index_full_words`: index full words, which improves exact matches, default is `true`
|
208
|
+
* `apply_prefix_scoring_to_all_words`: score n-grams at beginning of words higher, default is `true`
|
209
|
+
* `max_ngrams_to_search`: maximum number of ngrams to query at any given time, default is `6`
|
210
|
+
|
211
|
+
Array filters
|
212
|
+
-------------
|
213
|
+
|
214
|
+
A filter may also return an Array. Consider the following example.
|
215
|
+
|
216
|
+
class Artist
|
217
|
+
include Mongoid::Document
|
218
|
+
include Mongoid::FullTextSearch
|
219
|
+
|
220
|
+
field :name
|
221
|
+
field :exhibitions, as: Array, default: []
|
222
|
+
|
223
|
+
fulltext_search_in :name, :index_name => 'exhibited_artist',
|
224
|
+
:filters => {
|
225
|
+
:exhibitions => lambda { |artist| artist.exhibitions }
|
226
|
+
}
|
227
|
+
end
|
228
|
+
|
229
|
+
You can now find all artists that are at the Art Basel exhibition or all artists that have exhibited
|
230
|
+
at both the Art Basel and the New York Armory exhibition.
|
231
|
+
|
232
|
+
# All artists
|
233
|
+
Artist.fulltext_search('foobar')
|
234
|
+
|
235
|
+
# Artists at the Art Basel exhibition only
|
236
|
+
Artist.fulltext_search('foobar', :exhibitions => [ "Art Basel" ])
|
237
|
+
|
238
|
+
# Artists at both the Art Basel and the New York Armory exhibition
|
239
|
+
Artist.fulltext_search('foobar', :exhibitions => [ "Art Basel", "New York Armory" ])
|
240
|
+
|
241
|
+
Building the index
|
242
|
+
------------------
|
243
|
+
|
244
|
+
The fulltext index is built and maintained incrementally by hooking into `before_save` and
|
245
|
+
`before_destroy` callbacks on each model that's being indexed. If you want to build an index
|
246
|
+
on existing models, you can call the `update_ngram_index` method on each instance:
|
247
|
+
|
248
|
+
Artwork.all.each { |artwork| artwork.update_ngram_index }
|
249
|
+
|
250
|
+
You can also remove instances in bulk from the index with the `remove_from_ngram_index`
|
251
|
+
method:
|
252
|
+
|
253
|
+
Artwork.all.each { |artwork| artwork.remove_from_ngram_index }
|
254
|
+
|
255
|
+
Running the specs
|
256
|
+
-----------------
|
257
|
+
|
258
|
+
To run the specs, execute `rake spec`. You need a local MongoDB instance to run the specs.
|
259
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'rake'
|
11
|
+
require 'rspec/core/rake_task'
|
12
|
+
|
13
|
+
require 'jeweler'
|
14
|
+
Jeweler::Tasks.new do |gem|
|
15
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
16
|
+
gem.name = "mongoid_fulltext"
|
17
|
+
gem.homepage = "http://github.com/aaw/mongoid_fulltext"
|
18
|
+
gem.license = "MIT"
|
19
|
+
gem.summary = %Q{Full-text search for the Mongoid ORM}
|
20
|
+
gem.description = %Q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
|
21
|
+
gem.email = "aaron.windsor@gmail.com"
|
22
|
+
gem.authors = ["Aaron Windsor"]
|
23
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
24
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
25
|
+
# gem.add_runtime_dependency 'jabber4r', '> 0.1'
|
26
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
27
|
+
end
|
28
|
+
Jeweler::RubygemsDotOrgTasks.new
|
29
|
+
|
30
|
+
|
31
|
+
desc "Run all tests"
|
32
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
33
|
+
spec.pattern = "spec/**/*_spec.rb"
|
34
|
+
end
|
35
|
+
|
36
|
+
task :default => :spec
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.3.6
|
@@ -0,0 +1,183 @@
|
|
1
|
+
module Mongoid::FullTextSearch
|
2
|
+
extend ActiveSupport::Concern
|
3
|
+
|
4
|
+
included do
|
5
|
+
cattr_accessor :mongoid_fulltext_config
|
6
|
+
end
|
7
|
+
|
8
|
+
class UnspecifiedIndexError < StandardError; end
|
9
|
+
|
10
|
+
module ClassMethods
|
11
|
+
|
12
|
+
def fulltext_search_in(*args)
|
13
|
+
self.mongoid_fulltext_config = {} if self.mongoid_fulltext_config.nil?
|
14
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
15
|
+
if options.has_key?(:index_name)
|
16
|
+
index_name = options[:index_name]
|
17
|
+
else
|
18
|
+
index_name = 'mongoid_fulltext.index_%s_%s' % [self.name.downcase, self.mongoid_fulltext_config.count]
|
19
|
+
end
|
20
|
+
|
21
|
+
config = {
|
22
|
+
:alphabet => 'abcdefghijklmnopqrstuvwxyz0123456789 ',
|
23
|
+
:word_separators => ' ',
|
24
|
+
:ngram_width => 3,
|
25
|
+
:max_ngrams_to_search => 6,
|
26
|
+
:apply_prefix_scoring_to_all_words => true,
|
27
|
+
:index_full_words => true
|
28
|
+
}
|
29
|
+
|
30
|
+
config.update(options)
|
31
|
+
|
32
|
+
args = [:to_s] if args.empty?
|
33
|
+
config[:ngram_fields] = args
|
34
|
+
config[:alphabet] = Hash[config[:alphabet].split('').map{ |ch| [ch,ch] }]
|
35
|
+
config[:word_separators] = Hash[config[:word_separators].split('').map{ |ch| [ch,ch] }]
|
36
|
+
self.mongoid_fulltext_config[index_name] = config
|
37
|
+
|
38
|
+
coll = collection.db.collection(index_name)
|
39
|
+
coll.ensure_index([['ngram', Mongo::ASCENDING]])
|
40
|
+
coll.ensure_index([['document_id', Mongo::ASCENDING]])
|
41
|
+
|
42
|
+
before_save :update_ngram_index
|
43
|
+
before_destroy :remove_from_ngram_index
|
44
|
+
end
|
45
|
+
|
46
|
+
def fulltext_search(query_string, options={})
|
47
|
+
max_results = options.has_key?(:max_results) ? options.delete(:max_results) : 10
|
48
|
+
return_scores = options.has_key?(:return_scores) ? options.delete(:return_scores) : false
|
49
|
+
if self.mongoid_fulltext_config.count > 1 and !options.has_key?(:index)
|
50
|
+
error_message = '%s is indexed by multiple full-text indexes. You must specify one by passing an :index_name parameter'
|
51
|
+
raise UnspecifiedIndexError, error_message % self.name, caller
|
52
|
+
end
|
53
|
+
index_name = options.has_key?(:index) ? options.delete(:index) : self.mongoid_fulltext_config.keys.first
|
54
|
+
|
55
|
+
# options hash should only contain filters after this point
|
56
|
+
ngrams = all_ngrams(query_string, self.mongoid_fulltext_config[index_name])
|
57
|
+
return [] if ngrams.empty?
|
58
|
+
|
59
|
+
query = {'ngram' => {'$in' => ngrams.keys}}
|
60
|
+
query.update(Hash[options.map { |key,value| [ 'filter_values.%s' % key, { '$all' => [ value ].flatten } ] }])
|
61
|
+
map = <<-EOS
|
62
|
+
function() {
|
63
|
+
emit(this['document_id'], {'class': this['class'], 'score': this['score']*ngrams[this['ngram']] })
|
64
|
+
}
|
65
|
+
EOS
|
66
|
+
reduce = <<-EOS
|
67
|
+
function(key, values) {
|
68
|
+
score = 0.0
|
69
|
+
for (i in values) {
|
70
|
+
score += values[i]['score']
|
71
|
+
}
|
72
|
+
return({'class': values[0]['class'], 'score': score})
|
73
|
+
}
|
74
|
+
EOS
|
75
|
+
mr_options = {:scope => {:ngrams => ngrams }, :query => query, :raw => true}
|
76
|
+
rc_options = { :return_scores => return_scores }
|
77
|
+
coll = collection.db.collection(index_name)
|
78
|
+
if collection.db.connection.server_version >= '1.7.4'
|
79
|
+
mr_options[:out] = {:inline => 1}
|
80
|
+
results = coll.map_reduce(map, reduce, mr_options)['results'].sort_by{ |x| -x['value']['score'] }
|
81
|
+
max_results = results.count if max_results.nil?
|
82
|
+
instantiate_mapreduce_results(results.first(max_results), rc_options)
|
83
|
+
else
|
84
|
+
result_collection = coll.map_reduce(map, reduce, mr_options)['result']
|
85
|
+
results = collection.db.collection(result_collection).find.sort(['value.score',-1])
|
86
|
+
results = results.limit(max_results) if !max_results.nil?
|
87
|
+
models = instantiate_mapreduce_results(results, rc_options)
|
88
|
+
collection.db.collection(result_collection).drop
|
89
|
+
models
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def instantiate_mapreduce_result(result)
|
94
|
+
Object::const_get(result['value']['class']).find(:first, :conditions => {:id => result['_id']})
|
95
|
+
end
|
96
|
+
|
97
|
+
def instantiate_mapreduce_results(results, options)
|
98
|
+
if (options[:return_scores])
|
99
|
+
results.map { |result| [ instantiate_mapreduce_result(result), result['value']['score'] ] }.find_all { |result| ! result[0].nil? }
|
100
|
+
else
|
101
|
+
results.map { |result| instantiate_mapreduce_result(result) }.find_all { |result| ! result.nil? }
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# returns an [ngram, score] [ngram, position] pair
|
106
|
+
def all_ngrams(str, config, bound_number_returned = true)
|
107
|
+
return {} if str.nil? or str.length < config[:ngram_width]
|
108
|
+
filtered_str = str.downcase.split('').map{ |ch| config[:alphabet][ch] }.find_all{ |ch| !ch.nil? }.join('')
|
109
|
+
|
110
|
+
if bound_number_returned
|
111
|
+
step_size = [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max
|
112
|
+
else
|
113
|
+
step_size = 1
|
114
|
+
end
|
115
|
+
|
116
|
+
# array of ngrams
|
117
|
+
ngram_ary = (0..filtered_str.length - config[:ngram_width]).step(step_size).map do |i|
|
118
|
+
if i == 0 or (config[:apply_prefix_scoring_to_all_words] and \
|
119
|
+
config[:word_separators].has_key?(filtered_str[i-1].chr))
|
120
|
+
score = Math.sqrt(1 + 1.0/filtered_str.length)
|
121
|
+
else
|
122
|
+
score = Math.sqrt(2.0/filtered_str.length)
|
123
|
+
end
|
124
|
+
[filtered_str[i..i+config[:ngram_width]-1], score]
|
125
|
+
end
|
126
|
+
|
127
|
+
if (config[:index_full_words])
|
128
|
+
filtered_str.split(Regexp.compile(config[:word_separators].keys.join)).each do |word|
|
129
|
+
if word.length >= config[:ngram_width]
|
130
|
+
ngram_ary << [ word, 1 ]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
ngram_hash = {}
|
136
|
+
|
137
|
+
# deduplicate, and keep the highest score
|
138
|
+
ngram_ary.each do |ngram, score, position|
|
139
|
+
ngram_hash[ngram] = [ngram_hash[ngram] || 0, score].max
|
140
|
+
end
|
141
|
+
|
142
|
+
ngram_hash
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
def update_ngram_index
|
148
|
+
self.mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
|
149
|
+
# remove existing ngrams from external index
|
150
|
+
coll = collection.db.collection(index_name)
|
151
|
+
coll.remove({'document_id' => self._id})
|
152
|
+
# extract ngrams from fields
|
153
|
+
field_values = fulltext_config[:ngram_fields].map { |field| self.send(field) }
|
154
|
+
ngrams = field_values.inject({}) { |accum, item| accum.update(self.class.all_ngrams(item, fulltext_config, false))}
|
155
|
+
return if ngrams.empty?
|
156
|
+
# apply filters, if necessary
|
157
|
+
filter_values = nil
|
158
|
+
if fulltext_config.has_key?(:filters)
|
159
|
+
filter_values = Hash[fulltext_config[:filters].map do |key,value|
|
160
|
+
begin
|
161
|
+
[key, value.call(self)]
|
162
|
+
rescue
|
163
|
+
# Suppress any exceptions caused by filters
|
164
|
+
end
|
165
|
+
end.find_all{ |x| !x.nil? }]
|
166
|
+
end
|
167
|
+
# insert new ngrams in external index
|
168
|
+
ngrams.each_pair do |ngram, score|
|
169
|
+
index_document = {'ngram' => ngram, 'document_id' => self._id, 'score' => score, 'class' => self.class.name}
|
170
|
+
index_document['filter_values'] = filter_values if fulltext_config.has_key?(:filters)
|
171
|
+
coll.insert(index_document)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def remove_from_ngram_index
|
177
|
+
self.mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
|
178
|
+
coll = collection.db.collection(index_name)
|
179
|
+
coll.remove({'document_id' => self._id})
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{mongoid_fulltext}
|
8
|
+
s.version = "0.3.6"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Aaron Windsor"]
|
12
|
+
s.date = %q{2011-05-27}
|
13
|
+
s.description = %q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
|
14
|
+
s.email = %q{aaron.windsor@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.md"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
"Gemfile",
|
22
|
+
"LICENSE",
|
23
|
+
"README.md",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"lib/mongoid_fulltext.rb",
|
27
|
+
"mongoid_fulltext.gemspec",
|
28
|
+
"spec/models/advanced_artwork.rb",
|
29
|
+
"spec/models/basic_artwork.rb",
|
30
|
+
"spec/models/external_artist.rb",
|
31
|
+
"spec/models/external_artwork.rb",
|
32
|
+
"spec/models/external_artwork_no_fields_supplied.rb",
|
33
|
+
"spec/models/filtered_artist.rb",
|
34
|
+
"spec/models/filtered_artwork.rb",
|
35
|
+
"spec/models/multi_external_artwork.rb",
|
36
|
+
"spec/models/multi_field_artist.rb",
|
37
|
+
"spec/models/multi_field_artwork.rb",
|
38
|
+
"spec/models/partitioned_artist.rb",
|
39
|
+
"spec/mongoid/fulltext_spec.rb",
|
40
|
+
"spec/spec_helper.rb"
|
41
|
+
]
|
42
|
+
s.homepage = %q{http://github.com/aaw/mongoid_fulltext}
|
43
|
+
s.licenses = ["MIT"]
|
44
|
+
s.require_paths = ["lib"]
|
45
|
+
s.rubygems_version = %q{1.3.7}
|
46
|
+
s.summary = %q{Full-text search for the Mongoid ORM}
|
47
|
+
s.test_files = [
|
48
|
+
"spec/models/advanced_artwork.rb",
|
49
|
+
"spec/models/basic_artwork.rb",
|
50
|
+
"spec/models/external_artist.rb",
|
51
|
+
"spec/models/external_artwork.rb",
|
52
|
+
"spec/models/external_artwork_no_fields_supplied.rb",
|
53
|
+
"spec/models/filtered_artist.rb",
|
54
|
+
"spec/models/filtered_artwork.rb",
|
55
|
+
"spec/models/multi_external_artwork.rb",
|
56
|
+
"spec/models/multi_field_artist.rb",
|
57
|
+
"spec/models/multi_field_artwork.rb",
|
58
|
+
"spec/models/partitioned_artist.rb",
|
59
|
+
"spec/mongoid/fulltext_spec.rb",
|
60
|
+
"spec/spec_helper.rb"
|
61
|
+
]
|
62
|
+
|
63
|
+
if s.respond_to? :specification_version then
|
64
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
65
|
+
s.specification_version = 3
|
66
|
+
|
67
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
68
|
+
s.add_development_dependency(%q<mongoid>, ["~> 2.0.0"])
|
69
|
+
s.add_development_dependency(%q<database_cleaner>, ["~> 0.6.0"])
|
70
|
+
s.add_development_dependency(%q<rspec>, ["~> 2.5.0"])
|
71
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
|
72
|
+
else
|
73
|
+
s.add_dependency(%q<mongoid>, ["~> 2.0.0"])
|
74
|
+
s.add_dependency(%q<database_cleaner>, ["~> 0.6.0"])
|
75
|
+
s.add_dependency(%q<rspec>, ["~> 2.5.0"])
|
76
|
+
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
77
|
+
end
|
78
|
+
else
|
79
|
+
s.add_dependency(%q<mongoid>, ["~> 2.0.0"])
|
80
|
+
s.add_dependency(%q<database_cleaner>, ["~> 0.6.0"])
|
81
|
+
s.add_dependency(%q<rspec>, ["~> 2.5.0"])
|
82
|
+
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class ExternalArtworkNoFieldsSupplied
|
2
|
+
include Mongoid::Document
|
3
|
+
include Mongoid::FullTextSearch
|
4
|
+
field :title
|
5
|
+
field :year
|
6
|
+
field :artist
|
7
|
+
fulltext_search_in :index_name => 'mongoid_fulltext.artworks_and_artists'
|
8
|
+
|
9
|
+
def to_s
|
10
|
+
'%s (%s %s)' % [title, artist, year]
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
class FilteredArtist
|
2
|
+
include Mongoid::Document
|
3
|
+
include Mongoid::FullTextSearch
|
4
|
+
field :full_name
|
5
|
+
fulltext_search_in :full_name, :index_name => 'mongoid_fulltext.artworks_and_artists',
|
6
|
+
:filters => { :is_foobar => lambda { |x| x.full_name == 'foobar' },
|
7
|
+
:is_artist => lambda { |x| true },
|
8
|
+
:is_artwork => lambda { |x| false }
|
9
|
+
}
|
10
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
class FilteredArtwork
|
2
|
+
include Mongoid::Document
|
3
|
+
include Mongoid::FullTextSearch
|
4
|
+
field :title
|
5
|
+
fulltext_search_in :title, :index_name => 'mongoid_fulltext.artworks_and_artists',
|
6
|
+
:filters => { :is_foobar => lambda { |x| x.title == 'foobar' },
|
7
|
+
:is_artwork => lambda { |x| true },
|
8
|
+
:is_artist => lambda { |x| false }
|
9
|
+
}
|
10
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
class MultiExternalArtwork
|
2
|
+
include Mongoid::Document
|
3
|
+
include Mongoid::FullTextSearch
|
4
|
+
field :title
|
5
|
+
field :year
|
6
|
+
field :artist
|
7
|
+
fulltext_search_in :title, :index_name => 'mongoid_fulltext.titles'
|
8
|
+
fulltext_search_in :year, :index_name => 'mongoid_fulltext.years'
|
9
|
+
fulltext_search_in :title, :year, :artist, :index_name => 'mongoid_fulltext.all'
|
10
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
class PartitionedArtist
|
2
|
+
include Mongoid::Document
|
3
|
+
include Mongoid::FullTextSearch
|
4
|
+
|
5
|
+
field :full_name
|
6
|
+
field :exhibitions, type: Array, default: []
|
7
|
+
|
8
|
+
fulltext_search_in :full_name,
|
9
|
+
:index_name => 'mongoid_fulltext.partitioned_artists',
|
10
|
+
:filters => {
|
11
|
+
:has_exhibitions => lambda { |x| x.exhibitions.size > 0 },
|
12
|
+
:exhibitions => lambda { |x| [ x.exhibitions ].flatten },
|
13
|
+
}
|
14
|
+
|
15
|
+
end
|
@@ -0,0 +1,348 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Mongoid
|
4
|
+
describe FullTextSearch do
|
5
|
+
|
6
|
+
context "with several config options defined" do
|
7
|
+
|
8
|
+
let!(:abcdef) { AdvancedArtwork.create(:title => 'abcdefg hijklmn') }
|
9
|
+
|
10
|
+
it "should recognize all options" do
|
11
|
+
# AdvancedArtwork is defined with an ngram_width of 4 and a different alphabet (abcdefg)
|
12
|
+
AdvancedArtwork.fulltext_search('abc').should == []
|
13
|
+
AdvancedArtwork.fulltext_search('abcd').first.should == abcdef
|
14
|
+
AdvancedArtwork.fulltext_search('defg').first.should == abcdef
|
15
|
+
AdvancedArtwork.fulltext_search('hijklmn').should == []
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
context "with default settings" do
|
20
|
+
|
21
|
+
let!(:flower_myth) { BasicArtwork.create(:title => 'Flower Myth') }
|
22
|
+
let!(:flowers) { BasicArtwork.create(:title => 'Flowers') }
|
23
|
+
let!(:lowered) { BasicArtwork.create(:title => 'Lowered') }
|
24
|
+
let!(:cookies) { BasicArtwork.create(:title => 'Cookies') }
|
25
|
+
let!(:empty) { BasicArtwork.create(:title => '') }
|
26
|
+
|
27
|
+
it "returns exact matches" do
|
28
|
+
BasicArtwork.fulltext_search('Flower Myth', :max_results => 1).first.should == flower_myth
|
29
|
+
BasicArtwork.fulltext_search('Flowers', :max_results => 1).first.should == flowers
|
30
|
+
BasicArtwork.fulltext_search('Cookies', :max_results => 1).first.should == cookies
|
31
|
+
BasicArtwork.fulltext_search('Lowered', :max_results => 1).first.should == lowered
|
32
|
+
end
|
33
|
+
|
34
|
+
it "returns exact matches regardless of case" do
|
35
|
+
BasicArtwork.fulltext_search('fLOWER mYTH', :max_results => 1).first.should == flower_myth
|
36
|
+
BasicArtwork.fulltext_search('FLOWERS', :max_results => 1).first.should == flowers
|
37
|
+
BasicArtwork.fulltext_search('cOOkies', :max_results => 1).first.should == cookies
|
38
|
+
BasicArtwork.fulltext_search('lOWERED', :max_results => 1).first.should == lowered
|
39
|
+
end
|
40
|
+
|
41
|
+
it "returns all relevant results, sorted by relevance" do
|
42
|
+
BasicArtwork.fulltext_search('Flowers').should == [flowers, flower_myth, lowered]
|
43
|
+
end
|
44
|
+
|
45
|
+
it "prefers prefix matches" do
|
46
|
+
[flowers, flower_myth].should include(BasicArtwork.fulltext_search('Floweockies').first)
|
47
|
+
BasicArtwork.fulltext_search('Lowers').first.should == lowered
|
48
|
+
BasicArtwork.fulltext_search('Cookilowers').first.should == cookies
|
49
|
+
end
|
50
|
+
|
51
|
+
it "returns an empty result set for an empty query" do
|
52
|
+
BasicArtwork.fulltext_search('').empty?.should be_true
|
53
|
+
end
|
54
|
+
|
55
|
+
it "returns an empty result set for a query that doesn't contain any characters in the alphabet" do
|
56
|
+
BasicArtwork.fulltext_search('_+=--@!##%#$%%').empty?.should be_true
|
57
|
+
end
|
58
|
+
|
59
|
+
it "returns results for a query that contains only a single ngram" do
|
60
|
+
BasicArtwork.fulltext_search('coo').first.should == cookies
|
61
|
+
BasicArtwork.fulltext_search('c!!!oo').first.should == cookies
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
context "with default settings" do
|
67
|
+
|
68
|
+
let!(:yellow) { BasicArtwork.create(:title => 'Yellow') }
|
69
|
+
let!(:yellow_leaves_2) { BasicArtwork.create(:title => 'Yellow Leaves 2') }
|
70
|
+
let!(:yellow_leaves_3) { BasicArtwork.create(:title => 'Yellow Leaves 3') }
|
71
|
+
let!(:yellow_leaves_20) { BasicArtwork.create(:title => 'Yellow Leaves 20') }
|
72
|
+
let!(:yellow_cup) { BasicArtwork.create(:title => 'Yellow Cup') }
|
73
|
+
|
74
|
+
it "prefers the best prefix that matches a given string" do
|
75
|
+
BasicArtwork.fulltext_search('yellow').first.should == yellow
|
76
|
+
BasicArtwork.fulltext_search('yellow leaves', :max_results => 3).sort_by{ |x| x.title }.should == \
|
77
|
+
[yellow_leaves_2, yellow_leaves_3, yellow_leaves_20].sort_by{ |x| x.title }
|
78
|
+
BasicArtwork.fulltext_search('yellow cup').first.should == yellow_cup
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
context "with default settings" do
|
84
|
+
let!(:monet) { BasicArtwork.create(:title => 'claude monet') }
|
85
|
+
let!(:one_month_weather_permitting) { BasicArtwork.create(:title => 'one month weather permitting monday') }
|
86
|
+
|
87
|
+
it "finds better matches within exact strings" do
|
88
|
+
BasicArtwork.fulltext_search('monet').first.should == monet
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
context "with default settings" do
|
93
|
+
|
94
|
+
let!(:abc) { BasicArtwork.create(:title => "abc") }
|
95
|
+
let!(:abcd) { BasicArtwork.create(:title => "abcd") }
|
96
|
+
let!(:abcde) { BasicArtwork.create(:title => "abcde") }
|
97
|
+
let!(:abcdef) { BasicArtwork.create(:title => "abcdef") }
|
98
|
+
let!(:abcdefg) { BasicArtwork.create(:title => "abcdefg") }
|
99
|
+
let!(:abcdefgh) { BasicArtwork.create(:title => "abcdefgh") }
|
100
|
+
|
101
|
+
it "returns exact matches from a list of similar prefixes" do
|
102
|
+
BasicArtwork.fulltext_search('abc').first.should == abc
|
103
|
+
BasicArtwork.fulltext_search('abcd').first.should == abcd
|
104
|
+
BasicArtwork.fulltext_search('abcde').first.should == abcde
|
105
|
+
BasicArtwork.fulltext_search('abcdef').first.should == abcdef
|
106
|
+
BasicArtwork.fulltext_search('abcdefg').first.should == abcdefg
|
107
|
+
BasicArtwork.fulltext_search('abcdefgh').first.should == abcdefgh
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
context "with an index name specified" do
|
112
|
+
let!(:pablo_picasso) { ExternalArtist.create(:full_name => 'Pablo Picasso') }
|
113
|
+
let!(:portrait_of_picasso) { ExternalArtwork.create(:title => 'Portrait of Picasso') }
|
114
|
+
let!(:andy_warhol) { ExternalArtist.create(:full_name => 'Andy Warhol') }
|
115
|
+
let!(:warhol) { ExternalArtwork.create(:title => 'Warhol') }
|
116
|
+
let!(:empty) { ExternalArtwork.create(:title => '') }
|
117
|
+
|
118
|
+
it "returns results of different types from the same query" do
|
119
|
+
results = ExternalArtwork.fulltext_search('picasso', :max_results => 2).map{ |result| result }
|
120
|
+
results.member?(portrait_of_picasso).should be_true
|
121
|
+
results.member?(pablo_picasso).should be_true
|
122
|
+
results = ExternalArtist.fulltext_search('picasso', :max_results => 2).map{ |result| result }
|
123
|
+
results.member?(portrait_of_picasso).should be_true
|
124
|
+
results.member?(pablo_picasso).should be_true
|
125
|
+
end
|
126
|
+
|
127
|
+
it "returns exact matches" do
|
128
|
+
ExternalArtwork.fulltext_search('Pablo Picasso', :max_results => 1).first.should == pablo_picasso
|
129
|
+
ExternalArtwork.fulltext_search('Portrait of Picasso', :max_results => 1).first.should == portrait_of_picasso
|
130
|
+
ExternalArtwork.fulltext_search('Andy Warhol', :max_results => 1).first.should == andy_warhol
|
131
|
+
ExternalArtwork.fulltext_search('Warhol', :max_results => 1).first.should == warhol
|
132
|
+
ExternalArtist.fulltext_search('Pablo Picasso', :max_results => 1).first.should == pablo_picasso
|
133
|
+
ExternalArtist.fulltext_search('Portrait of Picasso', :max_results => 1).first.should == portrait_of_picasso
|
134
|
+
ExternalArtist.fulltext_search('Andy Warhol', :max_results => 1).first.should == andy_warhol
|
135
|
+
ExternalArtist.fulltext_search('Warhol', :max_results => 1).first.should == warhol
|
136
|
+
end
|
137
|
+
|
138
|
+
it "returns exact matches regardless of case" do
|
139
|
+
ExternalArtwork.fulltext_search('pABLO pICASSO', :max_results => 1).first.should == pablo_picasso
|
140
|
+
ExternalArtist.fulltext_search('PORTRAIT OF PICASSO', :max_results => 1).first.should == portrait_of_picasso
|
141
|
+
ExternalArtwork.fulltext_search('andy warhol', :max_results => 1).first.should == andy_warhol
|
142
|
+
ExternalArtwork.fulltext_search('wArHoL', :max_results => 1).first.should == warhol
|
143
|
+
end
|
144
|
+
|
145
|
+
it "returns all relevant results, sorted by relevance" do
|
146
|
+
ExternalArtist.fulltext_search('Pablo Picasso').should == [pablo_picasso, portrait_of_picasso]
|
147
|
+
ExternalArtwork.fulltext_search('Pablo Picasso').should == [pablo_picasso, portrait_of_picasso]
|
148
|
+
ExternalArtist.fulltext_search('Portrait of Picasso').should == [portrait_of_picasso, pablo_picasso]
|
149
|
+
ExternalArtwork.fulltext_search('Portrait of Picasso').should == [portrait_of_picasso, pablo_picasso]
|
150
|
+
ExternalArtist.fulltext_search('Andy Warhol').should == [andy_warhol, warhol]
|
151
|
+
ExternalArtwork.fulltext_search('Andy Warhol').should == [andy_warhol, warhol]
|
152
|
+
ExternalArtist.fulltext_search('Warhol').should == [warhol, andy_warhol]
|
153
|
+
ExternalArtwork.fulltext_search('Warhol').should == [warhol, andy_warhol]
|
154
|
+
end
|
155
|
+
|
156
|
+
it "prefers prefix matches" do
|
157
|
+
ExternalArtist.fulltext_search('PabloWarhol').first.should == pablo_picasso
|
158
|
+
ExternalArtist.fulltext_search('AndyPicasso').first.should == andy_warhol
|
159
|
+
end
|
160
|
+
|
161
|
+
it "returns an empty result set for an empty query" do
|
162
|
+
ExternalArtist.fulltext_search('').empty?.should be_true
|
163
|
+
end
|
164
|
+
|
165
|
+
it "returns an empty result set for a query that doesn't contain any characters in the alphabet" do
|
166
|
+
ExternalArtwork.fulltext_search('#$%!$#*%*').empty?.should be_true
|
167
|
+
end
|
168
|
+
|
169
|
+
it "returns results for a query that contains only a single ngram" do
|
170
|
+
ExternalArtist.fulltext_search('and').first.should == andy_warhol
|
171
|
+
end
|
172
|
+
|
173
|
+
end
|
174
|
+
context "with an index name specified" do
|
175
|
+
|
176
|
+
let!(:andy_warhol) { ExternalArtist.create(:full_name => 'Andy Warhol') }
|
177
|
+
let!(:warhol) { ExternalArtwork.create(:title => 'Warhol') }
|
178
|
+
|
179
|
+
it "doesn't blow up if garbage is in the index collection" do
|
180
|
+
ExternalArtist.fulltext_search('warhol').should == [warhol, andy_warhol]
|
181
|
+
index_collection = ExternalArtist.collection.db.collection(ExternalArtist.mongoid_fulltext_config.keys.first)
|
182
|
+
index_collection.update({'document_id' => warhol.id}, {'$set' => { 'document_id' => BSON::ObjectId.new }}, :multi => true)
|
183
|
+
# We should no longer be able to find warhol, but that shouldn't keep it from returning results
|
184
|
+
ExternalArtist.fulltext_search('warhol').should == [andy_warhol]
|
185
|
+
end
|
186
|
+
|
187
|
+
end
|
188
|
+
context "with an index name specified" do
|
189
|
+
|
190
|
+
let!(:pop) { ExternalArtwork.create(:title => 'Pop') }
|
191
|
+
let!(:pop_culture) { ExternalArtwork.create(:title => 'Pop Culture') }
|
192
|
+
let!(:contemporary_pop) { ExternalArtwork.create(:title => 'Contemporary Pop') }
|
193
|
+
let!(:david_poppie) { ExternalArtist.create(:full_name => 'David Poppie') }
|
194
|
+
let!(:kung_fu_lollipop) { ExternalArtwork.create(:title => 'Kung-Fu Lollipop') }
|
195
|
+
|
196
|
+
it "prefers the best prefix that matches a given string" do
|
197
|
+
ExternalArtwork.fulltext_search('pop').first.should == pop
|
198
|
+
ExternalArtwork.fulltext_search('poppie').first.should == david_poppie
|
199
|
+
ExternalArtwork.fulltext_search('pop cult').first.should == pop_culture
|
200
|
+
ExternalArtwork.fulltext_search('pop', :max_results => 5)[4].should == kung_fu_lollipop
|
201
|
+
end
|
202
|
+
|
203
|
+
end
|
204
|
+
context "with an index name specified" do
|
205
|
+
|
206
|
+
let!(:abc) { ExternalArtwork.create(:title => "abc") }
|
207
|
+
let!(:abcd) { ExternalArtwork.create(:title => "abcd") }
|
208
|
+
let!(:abcde) { ExternalArtwork.create(:title => "abcde") }
|
209
|
+
let!(:abcdef) { ExternalArtwork.create(:title => "abcdef") }
|
210
|
+
let!(:abcdefg) { ExternalArtwork.create(:title => "abcdefg") }
|
211
|
+
let!(:abcdefgh) { ExternalArtwork.create(:title => "abcdefgh") }
|
212
|
+
|
213
|
+
it "returns exact matches from a list of similar prefixes" do
|
214
|
+
ExternalArtwork.fulltext_search('abc').first.should == abc
|
215
|
+
ExternalArtwork.fulltext_search('abcd').first.should == abcd
|
216
|
+
ExternalArtwork.fulltext_search('abcde').first.should == abcde
|
217
|
+
ExternalArtwork.fulltext_search('abcdef').first.should == abcdef
|
218
|
+
ExternalArtwork.fulltext_search('abcdefg').first.should == abcdefg
|
219
|
+
ExternalArtwork.fulltext_search('abcdefgh').first.should == abcdefgh
|
220
|
+
end
|
221
|
+
|
222
|
+
end
|
223
|
+
context "with an index name specified" do
|
224
|
+
|
225
|
+
it "cleans up item from the index after they're destroyed" do
|
226
|
+
foobar = ExternalArtwork.create(:title => "foobar")
|
227
|
+
barfoo = ExternalArtwork.create(:title => "barfoo")
|
228
|
+
ExternalArtwork.fulltext_search('foobar').should == [foobar, barfoo]
|
229
|
+
foobar.destroy
|
230
|
+
ExternalArtwork.fulltext_search('foobar').should == [barfoo]
|
231
|
+
barfoo.destroy
|
232
|
+
ExternalArtwork.fulltext_search('foobar').should == []
|
233
|
+
end
|
234
|
+
|
235
|
+
end
|
236
|
+
context "with an index name specified and no fields provided to index" do
|
237
|
+
|
238
|
+
let!(:big_bang) { ExternalArtworkNoFieldsSupplied.create(:title => 'Big Bang', :artist => 'David Poppie', :year => '2009') }
|
239
|
+
|
240
|
+
it "indexes the string returned by to_s" do
|
241
|
+
ExternalArtworkNoFieldsSupplied.fulltext_search('big bang').first.should == big_bang
|
242
|
+
ExternalArtworkNoFieldsSupplied.fulltext_search('poppie').first.should == big_bang
|
243
|
+
ExternalArtworkNoFieldsSupplied.fulltext_search('2009').first.should == big_bang
|
244
|
+
end
|
245
|
+
|
246
|
+
end
|
247
|
+
context "with multiple indexes defined" do
|
248
|
+
|
249
|
+
let!(:pop) { MultiExternalArtwork.create(:title => 'Pop', :year => '1970', :artist => 'Joe Schmoe') }
|
250
|
+
let!(:pop_culture) { MultiExternalArtwork.create(:title => 'Pop Culture', :year => '1977', :artist => 'Jim Schmoe') }
|
251
|
+
let!(:contemporary_pop) { MultiExternalArtwork.create(:title => 'Contemporary Pop', :year => '1800', :artist => 'Bill Schmoe') }
|
252
|
+
let!(:kung_fu_lollipop) { MultiExternalArtwork.create(:title => 'Kung-Fu Lollipop', :year => '2006', :artist => 'Michael Anderson') }
|
253
|
+
|
254
|
+
it "allows searches to hit a particular index" do
|
255
|
+
title_results = MultiExternalArtwork.fulltext_search('pop', :index => 'mongoid_fulltext.titles').sort_by{ |x| x.title }
|
256
|
+
title_results.should == [pop, pop_culture, contemporary_pop, kung_fu_lollipop].sort_by{ |x| x.title }
|
257
|
+
year_results = MultiExternalArtwork.fulltext_search('197', :index => 'mongoid_fulltext.years').sort_by{ |x| x.title }
|
258
|
+
year_results.should == [pop, pop_culture].sort_by{ |x| x.title }
|
259
|
+
all_results = MultiExternalArtwork.fulltext_search('1800 and', :index => 'mongoid_fulltext.all').sort_by{ |x| x.title }
|
260
|
+
all_results.should == [contemporary_pop, kung_fu_lollipop].sort_by{ |x| x.title }
|
261
|
+
end
|
262
|
+
|
263
|
+
it "should raise an error if you don't specify which index to search with" do
|
264
|
+
lambda { MultiExternalArtwork.fulltext_search('foobar') }.should raise_error(Mongoid::FullTextSearch::UnspecifiedIndexError)
|
265
|
+
end
|
266
|
+
|
267
|
+
end
|
268
|
+
context "with multiple fields indexed and the same index used by multiple models" do
|
269
|
+
|
270
|
+
let!(:andy_warhol) { MultiFieldArtist.create(:full_name => 'Andy Warhol', :birth_year => '1928') }
|
271
|
+
let!(:warhol) { MultiFieldArtwork.create(:title => 'Warhol', :year => '2010') }
|
272
|
+
let!(:pablo_picasso) { MultiFieldArtist.create(:full_name => 'Pablo Picasso', :birth_year => '1881') }
|
273
|
+
let!(:portrait_of_picasso) { MultiFieldArtwork.create(:title => 'Portrait of Picasso', :year => '1912') }
|
274
|
+
|
275
|
+
it "allows searches across all models on both fields indexed" do
|
276
|
+
MultiFieldArtist.fulltext_search('2010').first.should == warhol
|
277
|
+
MultiFieldArtist.fulltext_search('andy').first.should == andy_warhol
|
278
|
+
MultiFieldArtist.fulltext_search('pablo').first.should == pablo_picasso
|
279
|
+
MultiFieldArtist.fulltext_search('1881').first.should == pablo_picasso
|
280
|
+
MultiFieldArtist.fulltext_search('portrait 1912').first.should == portrait_of_picasso
|
281
|
+
|
282
|
+
MultiFieldArtwork.fulltext_search('2010').first.should == warhol
|
283
|
+
MultiFieldArtwork.fulltext_search('andy').first.should == andy_warhol
|
284
|
+
MultiFieldArtwork.fulltext_search('pablo').first.should == pablo_picasso
|
285
|
+
MultiFieldArtwork.fulltext_search('1881').first.should == pablo_picasso
|
286
|
+
MultiFieldArtwork.fulltext_search('portrait 1912').first.should == portrait_of_picasso
|
287
|
+
end
|
288
|
+
|
289
|
+
end
|
290
|
+
context "with filters applied to multiple models" do
|
291
|
+
|
292
|
+
let!(:foobar_artwork) { FilteredArtwork.create(:title => 'foobar') }
|
293
|
+
let!(:barfoo_artwork) { FilteredArtwork.create(:title => 'barfoo') }
|
294
|
+
let!(:foobar_artist) { FilteredArtist.create(:full_name => 'foobar') }
|
295
|
+
let!(:barfoo_artist) { FilteredArtist.create(:full_name => 'barfoo') }
|
296
|
+
|
297
|
+
it "allows filtered searches" do
|
298
|
+
FilteredArtwork.fulltext_search('foobar', :is_artwork => true).should == [foobar_artwork, barfoo_artwork]
|
299
|
+
FilteredArtist.fulltext_search('foobar', :is_artwork => true).should == [foobar_artwork, barfoo_artwork]
|
300
|
+
|
301
|
+
FilteredArtwork.fulltext_search('foobar', :is_artwork => true, :is_foobar => true).should == [foobar_artwork]
|
302
|
+
FilteredArtwork.fulltext_search('foobar', :is_artwork => true, :is_foobar => false).should == [barfoo_artwork]
|
303
|
+
FilteredArtwork.fulltext_search('foobar', :is_artwork => false, :is_foobar => true).should == [foobar_artist]
|
304
|
+
FilteredArtwork.fulltext_search('foobar', :is_artwork => false, :is_foobar => false).should == [barfoo_artist]
|
305
|
+
|
306
|
+
FilteredArtist.fulltext_search('foobar', :is_artwork => true, :is_foobar => true).should == [foobar_artwork]
|
307
|
+
FilteredArtist.fulltext_search('foobar', :is_artwork => true, :is_foobar => false).should == [barfoo_artwork]
|
308
|
+
FilteredArtist.fulltext_search('foobar', :is_artwork => false, :is_foobar => true).should == [foobar_artist]
|
309
|
+
FilteredArtist.fulltext_search('foobar', :is_artwork => false, :is_foobar => false).should == [barfoo_artist]
|
310
|
+
end
|
311
|
+
|
312
|
+
end
|
313
|
+
|
314
|
+
context "with partitions applied to a model" do
|
315
|
+
|
316
|
+
let!(:artist_2) { PartitionedArtist.create(:full_name => 'foobar', :exhibitions => [ "Art Basel 2011", "Armory NY" ]) }
|
317
|
+
let!(:artist_1) { PartitionedArtist.create(:full_name => 'foobar', :exhibitions => [ "Art Basel 2011", ]) }
|
318
|
+
let!(:artist_0) { PartitionedArtist.create(:full_name => 'foobar', :exhibitions => [ ]) }
|
319
|
+
|
320
|
+
it "allows partitioned searches" do
|
321
|
+
PartitionedArtist.fulltext_search('foobar').should == [ artist_2, artist_1, artist_0 ]
|
322
|
+
PartitionedArtist.fulltext_search('foobar', :exhibitions => [ "Armory NY" ]).should == [ artist_2 ]
|
323
|
+
PartitionedArtist.fulltext_search('foobar', :exhibitions => [ "Art Basel 2011" ]).should == [ artist_2, artist_1 ]
|
324
|
+
PartitionedArtist.fulltext_search('foobar', :exhibitions => [ "Art Basel 2011", "Armory NY" ]).should == [ artist_2 ]
|
325
|
+
end
|
326
|
+
|
327
|
+
end
|
328
|
+
|
329
|
+
context "using search options" do
|
330
|
+
let!(:patterns) { BasicArtwork.create(:title => 'Flower Patterns') }
|
331
|
+
let!(:flowers) { BasicArtwork.create(:title => 'Flowers') }
|
332
|
+
|
333
|
+
it "returns max_results" do
|
334
|
+
BasicArtwork.fulltext_search('flower', { :max_results => 1 }).length.should == 1
|
335
|
+
end
|
336
|
+
|
337
|
+
it "returns scored results" do
|
338
|
+
results = BasicArtwork.fulltext_search('flowers', { :return_scores => true })
|
339
|
+
first_result = results[0]
|
340
|
+
first_result.is_a?(Array).should be_true
|
341
|
+
first_result.size.should == 2
|
342
|
+
first_result[0].should == flowers
|
343
|
+
first_result[1].is_a?(Float).should be_true
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
end
|
348
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
require 'rspec'
|
4
|
+
|
5
|
+
require 'mongoid'
|
6
|
+
require 'database_cleaner'
|
7
|
+
|
8
|
+
Mongoid.configure do |config|
|
9
|
+
name = "mongoid_fulltext_test"
|
10
|
+
config.master = Mongo::Connection.new.db(name)
|
11
|
+
end
|
12
|
+
|
13
|
+
require File.expand_path("../../lib/mongoid_fulltext", __FILE__)
|
14
|
+
Dir["#{File.dirname(__FILE__)}/models/*.rb"].each { |f| require f }
|
15
|
+
|
16
|
+
Rspec.configure do |c|
|
17
|
+
c.before(:all) { DatabaseCleaner.strategy = :truncation }
|
18
|
+
c.before(:each) { DatabaseCleaner.clean }
|
19
|
+
end
|
20
|
+
|
metadata
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mongoid_fulltext
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.6
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Aaron Windsor
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-05-27 00:00:00.000000000 -04:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: mongoid
|
17
|
+
requirement: &82366030 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ~>
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.0.0
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *82366030
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: database_cleaner
|
28
|
+
requirement: &82365790 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.6.0
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: *82365790
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: rspec
|
39
|
+
requirement: &82365550 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ~>
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: 2.5.0
|
45
|
+
type: :development
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *82365550
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: jeweler
|
50
|
+
requirement: &82365310 !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ~>
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: 1.5.2
|
56
|
+
type: :development
|
57
|
+
prerelease: false
|
58
|
+
version_requirements: *82365310
|
59
|
+
description: Full-text search for the Mongoid ORM, using n-grams extracted from text
|
60
|
+
email: aaron.windsor@gmail.com
|
61
|
+
executables: []
|
62
|
+
extensions: []
|
63
|
+
extra_rdoc_files:
|
64
|
+
- LICENSE
|
65
|
+
- README.md
|
66
|
+
files:
|
67
|
+
- .document
|
68
|
+
- Gemfile
|
69
|
+
- LICENSE
|
70
|
+
- README.md
|
71
|
+
- Rakefile
|
72
|
+
- VERSION
|
73
|
+
- lib/mongoid_fulltext.rb
|
74
|
+
- mongoid_fulltext.gemspec
|
75
|
+
- spec/models/advanced_artwork.rb
|
76
|
+
- spec/models/basic_artwork.rb
|
77
|
+
- spec/models/external_artist.rb
|
78
|
+
- spec/models/external_artwork.rb
|
79
|
+
- spec/models/external_artwork_no_fields_supplied.rb
|
80
|
+
- spec/models/filtered_artist.rb
|
81
|
+
- spec/models/filtered_artwork.rb
|
82
|
+
- spec/models/multi_external_artwork.rb
|
83
|
+
- spec/models/multi_field_artist.rb
|
84
|
+
- spec/models/multi_field_artwork.rb
|
85
|
+
- spec/models/partitioned_artist.rb
|
86
|
+
- spec/mongoid/fulltext_spec.rb
|
87
|
+
- spec/spec_helper.rb
|
88
|
+
has_rdoc: true
|
89
|
+
homepage: http://github.com/aaw/mongoid_fulltext
|
90
|
+
licenses:
|
91
|
+
- MIT
|
92
|
+
post_install_message:
|
93
|
+
rdoc_options: []
|
94
|
+
require_paths:
|
95
|
+
- lib
|
96
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
|
+
none: false
|
104
|
+
requirements:
|
105
|
+
- - ! '>='
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
108
|
+
requirements: []
|
109
|
+
rubyforge_project:
|
110
|
+
rubygems_version: 1.6.2
|
111
|
+
signing_key:
|
112
|
+
specification_version: 3
|
113
|
+
summary: Full-text search for the Mongoid ORM
|
114
|
+
test_files:
|
115
|
+
- spec/models/advanced_artwork.rb
|
116
|
+
- spec/models/basic_artwork.rb
|
117
|
+
- spec/models/external_artist.rb
|
118
|
+
- spec/models/external_artwork.rb
|
119
|
+
- spec/models/external_artwork_no_fields_supplied.rb
|
120
|
+
- spec/models/filtered_artist.rb
|
121
|
+
- spec/models/filtered_artwork.rb
|
122
|
+
- spec/models/multi_external_artwork.rb
|
123
|
+
- spec/models/multi_field_artist.rb
|
124
|
+
- spec/models/multi_field_artwork.rb
|
125
|
+
- spec/models/partitioned_artist.rb
|
126
|
+
- spec/mongoid/fulltext_spec.rb
|
127
|
+
- spec/spec_helper.rb
|