mongoid_fulltext 0.4.5 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +14 -4
- data/VERSION +1 -1
- data/lib/mongoid_fulltext.rb +9 -13
- data/mongoid_fulltext.gemspec +2 -2
- data/spec/models/external_artist.rb +5 -0
- data/spec/models/external_artwork.rb +5 -0
- data/spec/mongoid/fulltext_spec.rb +41 -1
- metadata +13 -13
data/README.md
CHANGED
@@ -50,6 +50,16 @@ To return a pair of `[ result, score ]` instead of an array of results, pass the
|
|
50
50
|
|
51
51
|
Artist.fulltext_search("vince vangogh", { :return_scores => true })
|
52
52
|
|
53
|
+
The larger a score is, the better mongoid_fulltext thinks the match is. The scores have the following rough
|
54
|
+
interpretation that you can use to make decisions about whether the match is good enough:
|
55
|
+
|
56
|
+
* If a prefix of your query matches something indexed, or if your query matches a prefix of something
|
57
|
+
indexed (for example, searching for "foo" finds "myfoo" or searching for "myfoo" finds "foo"), you
|
58
|
+
can expect a score of at least 1 for the match.
|
59
|
+
* If an entire word in your query matches an entire word that's indexed and you have the `index_full_words`
|
60
|
+
option turned on (it's turned on by default), you can expect a score of at least 2 for the match.
|
61
|
+
* If neither of the above criteria are met, you can expect a score less than one.
|
62
|
+
|
53
63
|
If you don't specify a field to index, the default is the result of `to_s` called on the object.
|
54
64
|
The following definition will index the first and last name of an artist:
|
55
65
|
|
@@ -219,10 +229,10 @@ Additional indexing/query options can be used as parameters to `fulltext_search_
|
|
219
229
|
and then finally stripped, as before.
|
220
230
|
* `update_if`: controls whether or not the index will be updated. This can be set to a symbol,
|
221
231
|
string, or proc. If the result of evaluating the value is true, the index will be updated.
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
232
|
+
* When set to a symbol, the symbol is sent to the document.
|
233
|
+
* When set to a string, the string is evaluated within the document's instance.
|
234
|
+
* When set to a proc, the proc is called, and the document is given to the proc as the first arg.
|
235
|
+
* When set to any other type of object, the document's index will not be updated.
|
226
236
|
|
227
237
|
Array filters
|
228
238
|
-------------
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.5.0
|
data/lib/mongoid_fulltext.rb
CHANGED
@@ -153,7 +153,6 @@ module Mongoid::FullTextSearch
|
|
153
153
|
all_scores.concat(scores)
|
154
154
|
end
|
155
155
|
all_scores.sort!{ |document1, document2| -document1[:score] <=> -document2[:score] }
|
156
|
-
|
157
156
|
instantiate_mapreduce_results(all_scores[0..max_results-1], { :return_scores => return_scores })
|
158
157
|
end
|
159
158
|
|
@@ -189,7 +188,7 @@ module Mongoid::FullTextSearch
|
|
189
188
|
step_size = 1
|
190
189
|
end
|
191
190
|
|
192
|
-
# array of ngrams
|
191
|
+
# Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the input string
|
193
192
|
ngram_ary = (0..filtered_str.length - config[:ngram_width]).step(step_size).map do |i|
|
194
193
|
if i == 0 or (config[:apply_prefix_scoring_to_all_words] and \
|
195
194
|
config[:word_separators].has_key?(filtered_str[i-1].chr))
|
@@ -197,25 +196,22 @@ module Mongoid::FullTextSearch
|
|
197
196
|
else
|
198
197
|
score = Math.sqrt(2.0/filtered_str.length)
|
199
198
|
end
|
200
|
-
|
199
|
+
{:ngram => filtered_str[i..i+config[:ngram_width]-1], :score => score}
|
201
200
|
end
|
201
|
+
|
202
|
+
# If an ngram appears multiple times in the query string, keep the max score
|
203
|
+
ngram_ary = ngram_ary.group_by{ |h| h[:ngram] }.map{ |key, values| {:ngram => key, :score => values.map{ |v| v[:score] }.max} }
|
202
204
|
|
203
205
|
if (config[:index_full_words])
|
204
206
|
filtered_str.split(Regexp.compile(config[:word_separators].keys.join)).each do |word|
|
205
207
|
if word.length >= config[:ngram_width]
|
206
|
-
ngram_ary <<
|
208
|
+
ngram_ary << {:ngram => word, :score => 1}
|
207
209
|
end
|
208
210
|
end
|
209
211
|
end
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
# deduplicate, and keep the highest score
|
214
|
-
ngram_ary.each do |ngram, score, position|
|
215
|
-
ngram_hash[ngram] = [ngram_hash[ngram] || 0, score].max
|
216
|
-
end
|
217
|
-
|
218
|
-
ngram_hash
|
212
|
+
|
213
|
+
# If an ngram appears as a full word and an ngram, keep the sum of the two scores
|
214
|
+
Hash[ngram_ary.group_by{ |h| h[:ngram] }.map{ |key, values| [key, values.map{ |v| v[:score] }.sum] }]
|
219
215
|
end
|
220
216
|
|
221
217
|
def remove_from_ngram_index
|
data/mongoid_fulltext.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{mongoid_fulltext}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.5.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Aaron Windsor"]
|
12
|
-
s.date = %q{2011-10-
|
12
|
+
s.date = %q{2011-10-11}
|
13
13
|
s.description = %q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
|
14
14
|
s.email = %q{aaron.windsor@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -391,7 +391,7 @@ module Mongoid
|
|
391
391
|
end
|
392
392
|
|
393
393
|
end
|
394
|
-
|
394
|
+
|
395
395
|
context "using search options" do
|
396
396
|
let!(:patterns) { BasicArtwork.create(:title => 'Flower Patterns') }
|
397
397
|
let!(:flowers) { BasicArtwork.create(:title => 'Flowers') }
|
@@ -410,6 +410,46 @@ module Mongoid
|
|
410
410
|
end
|
411
411
|
end
|
412
412
|
|
413
|
+
context "returning scores" do
|
414
|
+
# Since we return scores, let's make some weak guarantees about what they actually mean
|
415
|
+
|
416
|
+
let!(:mao_yan) { ExternalArtist.create(:full_name => "Mao Yan") }
|
417
|
+
let!(:mao) { ExternalArtwork.create(:title => "Mao by Andy Warhol") }
|
418
|
+
let!(:maox) { ExternalArtwork.create(:title => "Maox by Randy Morehall") }
|
419
|
+
let!(:somao) { ExternalArtwork.create(:title => "Somao by Randy Morehall") }
|
420
|
+
|
421
|
+
it "returns basic matches that don't match a whole word and aren't prefixes with score < 1" do
|
422
|
+
['paox', 'porehall'].each do |query|
|
423
|
+
results = ExternalArtist.fulltext_search(query, { :return_scores => true })
|
424
|
+
results.length.should > 0
|
425
|
+
results.map{ |result| result[-1] }.inject(true){ |accum, item| accum &= (item < 1) }.should be_true
|
426
|
+
end
|
427
|
+
end
|
428
|
+
|
429
|
+
it "returns prefix matches with a score >= 1 but < 2" do
|
430
|
+
['warho', 'rand'].each do |query|
|
431
|
+
results = ExternalArtist.fulltext_search(query, { :return_scores => true })
|
432
|
+
results.length.should > 0
|
433
|
+
results.map{ |result| result[-1] if result[0].to_s.starts_with?(query)}
|
434
|
+
.compact
|
435
|
+
.inject(true){ |accum, item| accum &= (item >= 1 and item < 2) }
|
436
|
+
.should be_true
|
437
|
+
end
|
438
|
+
end
|
439
|
+
|
440
|
+
it "returns full-word matches with a score >= 2" do
|
441
|
+
['andy', 'warhol', 'mao'].each do |query|
|
442
|
+
results = ExternalArtist.fulltext_search(query, { :return_scores => true })
|
443
|
+
results.length.should > 0
|
444
|
+
results.map{ |result| result[-1] if result[0].to_s.split(' ').member?(query) }
|
445
|
+
.compact
|
446
|
+
.inject(true){ |accum, item| accum &= (item >= 2) }
|
447
|
+
.should be_true
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
451
|
+
end
|
452
|
+
|
413
453
|
context "remove_from_ngram_index" do
|
414
454
|
let!(:flowers1) { BasicArtwork.create(:title => 'Flowers 1') }
|
415
455
|
let!(:flowers2) { BasicArtwork.create(:title => 'Flowers 1') }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mongoid_fulltext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-10-
|
12
|
+
date: 2011-10-11 00:00:00.000000000 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: unicode_utils
|
17
|
-
requirement: &
|
17
|
+
requirement: &79126690 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ~>
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: 1.0.0
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *79126690
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: mongoid
|
28
|
-
requirement: &
|
28
|
+
requirement: &79116990 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ~>
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 2.0.0
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *79116990
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: bson_ext
|
39
|
-
requirement: &
|
39
|
+
requirement: &79116510 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ~>
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: 1.3.0
|
45
45
|
type: :development
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *79116510
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: rspec
|
50
|
-
requirement: &
|
50
|
+
requirement: &79116050 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ~>
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 2.5.0
|
56
56
|
type: :development
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *79116050
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: jeweler
|
61
|
-
requirement: &
|
61
|
+
requirement: &79115620 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ~>
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
version: 1.5.2
|
67
67
|
type: :development
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *79115620
|
70
70
|
description: Full-text search for the Mongoid ORM, using n-grams extracted from text
|
71
71
|
email: aaron.windsor@gmail.com
|
72
72
|
executables: []
|
@@ -118,7 +118,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
118
118
|
version: '0'
|
119
119
|
segments:
|
120
120
|
- 0
|
121
|
-
hash:
|
121
|
+
hash: -203766451
|
122
122
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
123
|
none: false
|
124
124
|
requirements:
|