mongoid_fulltext 0.4.5 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +14 -4
- data/VERSION +1 -1
- data/lib/mongoid_fulltext.rb +9 -13
- data/mongoid_fulltext.gemspec +2 -2
- data/spec/models/external_artist.rb +5 -0
- data/spec/models/external_artwork.rb +5 -0
- data/spec/mongoid/fulltext_spec.rb +41 -1
- metadata +13 -13
data/README.md
CHANGED
@@ -50,6 +50,16 @@ To return a pair of `[ result, score ]` instead of an array of results, pass the
|
|
50
50
|
|
51
51
|
Artist.fulltext_search("vince vangogh", { :return_scores => true })
|
52
52
|
|
53
|
+
The larger a score is, the better mongoid_fulltext thinks the match is. The scores have the following rough
|
54
|
+
interpretation that you can use to make decisions about whether the match is good enough:
|
55
|
+
|
56
|
+
* If a prefix of your query matches something indexed, or if your query matches a prefix of something
|
57
|
+
indexed (for example, searching for "foo" finds "myfoo" or searching for "myfoo" finds "foo"), you
|
58
|
+
can expect a score of at least 1 for the match.
|
59
|
+
* If an entire word in your query matches an entire word that's indexed and you have the `index_full_words`
|
60
|
+
option turned on (it's turned on by default), you can expect a score of at least 2 for the match.
|
61
|
+
* If neither of the above criteria are met, you can expect a score less than one.
|
62
|
+
|
53
63
|
If you don't specify a field to index, the default is the result of `to_s` called on the object.
|
54
64
|
The following definition will index the first and last name of an artist:
|
55
65
|
|
@@ -219,10 +229,10 @@ Additional indexing/query options can be used as parameters to `fulltext_search_
|
|
219
229
|
and then finally stripped, as before.
|
220
230
|
* `update_if`: controls whether or not the index will be updated. This can be set to a symbol,
|
221
231
|
string, or proc. If the result of evaluating the value is true, the index will be updated.
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
232
|
+
* When set to a symbol, the symbol is sent to the document.
|
233
|
+
* When set to a string, the string is evaluated within the document's instance.
|
234
|
+
* When set to a proc, the proc is called, and the document is given to the proc as the first arg.
|
235
|
+
* When set to any other type of object, the document's index will not be updated.
|
226
236
|
|
227
237
|
Array filters
|
228
238
|
-------------
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.5.0
|
data/lib/mongoid_fulltext.rb
CHANGED
@@ -153,7 +153,6 @@ module Mongoid::FullTextSearch
|
|
153
153
|
all_scores.concat(scores)
|
154
154
|
end
|
155
155
|
all_scores.sort!{ |document1, document2| -document1[:score] <=> -document2[:score] }
|
156
|
-
|
157
156
|
instantiate_mapreduce_results(all_scores[0..max_results-1], { :return_scores => return_scores })
|
158
157
|
end
|
159
158
|
|
@@ -189,7 +188,7 @@ module Mongoid::FullTextSearch
|
|
189
188
|
step_size = 1
|
190
189
|
end
|
191
190
|
|
192
|
-
# array of ngrams
|
191
|
+
# Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the input string
|
193
192
|
ngram_ary = (0..filtered_str.length - config[:ngram_width]).step(step_size).map do |i|
|
194
193
|
if i == 0 or (config[:apply_prefix_scoring_to_all_words] and \
|
195
194
|
config[:word_separators].has_key?(filtered_str[i-1].chr))
|
@@ -197,25 +196,22 @@ module Mongoid::FullTextSearch
|
|
197
196
|
else
|
198
197
|
score = Math.sqrt(2.0/filtered_str.length)
|
199
198
|
end
|
200
|
-
|
199
|
+
{:ngram => filtered_str[i..i+config[:ngram_width]-1], :score => score}
|
201
200
|
end
|
201
|
+
|
202
|
+
# If an ngram appears multiple times in the query string, keep the max score
|
203
|
+
ngram_ary = ngram_ary.group_by{ |h| h[:ngram] }.map{ |key, values| {:ngram => key, :score => values.map{ |v| v[:score] }.max} }
|
202
204
|
|
203
205
|
if (config[:index_full_words])
|
204
206
|
filtered_str.split(Regexp.compile(config[:word_separators].keys.join)).each do |word|
|
205
207
|
if word.length >= config[:ngram_width]
|
206
|
-
ngram_ary <<
|
208
|
+
ngram_ary << {:ngram => word, :score => 1}
|
207
209
|
end
|
208
210
|
end
|
209
211
|
end
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
# deduplicate, and keep the highest score
|
214
|
-
ngram_ary.each do |ngram, score, position|
|
215
|
-
ngram_hash[ngram] = [ngram_hash[ngram] || 0, score].max
|
216
|
-
end
|
217
|
-
|
218
|
-
ngram_hash
|
212
|
+
|
213
|
+
# If an ngram appears as a full word and an ngram, keep the sum of the two scores
|
214
|
+
Hash[ngram_ary.group_by{ |h| h[:ngram] }.map{ |key, values| [key, values.map{ |v| v[:score] }.sum] }]
|
219
215
|
end
|
220
216
|
|
221
217
|
def remove_from_ngram_index
|
data/mongoid_fulltext.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{mongoid_fulltext}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.5.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Aaron Windsor"]
|
12
|
-
s.date = %q{2011-10-
|
12
|
+
s.date = %q{2011-10-11}
|
13
13
|
s.description = %q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
|
14
14
|
s.email = %q{aaron.windsor@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -391,7 +391,7 @@ module Mongoid
|
|
391
391
|
end
|
392
392
|
|
393
393
|
end
|
394
|
-
|
394
|
+
|
395
395
|
context "using search options" do
|
396
396
|
let!(:patterns) { BasicArtwork.create(:title => 'Flower Patterns') }
|
397
397
|
let!(:flowers) { BasicArtwork.create(:title => 'Flowers') }
|
@@ -410,6 +410,46 @@ module Mongoid
|
|
410
410
|
end
|
411
411
|
end
|
412
412
|
|
413
|
+
context "returning scores" do
|
414
|
+
# Since we return scores, let's make some weak guarantees about what they actually mean
|
415
|
+
|
416
|
+
let!(:mao_yan) { ExternalArtist.create(:full_name => "Mao Yan") }
|
417
|
+
let!(:mao) { ExternalArtwork.create(:title => "Mao by Andy Warhol") }
|
418
|
+
let!(:maox) { ExternalArtwork.create(:title => "Maox by Randy Morehall") }
|
419
|
+
let!(:somao) { ExternalArtwork.create(:title => "Somao by Randy Morehall") }
|
420
|
+
|
421
|
+
it "returns basic matches that don't match a whole word and aren't prefixes with score < 1" do
|
422
|
+
['paox', 'porehall'].each do |query|
|
423
|
+
results = ExternalArtist.fulltext_search(query, { :return_scores => true })
|
424
|
+
results.length.should > 0
|
425
|
+
results.map{ |result| result[-1] }.inject(true){ |accum, item| accum &= (item < 1) }.should be_true
|
426
|
+
end
|
427
|
+
end
|
428
|
+
|
429
|
+
it "returns prefix matches with a score >= 1 but < 2" do
|
430
|
+
['warho', 'rand'].each do |query|
|
431
|
+
results = ExternalArtist.fulltext_search(query, { :return_scores => true })
|
432
|
+
results.length.should > 0
|
433
|
+
results.map{ |result| result[-1] if result[0].to_s.starts_with?(query)}
|
434
|
+
.compact
|
435
|
+
.inject(true){ |accum, item| accum &= (item >= 1 and item < 2) }
|
436
|
+
.should be_true
|
437
|
+
end
|
438
|
+
end
|
439
|
+
|
440
|
+
it "returns full-word matches with a score >= 2" do
|
441
|
+
['andy', 'warhol', 'mao'].each do |query|
|
442
|
+
results = ExternalArtist.fulltext_search(query, { :return_scores => true })
|
443
|
+
results.length.should > 0
|
444
|
+
results.map{ |result| result[-1] if result[0].to_s.split(' ').member?(query) }
|
445
|
+
.compact
|
446
|
+
.inject(true){ |accum, item| accum &= (item >= 2) }
|
447
|
+
.should be_true
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
451
|
+
end
|
452
|
+
|
413
453
|
context "remove_from_ngram_index" do
|
414
454
|
let!(:flowers1) { BasicArtwork.create(:title => 'Flowers 1') }
|
415
455
|
let!(:flowers2) { BasicArtwork.create(:title => 'Flowers 1') }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mongoid_fulltext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-10-
|
12
|
+
date: 2011-10-11 00:00:00.000000000 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: unicode_utils
|
17
|
-
requirement: &
|
17
|
+
requirement: &79126690 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ~>
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: 1.0.0
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *79126690
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: mongoid
|
28
|
-
requirement: &
|
28
|
+
requirement: &79116990 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ~>
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 2.0.0
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *79116990
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: bson_ext
|
39
|
-
requirement: &
|
39
|
+
requirement: &79116510 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ~>
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: 1.3.0
|
45
45
|
type: :development
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *79116510
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: rspec
|
50
|
-
requirement: &
|
50
|
+
requirement: &79116050 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ~>
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 2.5.0
|
56
56
|
type: :development
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *79116050
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: jeweler
|
61
|
-
requirement: &
|
61
|
+
requirement: &79115620 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ~>
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
version: 1.5.2
|
67
67
|
type: :development
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *79115620
|
70
70
|
description: Full-text search for the Mongoid ORM, using n-grams extracted from text
|
71
71
|
email: aaron.windsor@gmail.com
|
72
72
|
executables: []
|
@@ -118,7 +118,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
118
118
|
version: '0'
|
119
119
|
segments:
|
120
120
|
- 0
|
121
|
-
hash:
|
121
|
+
hash: -203766451
|
122
122
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
123
|
none: false
|
124
124
|
requirements:
|