mongoid_fulltext 0.4.5 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -50,6 +50,16 @@ To return a pair of `[ result, score ]` instead of an array of results, pass the
50
50
 
51
51
  Artist.fulltext_search("vince vangogh", { :return_scores => true })
52
52
 
53
+ The larger a score is, the better mongoid_fulltext thinks the match is. The scores have the following rough
54
+ interpretation that you can use to make decisions about whether the match is good enough:
55
+
56
+ * If a prefix of your query matches something indexed, or if your query matches a prefix of something
57
+ indexed (for example, searching for "foo" finds "myfoo" or searching for "myfoo" finds "foo"), you
58
+ can expect a score of at least 1 for the match.
59
+ * If an entire word in your query matches an entire word that's indexed and you have the `index_full_words`
60
+ option turned on (it's turned on by default), you can expect a score of at least 2 for the match.
61
+ * If neither of the above criteria are met, you can expect a score less than one.
62
+
53
63
  If you don't specify a field to index, the default is the result of `to_s` called on the object.
54
64
  The following definition will index the first and last name of an artist:
55
65
 
@@ -219,10 +229,10 @@ Additional indexing/query options can be used as parameters to `fulltext_search_
219
229
  and then finally stripped, as before.
220
230
  * `update_if`: controls whether or not the index will be updated. This can be set to a symbol,
221
231
  string, or proc. If the result of evaluating the value is true, the index will be updated.
222
- ** When set to a symbol, the symbol is sent to the document.
223
- ** When set to a string, the string is evaluated within the document's instance.
224
- ** When set to a proc, the proc is called, and the document is given to the proc as the first arg.
225
- ** When set to any other type of object, the document's index will not be updated.
232
+ * When set to a symbol, the symbol is sent to the document.
233
+ * When set to a string, the string is evaluated within the document's instance.
234
+ * When set to a proc, the proc is called, and the document is given to the proc as the first arg.
235
+ * When set to any other type of object, the document's index will not be updated.
226
236
 
227
237
  Array filters
228
238
  -------------
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.5
1
+ 0.5.0
@@ -153,7 +153,6 @@ module Mongoid::FullTextSearch
153
153
  all_scores.concat(scores)
154
154
  end
155
155
  all_scores.sort!{ |document1, document2| -document1[:score] <=> -document2[:score] }
156
-
157
156
  instantiate_mapreduce_results(all_scores[0..max_results-1], { :return_scores => return_scores })
158
157
  end
159
158
 
@@ -189,7 +188,7 @@ module Mongoid::FullTextSearch
189
188
  step_size = 1
190
189
  end
191
190
 
192
- # array of ngrams
191
+ # Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the input string
193
192
  ngram_ary = (0..filtered_str.length - config[:ngram_width]).step(step_size).map do |i|
194
193
  if i == 0 or (config[:apply_prefix_scoring_to_all_words] and \
195
194
  config[:word_separators].has_key?(filtered_str[i-1].chr))
@@ -197,25 +196,22 @@ module Mongoid::FullTextSearch
197
196
  else
198
197
  score = Math.sqrt(2.0/filtered_str.length)
199
198
  end
200
- [filtered_str[i..i+config[:ngram_width]-1], score]
199
+ {:ngram => filtered_str[i..i+config[:ngram_width]-1], :score => score}
201
200
  end
201
+
202
+ # If an ngram appears multiple times in the query string, keep the max score
203
+ ngram_ary = ngram_ary.group_by{ |h| h[:ngram] }.map{ |key, values| {:ngram => key, :score => values.map{ |v| v[:score] }.max} }
202
204
 
203
205
  if (config[:index_full_words])
204
206
  filtered_str.split(Regexp.compile(config[:word_separators].keys.join)).each do |word|
205
207
  if word.length >= config[:ngram_width]
206
- ngram_ary << [ word, 1 ]
208
+ ngram_ary << {:ngram => word, :score => 1}
207
209
  end
208
210
  end
209
211
  end
210
-
211
- ngram_hash = {}
212
-
213
- # deduplicate, and keep the highest score
214
- ngram_ary.each do |ngram, score, position|
215
- ngram_hash[ngram] = [ngram_hash[ngram] || 0, score].max
216
- end
217
-
218
- ngram_hash
212
+
213
+ # If an ngram appears as a full word and an ngram, keep the sum of the two scores
214
+ Hash[ngram_ary.group_by{ |h| h[:ngram] }.map{ |key, values| [key, values.map{ |v| v[:score] }.sum] }]
219
215
  end
220
216
 
221
217
  def remove_from_ngram_index
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{mongoid_fulltext}
8
- s.version = "0.4.5"
8
+ s.version = "0.5.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Aaron Windsor"]
12
- s.date = %q{2011-10-05}
12
+ s.date = %q{2011-10-11}
13
13
  s.description = %q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
14
14
  s.email = %q{aaron.windsor@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -3,4 +3,9 @@ class ExternalArtist
3
3
  include Mongoid::FullTextSearch
4
4
  field :full_name
5
5
  fulltext_search_in :full_name, :index_name => 'mongoid_fulltext.artworks_and_artists'
6
+
7
+ def to_s
8
+ full_name
9
+ end
10
+
6
11
  end
@@ -3,4 +3,9 @@ class ExternalArtwork
3
3
  include Mongoid::FullTextSearch
4
4
  field :title
5
5
  fulltext_search_in :title, :index_name => 'mongoid_fulltext.artworks_and_artists'
6
+
7
+ def to_s
8
+ title
9
+ end
10
+
6
11
  end
@@ -391,7 +391,7 @@ module Mongoid
391
391
  end
392
392
 
393
393
  end
394
-
394
+
395
395
  context "using search options" do
396
396
  let!(:patterns) { BasicArtwork.create(:title => 'Flower Patterns') }
397
397
  let!(:flowers) { BasicArtwork.create(:title => 'Flowers') }
@@ -410,6 +410,46 @@ module Mongoid
410
410
  end
411
411
  end
412
412
 
413
+ context "returning scores" do
414
+ # Since we return scores, let's make some weak guarantees about what they actually mean
415
+
416
+ let!(:mao_yan) { ExternalArtist.create(:full_name => "Mao Yan") }
417
+ let!(:mao) { ExternalArtwork.create(:title => "Mao by Andy Warhol") }
418
+ let!(:maox) { ExternalArtwork.create(:title => "Maox by Randy Morehall") }
419
+ let!(:somao) { ExternalArtwork.create(:title => "Somao by Randy Morehall") }
420
+
421
+ it "returns basic matches that don't match a whole word and aren't prefixes with score < 1" do
422
+ ['paox', 'porehall'].each do |query|
423
+ results = ExternalArtist.fulltext_search(query, { :return_scores => true })
424
+ results.length.should > 0
425
+ results.map{ |result| result[-1] }.inject(true){ |accum, item| accum &= (item < 1) }.should be_true
426
+ end
427
+ end
428
+
429
+ it "returns prefix matches with a score >= 1 but < 2" do
430
+ ['warho', 'rand'].each do |query|
431
+ results = ExternalArtist.fulltext_search(query, { :return_scores => true })
432
+ results.length.should > 0
433
+ results.map{ |result| result[-1] if result[0].to_s.starts_with?(query)}
434
+ .compact
435
+ .inject(true){ |accum, item| accum &= (item >= 1 and item < 2) }
436
+ .should be_true
437
+ end
438
+ end
439
+
440
+ it "returns full-word matches with a score >= 2" do
441
+ ['andy', 'warhol', 'mao'].each do |query|
442
+ results = ExternalArtist.fulltext_search(query, { :return_scores => true })
443
+ results.length.should > 0
444
+ results.map{ |result| result[-1] if result[0].to_s.split(' ').member?(query) }
445
+ .compact
446
+ .inject(true){ |accum, item| accum &= (item >= 2) }
447
+ .should be_true
448
+ end
449
+ end
450
+
451
+ end
452
+
413
453
  context "remove_from_ngram_index" do
414
454
  let!(:flowers1) { BasicArtwork.create(:title => 'Flowers 1') }
415
455
  let!(:flowers2) { BasicArtwork.create(:title => 'Flowers 1') }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mongoid_fulltext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.5
4
+ version: 0.5.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,12 +9,12 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-10-05 00:00:00.000000000 -04:00
12
+ date: 2011-10-11 00:00:00.000000000 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: unicode_utils
17
- requirement: &83773400 !ruby/object:Gem::Requirement
17
+ requirement: &79126690 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ~>
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: 1.0.0
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *83773400
25
+ version_requirements: *79126690
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: mongoid
28
- requirement: &83773150 !ruby/object:Gem::Requirement
28
+ requirement: &79116990 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ~>
@@ -33,10 +33,10 @@ dependencies:
33
33
  version: 2.0.0
34
34
  type: :development
35
35
  prerelease: false
36
- version_requirements: *83773150
36
+ version_requirements: *79116990
37
37
  - !ruby/object:Gem::Dependency
38
38
  name: bson_ext
39
- requirement: &83772860 !ruby/object:Gem::Requirement
39
+ requirement: &79116510 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ~>
@@ -44,10 +44,10 @@ dependencies:
44
44
  version: 1.3.0
45
45
  type: :development
46
46
  prerelease: false
47
- version_requirements: *83772860
47
+ version_requirements: *79116510
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: rspec
50
- requirement: &83772590 !ruby/object:Gem::Requirement
50
+ requirement: &79116050 !ruby/object:Gem::Requirement
51
51
  none: false
52
52
  requirements:
53
53
  - - ~>
@@ -55,10 +55,10 @@ dependencies:
55
55
  version: 2.5.0
56
56
  type: :development
57
57
  prerelease: false
58
- version_requirements: *83772590
58
+ version_requirements: *79116050
59
59
  - !ruby/object:Gem::Dependency
60
60
  name: jeweler
61
- requirement: &83772310 !ruby/object:Gem::Requirement
61
+ requirement: &79115620 !ruby/object:Gem::Requirement
62
62
  none: false
63
63
  requirements:
64
64
  - - ~>
@@ -66,7 +66,7 @@ dependencies:
66
66
  version: 1.5.2
67
67
  type: :development
68
68
  prerelease: false
69
- version_requirements: *83772310
69
+ version_requirements: *79115620
70
70
  description: Full-text search for the Mongoid ORM, using n-grams extracted from text
71
71
  email: aaron.windsor@gmail.com
72
72
  executables: []
@@ -118,7 +118,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
118
118
  version: '0'
119
119
  segments:
120
120
  - 0
121
- hash: 700135439
121
+ hash: -203766451
122
122
  required_rubygems_version: !ruby/object:Gem::Requirement
123
123
  none: false
124
124
  requirements: