mongoid_fulltext 0.4.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -50,6 +50,16 @@ To return a pair of `[ result, score ]` instead of an array of results, pass the
50
50
 
51
51
  Artist.fulltext_search("vince vangogh", { :return_scores => true })
52
52
 
53
+ The larger a score is, the better mongoid_fulltext thinks the match is. The scores have the following rough
54
+ interpretation that you can use to make decisions about whether the match is good enough:
55
+
56
+ * If a prefix of your query matches something indexed, or if your query matches a prefix of something
57
+ indexed (for example, searching for "foo" finds "myfoo" or searching for "myfoo" finds "foo"), you
58
+ can expect a score of at least 1 for the match.
59
+ * If an entire word in your query matches an entire word that's indexed and you have the `index_full_words`
60
+ option turned on (it's turned on by default), you can expect a score of at least 2 for the match.
61
+ * If neither of the above criteria are met, you can expect a score less than one.
62
+
53
63
  If you don't specify a field to index, the default is the result of `to_s` called on the object.
54
64
  The following definition will index the first and last name of an artist:
55
65
 
@@ -219,10 +229,10 @@ Additional indexing/query options can be used as parameters to `fulltext_search_
219
229
  and then finally stripped, as before.
220
230
  * `update_if`: controls whether or not the index will be updated. This can be set to a symbol,
221
231
  string, or proc. If the result of evaluating the value is true, the index will be updated.
222
- ** When set to a symbol, the symbol is sent to the document.
223
- ** When set to a string, the string is evaluated within the document's instance.
224
- ** When set to a proc, the proc is called, and the document is given to the proc as the first arg.
225
- ** When set to any other type of object, the document's index will not be updated.
232
+ * When set to a symbol, the symbol is sent to the document.
233
+ * When set to a string, the string is evaluated within the document's instance.
234
+ * When set to a proc, the proc is called, and the document is given to the proc as the first arg.
235
+ * When set to any other type of object, the document's index will not be updated.
226
236
 
227
237
  Array filters
228
238
  -------------
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.5
1
+ 0.5.0
@@ -153,7 +153,6 @@ module Mongoid::FullTextSearch
153
153
  all_scores.concat(scores)
154
154
  end
155
155
  all_scores.sort!{ |document1, document2| -document1[:score] <=> -document2[:score] }
156
-
157
156
  instantiate_mapreduce_results(all_scores[0..max_results-1], { :return_scores => return_scores })
158
157
  end
159
158
 
@@ -189,7 +188,7 @@ module Mongoid::FullTextSearch
189
188
  step_size = 1
190
189
  end
191
190
 
192
- # array of ngrams
191
+ # Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the input string
193
192
  ngram_ary = (0..filtered_str.length - config[:ngram_width]).step(step_size).map do |i|
194
193
  if i == 0 or (config[:apply_prefix_scoring_to_all_words] and \
195
194
  config[:word_separators].has_key?(filtered_str[i-1].chr))
@@ -197,25 +196,22 @@ module Mongoid::FullTextSearch
197
196
  else
198
197
  score = Math.sqrt(2.0/filtered_str.length)
199
198
  end
200
- [filtered_str[i..i+config[:ngram_width]-1], score]
199
+ {:ngram => filtered_str[i..i+config[:ngram_width]-1], :score => score}
201
200
  end
201
+
202
+ # If an ngram appears multiple times in the query string, keep the max score
203
+ ngram_ary = ngram_ary.group_by{ |h| h[:ngram] }.map{ |key, values| {:ngram => key, :score => values.map{ |v| v[:score] }.max} }
202
204
 
203
205
  if (config[:index_full_words])
204
206
  filtered_str.split(Regexp.compile(config[:word_separators].keys.join)).each do |word|
205
207
  if word.length >= config[:ngram_width]
206
- ngram_ary << [ word, 1 ]
208
+ ngram_ary << {:ngram => word, :score => 1}
207
209
  end
208
210
  end
209
211
  end
210
-
211
- ngram_hash = {}
212
-
213
- # deduplicate, and keep the highest score
214
- ngram_ary.each do |ngram, score, position|
215
- ngram_hash[ngram] = [ngram_hash[ngram] || 0, score].max
216
- end
217
-
218
- ngram_hash
212
+
213
+ # If an ngram appears as a full word and an ngram, keep the sum of the two scores
214
+ Hash[ngram_ary.group_by{ |h| h[:ngram] }.map{ |key, values| [key, values.map{ |v| v[:score] }.sum] }]
219
215
  end
220
216
 
221
217
  def remove_from_ngram_index
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{mongoid_fulltext}
8
- s.version = "0.4.5"
8
+ s.version = "0.5.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Aaron Windsor"]
12
- s.date = %q{2011-10-05}
12
+ s.date = %q{2011-10-11}
13
13
  s.description = %q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
14
14
  s.email = %q{aaron.windsor@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -3,4 +3,9 @@ class ExternalArtist
3
3
  include Mongoid::FullTextSearch
4
4
  field :full_name
5
5
  fulltext_search_in :full_name, :index_name => 'mongoid_fulltext.artworks_and_artists'
6
+
7
+ def to_s
8
+ full_name
9
+ end
10
+
6
11
  end
@@ -3,4 +3,9 @@ class ExternalArtwork
3
3
  include Mongoid::FullTextSearch
4
4
  field :title
5
5
  fulltext_search_in :title, :index_name => 'mongoid_fulltext.artworks_and_artists'
6
+
7
+ def to_s
8
+ title
9
+ end
10
+
6
11
  end
@@ -391,7 +391,7 @@ module Mongoid
391
391
  end
392
392
 
393
393
  end
394
-
394
+
395
395
  context "using search options" do
396
396
  let!(:patterns) { BasicArtwork.create(:title => 'Flower Patterns') }
397
397
  let!(:flowers) { BasicArtwork.create(:title => 'Flowers') }
@@ -410,6 +410,46 @@ module Mongoid
410
410
  end
411
411
  end
412
412
 
413
+ context "returning scores" do
414
+ # Since we return scores, let's make some weak guarantees about what they actually mean
415
+
416
+ let!(:mao_yan) { ExternalArtist.create(:full_name => "Mao Yan") }
417
+ let!(:mao) { ExternalArtwork.create(:title => "Mao by Andy Warhol") }
418
+ let!(:maox) { ExternalArtwork.create(:title => "Maox by Randy Morehall") }
419
+ let!(:somao) { ExternalArtwork.create(:title => "Somao by Randy Morehall") }
420
+
421
+ it "returns basic matches that don't match a whole word and aren't prefixes with score < 1" do
422
+ ['paox', 'porehall'].each do |query|
423
+ results = ExternalArtist.fulltext_search(query, { :return_scores => true })
424
+ results.length.should > 0
425
+ results.map{ |result| result[-1] }.inject(true){ |accum, item| accum &= (item < 1) }.should be_true
426
+ end
427
+ end
428
+
429
+ it "returns prefix matches with a score >= 1 but < 2" do
430
+ ['warho', 'rand'].each do |query|
431
+ results = ExternalArtist.fulltext_search(query, { :return_scores => true })
432
+ results.length.should > 0
433
+ results.map{ |result| result[-1] if result[0].to_s.starts_with?(query)}
434
+ .compact
435
+ .inject(true){ |accum, item| accum &= (item >= 1 and item < 2) }
436
+ .should be_true
437
+ end
438
+ end
439
+
440
+ it "returns full-word matches with a score >= 2" do
441
+ ['andy', 'warhol', 'mao'].each do |query|
442
+ results = ExternalArtist.fulltext_search(query, { :return_scores => true })
443
+ results.length.should > 0
444
+ results.map{ |result| result[-1] if result[0].to_s.split(' ').member?(query) }
445
+ .compact
446
+ .inject(true){ |accum, item| accum &= (item >= 2) }
447
+ .should be_true
448
+ end
449
+ end
450
+
451
+ end
452
+
413
453
  context "remove_from_ngram_index" do
414
454
  let!(:flowers1) { BasicArtwork.create(:title => 'Flowers 1') }
415
455
  let!(:flowers2) { BasicArtwork.create(:title => 'Flowers 1') }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mongoid_fulltext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.5
4
+ version: 0.5.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,12 +9,12 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-10-05 00:00:00.000000000 -04:00
12
+ date: 2011-10-11 00:00:00.000000000 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: unicode_utils
17
- requirement: &83773400 !ruby/object:Gem::Requirement
17
+ requirement: &79126690 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ~>
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: 1.0.0
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *83773400
25
+ version_requirements: *79126690
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: mongoid
28
- requirement: &83773150 !ruby/object:Gem::Requirement
28
+ requirement: &79116990 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ~>
@@ -33,10 +33,10 @@ dependencies:
33
33
  version: 2.0.0
34
34
  type: :development
35
35
  prerelease: false
36
- version_requirements: *83773150
36
+ version_requirements: *79116990
37
37
  - !ruby/object:Gem::Dependency
38
38
  name: bson_ext
39
- requirement: &83772860 !ruby/object:Gem::Requirement
39
+ requirement: &79116510 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ~>
@@ -44,10 +44,10 @@ dependencies:
44
44
  version: 1.3.0
45
45
  type: :development
46
46
  prerelease: false
47
- version_requirements: *83772860
47
+ version_requirements: *79116510
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: rspec
50
- requirement: &83772590 !ruby/object:Gem::Requirement
50
+ requirement: &79116050 !ruby/object:Gem::Requirement
51
51
  none: false
52
52
  requirements:
53
53
  - - ~>
@@ -55,10 +55,10 @@ dependencies:
55
55
  version: 2.5.0
56
56
  type: :development
57
57
  prerelease: false
58
- version_requirements: *83772590
58
+ version_requirements: *79116050
59
59
  - !ruby/object:Gem::Dependency
60
60
  name: jeweler
61
- requirement: &83772310 !ruby/object:Gem::Requirement
61
+ requirement: &79115620 !ruby/object:Gem::Requirement
62
62
  none: false
63
63
  requirements:
64
64
  - - ~>
@@ -66,7 +66,7 @@ dependencies:
66
66
  version: 1.5.2
67
67
  type: :development
68
68
  prerelease: false
69
- version_requirements: *83772310
69
+ version_requirements: *79115620
70
70
  description: Full-text search for the Mongoid ORM, using n-grams extracted from text
71
71
  email: aaron.windsor@gmail.com
72
72
  executables: []
@@ -118,7 +118,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
118
118
  version: '0'
119
119
  segments:
120
120
  - 0
121
- hash: 700135439
121
+ hash: -203766451
122
122
  required_rubygems_version: !ruby/object:Gem::Requirement
123
123
  none: false
124
124
  requirements: