mongoid_fulltext 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -214,7 +214,9 @@ Additional indexing/query options can be used as parameters to `fulltext_search_
214
214
  * `alphabet`: letters to index, default is `abcdefghijklmnopqrstuvwxyz0123456789 `
215
215
  * `word_separators`: word separators, default is the space character.
216
216
  * `ngram_width`: ngram width, default is `3`
217
- * `index_full_words`: index full words, which improves exact matches, default is `true`
217
+ * `index_full_words`: index full words, which improves exact matches, default is `true`.
218
+ * `index_short_prefixes`: index a prefix of each full word of length `(ngram_width-1)`. Useful if
219
+ you use a larger ngram_width than the default of 3. Default for this option is `false`.
218
220
  * `stop_words`: a hash of words to avoid indexing as full words. Used only if `index_full_words`
219
221
  is set to `true`. Defaults to a hash containing a list of common English stop words.
220
222
  * `apply_prefix_scoring_to_all_words`: score n-grams at beginning of words higher, default is `true`
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.2
1
+ 0.5.3
@@ -29,6 +29,7 @@ module Mongoid::FullTextSearch
29
29
  :max_ngrams_to_search => 6,
30
30
  :apply_prefix_scoring_to_all_words => true,
31
31
  :index_full_words => true,
32
+ :index_short_prefixes => false,
32
33
  :max_candidate_set_size => 1000,
33
34
  :remove_accents => true,
34
35
  :stop_words => Hash[['i', 'a', 's', 't', 'me', 'my', 'we', 'he', 'it', 'am', 'is', 'be', 'do', 'an', 'if',
@@ -218,19 +219,32 @@ module Mongoid::FullTextSearch
218
219
 
219
220
  # If an ngram appears multiple times in the query string, keep the max score
220
221
  ngram_array = ngram_array.group_by{ |h| h[:ngram] }.map{ |key, values| {:ngram => key, :score => values.map{ |v| v[:score] }.max} }
221
-
222
+
223
+ # Add 'short prefix' records to the array: prefixes of the string that are length (ngram_width - 1)
224
+ if config[:index_short_prefixes]
225
+ prefixes_seen = {}
226
+ filtered_str.split(Regexp.compile(config[:word_separators].keys.join)).each do |word|
227
+ next if word.length < config[:ngram_width]-1
228
+ prefix = word[0...config[:ngram_width]-1]
229
+ if prefixes_seen[prefix].nil? and (config[:stop_words][word].nil? or word == filtered_str)
230
+ ngram_array << {:ngram => prefix, :score => 1}
231
+ prefixes_seen[prefix] = true
232
+ end
233
+ end
234
+ end
235
+
222
236
  # Add records to the array of ngrams for each full word in the string that isn't a stop word
223
- if (config[:index_full_words])
237
+ if config[:index_full_words]
224
238
  full_words_seen = {}
225
239
  filtered_str.split(Regexp.compile(config[:word_separators].keys.join)).each do |word|
226
- if word.length > 1 and full_words_seen[word].nil? and config[:stop_words][word].nil?
240
+ if word.length > 1 and full_words_seen[word].nil? and (config[:stop_words][word].nil? or word == filtered_str)
227
241
  ngram_array << {:ngram => word, :score => 1}
228
242
  full_words_seen[word] = true
229
243
  end
230
244
  end
231
245
  end
232
246
 
233
- # If an ngram appears as a full word and an ngram, keep the sum of the two scores
247
+ # If an ngram appears as any combination of full word, short prefix, and ngram, keep the sum of the two scores
234
248
  Hash[ngram_array.group_by{ |h| h[:ngram] }.map{ |key, values| [key, values.map{ |v| v[:score] }.sum] }]
235
249
  end
236
250
 
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{mongoid_fulltext}
8
- s.version = "0.5.2"
8
+ s.version = "0.5.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Aaron Windsor"]
12
- s.date = %q{2011-11-05}
12
+ s.date = %q{2011-11-08}
13
13
  s.description = %q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
14
14
  s.email = %q{aaron.windsor@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -42,6 +42,7 @@ Gem::Specification.new do |s|
42
42
  "spec/models/multi_field_artist.rb",
43
43
  "spec/models/multi_field_artwork.rb",
44
44
  "spec/models/partitioned_artist.rb",
45
+ "spec/models/short_prefixes_artwork.rb",
45
46
  "spec/models/stopwords_artwork.rb",
46
47
  "spec/mongoid/fulltext_spec.rb",
47
48
  "spec/spec_helper.rb"
@@ -67,6 +68,7 @@ Gem::Specification.new do |s|
67
68
  "spec/models/multi_field_artist.rb",
68
69
  "spec/models/multi_field_artwork.rb",
69
70
  "spec/models/partitioned_artist.rb",
71
+ "spec/models/short_prefixes_artwork.rb",
70
72
  "spec/models/stopwords_artwork.rb",
71
73
  "spec/mongoid/fulltext_spec.rb",
72
74
  "spec/spec_helper.rb"
@@ -0,0 +1,11 @@
1
+ class ShortPrefixesArtwork
2
+ include Mongoid::Document
3
+ include Mongoid::FullTextSearch
4
+
5
+ field :title
6
+ fulltext_search_in :title,
7
+ :ngram_width => 4,
8
+ :index_short_prefixes => true,
9
+ :index_full_words => false
10
+
11
+ end
@@ -467,6 +467,33 @@ module Mongoid
467
467
 
468
468
  end
469
469
 
470
+ context "indexing short prefixes" do
471
+ let!(:dimethyl_mercury) { ShortPrefixesArtwork.create(:title => "Dimethyl Mercury by Damien Hirst") }
472
+ let!(:volume) { ShortPrefixesArtwork.create(:title => "Volume by Dadamaino") }
473
+ let!(:damaged) { ShortPrefixesArtwork.create(:title => "Damaged: Photographs from the Chicago Daily News 1902-1933 (Governor) by Lisa Oppenheim") }
474
+ let!(:frozen) { ShortPrefixesArtwork.create(:title => "Frozen Fountain XXX by Evelyn Rosenberg") }
475
+ let!(:skull) { ShortPrefixesArtwork.create(:title => "Skull by Andy Warhol") }
476
+
477
+ it "finds the most relevant items with prefix indexing" do
478
+ ShortPrefixesArtwork.fulltext_search("damien").first.should == dimethyl_mercury
479
+ ShortPrefixesArtwork.fulltext_search("dami").first.should == dimethyl_mercury
480
+ ShortPrefixesArtwork.fulltext_search("dama").first.should == damaged
481
+ ShortPrefixesArtwork.fulltext_search("dam").first.should_not == volume
482
+ ShortPrefixesArtwork.fulltext_search("dadamaino").first.should == volume
483
+ ShortPrefixesArtwork.fulltext_search("kull").first.should == skull
484
+ end
485
+
486
+ it "doesn't index prefixes of stopwords" do
487
+ # damaged has the word "from" in it, which shouldn't get indexed.
488
+ ShortPrefixesArtwork.fulltext_search("fro").should == [frozen]
489
+ end
490
+
491
+ it "does index prefixes that would be stopwords taken alone" do
492
+ # skull has the word "andy" in it, which should get indexed as "and" even though "and" is a stopword
493
+ ShortPrefixesArtwork.fulltext_search("and").should == [skull]
494
+ end
495
+ end
496
+
470
497
  context "remove_from_ngram_index" do
471
498
  let!(:flowers1) { BasicArtwork.create(:title => 'Flowers 1') }
472
499
  let!(:flowers2) { BasicArtwork.create(:title => 'Flowers 1') }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mongoid_fulltext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,12 +9,12 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-11-05 00:00:00.000000000 -04:00
12
+ date: 2011-11-08 00:00:00.000000000 -05:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: unicode_utils
17
- requirement: &86209940 !ruby/object:Gem::Requirement
17
+ requirement: &87410860 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ~>
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: 1.0.0
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *86209940
25
+ version_requirements: *87410860
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: mongoid
28
- requirement: &86209400 !ruby/object:Gem::Requirement
28
+ requirement: &87410620 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ~>
@@ -33,10 +33,10 @@ dependencies:
33
33
  version: 2.0.0
34
34
  type: :development
35
35
  prerelease: false
36
- version_requirements: *86209400
36
+ version_requirements: *87410620
37
37
  - !ruby/object:Gem::Dependency
38
38
  name: bson_ext
39
- requirement: &86209080 !ruby/object:Gem::Requirement
39
+ requirement: &87410380 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ~>
@@ -44,10 +44,10 @@ dependencies:
44
44
  version: 1.3.0
45
45
  type: :development
46
46
  prerelease: false
47
- version_requirements: *86209080
47
+ version_requirements: *87410380
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: rspec
50
- requirement: &86208570 !ruby/object:Gem::Requirement
50
+ requirement: &87410140 !ruby/object:Gem::Requirement
51
51
  none: false
52
52
  requirements:
53
53
  - - ~>
@@ -55,10 +55,10 @@ dependencies:
55
55
  version: 2.5.0
56
56
  type: :development
57
57
  prerelease: false
58
- version_requirements: *86208570
58
+ version_requirements: *87410140
59
59
  - !ruby/object:Gem::Dependency
60
60
  name: jeweler
61
- requirement: &86208060 !ruby/object:Gem::Requirement
61
+ requirement: &87409900 !ruby/object:Gem::Requirement
62
62
  none: false
63
63
  requirements:
64
64
  - - ~>
@@ -66,7 +66,7 @@ dependencies:
66
66
  version: 1.5.2
67
67
  type: :development
68
68
  prerelease: false
69
- version_requirements: *86208060
69
+ version_requirements: *87409900
70
70
  description: Full-text search for the Mongoid ORM, using n-grams extracted from text
71
71
  email: aaron.windsor@gmail.com
72
72
  executables: []
@@ -100,6 +100,7 @@ files:
100
100
  - spec/models/multi_field_artist.rb
101
101
  - spec/models/multi_field_artwork.rb
102
102
  - spec/models/partitioned_artist.rb
103
+ - spec/models/short_prefixes_artwork.rb
103
104
  - spec/models/stopwords_artwork.rb
104
105
  - spec/mongoid/fulltext_spec.rb
105
106
  - spec/spec_helper.rb
@@ -119,7 +120,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
119
120
  version: '0'
120
121
  segments:
121
122
  - 0
122
- hash: 444903309
123
+ hash: 591721469
123
124
  required_rubygems_version: !ruby/object:Gem::Requirement
124
125
  none: false
125
126
  requirements:
@@ -148,6 +149,7 @@ test_files:
148
149
  - spec/models/multi_field_artist.rb
149
150
  - spec/models/multi_field_artwork.rb
150
151
  - spec/models/partitioned_artist.rb
152
+ - spec/models/short_prefixes_artwork.rb
151
153
  - spec/models/stopwords_artwork.rb
152
154
  - spec/mongoid/fulltext_spec.rb
153
155
  - spec/spec_helper.rb