mongoid_fulltext 0.5.2 → 0.5.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +3 -1
- data/VERSION +1 -1
- data/lib/mongoid_fulltext.rb +18 -4
- data/mongoid_fulltext.gemspec +4 -2
- data/spec/models/short_prefixes_artwork.rb +11 -0
- data/spec/mongoid/fulltext_spec.rb +27 -0
- metadata +15 -13
data/README.md
CHANGED
@@ -214,7 +214,9 @@ Additional indexing/query options can be used as parameters to `fulltext_search_
|
|
214
214
|
* `alphabet`: letters to index, default is `abcdefghijklmnopqrstuvwxyz0123456789 `
|
215
215
|
* `word_separators`: word separators, default is the space character.
|
216
216
|
* `ngram_width`: ngram width, default is `3`
|
217
|
-
* `index_full_words`: index full words, which improves exact matches, default is `true
|
217
|
+
* `index_full_words`: index full words, which improves exact matches, default is `true`.
|
218
|
+
* `index_short_prefixes`: index a prefix of each full word of length `(ngram_width-1)`. Useful if
|
219
|
+
you use a larger ngram_width than the default of 3. Default for this option is `false`.
|
218
220
|
* `stop_words`: a hash of words to avoid indexing as full words. Used only if `index_full_words`
|
219
221
|
is set to `true`. Defaults to a hash containing a list of common English stop words.
|
220
222
|
* `apply_prefix_scoring_to_all_words`: score n-grams at beginning of words higher, default is `true`
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.3
|
data/lib/mongoid_fulltext.rb
CHANGED
@@ -29,6 +29,7 @@ module Mongoid::FullTextSearch
|
|
29
29
|
:max_ngrams_to_search => 6,
|
30
30
|
:apply_prefix_scoring_to_all_words => true,
|
31
31
|
:index_full_words => true,
|
32
|
+
:index_short_prefixes => false,
|
32
33
|
:max_candidate_set_size => 1000,
|
33
34
|
:remove_accents => true,
|
34
35
|
:stop_words => Hash[['i', 'a', 's', 't', 'me', 'my', 'we', 'he', 'it', 'am', 'is', 'be', 'do', 'an', 'if',
|
@@ -218,19 +219,32 @@ module Mongoid::FullTextSearch
|
|
218
219
|
|
219
220
|
# If an ngram appears multiple times in the query string, keep the max score
|
220
221
|
ngram_array = ngram_array.group_by{ |h| h[:ngram] }.map{ |key, values| {:ngram => key, :score => values.map{ |v| v[:score] }.max} }
|
221
|
-
|
222
|
+
|
223
|
+
# Add 'short prefix' records to the array: prefixes of the string that are length (ngram_width - 1)
|
224
|
+
if config[:index_short_prefixes]
|
225
|
+
prefixes_seen = {}
|
226
|
+
filtered_str.split(Regexp.compile(config[:word_separators].keys.join)).each do |word|
|
227
|
+
next if word.length < config[:ngram_width]-1
|
228
|
+
prefix = word[0...config[:ngram_width]-1]
|
229
|
+
if prefixes_seen[prefix].nil? and (config[:stop_words][word].nil? or word == filtered_str)
|
230
|
+
ngram_array << {:ngram => prefix, :score => 1}
|
231
|
+
prefixes_seen[prefix] = true
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
222
236
|
# Add records to the array of ngrams for each full word in the string that isn't a stop word
|
223
|
-
if
|
237
|
+
if config[:index_full_words]
|
224
238
|
full_words_seen = {}
|
225
239
|
filtered_str.split(Regexp.compile(config[:word_separators].keys.join)).each do |word|
|
226
|
-
if word.length > 1 and full_words_seen[word].nil? and config[:stop_words][word].nil?
|
240
|
+
if word.length > 1 and full_words_seen[word].nil? and (config[:stop_words][word].nil? or word == filtered_str)
|
227
241
|
ngram_array << {:ngram => word, :score => 1}
|
228
242
|
full_words_seen[word] = true
|
229
243
|
end
|
230
244
|
end
|
231
245
|
end
|
232
246
|
|
233
|
-
# If an ngram appears as
|
247
|
+
# If an ngram appears as any combination of full word, short prefix, and ngram, keep the sum of the two scores
|
234
248
|
Hash[ngram_array.group_by{ |h| h[:ngram] }.map{ |key, values| [key, values.map{ |v| v[:score] }.sum] }]
|
235
249
|
end
|
236
250
|
|
data/mongoid_fulltext.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{mongoid_fulltext}
|
8
|
-
s.version = "0.5.
|
8
|
+
s.version = "0.5.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Aaron Windsor"]
|
12
|
-
s.date = %q{2011-11-
|
12
|
+
s.date = %q{2011-11-08}
|
13
13
|
s.description = %q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
|
14
14
|
s.email = %q{aaron.windsor@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -42,6 +42,7 @@ Gem::Specification.new do |s|
|
|
42
42
|
"spec/models/multi_field_artist.rb",
|
43
43
|
"spec/models/multi_field_artwork.rb",
|
44
44
|
"spec/models/partitioned_artist.rb",
|
45
|
+
"spec/models/short_prefixes_artwork.rb",
|
45
46
|
"spec/models/stopwords_artwork.rb",
|
46
47
|
"spec/mongoid/fulltext_spec.rb",
|
47
48
|
"spec/spec_helper.rb"
|
@@ -67,6 +68,7 @@ Gem::Specification.new do |s|
|
|
67
68
|
"spec/models/multi_field_artist.rb",
|
68
69
|
"spec/models/multi_field_artwork.rb",
|
69
70
|
"spec/models/partitioned_artist.rb",
|
71
|
+
"spec/models/short_prefixes_artwork.rb",
|
70
72
|
"spec/models/stopwords_artwork.rb",
|
71
73
|
"spec/mongoid/fulltext_spec.rb",
|
72
74
|
"spec/spec_helper.rb"
|
@@ -467,6 +467,33 @@ module Mongoid
|
|
467
467
|
|
468
468
|
end
|
469
469
|
|
470
|
+
context "indexing short prefixes" do
|
471
|
+
let!(:dimethyl_mercury) { ShortPrefixesArtwork.create(:title => "Dimethyl Mercury by Damien Hirst") }
|
472
|
+
let!(:volume) { ShortPrefixesArtwork.create(:title => "Volume by Dadamaino") }
|
473
|
+
let!(:damaged) { ShortPrefixesArtwork.create(:title => "Damaged: Photographs from the Chicago Daily News 1902-1933 (Governor) by Lisa Oppenheim") }
|
474
|
+
let!(:frozen) { ShortPrefixesArtwork.create(:title => "Frozen Fountain XXX by Evelyn Rosenberg") }
|
475
|
+
let!(:skull) { ShortPrefixesArtwork.create(:title => "Skull by Andy Warhol") }
|
476
|
+
|
477
|
+
it "finds the most relevant items with prefix indexing" do
|
478
|
+
ShortPrefixesArtwork.fulltext_search("damien").first.should == dimethyl_mercury
|
479
|
+
ShortPrefixesArtwork.fulltext_search("dami").first.should == dimethyl_mercury
|
480
|
+
ShortPrefixesArtwork.fulltext_search("dama").first.should == damaged
|
481
|
+
ShortPrefixesArtwork.fulltext_search("dam").first.should_not == volume
|
482
|
+
ShortPrefixesArtwork.fulltext_search("dadamaino").first.should == volume
|
483
|
+
ShortPrefixesArtwork.fulltext_search("kull").first.should == skull
|
484
|
+
end
|
485
|
+
|
486
|
+
it "doesn't index prefixes of stopwords" do
|
487
|
+
# damaged has the word "from" in it, which shouldn't get indexed.
|
488
|
+
ShortPrefixesArtwork.fulltext_search("fro").should == [frozen]
|
489
|
+
end
|
490
|
+
|
491
|
+
it "does index prefixes that would be stopwords taken alone" do
|
492
|
+
# skull has the word "andy" in it, which should get indexed as "and" even though "and" is a stopword
|
493
|
+
ShortPrefixesArtwork.fulltext_search("and").should == [skull]
|
494
|
+
end
|
495
|
+
end
|
496
|
+
|
470
497
|
context "remove_from_ngram_index" do
|
471
498
|
let!(:flowers1) { BasicArtwork.create(:title => 'Flowers 1') }
|
472
499
|
let!(:flowers2) { BasicArtwork.create(:title => 'Flowers 1') }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mongoid_fulltext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-11-
|
12
|
+
date: 2011-11-08 00:00:00.000000000 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: unicode_utils
|
17
|
-
requirement: &
|
17
|
+
requirement: &87410860 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ~>
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: 1.0.0
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *87410860
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: mongoid
|
28
|
-
requirement: &
|
28
|
+
requirement: &87410620 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ~>
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 2.0.0
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *87410620
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: bson_ext
|
39
|
-
requirement: &
|
39
|
+
requirement: &87410380 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ~>
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: 1.3.0
|
45
45
|
type: :development
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *87410380
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: rspec
|
50
|
-
requirement: &
|
50
|
+
requirement: &87410140 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ~>
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 2.5.0
|
56
56
|
type: :development
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *87410140
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: jeweler
|
61
|
-
requirement: &
|
61
|
+
requirement: &87409900 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ~>
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
version: 1.5.2
|
67
67
|
type: :development
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *87409900
|
70
70
|
description: Full-text search for the Mongoid ORM, using n-grams extracted from text
|
71
71
|
email: aaron.windsor@gmail.com
|
72
72
|
executables: []
|
@@ -100,6 +100,7 @@ files:
|
|
100
100
|
- spec/models/multi_field_artist.rb
|
101
101
|
- spec/models/multi_field_artwork.rb
|
102
102
|
- spec/models/partitioned_artist.rb
|
103
|
+
- spec/models/short_prefixes_artwork.rb
|
103
104
|
- spec/models/stopwords_artwork.rb
|
104
105
|
- spec/mongoid/fulltext_spec.rb
|
105
106
|
- spec/spec_helper.rb
|
@@ -119,7 +120,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
119
120
|
version: '0'
|
120
121
|
segments:
|
121
122
|
- 0
|
122
|
-
hash:
|
123
|
+
hash: 591721469
|
123
124
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
124
125
|
none: false
|
125
126
|
requirements:
|
@@ -148,6 +149,7 @@ test_files:
|
|
148
149
|
- spec/models/multi_field_artist.rb
|
149
150
|
- spec/models/multi_field_artwork.rb
|
150
151
|
- spec/models/partitioned_artist.rb
|
152
|
+
- spec/models/short_prefixes_artwork.rb
|
151
153
|
- spec/models/stopwords_artwork.rb
|
152
154
|
- spec/mongoid/fulltext_spec.rb
|
153
155
|
- spec/spec_helper.rb
|