mongoid_fulltext 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/mongoid_fulltext.rb +17 -3
- data/mongoid_fulltext.gemspec +4 -2
- data/spec/models/stopwords_artwork.rb +10 -0
- data/spec/mongoid/fulltext_spec.rb +10 -0
- metadata +15 -13
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.1
|
data/lib/mongoid_fulltext.rb
CHANGED
@@ -29,7 +29,19 @@ module Mongoid::FullTextSearch
|
|
29
29
|
:apply_prefix_scoring_to_all_words => true,
|
30
30
|
:index_full_words => true,
|
31
31
|
:max_candidate_set_size => 1000,
|
32
|
-
:remove_accents => true
|
32
|
+
:remove_accents => true,
|
33
|
+
:stop_words => Hash[['i', 'a', 's', 't', 'me', 'my', 'we', 'he', 'it', 'am', 'is', 'be', 'do', 'an', 'if',
|
34
|
+
'or', 'as', 'of', 'at', 'by', 'to', 'up', 'in', 'on', 'no', 'so', 'our', 'you', 'him',
|
35
|
+
'his', 'she', 'her', 'its', 'who', 'are', 'was', 'has', 'had', 'did', 'the', 'and',
|
36
|
+
'but', 'for', 'out', 'off', 'why', 'how', 'all', 'any', 'few', 'nor', 'not', 'own',
|
37
|
+
'too', 'can', 'don', 'now', 'ours', 'your', 'hers', 'they', 'them', 'what', 'whom',
|
38
|
+
'this', 'that', 'were', 'been', 'have', 'does', 'with', 'into', 'from', 'down', 'over',
|
39
|
+
'then', 'once', 'here', 'when', 'both', 'each', 'more', 'most', 'some', 'such', 'only',
|
40
|
+
'same', 'than', 'very', 'will', 'just', 'yours', 'their', 'which', 'these', 'those',
|
41
|
+
'being', 'doing', 'until', 'while', 'about', 'after', 'above', 'below', 'under',
|
42
|
+
'again', 'there', 'where', 'other', 'myself', 'itself', 'theirs', 'having', 'during',
|
43
|
+
'before', 'should', 'himself', 'herself', 'because', 'against', 'between', 'through',
|
44
|
+
'further', 'yourself', 'ourselves', 'yourselves', 'themselves'].map{ |x| [x,true] }]
|
33
45
|
}
|
34
46
|
|
35
47
|
config.update(options)
|
@@ -180,7 +192,7 @@ module Mongoid::FullTextSearch
|
|
180
192
|
filtered_str = UnicodeUtils.nfkd(filtered_str).gsub(/[^\x00-\x7F]/,'')
|
181
193
|
end
|
182
194
|
|
183
|
-
filtered_str = filtered_str.downcase.split('').map{ |ch| config[:alphabet][ch] }.compact.join('')
|
195
|
+
filtered_str = filtered_str.mb_chars.downcase.to_s.split('').map{ |ch| config[:alphabet][ch] }.compact.join('')
|
184
196
|
|
185
197
|
if bound_number_returned
|
186
198
|
step_size = [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max
|
@@ -203,9 +215,11 @@ module Mongoid::FullTextSearch
|
|
203
215
|
ngram_ary = ngram_ary.group_by{ |h| h[:ngram] }.map{ |key, values| {:ngram => key, :score => values.map{ |v| v[:score] }.max} }
|
204
216
|
|
205
217
|
if (config[:index_full_words])
|
218
|
+
full_words_seen = {}
|
206
219
|
filtered_str.split(Regexp.compile(config[:word_separators].keys.join)).each do |word|
|
207
|
-
if word.length >= config[:ngram_width]
|
220
|
+
if word.length >= config[:ngram_width] and full_words_seen[word].nil? and config[:stop_words][word].nil?
|
208
221
|
ngram_ary << {:ngram => word, :score => 1}
|
222
|
+
full_words_seen[word] = true
|
209
223
|
end
|
210
224
|
end
|
211
225
|
end
|
data/mongoid_fulltext.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{mongoid_fulltext}
|
8
|
-
s.version = "0.5.
|
8
|
+
s.version = "0.5.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Aaron Windsor"]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-11-02}
|
13
13
|
s.description = %q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
|
14
14
|
s.email = %q{aaron.windsor@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -42,6 +42,7 @@ Gem::Specification.new do |s|
|
|
42
42
|
"spec/models/multi_field_artist.rb",
|
43
43
|
"spec/models/multi_field_artwork.rb",
|
44
44
|
"spec/models/partitioned_artist.rb",
|
45
|
+
"spec/models/stopwords_artwork.rb",
|
45
46
|
"spec/mongoid/fulltext_spec.rb",
|
46
47
|
"spec/spec_helper.rb"
|
47
48
|
]
|
@@ -66,6 +67,7 @@ Gem::Specification.new do |s|
|
|
66
67
|
"spec/models/multi_field_artist.rb",
|
67
68
|
"spec/models/multi_field_artwork.rb",
|
68
69
|
"spec/models/partitioned_artist.rb",
|
70
|
+
"spec/models/stopwords_artwork.rb",
|
69
71
|
"spec/mongoid/fulltext_spec.rb",
|
70
72
|
"spec/spec_helper.rb"
|
71
73
|
]
|
@@ -450,6 +450,16 @@ module Mongoid
|
|
450
450
|
|
451
451
|
end
|
452
452
|
|
453
|
+
context "with stop words defined" do
|
454
|
+
let!(:flowers) { StopwordsArtwork.create(:title => "Flowers by Andy Warhol") }
|
455
|
+
let!(:many_ands) { StopwordsArtwork.create(:title => "Foo and bar and baz and foobar") }
|
456
|
+
|
457
|
+
it "doesn't give a full-word score boost to stopwords" do
|
458
|
+
StopwordsArtwork.fulltext_search("andy").map{ |a| a.title }.should == [flowers.title, many_ands.title]
|
459
|
+
StopwordsArtwork.fulltext_search("warhol and other stuff").map{ |a| a.title }.should == [flowers.title, many_ands.title]
|
460
|
+
end
|
461
|
+
end
|
462
|
+
|
453
463
|
context "remove_from_ngram_index" do
|
454
464
|
let!(:flowers1) { BasicArtwork.create(:title => 'Flowers 1') }
|
455
465
|
let!(:flowers2) { BasicArtwork.create(:title => 'Flowers 1') }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mongoid_fulltext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-
|
12
|
+
date: 2011-11-02 00:00:00.000000000 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: unicode_utils
|
17
|
-
requirement: &
|
17
|
+
requirement: &87323870 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ~>
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: 1.0.0
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *87323870
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: mongoid
|
28
|
-
requirement: &
|
28
|
+
requirement: &87323630 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ~>
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 2.0.0
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *87323630
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: bson_ext
|
39
|
-
requirement: &
|
39
|
+
requirement: &87323390 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ~>
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: 1.3.0
|
45
45
|
type: :development
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *87323390
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: rspec
|
50
|
-
requirement: &
|
50
|
+
requirement: &87323150 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ~>
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 2.5.0
|
56
56
|
type: :development
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *87323150
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: jeweler
|
61
|
-
requirement: &
|
61
|
+
requirement: &87322910 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ~>
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
version: 1.5.2
|
67
67
|
type: :development
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *87322910
|
70
70
|
description: Full-text search for the Mongoid ORM, using n-grams extracted from text
|
71
71
|
email: aaron.windsor@gmail.com
|
72
72
|
executables: []
|
@@ -100,6 +100,7 @@ files:
|
|
100
100
|
- spec/models/multi_field_artist.rb
|
101
101
|
- spec/models/multi_field_artwork.rb
|
102
102
|
- spec/models/partitioned_artist.rb
|
103
|
+
- spec/models/stopwords_artwork.rb
|
103
104
|
- spec/mongoid/fulltext_spec.rb
|
104
105
|
- spec/spec_helper.rb
|
105
106
|
has_rdoc: true
|
@@ -118,7 +119,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
118
119
|
version: '0'
|
119
120
|
segments:
|
120
121
|
- 0
|
121
|
-
hash:
|
122
|
+
hash: 136864689
|
122
123
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
124
|
none: false
|
124
125
|
requirements:
|
@@ -147,5 +148,6 @@ test_files:
|
|
147
148
|
- spec/models/multi_field_artist.rb
|
148
149
|
- spec/models/multi_field_artwork.rb
|
149
150
|
- spec/models/partitioned_artist.rb
|
151
|
+
- spec/models/stopwords_artwork.rb
|
150
152
|
- spec/mongoid/fulltext_spec.rb
|
151
153
|
- spec/spec_helper.rb
|