mongoid_fulltext 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/mongoid_fulltext.rb +17 -3
- data/mongoid_fulltext.gemspec +4 -2
- data/spec/models/stopwords_artwork.rb +10 -0
- data/spec/mongoid/fulltext_spec.rb +10 -0
- metadata +15 -13
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.1
|
data/lib/mongoid_fulltext.rb
CHANGED
@@ -29,7 +29,19 @@ module Mongoid::FullTextSearch
|
|
29
29
|
:apply_prefix_scoring_to_all_words => true,
|
30
30
|
:index_full_words => true,
|
31
31
|
:max_candidate_set_size => 1000,
|
32
|
-
:remove_accents => true
|
32
|
+
:remove_accents => true,
|
33
|
+
:stop_words => Hash[['i', 'a', 's', 't', 'me', 'my', 'we', 'he', 'it', 'am', 'is', 'be', 'do', 'an', 'if',
|
34
|
+
'or', 'as', 'of', 'at', 'by', 'to', 'up', 'in', 'on', 'no', 'so', 'our', 'you', 'him',
|
35
|
+
'his', 'she', 'her', 'its', 'who', 'are', 'was', 'has', 'had', 'did', 'the', 'and',
|
36
|
+
'but', 'for', 'out', 'off', 'why', 'how', 'all', 'any', 'few', 'nor', 'not', 'own',
|
37
|
+
'too', 'can', 'don', 'now', 'ours', 'your', 'hers', 'they', 'them', 'what', 'whom',
|
38
|
+
'this', 'that', 'were', 'been', 'have', 'does', 'with', 'into', 'from', 'down', 'over',
|
39
|
+
'then', 'once', 'here', 'when', 'both', 'each', 'more', 'most', 'some', 'such', 'only',
|
40
|
+
'same', 'than', 'very', 'will', 'just', 'yours', 'their', 'which', 'these', 'those',
|
41
|
+
'being', 'doing', 'until', 'while', 'about', 'after', 'above', 'below', 'under',
|
42
|
+
'again', 'there', 'where', 'other', 'myself', 'itself', 'theirs', 'having', 'during',
|
43
|
+
'before', 'should', 'himself', 'herself', 'because', 'against', 'between', 'through',
|
44
|
+
'further', 'yourself', 'ourselves', 'yourselves', 'themselves'].map{ |x| [x,true] }]
|
33
45
|
}
|
34
46
|
|
35
47
|
config.update(options)
|
@@ -180,7 +192,7 @@ module Mongoid::FullTextSearch
|
|
180
192
|
filtered_str = UnicodeUtils.nfkd(filtered_str).gsub(/[^\x00-\x7F]/,'')
|
181
193
|
end
|
182
194
|
|
183
|
-
filtered_str = filtered_str.downcase.split('').map{ |ch| config[:alphabet][ch] }.compact.join('')
|
195
|
+
filtered_str = filtered_str.mb_chars.downcase.to_s.split('').map{ |ch| config[:alphabet][ch] }.compact.join('')
|
184
196
|
|
185
197
|
if bound_number_returned
|
186
198
|
step_size = [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max
|
@@ -203,9 +215,11 @@ module Mongoid::FullTextSearch
|
|
203
215
|
ngram_ary = ngram_ary.group_by{ |h| h[:ngram] }.map{ |key, values| {:ngram => key, :score => values.map{ |v| v[:score] }.max} }
|
204
216
|
|
205
217
|
if (config[:index_full_words])
|
218
|
+
full_words_seen = {}
|
206
219
|
filtered_str.split(Regexp.compile(config[:word_separators].keys.join)).each do |word|
|
207
|
-
if word.length >= config[:ngram_width]
|
220
|
+
if word.length >= config[:ngram_width] and full_words_seen[word].nil? and config[:stop_words][word].nil?
|
208
221
|
ngram_ary << {:ngram => word, :score => 1}
|
222
|
+
full_words_seen[word] = true
|
209
223
|
end
|
210
224
|
end
|
211
225
|
end
|
data/mongoid_fulltext.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{mongoid_fulltext}
|
8
|
-
s.version = "0.5.
|
8
|
+
s.version = "0.5.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Aaron Windsor"]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-11-02}
|
13
13
|
s.description = %q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
|
14
14
|
s.email = %q{aaron.windsor@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -42,6 +42,7 @@ Gem::Specification.new do |s|
|
|
42
42
|
"spec/models/multi_field_artist.rb",
|
43
43
|
"spec/models/multi_field_artwork.rb",
|
44
44
|
"spec/models/partitioned_artist.rb",
|
45
|
+
"spec/models/stopwords_artwork.rb",
|
45
46
|
"spec/mongoid/fulltext_spec.rb",
|
46
47
|
"spec/spec_helper.rb"
|
47
48
|
]
|
@@ -66,6 +67,7 @@ Gem::Specification.new do |s|
|
|
66
67
|
"spec/models/multi_field_artist.rb",
|
67
68
|
"spec/models/multi_field_artwork.rb",
|
68
69
|
"spec/models/partitioned_artist.rb",
|
70
|
+
"spec/models/stopwords_artwork.rb",
|
69
71
|
"spec/mongoid/fulltext_spec.rb",
|
70
72
|
"spec/spec_helper.rb"
|
71
73
|
]
|
@@ -450,6 +450,16 @@ module Mongoid
|
|
450
450
|
|
451
451
|
end
|
452
452
|
|
453
|
+
context "with stop words defined" do
|
454
|
+
let!(:flowers) { StopwordsArtwork.create(:title => "Flowers by Andy Warhol") }
|
455
|
+
let!(:many_ands) { StopwordsArtwork.create(:title => "Foo and bar and baz and foobar") }
|
456
|
+
|
457
|
+
it "doesn't give a full-word score boost to stopwords" do
|
458
|
+
StopwordsArtwork.fulltext_search("andy").map{ |a| a.title }.should == [flowers.title, many_ands.title]
|
459
|
+
StopwordsArtwork.fulltext_search("warhol and other stuff").map{ |a| a.title }.should == [flowers.title, many_ands.title]
|
460
|
+
end
|
461
|
+
end
|
462
|
+
|
453
463
|
context "remove_from_ngram_index" do
|
454
464
|
let!(:flowers1) { BasicArtwork.create(:title => 'Flowers 1') }
|
455
465
|
let!(:flowers2) { BasicArtwork.create(:title => 'Flowers 1') }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mongoid_fulltext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-
|
12
|
+
date: 2011-11-02 00:00:00.000000000 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: unicode_utils
|
17
|
-
requirement: &
|
17
|
+
requirement: &87323870 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ~>
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: 1.0.0
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *87323870
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: mongoid
|
28
|
-
requirement: &
|
28
|
+
requirement: &87323630 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ~>
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 2.0.0
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *87323630
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: bson_ext
|
39
|
-
requirement: &
|
39
|
+
requirement: &87323390 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ~>
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: 1.3.0
|
45
45
|
type: :development
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *87323390
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: rspec
|
50
|
-
requirement: &
|
50
|
+
requirement: &87323150 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ~>
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 2.5.0
|
56
56
|
type: :development
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *87323150
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: jeweler
|
61
|
-
requirement: &
|
61
|
+
requirement: &87322910 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ~>
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
version: 1.5.2
|
67
67
|
type: :development
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *87322910
|
70
70
|
description: Full-text search for the Mongoid ORM, using n-grams extracted from text
|
71
71
|
email: aaron.windsor@gmail.com
|
72
72
|
executables: []
|
@@ -100,6 +100,7 @@ files:
|
|
100
100
|
- spec/models/multi_field_artist.rb
|
101
101
|
- spec/models/multi_field_artwork.rb
|
102
102
|
- spec/models/partitioned_artist.rb
|
103
|
+
- spec/models/stopwords_artwork.rb
|
103
104
|
- spec/mongoid/fulltext_spec.rb
|
104
105
|
- spec/spec_helper.rb
|
105
106
|
has_rdoc: true
|
@@ -118,7 +119,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
118
119
|
version: '0'
|
119
120
|
segments:
|
120
121
|
- 0
|
121
|
-
hash:
|
122
|
+
hash: 136864689
|
122
123
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
124
|
none: false
|
124
125
|
requirements:
|
@@ -147,5 +148,6 @@ test_files:
|
|
147
148
|
- spec/models/multi_field_artist.rb
|
148
149
|
- spec/models/multi_field_artwork.rb
|
149
150
|
- spec/models/partitioned_artist.rb
|
151
|
+
- spec/models/stopwords_artwork.rb
|
150
152
|
- spec/mongoid/fulltext_spec.rb
|
151
153
|
- spec/spec_helper.rb
|