mongoid_fulltext 0.5.0 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.0
1
+ 0.5.1
@@ -29,7 +29,19 @@ module Mongoid::FullTextSearch
29
29
  :apply_prefix_scoring_to_all_words => true,
30
30
  :index_full_words => true,
31
31
  :max_candidate_set_size => 1000,
32
- :remove_accents => true
32
+ :remove_accents => true,
33
+ :stop_words => Hash[['i', 'a', 's', 't', 'me', 'my', 'we', 'he', 'it', 'am', 'is', 'be', 'do', 'an', 'if',
34
+ 'or', 'as', 'of', 'at', 'by', 'to', 'up', 'in', 'on', 'no', 'so', 'our', 'you', 'him',
35
+ 'his', 'she', 'her', 'its', 'who', 'are', 'was', 'has', 'had', 'did', 'the', 'and',
36
+ 'but', 'for', 'out', 'off', 'why', 'how', 'all', 'any', 'few', 'nor', 'not', 'own',
37
+ 'too', 'can', 'don', 'now', 'ours', 'your', 'hers', 'they', 'them', 'what', 'whom',
38
+ 'this', 'that', 'were', 'been', 'have', 'does', 'with', 'into', 'from', 'down', 'over',
39
+ 'then', 'once', 'here', 'when', 'both', 'each', 'more', 'most', 'some', 'such', 'only',
40
+ 'same', 'than', 'very', 'will', 'just', 'yours', 'their', 'which', 'these', 'those',
41
+ 'being', 'doing', 'until', 'while', 'about', 'after', 'above', 'below', 'under',
42
+ 'again', 'there', 'where', 'other', 'myself', 'itself', 'theirs', 'having', 'during',
43
+ 'before', 'should', 'himself', 'herself', 'because', 'against', 'between', 'through',
44
+ 'further', 'yourself', 'ourselves', 'yourselves', 'themselves'].map{ |x| [x,true] }]
33
45
  }
34
46
 
35
47
  config.update(options)
@@ -180,7 +192,7 @@ module Mongoid::FullTextSearch
180
192
  filtered_str = UnicodeUtils.nfkd(filtered_str).gsub(/[^\x00-\x7F]/,'')
181
193
  end
182
194
 
183
- filtered_str = filtered_str.downcase.split('').map{ |ch| config[:alphabet][ch] }.compact.join('')
195
+ filtered_str = filtered_str.mb_chars.downcase.to_s.split('').map{ |ch| config[:alphabet][ch] }.compact.join('')
184
196
 
185
197
  if bound_number_returned
186
198
  step_size = [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max
@@ -203,9 +215,11 @@ module Mongoid::FullTextSearch
203
215
  ngram_ary = ngram_ary.group_by{ |h| h[:ngram] }.map{ |key, values| {:ngram => key, :score => values.map{ |v| v[:score] }.max} }
204
216
 
205
217
  if (config[:index_full_words])
218
+ full_words_seen = {}
206
219
  filtered_str.split(Regexp.compile(config[:word_separators].keys.join)).each do |word|
207
- if word.length >= config[:ngram_width]
220
+ if word.length >= config[:ngram_width] and full_words_seen[word].nil? and config[:stop_words][word].nil?
208
221
  ngram_ary << {:ngram => word, :score => 1}
222
+ full_words_seen[word] = true
209
223
  end
210
224
  end
211
225
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{mongoid_fulltext}
8
- s.version = "0.5.0"
8
+ s.version = "0.5.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Aaron Windsor"]
12
- s.date = %q{2011-10-11}
12
+ s.date = %q{2011-11-02}
13
13
  s.description = %q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
14
14
  s.email = %q{aaron.windsor@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -42,6 +42,7 @@ Gem::Specification.new do |s|
42
42
  "spec/models/multi_field_artist.rb",
43
43
  "spec/models/multi_field_artwork.rb",
44
44
  "spec/models/partitioned_artist.rb",
45
+ "spec/models/stopwords_artwork.rb",
45
46
  "spec/mongoid/fulltext_spec.rb",
46
47
  "spec/spec_helper.rb"
47
48
  ]
@@ -66,6 +67,7 @@ Gem::Specification.new do |s|
66
67
  "spec/models/multi_field_artist.rb",
67
68
  "spec/models/multi_field_artwork.rb",
68
69
  "spec/models/partitioned_artist.rb",
70
+ "spec/models/stopwords_artwork.rb",
69
71
  "spec/mongoid/fulltext_spec.rb",
70
72
  "spec/spec_helper.rb"
71
73
  ]
@@ -0,0 +1,10 @@
1
+ class StopwordsArtwork
2
+ include Mongoid::Document
3
+ include Mongoid::FullTextSearch
4
+
5
+ field :title
6
+ fulltext_search_in :title,
7
+ :index_full_words => true,
8
+ :stop_words => { 'and' => true }
9
+
10
+ end
@@ -450,6 +450,16 @@ module Mongoid
450
450
 
451
451
  end
452
452
 
453
+ context "with stop words defined" do
454
+ let!(:flowers) { StopwordsArtwork.create(:title => "Flowers by Andy Warhol") }
455
+ let!(:many_ands) { StopwordsArtwork.create(:title => "Foo and bar and baz and foobar") }
456
+
457
+ it "doesn't give a full-word score boost to stopwords" do
458
+ StopwordsArtwork.fulltext_search("andy").map{ |a| a.title }.should == [flowers.title, many_ands.title]
459
+ StopwordsArtwork.fulltext_search("warhol and other stuff").map{ |a| a.title }.should == [flowers.title, many_ands.title]
460
+ end
461
+ end
462
+
453
463
  context "remove_from_ngram_index" do
454
464
  let!(:flowers1) { BasicArtwork.create(:title => 'Flowers 1') }
455
465
  let!(:flowers2) { BasicArtwork.create(:title => 'Flowers 1') }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mongoid_fulltext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,12 +9,12 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-10-11 00:00:00.000000000 -04:00
12
+ date: 2011-11-02 00:00:00.000000000 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: unicode_utils
17
- requirement: &79126690 !ruby/object:Gem::Requirement
17
+ requirement: &87323870 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ~>
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: 1.0.0
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *79126690
25
+ version_requirements: *87323870
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: mongoid
28
- requirement: &79116990 !ruby/object:Gem::Requirement
28
+ requirement: &87323630 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ~>
@@ -33,10 +33,10 @@ dependencies:
33
33
  version: 2.0.0
34
34
  type: :development
35
35
  prerelease: false
36
- version_requirements: *79116990
36
+ version_requirements: *87323630
37
37
  - !ruby/object:Gem::Dependency
38
38
  name: bson_ext
39
- requirement: &79116510 !ruby/object:Gem::Requirement
39
+ requirement: &87323390 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ~>
@@ -44,10 +44,10 @@ dependencies:
44
44
  version: 1.3.0
45
45
  type: :development
46
46
  prerelease: false
47
- version_requirements: *79116510
47
+ version_requirements: *87323390
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: rspec
50
- requirement: &79116050 !ruby/object:Gem::Requirement
50
+ requirement: &87323150 !ruby/object:Gem::Requirement
51
51
  none: false
52
52
  requirements:
53
53
  - - ~>
@@ -55,10 +55,10 @@ dependencies:
55
55
  version: 2.5.0
56
56
  type: :development
57
57
  prerelease: false
58
- version_requirements: *79116050
58
+ version_requirements: *87323150
59
59
  - !ruby/object:Gem::Dependency
60
60
  name: jeweler
61
- requirement: &79115620 !ruby/object:Gem::Requirement
61
+ requirement: &87322910 !ruby/object:Gem::Requirement
62
62
  none: false
63
63
  requirements:
64
64
  - - ~>
@@ -66,7 +66,7 @@ dependencies:
66
66
  version: 1.5.2
67
67
  type: :development
68
68
  prerelease: false
69
- version_requirements: *79115620
69
+ version_requirements: *87322910
70
70
  description: Full-text search for the Mongoid ORM, using n-grams extracted from text
71
71
  email: aaron.windsor@gmail.com
72
72
  executables: []
@@ -100,6 +100,7 @@ files:
100
100
  - spec/models/multi_field_artist.rb
101
101
  - spec/models/multi_field_artwork.rb
102
102
  - spec/models/partitioned_artist.rb
103
+ - spec/models/stopwords_artwork.rb
103
104
  - spec/mongoid/fulltext_spec.rb
104
105
  - spec/spec_helper.rb
105
106
  has_rdoc: true
@@ -118,7 +119,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
118
119
  version: '0'
119
120
  segments:
120
121
  - 0
121
- hash: -203766451
122
+ hash: 136864689
122
123
  required_rubygems_version: !ruby/object:Gem::Requirement
123
124
  none: false
124
125
  requirements:
@@ -147,5 +148,6 @@ test_files:
147
148
  - spec/models/multi_field_artist.rb
148
149
  - spec/models/multi_field_artwork.rb
149
150
  - spec/models/partitioned_artist.rb
151
+ - spec/models/stopwords_artwork.rb
150
152
  - spec/mongoid/fulltext_spec.rb
151
153
  - spec/spec_helper.rb