mongoid_fulltext 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.0
1
+ 0.5.1
@@ -29,7 +29,19 @@ module Mongoid::FullTextSearch
29
29
  :apply_prefix_scoring_to_all_words => true,
30
30
  :index_full_words => true,
31
31
  :max_candidate_set_size => 1000,
32
- :remove_accents => true
32
+ :remove_accents => true,
33
+ :stop_words => Hash[['i', 'a', 's', 't', 'me', 'my', 'we', 'he', 'it', 'am', 'is', 'be', 'do', 'an', 'if',
34
+ 'or', 'as', 'of', 'at', 'by', 'to', 'up', 'in', 'on', 'no', 'so', 'our', 'you', 'him',
35
+ 'his', 'she', 'her', 'its', 'who', 'are', 'was', 'has', 'had', 'did', 'the', 'and',
36
+ 'but', 'for', 'out', 'off', 'why', 'how', 'all', 'any', 'few', 'nor', 'not', 'own',
37
+ 'too', 'can', 'don', 'now', 'ours', 'your', 'hers', 'they', 'them', 'what', 'whom',
38
+ 'this', 'that', 'were', 'been', 'have', 'does', 'with', 'into', 'from', 'down', 'over',
39
+ 'then', 'once', 'here', 'when', 'both', 'each', 'more', 'most', 'some', 'such', 'only',
40
+ 'same', 'than', 'very', 'will', 'just', 'yours', 'their', 'which', 'these', 'those',
41
+ 'being', 'doing', 'until', 'while', 'about', 'after', 'above', 'below', 'under',
42
+ 'again', 'there', 'where', 'other', 'myself', 'itself', 'theirs', 'having', 'during',
43
+ 'before', 'should', 'himself', 'herself', 'because', 'against', 'between', 'through',
44
+ 'further', 'yourself', 'ourselves', 'yourselves', 'themselves'].map{ |x| [x,true] }]
33
45
  }
34
46
 
35
47
  config.update(options)
@@ -180,7 +192,7 @@ module Mongoid::FullTextSearch
180
192
  filtered_str = UnicodeUtils.nfkd(filtered_str).gsub(/[^\x00-\x7F]/,'')
181
193
  end
182
194
 
183
- filtered_str = filtered_str.downcase.split('').map{ |ch| config[:alphabet][ch] }.compact.join('')
195
+ filtered_str = filtered_str.mb_chars.downcase.to_s.split('').map{ |ch| config[:alphabet][ch] }.compact.join('')
184
196
 
185
197
  if bound_number_returned
186
198
  step_size = [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max
@@ -203,9 +215,11 @@ module Mongoid::FullTextSearch
203
215
  ngram_ary = ngram_ary.group_by{ |h| h[:ngram] }.map{ |key, values| {:ngram => key, :score => values.map{ |v| v[:score] }.max} }
204
216
 
205
217
  if (config[:index_full_words])
218
+ full_words_seen = {}
206
219
  filtered_str.split(Regexp.compile(config[:word_separators].keys.join)).each do |word|
207
- if word.length >= config[:ngram_width]
220
+ if word.length >= config[:ngram_width] and full_words_seen[word].nil? and config[:stop_words][word].nil?
208
221
  ngram_ary << {:ngram => word, :score => 1}
222
+ full_words_seen[word] = true
209
223
  end
210
224
  end
211
225
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{mongoid_fulltext}
8
- s.version = "0.5.0"
8
+ s.version = "0.5.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Aaron Windsor"]
12
- s.date = %q{2011-10-11}
12
+ s.date = %q{2011-11-02}
13
13
  s.description = %q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
14
14
  s.email = %q{aaron.windsor@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -42,6 +42,7 @@ Gem::Specification.new do |s|
42
42
  "spec/models/multi_field_artist.rb",
43
43
  "spec/models/multi_field_artwork.rb",
44
44
  "spec/models/partitioned_artist.rb",
45
+ "spec/models/stopwords_artwork.rb",
45
46
  "spec/mongoid/fulltext_spec.rb",
46
47
  "spec/spec_helper.rb"
47
48
  ]
@@ -66,6 +67,7 @@ Gem::Specification.new do |s|
66
67
  "spec/models/multi_field_artist.rb",
67
68
  "spec/models/multi_field_artwork.rb",
68
69
  "spec/models/partitioned_artist.rb",
70
+ "spec/models/stopwords_artwork.rb",
69
71
  "spec/mongoid/fulltext_spec.rb",
70
72
  "spec/spec_helper.rb"
71
73
  ]
@@ -0,0 +1,10 @@
1
+ class StopwordsArtwork
2
+ include Mongoid::Document
3
+ include Mongoid::FullTextSearch
4
+
5
+ field :title
6
+ fulltext_search_in :title,
7
+ :index_full_words => true,
8
+ :stop_words => { 'and' => true }
9
+
10
+ end
@@ -450,6 +450,16 @@ module Mongoid
450
450
 
451
451
  end
452
452
 
453
+ context "with stop words defined" do
454
+ let!(:flowers) { StopwordsArtwork.create(:title => "Flowers by Andy Warhol") }
455
+ let!(:many_ands) { StopwordsArtwork.create(:title => "Foo and bar and baz and foobar") }
456
+
457
+ it "doesn't give a full-word score boost to stopwords" do
458
+ StopwordsArtwork.fulltext_search("andy").map{ |a| a.title }.should == [flowers.title, many_ands.title]
459
+ StopwordsArtwork.fulltext_search("warhol and other stuff").map{ |a| a.title }.should == [flowers.title, many_ands.title]
460
+ end
461
+ end
462
+
453
463
  context "remove_from_ngram_index" do
454
464
  let!(:flowers1) { BasicArtwork.create(:title => 'Flowers 1') }
455
465
  let!(:flowers2) { BasicArtwork.create(:title => 'Flowers 1') }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mongoid_fulltext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,12 +9,12 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-10-11 00:00:00.000000000 -04:00
12
+ date: 2011-11-02 00:00:00.000000000 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: unicode_utils
17
- requirement: &79126690 !ruby/object:Gem::Requirement
17
+ requirement: &87323870 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ~>
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: 1.0.0
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *79126690
25
+ version_requirements: *87323870
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: mongoid
28
- requirement: &79116990 !ruby/object:Gem::Requirement
28
+ requirement: &87323630 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ~>
@@ -33,10 +33,10 @@ dependencies:
33
33
  version: 2.0.0
34
34
  type: :development
35
35
  prerelease: false
36
- version_requirements: *79116990
36
+ version_requirements: *87323630
37
37
  - !ruby/object:Gem::Dependency
38
38
  name: bson_ext
39
- requirement: &79116510 !ruby/object:Gem::Requirement
39
+ requirement: &87323390 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ~>
@@ -44,10 +44,10 @@ dependencies:
44
44
  version: 1.3.0
45
45
  type: :development
46
46
  prerelease: false
47
- version_requirements: *79116510
47
+ version_requirements: *87323390
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: rspec
50
- requirement: &79116050 !ruby/object:Gem::Requirement
50
+ requirement: &87323150 !ruby/object:Gem::Requirement
51
51
  none: false
52
52
  requirements:
53
53
  - - ~>
@@ -55,10 +55,10 @@ dependencies:
55
55
  version: 2.5.0
56
56
  type: :development
57
57
  prerelease: false
58
- version_requirements: *79116050
58
+ version_requirements: *87323150
59
59
  - !ruby/object:Gem::Dependency
60
60
  name: jeweler
61
- requirement: &79115620 !ruby/object:Gem::Requirement
61
+ requirement: &87322910 !ruby/object:Gem::Requirement
62
62
  none: false
63
63
  requirements:
64
64
  - - ~>
@@ -66,7 +66,7 @@ dependencies:
66
66
  version: 1.5.2
67
67
  type: :development
68
68
  prerelease: false
69
- version_requirements: *79115620
69
+ version_requirements: *87322910
70
70
  description: Full-text search for the Mongoid ORM, using n-grams extracted from text
71
71
  email: aaron.windsor@gmail.com
72
72
  executables: []
@@ -100,6 +100,7 @@ files:
100
100
  - spec/models/multi_field_artist.rb
101
101
  - spec/models/multi_field_artwork.rb
102
102
  - spec/models/partitioned_artist.rb
103
+ - spec/models/stopwords_artwork.rb
103
104
  - spec/mongoid/fulltext_spec.rb
104
105
  - spec/spec_helper.rb
105
106
  has_rdoc: true
@@ -118,7 +119,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
118
119
  version: '0'
119
120
  segments:
120
121
  - 0
121
- hash: -203766451
122
+ hash: 136864689
122
123
  required_rubygems_version: !ruby/object:Gem::Requirement
123
124
  none: false
124
125
  requirements:
@@ -147,5 +148,6 @@ test_files:
147
148
  - spec/models/multi_field_artist.rb
148
149
  - spec/models/multi_field_artwork.rb
149
150
  - spec/models/partitioned_artist.rb
151
+ - spec/models/stopwords_artwork.rb
150
152
  - spec/mongoid/fulltext_spec.rb
151
153
  - spec/spec_helper.rb