mongoid_fulltext 0.4.3 → 0.4.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -4
- data/README.md +6 -0
- data/VERSION +1 -1
- data/lib/mongoid_fulltext.rb +14 -2
- data/mongoid_fulltext.gemspec +12 -2
- data/spec/models/accentless_artwork.rb +7 -0
- data/spec/models/hidden_dragon.rb +6 -0
- data/spec/mongoid/fulltext_spec.rb +21 -0
- metadata +35 -9
data/Gemfile
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
|
-
|
3
|
-
|
4
|
-
# gem "activesupport", ">= 2.3.5"
|
2
|
+
|
3
|
+
gem "unicode_utils", "~> 1.0.0"
|
5
4
|
|
6
5
|
# Add dependencies to develop your gem here.
|
7
6
|
# Include everything needed to run rake, tests, features, etc.
|
8
7
|
group :development do
|
9
8
|
gem "mongoid", "~> 2.0.0"
|
10
|
-
gem
|
9
|
+
gem "bson_ext", "~> 1.3.0"
|
10
|
+
gem "rspec", "~> 2.5.0"
|
11
11
|
gem "jeweler", "~> 1.5.2"
|
12
12
|
end
|
data/README.md
CHANGED
@@ -211,6 +211,12 @@ Additional indexing/query options can be used as parameters to `fulltext_search_
|
|
211
211
|
Defaults to 1000. If you're seeing poor results, you can try increasing this value to consider
|
212
212
|
more ngrams per query (changing this parameter does not require a re-index.) The amount of time
|
213
213
|
a search takes is directly proportional to this parameter's value.
|
214
|
+
* `remove_accents`: remove accents on accented characters or not. Defaults to true. If a string
|
215
|
+
is encoded in UTF-8, we strip the accents using NFKD normalization (via an external library,
|
216
|
+
`unicode_utils`. If a string is encoded in ASCII-8BIT, we assume it has been passed via a
|
217
|
+
URL, for instance we might have "%C3%A9" which is how an "e-accute" ("é") gets passed
|
218
|
+
through a web-browser. These are then changed to their UTF-8 equivalents (via the `CGI` gem)
|
219
|
+
and then finally stripped, as before.
|
214
220
|
|
215
221
|
Array filters
|
216
222
|
-------------
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.4
|
data/lib/mongoid_fulltext.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'mongoid_indexes'
|
2
|
+
require 'unicode_utils'
|
2
3
|
|
3
4
|
module Mongoid::FullTextSearch
|
4
5
|
extend ActiveSupport::Concern
|
@@ -27,7 +28,8 @@ module Mongoid::FullTextSearch
|
|
27
28
|
:max_ngrams_to_search => 6,
|
28
29
|
:apply_prefix_scoring_to_all_words => true,
|
29
30
|
:index_full_words => true,
|
30
|
-
:max_candidate_set_size => 1000
|
31
|
+
:max_candidate_set_size => 1000,
|
32
|
+
:remove_accents => true
|
31
33
|
}
|
32
34
|
|
33
35
|
config.update(options)
|
@@ -43,6 +45,7 @@ module Mongoid::FullTextSearch
|
|
43
45
|
end
|
44
46
|
|
45
47
|
def create_fulltext_indexes
|
48
|
+
return unless self.mongoid_fulltext_config
|
46
49
|
self.mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
|
47
50
|
fulltext_search_ensure_indexes(index_name, fulltext_config)
|
48
51
|
end
|
@@ -169,7 +172,16 @@ module Mongoid::FullTextSearch
|
|
169
172
|
# returns an [ngram, score] [ngram, position] pair
|
170
173
|
def all_ngrams(str, config, bound_number_returned = true)
|
171
174
|
return {} if str.nil? or str.length < config[:ngram_width]
|
172
|
-
|
175
|
+
|
176
|
+
filtered_str = String.new(str)
|
177
|
+
if config[:remove_accents]
|
178
|
+
if str.encoding.name == "ASCII-8BIT"
|
179
|
+
filtered_str = CGI.unescape(filtered_str)
|
180
|
+
end
|
181
|
+
filtered_str = UnicodeUtils.nfkd(filtered_str).gsub(/[^\x00-\x7F]/,'')
|
182
|
+
end
|
183
|
+
|
184
|
+
filtered_str = filtered_str.downcase.split('').map{ |ch| config[:alphabet][ch] }.compact.join('')
|
173
185
|
|
174
186
|
if bound_number_returned
|
175
187
|
step_size = [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max
|
data/mongoid_fulltext.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{mongoid_fulltext}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Aaron Windsor"]
|
12
|
-
s.date = %q{2011-08-
|
12
|
+
s.date = %q{2011-08-31}
|
13
13
|
s.description = %q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
|
14
14
|
s.email = %q{aaron.windsor@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -27,6 +27,7 @@ Gem::Specification.new do |s|
|
|
27
27
|
"lib/mongoid_fulltext.rb",
|
28
28
|
"lib/mongoid_indexes.rb",
|
29
29
|
"mongoid_fulltext.gemspec",
|
30
|
+
"spec/models/accentless_artwork.rb",
|
30
31
|
"spec/models/advanced_artwork.rb",
|
31
32
|
"spec/models/basic_artwork.rb",
|
32
33
|
"spec/models/external_artist.rb",
|
@@ -36,6 +37,7 @@ Gem::Specification.new do |s|
|
|
36
37
|
"spec/models/filtered_artwork.rb",
|
37
38
|
"spec/models/filtered_other.rb",
|
38
39
|
"spec/models/gallery/basic_artwork.rb",
|
40
|
+
"spec/models/hidden_dragon.rb",
|
39
41
|
"spec/models/multi_external_artwork.rb",
|
40
42
|
"spec/models/multi_field_artist.rb",
|
41
43
|
"spec/models/multi_field_artwork.rb",
|
@@ -49,6 +51,7 @@ Gem::Specification.new do |s|
|
|
49
51
|
s.rubygems_version = %q{1.6.2}
|
50
52
|
s.summary = %q{Full-text search for the Mongoid ORM}
|
51
53
|
s.test_files = [
|
54
|
+
"spec/models/accentless_artwork.rb",
|
52
55
|
"spec/models/advanced_artwork.rb",
|
53
56
|
"spec/models/basic_artwork.rb",
|
54
57
|
"spec/models/external_artist.rb",
|
@@ -58,6 +61,7 @@ Gem::Specification.new do |s|
|
|
58
61
|
"spec/models/filtered_artwork.rb",
|
59
62
|
"spec/models/filtered_other.rb",
|
60
63
|
"spec/models/gallery/basic_artwork.rb",
|
64
|
+
"spec/models/hidden_dragon.rb",
|
61
65
|
"spec/models/multi_external_artwork.rb",
|
62
66
|
"spec/models/multi_field_artist.rb",
|
63
67
|
"spec/models/multi_field_artwork.rb",
|
@@ -70,16 +74,22 @@ Gem::Specification.new do |s|
|
|
70
74
|
s.specification_version = 3
|
71
75
|
|
72
76
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
77
|
+
s.add_runtime_dependency(%q<unicode_utils>, ["~> 1.0.0"])
|
73
78
|
s.add_development_dependency(%q<mongoid>, ["~> 2.0.0"])
|
79
|
+
s.add_development_dependency(%q<bson_ext>, ["~> 1.3.0"])
|
74
80
|
s.add_development_dependency(%q<rspec>, ["~> 2.5.0"])
|
75
81
|
s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
|
76
82
|
else
|
83
|
+
s.add_dependency(%q<unicode_utils>, ["~> 1.0.0"])
|
77
84
|
s.add_dependency(%q<mongoid>, ["~> 2.0.0"])
|
85
|
+
s.add_dependency(%q<bson_ext>, ["~> 1.3.0"])
|
78
86
|
s.add_dependency(%q<rspec>, ["~> 2.5.0"])
|
79
87
|
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
80
88
|
end
|
81
89
|
else
|
90
|
+
s.add_dependency(%q<unicode_utils>, ["~> 1.0.0"])
|
82
91
|
s.add_dependency(%q<mongoid>, ["~> 2.0.0"])
|
92
|
+
s.add_dependency(%q<bson_ext>, ["~> 1.3.0"])
|
83
93
|
s.add_dependency(%q<rspec>, ["~> 2.5.0"])
|
84
94
|
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
85
95
|
end
|
@@ -6,6 +6,8 @@ module Mongoid
|
|
6
6
|
context "with several config options defined" do
|
7
7
|
|
8
8
|
let!(:abcdef) { AdvancedArtwork.create(:title => 'abcdefg hijklmn') }
|
9
|
+
let!(:cesar) { AccentlessArtwork.create(:title => "C\u00e9sar Galicia") }
|
10
|
+
let!(:julio) { AccentlessArtwork.create(:title => "Julio Cesar Morales") }
|
9
11
|
|
10
12
|
it "should recognize all options" do
|
11
13
|
# AdvancedArtwork is defined with an ngram_width of 4 and a different alphabet (abcdefg)
|
@@ -13,6 +15,9 @@ module Mongoid
|
|
13
15
|
AdvancedArtwork.fulltext_search('abcd').first.should == abcdef
|
14
16
|
AdvancedArtwork.fulltext_search('defg').first.should == abcdef
|
15
17
|
AdvancedArtwork.fulltext_search('hijklmn').should == []
|
18
|
+
# AccentlessArtwork is just like BasicArtwork, except that we set :remove_accents to false,
|
19
|
+
# so this behaves like the ``old'' version of fulltext_search
|
20
|
+
AccentlessArtwork.fulltext_search("cesar").first.should == julio
|
16
21
|
end
|
17
22
|
|
18
23
|
end
|
@@ -24,6 +29,17 @@ module Mongoid
|
|
24
29
|
let!(:lowered) { BasicArtwork.create(:title => 'Lowered') }
|
25
30
|
let!(:cookies) { BasicArtwork.create(:title => 'Cookies') }
|
26
31
|
let!(:empty) { BasicArtwork.create(:title => '') }
|
32
|
+
let!(:cesar) { BasicArtwork.create(:title => "C\u00e9sar Galicia") }
|
33
|
+
let!(:julio) { BasicArtwork.create(:title => "Julio Cesar Morales") }
|
34
|
+
let!(:csar) { BasicArtwork.create(:title => "Csar") }
|
35
|
+
|
36
|
+
it "forgets accents" do
|
37
|
+
BasicArtwork.fulltext_search('cesar', :max_results => 1).first.should == cesar
|
38
|
+
BasicArtwork.fulltext_search('cesar g', :max_results => 1).first.should == cesar
|
39
|
+
BasicArtwork.fulltext_search("C\u00e9sar", :max_results => 1).first.should == cesar
|
40
|
+
BasicArtwork.fulltext_search("C\303\251sar", :max_results => 1).first.should == cesar
|
41
|
+
BasicArtwork.fulltext_search("c%C3%A9sar".encode("ASCII-8BIT"), :max_results => 1).first.should == cesar
|
42
|
+
end
|
27
43
|
|
28
44
|
it "returns exact matches" do
|
29
45
|
BasicArtwork.fulltext_search('Flower Myth', :max_results => 1).first.should == flower_myth
|
@@ -443,6 +459,7 @@ module Mongoid
|
|
443
459
|
end
|
444
460
|
|
445
461
|
context "mongoid indexes" do
|
462
|
+
|
446
463
|
it "can re-create dropped indexes" do
|
447
464
|
# there're no indexes by default as Mongoid.autocreate_indexes is set to false
|
448
465
|
# but mongo will automatically attempt to index _id in the background
|
@@ -471,6 +488,10 @@ module Mongoid
|
|
471
488
|
}
|
472
489
|
end
|
473
490
|
|
491
|
+
it "doesn't fail on models that don't have a fulltext index" do
|
492
|
+
lambda { HiddenDragon.create_indexes }.should_not raise_error
|
493
|
+
end
|
494
|
+
|
474
495
|
end
|
475
496
|
|
476
497
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mongoid_fulltext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,23 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-08-
|
12
|
+
date: 2011-08-31 00:00:00.000000000 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: unicode_utils
|
17
|
+
requirement: &82521100 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ~>
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.0.0
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *82521100
|
15
26
|
- !ruby/object:Gem::Dependency
|
16
27
|
name: mongoid
|
17
|
-
requirement: &
|
28
|
+
requirement: &82510440 !ruby/object:Gem::Requirement
|
18
29
|
none: false
|
19
30
|
requirements:
|
20
31
|
- - ~>
|
@@ -22,10 +33,21 @@ dependencies:
|
|
22
33
|
version: 2.0.0
|
23
34
|
type: :development
|
24
35
|
prerelease: false
|
25
|
-
version_requirements: *
|
36
|
+
version_requirements: *82510440
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: bson_ext
|
39
|
+
requirement: &82510030 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ~>
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: 1.3.0
|
45
|
+
type: :development
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *82510030
|
26
48
|
- !ruby/object:Gem::Dependency
|
27
49
|
name: rspec
|
28
|
-
requirement: &
|
50
|
+
requirement: &82509570 !ruby/object:Gem::Requirement
|
29
51
|
none: false
|
30
52
|
requirements:
|
31
53
|
- - ~>
|
@@ -33,10 +55,10 @@ dependencies:
|
|
33
55
|
version: 2.5.0
|
34
56
|
type: :development
|
35
57
|
prerelease: false
|
36
|
-
version_requirements: *
|
58
|
+
version_requirements: *82509570
|
37
59
|
- !ruby/object:Gem::Dependency
|
38
60
|
name: jeweler
|
39
|
-
requirement: &
|
61
|
+
requirement: &82509170 !ruby/object:Gem::Requirement
|
40
62
|
none: false
|
41
63
|
requirements:
|
42
64
|
- - ~>
|
@@ -44,7 +66,7 @@ dependencies:
|
|
44
66
|
version: 1.5.2
|
45
67
|
type: :development
|
46
68
|
prerelease: false
|
47
|
-
version_requirements: *
|
69
|
+
version_requirements: *82509170
|
48
70
|
description: Full-text search for the Mongoid ORM, using n-grams extracted from text
|
49
71
|
email: aaron.windsor@gmail.com
|
50
72
|
executables: []
|
@@ -63,6 +85,7 @@ files:
|
|
63
85
|
- lib/mongoid_fulltext.rb
|
64
86
|
- lib/mongoid_indexes.rb
|
65
87
|
- mongoid_fulltext.gemspec
|
88
|
+
- spec/models/accentless_artwork.rb
|
66
89
|
- spec/models/advanced_artwork.rb
|
67
90
|
- spec/models/basic_artwork.rb
|
68
91
|
- spec/models/external_artist.rb
|
@@ -72,6 +95,7 @@ files:
|
|
72
95
|
- spec/models/filtered_artwork.rb
|
73
96
|
- spec/models/filtered_other.rb
|
74
97
|
- spec/models/gallery/basic_artwork.rb
|
98
|
+
- spec/models/hidden_dragon.rb
|
75
99
|
- spec/models/multi_external_artwork.rb
|
76
100
|
- spec/models/multi_field_artist.rb
|
77
101
|
- spec/models/multi_field_artwork.rb
|
@@ -94,7 +118,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
94
118
|
version: '0'
|
95
119
|
segments:
|
96
120
|
- 0
|
97
|
-
hash:
|
121
|
+
hash: 518227181
|
98
122
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
99
123
|
none: false
|
100
124
|
requirements:
|
@@ -108,6 +132,7 @@ signing_key:
|
|
108
132
|
specification_version: 3
|
109
133
|
summary: Full-text search for the Mongoid ORM
|
110
134
|
test_files:
|
135
|
+
- spec/models/accentless_artwork.rb
|
111
136
|
- spec/models/advanced_artwork.rb
|
112
137
|
- spec/models/basic_artwork.rb
|
113
138
|
- spec/models/external_artist.rb
|
@@ -117,6 +142,7 @@ test_files:
|
|
117
142
|
- spec/models/filtered_artwork.rb
|
118
143
|
- spec/models/filtered_other.rb
|
119
144
|
- spec/models/gallery/basic_artwork.rb
|
145
|
+
- spec/models/hidden_dragon.rb
|
120
146
|
- spec/models/multi_external_artwork.rb
|
121
147
|
- spec/models/multi_field_artist.rb
|
122
148
|
- spec/models/multi_field_artwork.rb
|