mongoid_fulltext 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -4
- data/README.md +6 -0
- data/VERSION +1 -1
- data/lib/mongoid_fulltext.rb +14 -2
- data/mongoid_fulltext.gemspec +12 -2
- data/spec/models/accentless_artwork.rb +7 -0
- data/spec/models/hidden_dragon.rb +6 -0
- data/spec/mongoid/fulltext_spec.rb +21 -0
- metadata +35 -9
data/Gemfile
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
|
-
|
3
|
-
|
4
|
-
# gem "activesupport", ">= 2.3.5"
|
2
|
+
|
3
|
+
gem "unicode_utils", "~> 1.0.0"
|
5
4
|
|
6
5
|
# Add dependencies to develop your gem here.
|
7
6
|
# Include everything needed to run rake, tests, features, etc.
|
8
7
|
group :development do
|
9
8
|
gem "mongoid", "~> 2.0.0"
|
10
|
-
gem
|
9
|
+
gem "bson_ext", "~> 1.3.0"
|
10
|
+
gem "rspec", "~> 2.5.0"
|
11
11
|
gem "jeweler", "~> 1.5.2"
|
12
12
|
end
|
data/README.md
CHANGED
@@ -211,6 +211,12 @@ Additional indexing/query options can be used as parameters to `fulltext_search_
|
|
211
211
|
Defaults to 1000. If you're seeing poor results, you can try increasing this value to consider
|
212
212
|
more ngrams per query (changing this parameter does not require a re-index.) The amount of time
|
213
213
|
a search takes is directly proportional to this parameter's value.
|
214
|
+
* `remove_accents`: remove accents on accented characters or not. Defaults to true. If a string
|
215
|
+
is encoded in UTF-8, we strip the accents using NFKD normalization (via an external library,
|
216
|
+
`unicode_utils`. If a string is encoded in ASCII-8BIT, we assume it has been passed via a
|
217
|
+
URL, for instance we might have "%C3%A9" which is how an "e-accute" ("é") gets passed
|
218
|
+
through a web-browser. These are then changed to their UTF-8 equivalents (via the `CGI` gem)
|
219
|
+
and then finally stripped, as before.
|
214
220
|
|
215
221
|
Array filters
|
216
222
|
-------------
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.4
|
data/lib/mongoid_fulltext.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'mongoid_indexes'
|
2
|
+
require 'unicode_utils'
|
2
3
|
|
3
4
|
module Mongoid::FullTextSearch
|
4
5
|
extend ActiveSupport::Concern
|
@@ -27,7 +28,8 @@ module Mongoid::FullTextSearch
|
|
27
28
|
:max_ngrams_to_search => 6,
|
28
29
|
:apply_prefix_scoring_to_all_words => true,
|
29
30
|
:index_full_words => true,
|
30
|
-
:max_candidate_set_size => 1000
|
31
|
+
:max_candidate_set_size => 1000,
|
32
|
+
:remove_accents => true
|
31
33
|
}
|
32
34
|
|
33
35
|
config.update(options)
|
@@ -43,6 +45,7 @@ module Mongoid::FullTextSearch
|
|
43
45
|
end
|
44
46
|
|
45
47
|
def create_fulltext_indexes
|
48
|
+
return unless self.mongoid_fulltext_config
|
46
49
|
self.mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
|
47
50
|
fulltext_search_ensure_indexes(index_name, fulltext_config)
|
48
51
|
end
|
@@ -169,7 +172,16 @@ module Mongoid::FullTextSearch
|
|
169
172
|
# returns an [ngram, score] [ngram, position] pair
|
170
173
|
def all_ngrams(str, config, bound_number_returned = true)
|
171
174
|
return {} if str.nil? or str.length < config[:ngram_width]
|
172
|
-
|
175
|
+
|
176
|
+
filtered_str = String.new(str)
|
177
|
+
if config[:remove_accents]
|
178
|
+
if str.encoding.name == "ASCII-8BIT"
|
179
|
+
filtered_str = CGI.unescape(filtered_str)
|
180
|
+
end
|
181
|
+
filtered_str = UnicodeUtils.nfkd(filtered_str).gsub(/[^\x00-\x7F]/,'')
|
182
|
+
end
|
183
|
+
|
184
|
+
filtered_str = filtered_str.downcase.split('').map{ |ch| config[:alphabet][ch] }.compact.join('')
|
173
185
|
|
174
186
|
if bound_number_returned
|
175
187
|
step_size = [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max
|
data/mongoid_fulltext.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{mongoid_fulltext}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Aaron Windsor"]
|
12
|
-
s.date = %q{2011-08-
|
12
|
+
s.date = %q{2011-08-31}
|
13
13
|
s.description = %q{Full-text search for the Mongoid ORM, using n-grams extracted from text}
|
14
14
|
s.email = %q{aaron.windsor@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -27,6 +27,7 @@ Gem::Specification.new do |s|
|
|
27
27
|
"lib/mongoid_fulltext.rb",
|
28
28
|
"lib/mongoid_indexes.rb",
|
29
29
|
"mongoid_fulltext.gemspec",
|
30
|
+
"spec/models/accentless_artwork.rb",
|
30
31
|
"spec/models/advanced_artwork.rb",
|
31
32
|
"spec/models/basic_artwork.rb",
|
32
33
|
"spec/models/external_artist.rb",
|
@@ -36,6 +37,7 @@ Gem::Specification.new do |s|
|
|
36
37
|
"spec/models/filtered_artwork.rb",
|
37
38
|
"spec/models/filtered_other.rb",
|
38
39
|
"spec/models/gallery/basic_artwork.rb",
|
40
|
+
"spec/models/hidden_dragon.rb",
|
39
41
|
"spec/models/multi_external_artwork.rb",
|
40
42
|
"spec/models/multi_field_artist.rb",
|
41
43
|
"spec/models/multi_field_artwork.rb",
|
@@ -49,6 +51,7 @@ Gem::Specification.new do |s|
|
|
49
51
|
s.rubygems_version = %q{1.6.2}
|
50
52
|
s.summary = %q{Full-text search for the Mongoid ORM}
|
51
53
|
s.test_files = [
|
54
|
+
"spec/models/accentless_artwork.rb",
|
52
55
|
"spec/models/advanced_artwork.rb",
|
53
56
|
"spec/models/basic_artwork.rb",
|
54
57
|
"spec/models/external_artist.rb",
|
@@ -58,6 +61,7 @@ Gem::Specification.new do |s|
|
|
58
61
|
"spec/models/filtered_artwork.rb",
|
59
62
|
"spec/models/filtered_other.rb",
|
60
63
|
"spec/models/gallery/basic_artwork.rb",
|
64
|
+
"spec/models/hidden_dragon.rb",
|
61
65
|
"spec/models/multi_external_artwork.rb",
|
62
66
|
"spec/models/multi_field_artist.rb",
|
63
67
|
"spec/models/multi_field_artwork.rb",
|
@@ -70,16 +74,22 @@ Gem::Specification.new do |s|
|
|
70
74
|
s.specification_version = 3
|
71
75
|
|
72
76
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
77
|
+
s.add_runtime_dependency(%q<unicode_utils>, ["~> 1.0.0"])
|
73
78
|
s.add_development_dependency(%q<mongoid>, ["~> 2.0.0"])
|
79
|
+
s.add_development_dependency(%q<bson_ext>, ["~> 1.3.0"])
|
74
80
|
s.add_development_dependency(%q<rspec>, ["~> 2.5.0"])
|
75
81
|
s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
|
76
82
|
else
|
83
|
+
s.add_dependency(%q<unicode_utils>, ["~> 1.0.0"])
|
77
84
|
s.add_dependency(%q<mongoid>, ["~> 2.0.0"])
|
85
|
+
s.add_dependency(%q<bson_ext>, ["~> 1.3.0"])
|
78
86
|
s.add_dependency(%q<rspec>, ["~> 2.5.0"])
|
79
87
|
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
80
88
|
end
|
81
89
|
else
|
90
|
+
s.add_dependency(%q<unicode_utils>, ["~> 1.0.0"])
|
82
91
|
s.add_dependency(%q<mongoid>, ["~> 2.0.0"])
|
92
|
+
s.add_dependency(%q<bson_ext>, ["~> 1.3.0"])
|
83
93
|
s.add_dependency(%q<rspec>, ["~> 2.5.0"])
|
84
94
|
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
85
95
|
end
|
@@ -6,6 +6,8 @@ module Mongoid
|
|
6
6
|
context "with several config options defined" do
|
7
7
|
|
8
8
|
let!(:abcdef) { AdvancedArtwork.create(:title => 'abcdefg hijklmn') }
|
9
|
+
let!(:cesar) { AccentlessArtwork.create(:title => "C\u00e9sar Galicia") }
|
10
|
+
let!(:julio) { AccentlessArtwork.create(:title => "Julio Cesar Morales") }
|
9
11
|
|
10
12
|
it "should recognize all options" do
|
11
13
|
# AdvancedArtwork is defined with an ngram_width of 4 and a different alphabet (abcdefg)
|
@@ -13,6 +15,9 @@ module Mongoid
|
|
13
15
|
AdvancedArtwork.fulltext_search('abcd').first.should == abcdef
|
14
16
|
AdvancedArtwork.fulltext_search('defg').first.should == abcdef
|
15
17
|
AdvancedArtwork.fulltext_search('hijklmn').should == []
|
18
|
+
# AccentlessArtwork is just like BasicArtwork, except that we set :remove_accents to false,
|
19
|
+
# so this behaves like the ``old'' version of fulltext_search
|
20
|
+
AccentlessArtwork.fulltext_search("cesar").first.should == julio
|
16
21
|
end
|
17
22
|
|
18
23
|
end
|
@@ -24,6 +29,17 @@ module Mongoid
|
|
24
29
|
let!(:lowered) { BasicArtwork.create(:title => 'Lowered') }
|
25
30
|
let!(:cookies) { BasicArtwork.create(:title => 'Cookies') }
|
26
31
|
let!(:empty) { BasicArtwork.create(:title => '') }
|
32
|
+
let!(:cesar) { BasicArtwork.create(:title => "C\u00e9sar Galicia") }
|
33
|
+
let!(:julio) { BasicArtwork.create(:title => "Julio Cesar Morales") }
|
34
|
+
let!(:csar) { BasicArtwork.create(:title => "Csar") }
|
35
|
+
|
36
|
+
it "forgets accents" do
|
37
|
+
BasicArtwork.fulltext_search('cesar', :max_results => 1).first.should == cesar
|
38
|
+
BasicArtwork.fulltext_search('cesar g', :max_results => 1).first.should == cesar
|
39
|
+
BasicArtwork.fulltext_search("C\u00e9sar", :max_results => 1).first.should == cesar
|
40
|
+
BasicArtwork.fulltext_search("C\303\251sar", :max_results => 1).first.should == cesar
|
41
|
+
BasicArtwork.fulltext_search("c%C3%A9sar".encode("ASCII-8BIT"), :max_results => 1).first.should == cesar
|
42
|
+
end
|
27
43
|
|
28
44
|
it "returns exact matches" do
|
29
45
|
BasicArtwork.fulltext_search('Flower Myth', :max_results => 1).first.should == flower_myth
|
@@ -443,6 +459,7 @@ module Mongoid
|
|
443
459
|
end
|
444
460
|
|
445
461
|
context "mongoid indexes" do
|
462
|
+
|
446
463
|
it "can re-create dropped indexes" do
|
447
464
|
# there're no indexes by default as Mongoid.autocreate_indexes is set to false
|
448
465
|
# but mongo will automatically attempt to index _id in the background
|
@@ -471,6 +488,10 @@ module Mongoid
|
|
471
488
|
}
|
472
489
|
end
|
473
490
|
|
491
|
+
it "doesn't fail on models that don't have a fulltext index" do
|
492
|
+
lambda { HiddenDragon.create_indexes }.should_not raise_error
|
493
|
+
end
|
494
|
+
|
474
495
|
end
|
475
496
|
|
476
497
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mongoid_fulltext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,23 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-08-
|
12
|
+
date: 2011-08-31 00:00:00.000000000 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: unicode_utils
|
17
|
+
requirement: &82521100 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ~>
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.0.0
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *82521100
|
15
26
|
- !ruby/object:Gem::Dependency
|
16
27
|
name: mongoid
|
17
|
-
requirement: &
|
28
|
+
requirement: &82510440 !ruby/object:Gem::Requirement
|
18
29
|
none: false
|
19
30
|
requirements:
|
20
31
|
- - ~>
|
@@ -22,10 +33,21 @@ dependencies:
|
|
22
33
|
version: 2.0.0
|
23
34
|
type: :development
|
24
35
|
prerelease: false
|
25
|
-
version_requirements: *
|
36
|
+
version_requirements: *82510440
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: bson_ext
|
39
|
+
requirement: &82510030 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ~>
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: 1.3.0
|
45
|
+
type: :development
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *82510030
|
26
48
|
- !ruby/object:Gem::Dependency
|
27
49
|
name: rspec
|
28
|
-
requirement: &
|
50
|
+
requirement: &82509570 !ruby/object:Gem::Requirement
|
29
51
|
none: false
|
30
52
|
requirements:
|
31
53
|
- - ~>
|
@@ -33,10 +55,10 @@ dependencies:
|
|
33
55
|
version: 2.5.0
|
34
56
|
type: :development
|
35
57
|
prerelease: false
|
36
|
-
version_requirements: *
|
58
|
+
version_requirements: *82509570
|
37
59
|
- !ruby/object:Gem::Dependency
|
38
60
|
name: jeweler
|
39
|
-
requirement: &
|
61
|
+
requirement: &82509170 !ruby/object:Gem::Requirement
|
40
62
|
none: false
|
41
63
|
requirements:
|
42
64
|
- - ~>
|
@@ -44,7 +66,7 @@ dependencies:
|
|
44
66
|
version: 1.5.2
|
45
67
|
type: :development
|
46
68
|
prerelease: false
|
47
|
-
version_requirements: *
|
69
|
+
version_requirements: *82509170
|
48
70
|
description: Full-text search for the Mongoid ORM, using n-grams extracted from text
|
49
71
|
email: aaron.windsor@gmail.com
|
50
72
|
executables: []
|
@@ -63,6 +85,7 @@ files:
|
|
63
85
|
- lib/mongoid_fulltext.rb
|
64
86
|
- lib/mongoid_indexes.rb
|
65
87
|
- mongoid_fulltext.gemspec
|
88
|
+
- spec/models/accentless_artwork.rb
|
66
89
|
- spec/models/advanced_artwork.rb
|
67
90
|
- spec/models/basic_artwork.rb
|
68
91
|
- spec/models/external_artist.rb
|
@@ -72,6 +95,7 @@ files:
|
|
72
95
|
- spec/models/filtered_artwork.rb
|
73
96
|
- spec/models/filtered_other.rb
|
74
97
|
- spec/models/gallery/basic_artwork.rb
|
98
|
+
- spec/models/hidden_dragon.rb
|
75
99
|
- spec/models/multi_external_artwork.rb
|
76
100
|
- spec/models/multi_field_artist.rb
|
77
101
|
- spec/models/multi_field_artwork.rb
|
@@ -94,7 +118,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
94
118
|
version: '0'
|
95
119
|
segments:
|
96
120
|
- 0
|
97
|
-
hash:
|
121
|
+
hash: 518227181
|
98
122
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
99
123
|
none: false
|
100
124
|
requirements:
|
@@ -108,6 +132,7 @@ signing_key:
|
|
108
132
|
specification_version: 3
|
109
133
|
summary: Full-text search for the Mongoid ORM
|
110
134
|
test_files:
|
135
|
+
- spec/models/accentless_artwork.rb
|
111
136
|
- spec/models/advanced_artwork.rb
|
112
137
|
- spec/models/basic_artwork.rb
|
113
138
|
- spec/models/external_artist.rb
|
@@ -117,6 +142,7 @@ test_files:
|
|
117
142
|
- spec/models/filtered_artwork.rb
|
118
143
|
- spec/models/filtered_other.rb
|
119
144
|
- spec/models/gallery/basic_artwork.rb
|
145
|
+
- spec/models/hidden_dragon.rb
|
120
146
|
- spec/models/multi_external_artwork.rb
|
121
147
|
- spec/models/multi_field_artist.rb
|
122
148
|
- spec/models/multi_field_artwork.rb
|