mongoid-fts 1.1.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,322 @@
1
+ module Mongoid
2
+ module FTS
3
+ module Util
4
+ #
5
+ def fts_models
6
+ [
7
+ Mongoid::FTS::Index
8
+ ]
9
+ end
10
+
11
+ def reset!
12
+ Mongoid::FTS.setup!(:warn => true)
13
+
14
+ fts_models.each do |model|
15
+ model.destroy_all
16
+
17
+ begin
18
+ model.collection.indexes.drop
19
+ rescue Object => e
20
+ end
21
+
22
+ begin
23
+ model.collection.drop
24
+ rescue Object => e
25
+ end
26
+
27
+ begin
28
+ model.create_indexes
29
+ rescue Object => e
30
+ end
31
+ end
32
+ end
33
+
34
+ def create_indexes
35
+ fts_models.each{|model| model.create_indexes}
36
+ end
37
+
38
+ def destroy_all
39
+ fts_models.map{|model| model.destroy_all}
40
+ end
41
+
42
+ #
43
+ def find_in_batches(queries = {})
44
+ models =
45
+ queries.map do |model_class, model_ids|
46
+ unless model_class.is_a?(Class)
47
+ model_class = eval(model_class.to_s)
48
+ end
49
+
50
+ model_ids = Array(model_ids)
51
+
52
+ begin
53
+ model_class.find(model_ids)
54
+ rescue Mongoid::Errors::DocumentNotFound
55
+ model_ids.map do |model_id|
56
+ begin
57
+ model_class.find(model_id)
58
+ rescue Mongoid::Errors::DocumentNotFound
59
+ nil
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ models.flatten!
66
+ models.compact!
67
+ models
68
+ end
69
+
70
+ def find_or_create(finder, creator)
71
+ doc = finder.call()
72
+ return doc if doc
73
+
74
+ n, max = 0, 2
75
+
76
+ begin
77
+ creator.call()
78
+ rescue Object => e
79
+ n += 1
80
+ raise if n > max
81
+ sleep(rand(0.1))
82
+ finder.call() or retry
83
+ end
84
+ end
85
+
86
+ #
87
+ def terms_for(*args, &block)
88
+ options = Map.options_for!(args)
89
+
90
+ words = words_for(*args)
91
+
92
+ list = options[:list] || []
93
+
94
+ words.each do |word|
95
+ word = word.downcase
96
+ next if stopword?(word)
97
+
98
+ stems = stems_for(word)
99
+
100
+ stems.each do |stem|
101
+ [stem, unidecode(stem)].uniq.each do |stem|
102
+ next if stopword?(stem)
103
+
104
+ block ? block.call(stem) : list.push(stem)
105
+
106
+ substems = stem.split(/_/)
107
+
108
+ if options[:subterms] and substems.size > 1
109
+ substems.each do |substem|
110
+ terms_for(substem.gsub(/_+/, '-'), :list => list)
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ list.uniq!
118
+
119
+ block ? nil : list
120
+ end
121
+
122
+ def words_for(*args, &block)
123
+ options = Map.options_for!(args)
124
+
125
+ string = args.join(' ')
126
+
127
+ list = []
128
+
129
+ UnicodeUtils.each_word(string) do |word|
130
+ word = strip(utf8ify(word))
131
+
132
+ next if word.empty?
133
+
134
+ block ? block.call(word) : list.push(word)
135
+ end
136
+
137
+ block ? nil : list
138
+ end
139
+
140
+ def stems_for(*args, &block)
141
+ options = Map.options_for!(args)
142
+
143
+ words = Coerce.list_of_strings(*args).map{|word| utf8ify(word)}
144
+
145
+ Stemming.stem(*words)
146
+ end
147
+
148
+ def literals_for(*args)
149
+ words = FTS.normalized_array(args)
150
+
151
+ return words.map{|word| "__#{ Digest::MD5.hexdigest(word) }__"}
152
+ end
153
+
154
+ def stopword?(word)
155
+ word = utf8ify(word)
156
+ word.empty? or Stemming::Stopwords.stopword?(word)
157
+ end
158
+
159
+ def strip(word)
160
+ word = utf8ify(word)
161
+ word.gsub!(/\A(?:[^\w]|_|\s)+/, '') # leading punctuation/spaces
162
+ word.gsub!(/(?:[^\w]|_|\s+)+\Z/, '') # trailing punctuation/spaces
163
+ word
164
+ end
165
+
166
+ def fuzzy(*args)
167
+ strings = Coerce.list_of_strings(args).map{|string| utf8ify(string)}
168
+
169
+ list = []
170
+
171
+ strings.each do |string|
172
+ list.push(*ngrams_for(string))
173
+
174
+ decoded = unidecode(string)
175
+
176
+ unless decoded == string
177
+ list.push(*ngrams_for(decoded))
178
+ end
179
+ end
180
+
181
+ list.uniq
182
+ end
183
+ alias_method(:fuzzy_for, :fuzzy)
184
+
185
+ def ngrams_for(*args)
186
+ options = Map.options_for!(args)
187
+
188
+ strings = Coerce.list_of_strings(args).map{|string| utf8ify(string)}
189
+
190
+ list = []
191
+
192
+ sizes = options[:sizes] || [2,3]
193
+
194
+ strings.each do |string|
195
+ chars = Util.chars('_' + string + '_')
196
+
197
+ sizes.each do |size|
198
+ (chars.size - (size - 1)).times do |i|
199
+ ngram = chars[i, size].join
200
+ list.push(ngram)
201
+ end
202
+ end
203
+
204
+ end
205
+
206
+ list
207
+ end
208
+
209
+ def chars(string)
210
+ chars = []
211
+ UnicodeUtils.each_grapheme(string.to_s){|g| chars.push(g)}
212
+ chars
213
+ end
214
+
215
+ def unidecode(string)
216
+ Stringex::Unidecoder.decode(utf8ify(string.to_s))
217
+ end
218
+
219
+ def utf8ify(string)
220
+ UnicodeUtils.nfkd(
221
+ begin
222
+ string.force_encoding('UTF-8')
223
+ rescue
224
+ string.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
225
+ end
226
+ )
227
+ end
228
+
229
+ def normalized_array(*array)
230
+ array.flatten.map{|_| _.to_s.strip}.select{|_| !_.empty?}.uniq
231
+ end
232
+
233
+ def list_of_strings(*args)
234
+ args.flatten.compact.map{|arg| arg.to_s}.select{|arg| !arg.empty?}.uniq
235
+ end
236
+
237
+ #
238
+ def index(*args, &block)
239
+ if args.empty? and block.nil?
240
+ Index
241
+ else
242
+ args.each do |arg|
243
+ case arg
244
+ when Class
245
+ arg.all.each{|model| Index.add(model)}
246
+ else
247
+ Index.add(arg, &block)
248
+ end
249
+ end
250
+ end
251
+ end
252
+
253
+ def unindex(*args, &block)
254
+ Index.remove(*args, &block)
255
+ end
256
+
257
+ def index!(*args, &block)
258
+ Index.add!(*args, &block)
259
+ end
260
+
261
+ def unindex!(*args, &block)
262
+ Index.remove!(*args, &block)
263
+ end
264
+
265
+ #
266
+ def models
267
+ @models ||= []
268
+ end
269
+
270
+ #
271
+ def session
272
+ @session ||= Mongoid::Sessions.default
273
+ end
274
+
275
+ def session=(session)
276
+ @session = session
277
+ end
278
+
279
+ def enable!(*args)
280
+ options = Map.options_for!(args)
281
+
282
+ unless options.has_key?(:warn)
283
+ options[:warn] = true
284
+ end
285
+
286
+ begin
287
+ session = Mongoid::Sessions.default
288
+ session.with(database: :admin).command({ setParameter: 1, textSearchEnabled: true })
289
+ rescue Object => e
290
+ unless e.is_a?(Mongoid::Errors::NoSessionsConfig)
291
+ warn "failed to enable search with #{ e.class }(#{ e.message })"
292
+ end
293
+ end
294
+ end
295
+
296
+ def setup!(*args)
297
+ enable!(*args)
298
+ Index.setup!
299
+ end
300
+
301
+ def connect!
302
+ Mongoid.configure do |config|
303
+ config.connect_to('mongoid-fts')
304
+ end
305
+ end
306
+
307
+ def boolean_and(*strings)
308
+ strings = Coerce.list_of_strings(*strings)
309
+ strings.map{|s| '"%s"' % s.gsub('"', '')}.join(' ')
310
+ end
311
+
312
+ def boolean_or(*strings)
313
+ strings = Coerce.list_of_strings(*strings)
314
+ strings.join(' ')
315
+ end
316
+
317
+ extend Util
318
+ end
319
+
320
+ extend Util
321
+ end
322
+ end
@@ -3,7 +3,7 @@
3
3
 
4
4
  Gem::Specification::new do |spec|
5
5
  spec.name = "mongoid-fts"
6
- spec.version = "1.1.1"
6
+ spec.version = "2.0.0"
7
7
  spec.platform = Gem::Platform::RUBY
8
8
  spec.summary = "mongoid-fts"
9
9
  spec.description = "enable mongodb's new fulltext simply and quickly on your mongoid models, including pagination."
@@ -18,9 +18,37 @@ Gem::Specification::new do |spec|
18
18
  "lib/app/mongoid",
19
19
  "lib/app/mongoid/fts",
20
20
  "lib/app/mongoid/fts/index.rb",
21
- "lib/mongoid",
21
+ "lib/mongoid-fts",
22
22
  "lib/mongoid-fts.rb",
23
- "mongoid-fts.gemspec"]
23
+ "lib/mongoid-fts/able.rb",
24
+ "lib/mongoid-fts/error.rb",
25
+ "lib/mongoid-fts/index.rb",
26
+ "lib/mongoid-fts/rails.rb",
27
+ "lib/mongoid-fts/raw.rb",
28
+ "lib/mongoid-fts/results.rb",
29
+ "lib/mongoid-fts/stemming",
30
+ "lib/mongoid-fts/stemming.rb",
31
+ "lib/mongoid-fts/stemming/stopwords",
32
+ "lib/mongoid-fts/stemming/stopwords/english.txt",
33
+ "lib/mongoid-fts/stemming/stopwords/extended_english.txt",
34
+ "lib/mongoid-fts/stemming/stopwords/full_danish.txt",
35
+ "lib/mongoid-fts/stemming/stopwords/full_dutch.txt",
36
+ "lib/mongoid-fts/stemming/stopwords/full_english.txt",
37
+ "lib/mongoid-fts/stemming/stopwords/full_finnish.txt",
38
+ "lib/mongoid-fts/stemming/stopwords/full_french.txt",
39
+ "lib/mongoid-fts/stemming/stopwords/full_german.txt",
40
+ "lib/mongoid-fts/stemming/stopwords/full_italian.txt",
41
+ "lib/mongoid-fts/stemming/stopwords/full_norwegian.txt",
42
+ "lib/mongoid-fts/stemming/stopwords/full_portuguese.txt",
43
+ "lib/mongoid-fts/stemming/stopwords/full_russian.txt",
44
+ "lib/mongoid-fts/stemming/stopwords/full_russiankoi8_r.txt",
45
+ "lib/mongoid-fts/stemming/stopwords/full_spanish.txt",
46
+ "lib/mongoid-fts/util.rb",
47
+ "mongoid-fts.gemspec",
48
+ "test",
49
+ "test/helper.rb",
50
+ "test/mongoid-fts_test.rb",
51
+ "test/testing.rb"]
24
52
 
25
53
  spec.executables = []
26
54
 
@@ -35,6 +63,12 @@ Gem::Specification::new do |spec|
35
63
 
36
64
  spec.add_dependency(*["coerce", "~> 0.0"])
37
65
 
66
+ spec.add_dependency(*["unicode_utils", "~> 1.4"])
67
+
68
+ spec.add_dependency(*["stringex", "~> 2.0"])
69
+
70
+ spec.add_dependency(*["fast-stemmer", "~> 1.0"])
71
+
38
72
 
39
73
  spec.extensions.push(*[])
40
74
 
@@ -0,0 +1,44 @@
1
+ # -*- encoding : utf-8 -*-
2
+
3
+ # this triggers mongoid to load rails...
4
+ # module Rails; end
5
+
6
+ require_relative 'testing'
7
+ require_relative '../lib/mongoid-fts.rb'
8
+
9
+ Mongoid::FTS.connect!
10
+ Mongoid::FTS.reset!
11
+
12
+ class A
13
+ include Mongoid::Document
14
+ include Mongoid::FTS
15
+ field(:content, :type => String)
16
+ def to_s; content; end
17
+
18
+ field(:a)
19
+ field(:b)
20
+ field(:c)
21
+ end
22
+
23
+ class B
24
+ include Mongoid::Document
25
+ include Mongoid::FTS
26
+ field(:content, :type => String)
27
+ def to_s; content; end
28
+
29
+ field(:a)
30
+ field(:b)
31
+ field(:c)
32
+ end
33
+
34
+ class C
35
+ include Mongoid::Document
36
+ include Mongoid::FTS
37
+ field(:content, :type => String)
38
+ def to_s; content; end
39
+
40
+ field(:a)
41
+ field(:b)
42
+ field(:c)
43
+ end
44
+
@@ -0,0 +1,177 @@
1
+ # encoding: utf-8
2
+ #
3
+ require_relative 'helper'
4
+
5
+ Testing Mongoid::FTS do
6
+ #
7
+ testing 'wording' do
8
+ assert{ Mongoid::FTS.words_for('dogs cats fishes') == %w[ dogs cats fishes ] }
9
+ assert{ Mongoid::FTS.words_for('foo-bar baz_bub') == %w[ foo bar baz_bub ] }
10
+ end
11
+
12
+ #
13
+ testing 'stemming' do
14
+ assert{ Mongoid::FTS.stems_for('dogs cats fishes') == %w[ dog cat fish ] }
15
+ end
16
+
17
+ #
18
+ testing 'terming' do
19
+ assert{ Mongoid::FTS.terms_for('dogs and the cats and those fishes') == %w[ dog cat fish ] }
20
+ assert{ Mongoid::FTS.terms_for('the foo-bar and then baz_bub') == %w[ foo bar baz_bub ] }
21
+ assert{ Mongoid::FTS.terms_for('the foo-bar and then baz_bub', :subterms => true) == %w[ foo bar baz_bub baz bub ] }
22
+ end
23
+
24
+ #
25
+ testing 'fuzzy' do
26
+ assert{
27
+ actual = Mongoid::FTS.fuzzy("über")
28
+ expected = ["_ü", "üb", "be", "er", "r_", "_üb", "übe", "ber", "er_", "_u", "ub", "_ub", "ube"]
29
+
30
+ actual.zip(expected).all? do |a,b|
31
+ a = Mongoid::FTS.utf8ify(a)
32
+ b = Mongoid::FTS.utf8ify(b)
33
+ a == b
34
+ end
35
+ }
36
+ end
37
+
38
+ #
39
+ testing 'that models can, at minimum, be indexed and searched' do
40
+ a = A.create!(:content => 'dogs')
41
+ b = B.create!(:content => 'cats')
42
+
43
+ assert{ Mongoid::FTS.index(a) }
44
+ assert{ Mongoid::FTS.index(b) }
45
+
46
+ assert{ Mongoid::FTS.search('dog') == [a] }
47
+ assert{ Mongoid::FTS.search('cat') == [b] }
48
+ assert{ Mongoid::FTS.search('Cat') == [b] }
49
+ end
50
+
51
+ #
52
+ testing 'fuzzy search' do
53
+ a = A.create!(:title => 'über')
54
+
55
+ assert{ Mongoid::FTS.index(a) }
56
+
57
+ assert{ Mongoid::FTS.search('uber') == [a] }
58
+ assert{ Mongoid::FTS.search('üb') == [a] }
59
+ end
60
+
61
+ #
62
+ testing 'that rare words float to the front of the results' do
63
+ a = A.create!(:content => 'dog')
64
+ b = A.create!(:content => 'dog dog')
65
+ c = A.create!(:content => 'dog dog dog')
66
+ d = A.create!(:content => 'dog dog dog cat')
67
+
68
+ assert{ Mongoid::FTS.index(A) }
69
+ assert{ Mongoid::FTS.search('cat dog') == [d] }
70
+ end
71
+
72
+ #
73
+ testing 'that word specificity affects the search' do
74
+ a = A.create!(:content => 'cat@dog.com')
75
+ b = A.create!(:content => 'dogs')
76
+ c = A.create!(:content => 'dog')
77
+ d = A.create!(:content => 'cats')
78
+ e = A.create!(:content => 'cat')
79
+
80
+ assert{ Mongoid::FTS.index(A) }
81
+
82
+ assert{ Mongoid::FTS.search('cat@dog.com') == [a] }
83
+ assert{ Mongoid::FTS.search('cat') == [e, d, a] }
84
+ assert{ Mongoid::FTS.search('dog') == [c, b, a] }
85
+ end
86
+
87
+ #
88
+ testing 'that set intersection and union are supported via search' do
89
+ a = A.create!(:content => 'dog')
90
+ b = A.create!(:content => 'dog cat')
91
+ c = A.create!(:content => 'dog cat fish')
92
+
93
+ assert{ Mongoid::FTS.index(A) }
94
+
95
+ assert{ Mongoid::FTS.search(:any => 'dog').count == 3 }
96
+ assert{ Mongoid::FTS.search(:any => 'dog cat').count == 3 }
97
+ assert{ Mongoid::FTS.search(:any => 'dog cat fish').count == 3 }
98
+
99
+ assert{ Mongoid::FTS.search(:all => 'dog').count == 3 }
100
+ assert{ Mongoid::FTS.search(:all => 'dog cat').count == 2 }
101
+ assert{ Mongoid::FTS.search(:all => 'dog cat fish').count == 1 }
102
+ end
103
+
104
+ #
105
+ testing 'that keywords are considered more highly than fulltext' do
106
+ a = A.create!(:title => 'the cats', :content => 'like to meow')
107
+ b = A.create!(:title => 'the dogs', :content => 'do not like to meow, they bark at cats')
108
+
109
+ assert{ Mongoid::FTS.search('cat').count == 2 }
110
+ assert{ Mongoid::FTS.search('cat').first == a }
111
+
112
+ assert{ Mongoid::FTS.search('meow').count == 2 }
113
+ assert{ Mongoid::FTS.search('bark').count == 1 }
114
+ assert{ Mongoid::FTS.search('dog').first == b }
115
+ end
116
+
117
+ #
118
+ testing 'basic pagination' do
119
+ 11.times{|i| A.create! :content => "cats #{ i }" }
120
+
121
+ assert{ A.search('cat').paginate(:page => 1, :size => 2).to_a.size == 2 }
122
+ assert{ A.search('cat').paginate(:page => 2, :size => 5).to_a.size == 5 }
123
+
124
+ accum = []
125
+
126
+ n = 6
127
+ size = 2
128
+ (1..n).each do |page|
129
+ list = assert{ A.search('cat').paginate(:page => page, :size => size) }
130
+ accum.push(*list)
131
+ assert{ list.num_pages == n }
132
+ assert{ list.total_pages == n }
133
+ assert{ list.current_page == page }
134
+ end
135
+
136
+ a = accum.map{|i| i}.sort_by{|m| m.content}
137
+ b = A.all.sort_by{|m| m.content}
138
+
139
+ assert{ a == b }
140
+ end
141
+
142
+ protected
143
+
144
+ def new_klass(&block)
145
+ if Object.send(:const_defined?, :K)
146
+ Object.const_get(:K).destroy_all
147
+ Object.send(:remove_const, :K)
148
+ end
149
+
150
+ k = Class.new(A) do
151
+ self.default_collection_name = :ks
152
+ def self.name() 'K' end
153
+ end
154
+
155
+ Object.const_set(:K, k)
156
+
157
+ k.class_eval do
158
+ include ::Mongoid::FTS
159
+ class_eval(&block) if block
160
+ end
161
+
162
+ k
163
+ end
164
+
165
+ setup do
166
+ [A, B, C].map{|m| m.destroy_all}
167
+ Mongoid::FTS.destroy_all
168
+ end
169
+
170
+ =begin
171
+ H = Mongoid::FTS
172
+ T = Mongoid::FTS::Token
173
+ I = Mongoid::FTS::Index
174
+
175
+ at_exit{ K.destroy_all if defined?(K) }
176
+ =end
177
+ end