mongoid-fts 1.1.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,322 @@
1
+ module Mongoid
2
+ module FTS
3
+ module Util
4
+ #
5
+ def fts_models
6
+ [
7
+ Mongoid::FTS::Index
8
+ ]
9
+ end
10
+
11
+ def reset!
12
+ Mongoid::FTS.setup!(:warn => true)
13
+
14
+ fts_models.each do |model|
15
+ model.destroy_all
16
+
17
+ begin
18
+ model.collection.indexes.drop
19
+ rescue Object => e
20
+ end
21
+
22
+ begin
23
+ model.collection.drop
24
+ rescue Object => e
25
+ end
26
+
27
+ begin
28
+ model.create_indexes
29
+ rescue Object => e
30
+ end
31
+ end
32
+ end
33
+
34
+ def create_indexes
35
+ fts_models.each{|model| model.create_indexes}
36
+ end
37
+
38
+ def destroy_all
39
+ fts_models.map{|model| model.destroy_all}
40
+ end
41
+
42
+ #
43
+ def find_in_batches(queries = {})
44
+ models =
45
+ queries.map do |model_class, model_ids|
46
+ unless model_class.is_a?(Class)
47
+ model_class = eval(model_class.to_s)
48
+ end
49
+
50
+ model_ids = Array(model_ids)
51
+
52
+ begin
53
+ model_class.find(model_ids)
54
+ rescue Mongoid::Errors::DocumentNotFound
55
+ model_ids.map do |model_id|
56
+ begin
57
+ model_class.find(model_id)
58
+ rescue Mongoid::Errors::DocumentNotFound
59
+ nil
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ models.flatten!
66
+ models.compact!
67
+ models
68
+ end
69
+
70
+ def find_or_create(finder, creator)
71
+ doc = finder.call()
72
+ return doc if doc
73
+
74
+ n, max = 0, 2
75
+
76
+ begin
77
+ creator.call()
78
+ rescue Object => e
79
+ n += 1
80
+ raise if n > max
81
+ sleep(rand(0.1))
82
+ finder.call() or retry
83
+ end
84
+ end
85
+
86
+ #
87
+ def terms_for(*args, &block)
88
+ options = Map.options_for!(args)
89
+
90
+ words = words_for(*args)
91
+
92
+ list = options[:list] || []
93
+
94
+ words.each do |word|
95
+ word = word.downcase
96
+ next if stopword?(word)
97
+
98
+ stems = stems_for(word)
99
+
100
+ stems.each do |stem|
101
+ [stem, unidecode(stem)].uniq.each do |stem|
102
+ next if stopword?(stem)
103
+
104
+ block ? block.call(stem) : list.push(stem)
105
+
106
+ substems = stem.split(/_/)
107
+
108
+ if options[:subterms] and substems.size > 1
109
+ substems.each do |substem|
110
+ terms_for(substem.gsub(/_+/, '-'), :list => list)
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ list.uniq!
118
+
119
+ block ? nil : list
120
+ end
121
+
122
+ def words_for(*args, &block)
123
+ options = Map.options_for!(args)
124
+
125
+ string = args.join(' ')
126
+
127
+ list = []
128
+
129
+ UnicodeUtils.each_word(string) do |word|
130
+ word = strip(utf8ify(word))
131
+
132
+ next if word.empty?
133
+
134
+ block ? block.call(word) : list.push(word)
135
+ end
136
+
137
+ block ? nil : list
138
+ end
139
+
140
+ def stems_for(*args, &block)
141
+ options = Map.options_for!(args)
142
+
143
+ words = Coerce.list_of_strings(*args).map{|word| utf8ify(word)}
144
+
145
+ Stemming.stem(*words)
146
+ end
147
+
148
+ def literals_for(*args)
149
+ words = FTS.normalized_array(args)
150
+
151
+ return words.map{|word| "__#{ Digest::MD5.hexdigest(word) }__"}
152
+ end
153
+
154
+ def stopword?(word)
155
+ word = utf8ify(word)
156
+ word.empty? or Stemming::Stopwords.stopword?(word)
157
+ end
158
+
159
+ def strip(word)
160
+ word = utf8ify(word)
161
+ word.gsub!(/\A(?:[^\w]|_|\s)+/, '') # leading punctuation/spaces
162
+ word.gsub!(/(?:[^\w]|_|\s+)+\Z/, '') # trailing punctuation/spaces
163
+ word
164
+ end
165
+
166
+ def fuzzy(*args)
167
+ strings = Coerce.list_of_strings(args).map{|string| utf8ify(string)}
168
+
169
+ list = []
170
+
171
+ strings.each do |string|
172
+ list.push(*ngrams_for(string))
173
+
174
+ decoded = unidecode(string)
175
+
176
+ unless decoded == string
177
+ list.push(*ngrams_for(decoded))
178
+ end
179
+ end
180
+
181
+ list.uniq
182
+ end
183
+ alias_method(:fuzzy_for, :fuzzy)
184
+
185
+ def ngrams_for(*args)
186
+ options = Map.options_for!(args)
187
+
188
+ strings = Coerce.list_of_strings(args).map{|string| utf8ify(string)}
189
+
190
+ list = []
191
+
192
+ sizes = options[:sizes] || [2,3]
193
+
194
+ strings.each do |string|
195
+ chars = Util.chars('_' + string + '_')
196
+
197
+ sizes.each do |size|
198
+ (chars.size - (size - 1)).times do |i|
199
+ ngram = chars[i, size].join
200
+ list.push(ngram)
201
+ end
202
+ end
203
+
204
+ end
205
+
206
+ list
207
+ end
208
+
209
+ def chars(string)
210
+ chars = []
211
+ UnicodeUtils.each_grapheme(string.to_s){|g| chars.push(g)}
212
+ chars
213
+ end
214
+
215
+ def unidecode(string)
216
+ Stringex::Unidecoder.decode(utf8ify(string.to_s))
217
+ end
218
+
219
+ def utf8ify(string)
220
+ UnicodeUtils.nfkd(
221
+ begin
222
+ string.force_encoding('UTF-8')
223
+ rescue
224
+ string.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
225
+ end
226
+ )
227
+ end
228
+
229
+ def normalized_array(*array)
230
+ array.flatten.map{|_| _.to_s.strip}.select{|_| !_.empty?}.uniq
231
+ end
232
+
233
+ def list_of_strings(*args)
234
+ args.flatten.compact.map{|arg| arg.to_s}.select{|arg| !arg.empty?}.uniq
235
+ end
236
+
237
+ #
238
+ def index(*args, &block)
239
+ if args.empty? and block.nil?
240
+ Index
241
+ else
242
+ args.each do |arg|
243
+ case arg
244
+ when Class
245
+ arg.all.each{|model| Index.add(model)}
246
+ else
247
+ Index.add(arg, &block)
248
+ end
249
+ end
250
+ end
251
+ end
252
+
253
+ def unindex(*args, &block)
254
+ Index.remove(*args, &block)
255
+ end
256
+
257
+ def index!(*args, &block)
258
+ Index.add!(*args, &block)
259
+ end
260
+
261
+ def unindex!(*args, &block)
262
+ Index.remove!(*args, &block)
263
+ end
264
+
265
+ #
266
+ def models
267
+ @models ||= []
268
+ end
269
+
270
+ #
271
+ def session
272
+ @session ||= Mongoid::Sessions.default
273
+ end
274
+
275
+ def session=(session)
276
+ @session = session
277
+ end
278
+
279
+ def enable!(*args)
280
+ options = Map.options_for!(args)
281
+
282
+ unless options.has_key?(:warn)
283
+ options[:warn] = true
284
+ end
285
+
286
+ begin
287
+ session = Mongoid::Sessions.default
288
+ session.with(database: :admin).command({ setParameter: 1, textSearchEnabled: true })
289
+ rescue Object => e
290
+ unless e.is_a?(Mongoid::Errors::NoSessionsConfig)
291
+ warn "failed to enable search with #{ e.class }(#{ e.message })"
292
+ end
293
+ end
294
+ end
295
+
296
+ def setup!(*args)
297
+ enable!(*args)
298
+ Index.setup!
299
+ end
300
+
301
+ def connect!
302
+ Mongoid.configure do |config|
303
+ config.connect_to('mongoid-fts')
304
+ end
305
+ end
306
+
307
+ def boolean_and(*strings)
308
+ strings = Coerce.list_of_strings(*strings)
309
+ strings.map{|s| '"%s"' % s.gsub('"', '')}.join(' ')
310
+ end
311
+
312
+ def boolean_or(*strings)
313
+ strings = Coerce.list_of_strings(*strings)
314
+ strings.join(' ')
315
+ end
316
+
317
+ extend Util
318
+ end
319
+
320
+ extend Util
321
+ end
322
+ end
@@ -3,7 +3,7 @@
3
3
 
4
4
  Gem::Specification::new do |spec|
5
5
  spec.name = "mongoid-fts"
6
- spec.version = "1.1.1"
6
+ spec.version = "2.0.0"
7
7
  spec.platform = Gem::Platform::RUBY
8
8
  spec.summary = "mongoid-fts"
9
9
  spec.description = "enable mongodb's new fulltext simply and quickly on your mongoid models, including pagination."
@@ -18,9 +18,37 @@ Gem::Specification::new do |spec|
18
18
  "lib/app/mongoid",
19
19
  "lib/app/mongoid/fts",
20
20
  "lib/app/mongoid/fts/index.rb",
21
- "lib/mongoid",
21
+ "lib/mongoid-fts",
22
22
  "lib/mongoid-fts.rb",
23
- "mongoid-fts.gemspec"]
23
+ "lib/mongoid-fts/able.rb",
24
+ "lib/mongoid-fts/error.rb",
25
+ "lib/mongoid-fts/index.rb",
26
+ "lib/mongoid-fts/rails.rb",
27
+ "lib/mongoid-fts/raw.rb",
28
+ "lib/mongoid-fts/results.rb",
29
+ "lib/mongoid-fts/stemming",
30
+ "lib/mongoid-fts/stemming.rb",
31
+ "lib/mongoid-fts/stemming/stopwords",
32
+ "lib/mongoid-fts/stemming/stopwords/english.txt",
33
+ "lib/mongoid-fts/stemming/stopwords/extended_english.txt",
34
+ "lib/mongoid-fts/stemming/stopwords/full_danish.txt",
35
+ "lib/mongoid-fts/stemming/stopwords/full_dutch.txt",
36
+ "lib/mongoid-fts/stemming/stopwords/full_english.txt",
37
+ "lib/mongoid-fts/stemming/stopwords/full_finnish.txt",
38
+ "lib/mongoid-fts/stemming/stopwords/full_french.txt",
39
+ "lib/mongoid-fts/stemming/stopwords/full_german.txt",
40
+ "lib/mongoid-fts/stemming/stopwords/full_italian.txt",
41
+ "lib/mongoid-fts/stemming/stopwords/full_norwegian.txt",
42
+ "lib/mongoid-fts/stemming/stopwords/full_portuguese.txt",
43
+ "lib/mongoid-fts/stemming/stopwords/full_russian.txt",
44
+ "lib/mongoid-fts/stemming/stopwords/full_russiankoi8_r.txt",
45
+ "lib/mongoid-fts/stemming/stopwords/full_spanish.txt",
46
+ "lib/mongoid-fts/util.rb",
47
+ "mongoid-fts.gemspec",
48
+ "test",
49
+ "test/helper.rb",
50
+ "test/mongoid-fts_test.rb",
51
+ "test/testing.rb"]
24
52
 
25
53
  spec.executables = []
26
54
 
@@ -35,6 +63,12 @@ Gem::Specification::new do |spec|
35
63
 
36
64
  spec.add_dependency(*["coerce", "~> 0.0"])
37
65
 
66
+ spec.add_dependency(*["unicode_utils", "~> 1.4"])
67
+
68
+ spec.add_dependency(*["stringex", "~> 2.0"])
69
+
70
+ spec.add_dependency(*["fast-stemmer", "~> 1.0"])
71
+
38
72
 
39
73
  spec.extensions.push(*[])
40
74
 
@@ -0,0 +1,44 @@
1
+ # -*- encoding : utf-8 -*-
2
+
3
+ # this triggers mongoid to load rails...
4
+ # module Rails; end
5
+
6
+ require_relative 'testing'
7
+ require_relative '../lib/mongoid-fts.rb'
8
+
9
+ Mongoid::FTS.connect!
10
+ Mongoid::FTS.reset!
11
+
12
+ class A
13
+ include Mongoid::Document
14
+ include Mongoid::FTS
15
+ field(:content, :type => String)
16
+ def to_s; content; end
17
+
18
+ field(:a)
19
+ field(:b)
20
+ field(:c)
21
+ end
22
+
23
+ class B
24
+ include Mongoid::Document
25
+ include Mongoid::FTS
26
+ field(:content, :type => String)
27
+ def to_s; content; end
28
+
29
+ field(:a)
30
+ field(:b)
31
+ field(:c)
32
+ end
33
+
34
+ class C
35
+ include Mongoid::Document
36
+ include Mongoid::FTS
37
+ field(:content, :type => String)
38
+ def to_s; content; end
39
+
40
+ field(:a)
41
+ field(:b)
42
+ field(:c)
43
+ end
44
+
@@ -0,0 +1,177 @@
1
+ # encoding: utf-8
2
+ #
3
+ require_relative 'helper'
4
+
5
+ Testing Mongoid::FTS do
6
+ #
7
+ testing 'wording' do
8
+ assert{ Mongoid::FTS.words_for('dogs cats fishes') == %w[ dogs cats fishes ] }
9
+ assert{ Mongoid::FTS.words_for('foo-bar baz_bub') == %w[ foo bar baz_bub ] }
10
+ end
11
+
12
+ #
13
+ testing 'stemming' do
14
+ assert{ Mongoid::FTS.stems_for('dogs cats fishes') == %w[ dog cat fish ] }
15
+ end
16
+
17
+ #
18
+ testing 'terming' do
19
+ assert{ Mongoid::FTS.terms_for('dogs and the cats and those fishes') == %w[ dog cat fish ] }
20
+ assert{ Mongoid::FTS.terms_for('the foo-bar and then baz_bub') == %w[ foo bar baz_bub ] }
21
+ assert{ Mongoid::FTS.terms_for('the foo-bar and then baz_bub', :subterms => true) == %w[ foo bar baz_bub baz bub ] }
22
+ end
23
+
24
+ #
25
+ testing 'fuzzy' do
26
+ assert{
27
+ actual = Mongoid::FTS.fuzzy("über")
28
+ expected = ["_ü", "üb", "be", "er", "r_", "_üb", "übe", "ber", "er_", "_u", "ub", "_ub", "ube"]
29
+
30
+ actual.zip(expected).all? do |a,b|
31
+ a = Mongoid::FTS.utf8ify(a)
32
+ b = Mongoid::FTS.utf8ify(b)
33
+ a == b
34
+ end
35
+ }
36
+ end
37
+
38
+ #
39
+ testing 'that models can, at minimum, be indexed and searched' do
40
+ a = A.create!(:content => 'dogs')
41
+ b = B.create!(:content => 'cats')
42
+
43
+ assert{ Mongoid::FTS.index(a) }
44
+ assert{ Mongoid::FTS.index(b) }
45
+
46
+ assert{ Mongoid::FTS.search('dog') == [a] }
47
+ assert{ Mongoid::FTS.search('cat') == [b] }
48
+ assert{ Mongoid::FTS.search('Cat') == [b] }
49
+ end
50
+
51
+ #
52
+ testing 'fuzzy search' do
53
+ a = A.create!(:title => 'über')
54
+
55
+ assert{ Mongoid::FTS.index(a) }
56
+
57
+ assert{ Mongoid::FTS.search('uber') == [a] }
58
+ assert{ Mongoid::FTS.search('üb') == [a] }
59
+ end
60
+
61
+ #
62
+ testing 'that rare words float to the front of the results' do
63
+ a = A.create!(:content => 'dog')
64
+ b = A.create!(:content => 'dog dog')
65
+ c = A.create!(:content => 'dog dog dog')
66
+ d = A.create!(:content => 'dog dog dog cat')
67
+
68
+ assert{ Mongoid::FTS.index(A) }
69
+ assert{ Mongoid::FTS.search('cat dog') == [d] }
70
+ end
71
+
72
+ #
73
+ testing 'that word specificity affects the search' do
74
+ a = A.create!(:content => 'cat@dog.com')
75
+ b = A.create!(:content => 'dogs')
76
+ c = A.create!(:content => 'dog')
77
+ d = A.create!(:content => 'cats')
78
+ e = A.create!(:content => 'cat')
79
+
80
+ assert{ Mongoid::FTS.index(A) }
81
+
82
+ assert{ Mongoid::FTS.search('cat@dog.com') == [a] }
83
+ assert{ Mongoid::FTS.search('cat') == [e, d, a] }
84
+ assert{ Mongoid::FTS.search('dog') == [c, b, a] }
85
+ end
86
+
87
+ #
88
+ testing 'that set intersection and union are supported via search' do
89
+ a = A.create!(:content => 'dog')
90
+ b = A.create!(:content => 'dog cat')
91
+ c = A.create!(:content => 'dog cat fish')
92
+
93
+ assert{ Mongoid::FTS.index(A) }
94
+
95
+ assert{ Mongoid::FTS.search(:any => 'dog').count == 3 }
96
+ assert{ Mongoid::FTS.search(:any => 'dog cat').count == 3 }
97
+ assert{ Mongoid::FTS.search(:any => 'dog cat fish').count == 3 }
98
+
99
+ assert{ Mongoid::FTS.search(:all => 'dog').count == 3 }
100
+ assert{ Mongoid::FTS.search(:all => 'dog cat').count == 2 }
101
+ assert{ Mongoid::FTS.search(:all => 'dog cat fish').count == 1 }
102
+ end
103
+
104
+ #
105
+ testing 'that keywords are considered more highly than fulltext' do
106
+ a = A.create!(:title => 'the cats', :content => 'like to meow')
107
+ b = A.create!(:title => 'the dogs', :content => 'do not like to meow, they bark at cats')
108
+
109
+ assert{ Mongoid::FTS.search('cat').count == 2 }
110
+ assert{ Mongoid::FTS.search('cat').first == a }
111
+
112
+ assert{ Mongoid::FTS.search('meow').count == 2 }
113
+ assert{ Mongoid::FTS.search('bark').count == 1 }
114
+ assert{ Mongoid::FTS.search('dog').first == b }
115
+ end
116
+
117
+ #
118
+ testing 'basic pagination' do
119
+ 11.times{|i| A.create! :content => "cats #{ i }" }
120
+
121
+ assert{ A.search('cat').paginate(:page => 1, :size => 2).to_a.size == 2 }
122
+ assert{ A.search('cat').paginate(:page => 2, :size => 5).to_a.size == 5 }
123
+
124
+ accum = []
125
+
126
+ n = 6
127
+ size = 2
128
+ (1..n).each do |page|
129
+ list = assert{ A.search('cat').paginate(:page => page, :size => size) }
130
+ accum.push(*list)
131
+ assert{ list.num_pages == n }
132
+ assert{ list.total_pages == n }
133
+ assert{ list.current_page == page }
134
+ end
135
+
136
+ a = accum.map{|i| i}.sort_by{|m| m.content}
137
+ b = A.all.sort_by{|m| m.content}
138
+
139
+ assert{ a == b }
140
+ end
141
+
142
+ protected
143
+
144
+ def new_klass(&block)
145
+ if Object.send(:const_defined?, :K)
146
+ Object.const_get(:K).destroy_all
147
+ Object.send(:remove_const, :K)
148
+ end
149
+
150
+ k = Class.new(A) do
151
+ self.default_collection_name = :ks
152
+ def self.name() 'K' end
153
+ end
154
+
155
+ Object.const_set(:K, k)
156
+
157
+ k.class_eval do
158
+ include ::Mongoid::FTS
159
+ class_eval(&block) if block
160
+ end
161
+
162
+ k
163
+ end
164
+
165
+ setup do
166
+ [A, B, C].map{|m| m.destroy_all}
167
+ Mongoid::FTS.destroy_all
168
+ end
169
+
170
+ =begin
171
+ H = Mongoid::FTS
172
+ T = Mongoid::FTS::Token
173
+ I = Mongoid::FTS::Index
174
+
175
+ at_exit{ K.destroy_all if defined?(K) }
176
+ =end
177
+ end