mongoid_fulltext 0.5.8 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +4 -0
- data/CHANGELOG.md +133 -0
- data/Gemfile +5 -7
- data/LICENSE +2 -2
- data/README.md +203 -142
- data/VERSION +1 -1
- data/lib/mongoid_fulltext.rb +57 -52
- data/mongoid_fulltext.gemspec +19 -37
- data/spec/config/mongoid.yml +6 -0
- data/spec/models/filtered_artwork.rb +2 -2
- data/spec/models/russian_artwork.rb +10 -0
- data/spec/mongoid/fulltext_spec.rb +47 -26
- data/spec/spec_helper.rb +8 -9
- metadata +54 -45
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.6.0
|
data/lib/mongoid_fulltext.rb
CHANGED
@@ -23,7 +23,7 @@ module Mongoid::FullTextSearch
|
|
23
23
|
index_name = 'mongoid_fulltext.index_%s_%s' % [self.name.downcase, self.mongoid_fulltext_config.count]
|
24
24
|
end
|
25
25
|
|
26
|
-
config = {
|
26
|
+
config = {
|
27
27
|
:alphabet => 'abcdefghijklmnopqrstuvwxyz0123456789 ',
|
28
28
|
:word_separators => "-_ \n\t",
|
29
29
|
:ngram_width => 3,
|
@@ -34,17 +34,17 @@ module Mongoid::FullTextSearch
|
|
34
34
|
:max_candidate_set_size => 1000,
|
35
35
|
:remove_accents => true,
|
36
36
|
:reindex_immediately => true,
|
37
|
-
:stop_words => Hash[['i', 'a', 's', 't', 'me', 'my', 'we', 'he', 'it', 'am', 'is', 'be', 'do', 'an', 'if',
|
38
|
-
'or', 'as', 'of', 'at', 'by', 'to', 'up', 'in', 'on', 'no', 'so', 'our', 'you', 'him',
|
39
|
-
'his', 'she', 'her', 'its', 'who', 'are', 'was', 'has', 'had', 'did', 'the', 'and',
|
40
|
-
'but', 'for', 'out', 'off', 'why', 'how', 'all', 'any', 'few', 'nor', 'not', 'own',
|
41
|
-
'too', 'can', 'don', 'now', 'ours', 'your', 'hers', 'they', 'them', 'what', 'whom',
|
42
|
-
'this', 'that', 'were', 'been', 'have', 'does', 'with', 'into', 'from', 'down', 'over',
|
43
|
-
'then', 'once', 'here', 'when', 'both', 'each', 'more', 'most', 'some', 'such', 'only',
|
44
|
-
'same', 'than', 'very', 'will', 'just', 'yours', 'their', 'which', 'these', 'those',
|
45
|
-
'being', 'doing', 'until', 'while', 'about', 'after', 'above', 'below', 'under',
|
46
|
-
'again', 'there', 'where', 'other', 'myself', 'itself', 'theirs', 'having', 'during',
|
47
|
-
'before', 'should', 'himself', 'herself', 'because', 'against', 'between', 'through',
|
37
|
+
:stop_words => Hash[['i', 'a', 's', 't', 'me', 'my', 'we', 'he', 'it', 'am', 'is', 'be', 'do', 'an', 'if',
|
38
|
+
'or', 'as', 'of', 'at', 'by', 'to', 'up', 'in', 'on', 'no', 'so', 'our', 'you', 'him',
|
39
|
+
'his', 'she', 'her', 'its', 'who', 'are', 'was', 'has', 'had', 'did', 'the', 'and',
|
40
|
+
'but', 'for', 'out', 'off', 'why', 'how', 'all', 'any', 'few', 'nor', 'not', 'own',
|
41
|
+
'too', 'can', 'don', 'now', 'ours', 'your', 'hers', 'they', 'them', 'what', 'whom',
|
42
|
+
'this', 'that', 'were', 'been', 'have', 'does', 'with', 'into', 'from', 'down', 'over',
|
43
|
+
'then', 'once', 'here', 'when', 'both', 'each', 'more', 'most', 'some', 'such', 'only',
|
44
|
+
'same', 'than', 'very', 'will', 'just', 'yours', 'their', 'which', 'these', 'those',
|
45
|
+
'being', 'doing', 'until', 'while', 'about', 'after', 'above', 'below', 'under',
|
46
|
+
'again', 'there', 'where', 'other', 'myself', 'itself', 'theirs', 'having', 'during',
|
47
|
+
'before', 'should', 'himself', 'herself', 'because', 'against', 'between', 'through',
|
48
48
|
'further', 'yourself', 'ourselves', 'yourselves', 'themselves'].map{ |x| [x,true] }]
|
49
49
|
}
|
50
50
|
|
@@ -68,45 +68,46 @@ module Mongoid::FullTextSearch
|
|
68
68
|
end
|
69
69
|
|
70
70
|
def fulltext_search_ensure_indexes(index_name, config)
|
71
|
-
db = collection.
|
72
|
-
coll = db
|
71
|
+
db = collection.database
|
72
|
+
coll = db[index_name]
|
73
73
|
|
74
74
|
# The order of filters matters when the same index is used from two or more collections.
|
75
75
|
filter_indexes = (config[:filters] || []).map do |key,value|
|
76
|
-
["filter_values.#{key}",
|
76
|
+
["filter_values.#{key}", 1]
|
77
77
|
end.sort_by { |filter_index| filter_index[0] }
|
78
78
|
|
79
|
-
index_definition = [['ngram',
|
79
|
+
index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes)
|
80
80
|
|
81
81
|
# Since the definition of the index could have changed, we'll clean up by
|
82
82
|
# removing any indexes that aren't on the exact.
|
83
83
|
correct_keys = index_definition.map{ |field_def| field_def[0] }
|
84
84
|
all_filter_keys = filter_indexes.map{ |field_def| field_def[0] }
|
85
|
-
coll.
|
86
|
-
keys =
|
85
|
+
coll.indexes.each do |idef|
|
86
|
+
keys = idef['key'].keys
|
87
87
|
next if !keys.member?('ngram')
|
88
88
|
all_filter_keys |= keys.find_all{ |key| key.starts_with?('filter_values.') }
|
89
89
|
if keys & correct_keys != correct_keys
|
90
|
-
Mongoid.logger.info "Dropping #{name} [#{keys & correct_keys} <=> #{correct_keys}]" if Mongoid.logger
|
91
|
-
coll.
|
90
|
+
Mongoid.logger.info "Dropping #{idef['name']} [#{keys & correct_keys} <=> #{correct_keys}]" if Mongoid.logger
|
91
|
+
coll.indexes.drop(idef['key'])
|
92
92
|
end
|
93
93
|
end
|
94
94
|
|
95
95
|
if all_filter_keys.length > filter_indexes.length
|
96
|
-
filter_indexes = all_filter_keys.map {
|
97
|
-
index_definition = [['ngram',
|
96
|
+
filter_indexes = all_filter_keys.map {|key| [key, 1] }.sort_by { |filter_index| filter_index[0] }
|
97
|
+
index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes)
|
98
98
|
end
|
99
99
|
|
100
100
|
Mongoid.logger.info "Ensuring fts_index on #{coll.name}: #{index_definition}" if Mongoid.logger
|
101
|
-
coll.
|
101
|
+
coll.indexes.create(Hash[index_definition], { :name => 'fts_index' })
|
102
|
+
|
102
103
|
Mongoid.logger.info "Ensuring document_id index on #{coll.name}" if Mongoid.logger
|
103
|
-
coll.
|
104
|
+
coll.indexes.create('document_id' => 1) # to make removes fast
|
104
105
|
end
|
105
106
|
|
106
107
|
def fulltext_search(query_string, options={})
|
107
108
|
max_results = options.has_key?(:max_results) ? options.delete(:max_results) : 10
|
108
109
|
return_scores = options.has_key?(:return_scores) ? options.delete(:return_scores) : false
|
109
|
-
if self.mongoid_fulltext_config.count > 1 and !options.has_key?(:index)
|
110
|
+
if self.mongoid_fulltext_config.count > 1 and !options.has_key?(:index)
|
110
111
|
error_message = '%s is indexed by multiple full-text indexes. You must specify one by passing an :index_name parameter'
|
111
112
|
raise UnspecifiedIndexError, error_message % self.name, caller
|
112
113
|
end
|
@@ -117,12 +118,12 @@ module Mongoid::FullTextSearch
|
|
117
118
|
ngrams = all_ngrams(query_string, self.mongoid_fulltext_config[index_name])
|
118
119
|
return [] if ngrams.empty?
|
119
120
|
|
120
|
-
# For each ngram, construct the query we'll use to pull index documents and
|
121
|
+
# For each ngram, construct the query we'll use to pull index documents and
|
121
122
|
# get a count of the number of index documents containing that n-gram
|
122
|
-
ordering =
|
123
|
+
ordering = {'score' => -1}
|
123
124
|
limit = self.mongoid_fulltext_config[index_name][:max_candidate_set_size]
|
124
|
-
coll = collection.
|
125
|
-
cursors = ngrams.map do |ngram|
|
125
|
+
coll = collection.database[index_name]
|
126
|
+
cursors = ngrams.map do |ngram|
|
126
127
|
query = {'ngram' => ngram[0]}
|
127
128
|
query.update(map_query_filters options)
|
128
129
|
count = coll.find(query).count
|
@@ -130,23 +131,23 @@ module Mongoid::FullTextSearch
|
|
130
131
|
end.sort!{ |record1, record2| record1[:count] <=> record2[:count] }
|
131
132
|
|
132
133
|
# Using the queries we just constructed and the n-gram frequency counts we
|
133
|
-
# just computed, pull in about *:max_candidate_set_size* candidates by
|
134
|
-
# considering the n-grams in order of increasing frequency. When we've
|
135
|
-
# spent all *:max_candidate_set_size* candidates, pull the top-scoring
|
134
|
+
# just computed, pull in about *:max_candidate_set_size* candidates by
|
135
|
+
# considering the n-grams in order of increasing frequency. When we've
|
136
|
+
# spent all *:max_candidate_set_size* candidates, pull the top-scoring
|
136
137
|
# *max_results* candidates for each remaining n-gram.
|
137
138
|
results_so_far = 0
|
138
139
|
candidates_list = cursors.map do |doc|
|
139
140
|
next if doc[:count] == 0
|
140
|
-
|
141
|
+
query_result = coll.find(doc[:query])
|
141
142
|
if results_so_far >= limit
|
142
|
-
|
143
|
+
query_result = query_result.sort(ordering).limit(max_results)
|
143
144
|
elsif doc[:count] > limit - results_so_far
|
144
|
-
|
145
|
+
query_result = query_result.sort(ordering).limit(limit - results_so_far)
|
145
146
|
end
|
146
147
|
results_so_far += doc[:count]
|
147
148
|
ngram_score = ngrams[doc[:ngram][0]]
|
148
|
-
Hash[
|
149
|
-
[candidate['document_id'],
|
149
|
+
Hash[query_result.map do |candidate|
|
150
|
+
[candidate['document_id'],
|
150
151
|
{:clazz => candidate['class'], :score => candidate['score'] * ngram_score}]
|
151
152
|
end]
|
152
153
|
end.compact
|
@@ -161,8 +162,8 @@ module Mongoid::FullTextSearch
|
|
161
162
|
while !candidates_list.empty?
|
162
163
|
candidates = candidates_list.pop
|
163
164
|
scores = candidates.map do |candidate_id, data|
|
164
|
-
{:id => candidate_id,
|
165
|
-
:clazz => data[:clazz],
|
165
|
+
{:id => candidate_id,
|
166
|
+
:clazz => data[:clazz],
|
166
167
|
:score => data[:score] + candidates_list.map{ |others| (others.delete(candidate_id) || {:score => 0})[:score] }.sum
|
167
168
|
}
|
168
169
|
end
|
@@ -173,7 +174,7 @@ module Mongoid::FullTextSearch
|
|
173
174
|
end
|
174
175
|
|
175
176
|
def instantiate_mapreduce_result(result)
|
176
|
-
result[:clazz].constantize.find(
|
177
|
+
result[:clazz].constantize.find(result[:id])
|
177
178
|
end
|
178
179
|
|
179
180
|
def instantiate_mapreduce_results(results, options)
|
@@ -188,11 +189,15 @@ module Mongoid::FullTextSearch
|
|
188
189
|
return {} if str.nil?
|
189
190
|
|
190
191
|
if config[:remove_accents]
|
191
|
-
|
192
|
+
if defined?(UnicodeUtils)
|
193
|
+
str = UnicodeUtils.nfkd(str)
|
194
|
+
elsif defined?(DiacriticsFu)
|
195
|
+
str = DiacriticsFu::escape(str)
|
196
|
+
end
|
192
197
|
end
|
193
198
|
|
194
199
|
# Remove any characters that aren't in the alphabet and aren't word separators
|
195
|
-
filtered_str = str.mb_chars.to_s.
|
200
|
+
filtered_str = str.mb_chars.downcase.to_s.split('').find_all{ |ch| config[:alphabet][ch] or config[:word_separators][ch] }.join('')
|
196
201
|
|
197
202
|
# Figure out how many ngrams to extract from the string. If we can't afford to extract all ngrams,
|
198
203
|
# step over the string in evenly spaced strides to extract ngrams. For example, to extract 3 3-letter
|
@@ -203,7 +208,7 @@ module Mongoid::FullTextSearch
|
|
203
208
|
step_size = 1
|
204
209
|
end
|
205
210
|
|
206
|
-
# Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the
|
211
|
+
# Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the
|
207
212
|
# input string using the step size that we just computed. Let score(x,y) be the score of string x
|
208
213
|
# compared with string y - assigning scores to ngrams with the square root-based scoring function
|
209
214
|
# below and multiplying scores of matching ngrams together yields a score function that has the
|
@@ -258,8 +263,8 @@ module Mongoid::FullTextSearch
|
|
258
263
|
|
259
264
|
def remove_from_ngram_index
|
260
265
|
self.mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
|
261
|
-
coll = collection.
|
262
|
-
coll.
|
266
|
+
coll = collection.database[index_name]
|
267
|
+
coll.find({'class' => self.name}).remove_all
|
263
268
|
end
|
264
269
|
end
|
265
270
|
|
@@ -300,8 +305,8 @@ module Mongoid::FullTextSearch
|
|
300
305
|
end
|
301
306
|
|
302
307
|
# remove existing ngrams from external index
|
303
|
-
coll = collection.
|
304
|
-
coll.
|
308
|
+
coll = collection.database[index_name.to_sym]
|
309
|
+
coll.find({'document_id' => self._id}).remove_all
|
305
310
|
# extract ngrams from fields
|
306
311
|
field_values = fulltext_config[:ngram_fields].map { |field| self.send(field) }
|
307
312
|
ngrams = field_values.inject({}) { |accum, item| accum.update(self.class.all_ngrams(item, fulltext_config, false))}
|
@@ -310,9 +315,9 @@ module Mongoid::FullTextSearch
|
|
310
315
|
filter_values = nil
|
311
316
|
if fulltext_config.has_key?(:filters)
|
312
317
|
filter_values = Hash[fulltext_config[:filters].map do |key,value|
|
313
|
-
begin
|
314
|
-
[key, value.call(self)]
|
315
|
-
rescue
|
318
|
+
begin
|
319
|
+
[key, value.call(self)]
|
320
|
+
rescue
|
316
321
|
# Suppress any exceptions caused by filters
|
317
322
|
end
|
318
323
|
end.compact]
|
@@ -328,8 +333,8 @@ module Mongoid::FullTextSearch
|
|
328
333
|
|
329
334
|
def remove_from_ngram_index
|
330
335
|
self.mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
|
331
|
-
coll = collection.
|
332
|
-
coll.
|
336
|
+
coll = collection.database[index_name]
|
337
|
+
coll.find({'document_id' => self._id}).remove_all
|
333
338
|
end
|
334
339
|
end
|
335
340
|
|
data/mongoid_fulltext.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "mongoid_fulltext"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.6.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Aaron Windsor"]
|
12
|
-
s.date = "2012-
|
12
|
+
s.date = "2012-07-16"
|
13
13
|
s.description = "Full-text search for the Mongoid ORM, using n-grams extracted from text"
|
14
14
|
s.email = "aaron.windsor@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -19,6 +19,8 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.files = [
|
20
20
|
".document",
|
21
21
|
".rspec",
|
22
|
+
".travis.yml",
|
23
|
+
"CHANGELOG.md",
|
22
24
|
"Gemfile",
|
23
25
|
"LICENSE",
|
24
26
|
"README.md",
|
@@ -27,6 +29,7 @@ Gem::Specification.new do |s|
|
|
27
29
|
"lib/mongoid_fulltext.rb",
|
28
30
|
"lib/mongoid_indexes.rb",
|
29
31
|
"mongoid_fulltext.gemspec",
|
32
|
+
"spec/config/mongoid.yml",
|
30
33
|
"spec/models/accentless_artwork.rb",
|
31
34
|
"spec/models/advanced_artwork.rb",
|
32
35
|
"spec/models/basic_artwork.rb",
|
@@ -43,6 +46,7 @@ Gem::Specification.new do |s|
|
|
43
46
|
"spec/models/multi_field_artist.rb",
|
44
47
|
"spec/models/multi_field_artwork.rb",
|
45
48
|
"spec/models/partitioned_artist.rb",
|
49
|
+
"spec/models/russian_artwork.rb",
|
46
50
|
"spec/models/short_prefixes_artwork.rb",
|
47
51
|
"spec/models/stopwords_artwork.rb",
|
48
52
|
"spec/mongoid/fulltext_spec.rb",
|
@@ -51,53 +55,31 @@ Gem::Specification.new do |s|
|
|
51
55
|
s.homepage = "http://github.com/aaw/mongoid_fulltext"
|
52
56
|
s.licenses = ["MIT"]
|
53
57
|
s.require_paths = ["lib"]
|
54
|
-
s.rubygems_version = "1.8.
|
58
|
+
s.rubygems_version = "1.8.24"
|
55
59
|
s.summary = "Full-text search for the Mongoid ORM"
|
56
|
-
s.test_files = [
|
57
|
-
"spec/models/accentless_artwork.rb",
|
58
|
-
"spec/models/advanced_artwork.rb",
|
59
|
-
"spec/models/basic_artwork.rb",
|
60
|
-
"spec/models/delayed_artwork.rb",
|
61
|
-
"spec/models/external_artist.rb",
|
62
|
-
"spec/models/external_artwork.rb",
|
63
|
-
"spec/models/external_artwork_no_fields_supplied.rb",
|
64
|
-
"spec/models/filtered_artist.rb",
|
65
|
-
"spec/models/filtered_artwork.rb",
|
66
|
-
"spec/models/filtered_other.rb",
|
67
|
-
"spec/models/gallery/basic_artwork.rb",
|
68
|
-
"spec/models/hidden_dragon.rb",
|
69
|
-
"spec/models/multi_external_artwork.rb",
|
70
|
-
"spec/models/multi_field_artist.rb",
|
71
|
-
"spec/models/multi_field_artwork.rb",
|
72
|
-
"spec/models/partitioned_artist.rb",
|
73
|
-
"spec/models/short_prefixes_artwork.rb",
|
74
|
-
"spec/models/stopwords_artwork.rb",
|
75
|
-
"spec/mongoid/fulltext_spec.rb",
|
76
|
-
"spec/spec_helper.rb"
|
77
|
-
]
|
78
60
|
|
79
61
|
if s.respond_to? :specification_version then
|
80
62
|
s.specification_version = 3
|
81
63
|
|
82
64
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
65
|
+
s.add_runtime_dependency(%q<mongoid>, ["~> 3.0.1"])
|
83
66
|
s.add_runtime_dependency(%q<unicode_utils>, ["~> 1.0.0"])
|
84
|
-
s.add_development_dependency(%q<
|
85
|
-
s.add_development_dependency(%q<
|
86
|
-
s.add_development_dependency(%q<
|
87
|
-
s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
|
67
|
+
s.add_development_dependency(%q<bundler>, [">= 0"])
|
68
|
+
s.add_development_dependency(%q<rspec>, ["~> 2.10.0"])
|
69
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.8.3"])
|
88
70
|
else
|
71
|
+
s.add_dependency(%q<mongoid>, ["~> 3.0.1"])
|
89
72
|
s.add_dependency(%q<unicode_utils>, ["~> 1.0.0"])
|
90
|
-
s.add_dependency(%q<
|
91
|
-
s.add_dependency(%q<
|
92
|
-
s.add_dependency(%q<
|
93
|
-
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
73
|
+
s.add_dependency(%q<bundler>, [">= 0"])
|
74
|
+
s.add_dependency(%q<rspec>, ["~> 2.10.0"])
|
75
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
|
94
76
|
end
|
95
77
|
else
|
78
|
+
s.add_dependency(%q<mongoid>, ["~> 3.0.1"])
|
96
79
|
s.add_dependency(%q<unicode_utils>, ["~> 1.0.0"])
|
97
|
-
s.add_dependency(%q<
|
98
|
-
s.add_dependency(%q<
|
99
|
-
s.add_dependency(%q<
|
100
|
-
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
80
|
+
s.add_dependency(%q<bundler>, [">= 0"])
|
81
|
+
s.add_dependency(%q<rspec>, ["~> 2.10.0"])
|
82
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
|
101
83
|
end
|
102
84
|
end
|
103
85
|
|
@@ -1,8 +1,8 @@
|
|
1
1
|
class FilteredArtwork
|
2
2
|
include Mongoid::Document
|
3
3
|
include Mongoid::FullTextSearch
|
4
|
-
field :title, type
|
5
|
-
field :colors, type
|
4
|
+
field :title, :type => String
|
5
|
+
field :colors, :type => Array, :default => []
|
6
6
|
fulltext_search_in :title, :index_name => 'mongoid_fulltext.artworks_and_artists',
|
7
7
|
:filters => { :is_foobar => lambda { |x| x.title == 'foobar' },
|
8
8
|
:is_artwork => lambda { |x| true },
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
require 'spec_helper'
|
2
3
|
|
3
4
|
module Mongoid
|
@@ -7,7 +8,7 @@ module Mongoid
|
|
7
8
|
|
8
9
|
let!(:abcdef) { AdvancedArtwork.create(:title => 'abcdefg hijklmn') }
|
9
10
|
let!(:cesar) { AccentlessArtwork.create(:title => "C\u00e9sar Galicia") }
|
10
|
-
let!(:julio) { AccentlessArtwork.create(:title => "Julio Cesar Morales") }
|
11
|
+
let!(:julio) { AccentlessArtwork.create(:title => "Julio Cesar Morales") }
|
11
12
|
|
12
13
|
it "should recognize all options" do
|
13
14
|
# AdvancedArtwork is defined with an ngram_width of 4 and a different alphabet (abcdefg)
|
@@ -226,8 +227,10 @@ module Mongoid
|
|
226
227
|
|
227
228
|
it "doesn't blow up if garbage is in the index collection" do
|
228
229
|
ExternalArtist.fulltext_search('warhol').should == [warhol, andy_warhol]
|
229
|
-
index_collection = ExternalArtist.collection.
|
230
|
-
index_collection.
|
230
|
+
index_collection = ExternalArtist.collection.database[ExternalArtist.mongoid_fulltext_config.keys.first]
|
231
|
+
index_collection.find('document_id' => warhol.id).each do |idef|
|
232
|
+
index_collection.find('_id' => idef['_id']).update('document_id' => Moped::BSON::ObjectId.new)
|
233
|
+
end
|
231
234
|
# We should no longer be able to find warhol, but that shouldn't keep it from returning results
|
232
235
|
ExternalArtist.fulltext_search('warhol').should == [andy_warhol]
|
233
236
|
end
|
@@ -374,14 +377,15 @@ module Mongoid
|
|
374
377
|
# fields as well as the union of all the filter fields to allow for efficient lookups.
|
375
378
|
|
376
379
|
it "creates a proper index for searching efficiently" do
|
377
|
-
[ FilteredArtwork, FilteredArtist, FilteredOther].each do |klass|
|
380
|
+
[ FilteredArtwork, FilteredArtist, FilteredOther].each do |klass|
|
378
381
|
klass.create_indexes
|
379
382
|
end
|
380
|
-
index_collection = FilteredArtwork.collection.
|
381
|
-
ngram_indexes =
|
383
|
+
index_collection = FilteredArtwork.collection.database['mongoid_fulltext.artworks_and_artists']
|
384
|
+
ngram_indexes = []
|
385
|
+
index_collection.indexes.each {|idef| ngram_indexes << idef if idef['key'].has_key?('ngram') }
|
382
386
|
ngram_indexes.length.should == 1
|
383
|
-
keys = ngram_indexes.first[
|
384
|
-
expected_keys = ['ngram','score', 'filter_values.is_fuzzy', 'filter_values.is_awesome',
|
387
|
+
keys = ngram_indexes.first['key'].keys
|
388
|
+
expected_keys = ['ngram','score', 'filter_values.is_fuzzy', 'filter_values.is_awesome',
|
385
389
|
'filter_values.is_foobar', 'filter_values.is_artwork', 'filter_values.is_artist', 'filter_values.colors?'].sort
|
386
390
|
keys.sort.should == expected_keys
|
387
391
|
end
|
@@ -455,10 +459,7 @@ module Mongoid
|
|
455
459
|
['warho', 'rand'].each do |query|
|
456
460
|
results = ExternalArtist.fulltext_search(query, { :return_scores => true })
|
457
461
|
results.length.should > 0
|
458
|
-
results.map{ |result| result[-1] if result[0].to_s.starts_with?(query)}
|
459
|
-
.compact
|
460
|
-
.inject(true){ |accum, item| accum &= (item >= 1 and item < 2) }
|
461
|
-
.should be_true
|
462
|
+
results.map{ |result| result[-1] if result[0].to_s.starts_with?(query)}.compact.inject(true){ |accum, item| accum &= (item >= 1 and item < 2) }.should be_true
|
462
463
|
end
|
463
464
|
end
|
464
465
|
|
@@ -466,10 +467,7 @@ module Mongoid
|
|
466
467
|
['andy', 'warhol', 'mao'].each do |query|
|
467
468
|
results = ExternalArtist.fulltext_search(query, { :return_scores => true })
|
468
469
|
results.length.should > 0
|
469
|
-
results.map{ |result| result[-1] if result[0].to_s.split(' ').member?(query) }
|
470
|
-
.compact
|
471
|
-
.inject(true){ |accum, item| accum &= (item >= 2) }
|
472
|
-
.should be_true
|
470
|
+
results.map{ |result| result[-1] if result[0].to_s.split(' ').member?(query) }.compact.inject(true){ |accum, item| accum &= (item >= 2) }.should be_true
|
473
471
|
end
|
474
472
|
end
|
475
473
|
|
@@ -530,8 +528,8 @@ module Mongoid
|
|
530
528
|
|
531
529
|
it "removes a single record from the index" do
|
532
530
|
flowers1.remove_from_ngram_index
|
533
|
-
BasicArtwork.fulltext_search('flower').length.should == 1
|
534
|
-
end
|
531
|
+
BasicArtwork.fulltext_search('flower').length.should == 1
|
532
|
+
end
|
535
533
|
end
|
536
534
|
|
537
535
|
context "update_ngram_index" do
|
@@ -550,7 +548,12 @@ module Mongoid
|
|
550
548
|
end
|
551
549
|
|
552
550
|
after(:all) do
|
553
|
-
|
551
|
+
# Moped 1.0.0rc raises an error when removing a collection that does not exist
|
552
|
+
# Will be fixed soon.
|
553
|
+
begin
|
554
|
+
Mongoid.default_session["mongoid_fulltext.index_conditional"].drop
|
555
|
+
rescue Moped::Errors::OperationFailure => e
|
556
|
+
end
|
554
557
|
BasicArtwork.mongoid_fulltext_config.delete "mongoid_fulltext.index_conditional"
|
555
558
|
end
|
556
559
|
|
@@ -619,7 +622,7 @@ module Mongoid
|
|
619
622
|
context "from scratch" do
|
620
623
|
|
621
624
|
before(:each) do
|
622
|
-
Mongoid.
|
625
|
+
Mongoid.default_session["mongoid_fulltext.index_basicartwork_0"].drop
|
623
626
|
end
|
624
627
|
|
625
628
|
it "updates index on a single record" do
|
@@ -637,9 +640,9 @@ module Mongoid
|
|
637
640
|
context "incremental" do
|
638
641
|
|
639
642
|
it "removes an existing record" do
|
640
|
-
coll = Mongoid.
|
641
|
-
|
642
|
-
coll.
|
643
|
+
coll = Mongoid.default_session["mongoid_fulltext.index_basicartwork_0"]
|
644
|
+
coll.find('document_id' => flowers1._id).remove_all
|
645
|
+
coll.find('document_id' => flowers1._id).one.should == nil
|
643
646
|
flowers1.update_ngram_index
|
644
647
|
end
|
645
648
|
|
@@ -650,10 +653,14 @@ module Mongoid
|
|
650
653
|
it "can re-create dropped indexes" do
|
651
654
|
# there're no indexes by default as Mongoid.autocreate_indexes is set to false
|
652
655
|
# but mongo will automatically attempt to index _id in the background
|
653
|
-
Mongoid.
|
656
|
+
Mongoid.default_session["mongoid_fulltext.index_basicartwork_0"].indexes.count.should <= 1
|
654
657
|
BasicArtwork.create_indexes
|
655
658
|
expected_indexes = ['_id_', 'fts_index', 'document_id_1'].sort
|
656
|
-
|
659
|
+
current_indexes = []
|
660
|
+
Mongoid.default_session["mongoid_fulltext.index_basicartwork_0"].indexes.each do |idef|
|
661
|
+
current_indexes << idef['name']
|
662
|
+
end
|
663
|
+
current_indexes.sort.should == expected_indexes
|
657
664
|
end
|
658
665
|
|
659
666
|
it "doesn't fail on models that don't have a fulltext index" do
|
@@ -768,10 +775,24 @@ module Mongoid
|
|
768
775
|
context "with an unknown query operator used to override the default $all" do
|
769
776
|
context "with a fulltext search passing red, green, and blue to the colors filter" do
|
770
777
|
it "should raise an error" do
|
771
|
-
|
778
|
+
lambda {
|
779
|
+
FilteredArtwork.fulltext_search(title, :colors? => {:unknown => [red,green,blue]})
|
780
|
+
}.should raise_error(Mongoid::FullTextSearch::UnknownFilterQueryOperator)
|
772
781
|
end
|
773
782
|
end
|
774
783
|
end
|
784
|
+
|
785
|
+
context "should properly work with non-latin strings (i.e. cyrillic)" do
|
786
|
+
let!(:morning) { RussianArtwork.create(:title => "Утро в сосновом лесу Шишкин Morning in a Pine Forest Shishkin") }
|
787
|
+
|
788
|
+
it "should find a match if query is non-latin string" do
|
789
|
+
# RussianArtwork is just like BasicArtwork, except that we set :alphabet to
|
790
|
+
# 'abcdefghijklmnopqrstuvwxyz0123456789абвгдежзиклмнопрстуфхцчшщъыьэюя'
|
791
|
+
RussianArtwork.fulltext_search("shishkin").first.should == morning
|
792
|
+
RussianArtwork.fulltext_search("шишкин").first.should == morning
|
793
|
+
end
|
794
|
+
|
795
|
+
end
|
775
796
|
end
|
776
797
|
|
777
798
|
end
|