mongoid_fulltext 0.5.8 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +4 -0
- data/CHANGELOG.md +133 -0
- data/Gemfile +5 -7
- data/LICENSE +2 -2
- data/README.md +203 -142
- data/VERSION +1 -1
- data/lib/mongoid_fulltext.rb +57 -52
- data/mongoid_fulltext.gemspec +19 -37
- data/spec/config/mongoid.yml +6 -0
- data/spec/models/filtered_artwork.rb +2 -2
- data/spec/models/russian_artwork.rb +10 -0
- data/spec/mongoid/fulltext_spec.rb +47 -26
- data/spec/spec_helper.rb +8 -9
- metadata +54 -45
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.6.0
|
data/lib/mongoid_fulltext.rb
CHANGED
@@ -23,7 +23,7 @@ module Mongoid::FullTextSearch
|
|
23
23
|
index_name = 'mongoid_fulltext.index_%s_%s' % [self.name.downcase, self.mongoid_fulltext_config.count]
|
24
24
|
end
|
25
25
|
|
26
|
-
config = {
|
26
|
+
config = {
|
27
27
|
:alphabet => 'abcdefghijklmnopqrstuvwxyz0123456789 ',
|
28
28
|
:word_separators => "-_ \n\t",
|
29
29
|
:ngram_width => 3,
|
@@ -34,17 +34,17 @@ module Mongoid::FullTextSearch
|
|
34
34
|
:max_candidate_set_size => 1000,
|
35
35
|
:remove_accents => true,
|
36
36
|
:reindex_immediately => true,
|
37
|
-
:stop_words => Hash[['i', 'a', 's', 't', 'me', 'my', 'we', 'he', 'it', 'am', 'is', 'be', 'do', 'an', 'if',
|
38
|
-
'or', 'as', 'of', 'at', 'by', 'to', 'up', 'in', 'on', 'no', 'so', 'our', 'you', 'him',
|
39
|
-
'his', 'she', 'her', 'its', 'who', 'are', 'was', 'has', 'had', 'did', 'the', 'and',
|
40
|
-
'but', 'for', 'out', 'off', 'why', 'how', 'all', 'any', 'few', 'nor', 'not', 'own',
|
41
|
-
'too', 'can', 'don', 'now', 'ours', 'your', 'hers', 'they', 'them', 'what', 'whom',
|
42
|
-
'this', 'that', 'were', 'been', 'have', 'does', 'with', 'into', 'from', 'down', 'over',
|
43
|
-
'then', 'once', 'here', 'when', 'both', 'each', 'more', 'most', 'some', 'such', 'only',
|
44
|
-
'same', 'than', 'very', 'will', 'just', 'yours', 'their', 'which', 'these', 'those',
|
45
|
-
'being', 'doing', 'until', 'while', 'about', 'after', 'above', 'below', 'under',
|
46
|
-
'again', 'there', 'where', 'other', 'myself', 'itself', 'theirs', 'having', 'during',
|
47
|
-
'before', 'should', 'himself', 'herself', 'because', 'against', 'between', 'through',
|
37
|
+
:stop_words => Hash[['i', 'a', 's', 't', 'me', 'my', 'we', 'he', 'it', 'am', 'is', 'be', 'do', 'an', 'if',
|
38
|
+
'or', 'as', 'of', 'at', 'by', 'to', 'up', 'in', 'on', 'no', 'so', 'our', 'you', 'him',
|
39
|
+
'his', 'she', 'her', 'its', 'who', 'are', 'was', 'has', 'had', 'did', 'the', 'and',
|
40
|
+
'but', 'for', 'out', 'off', 'why', 'how', 'all', 'any', 'few', 'nor', 'not', 'own',
|
41
|
+
'too', 'can', 'don', 'now', 'ours', 'your', 'hers', 'they', 'them', 'what', 'whom',
|
42
|
+
'this', 'that', 'were', 'been', 'have', 'does', 'with', 'into', 'from', 'down', 'over',
|
43
|
+
'then', 'once', 'here', 'when', 'both', 'each', 'more', 'most', 'some', 'such', 'only',
|
44
|
+
'same', 'than', 'very', 'will', 'just', 'yours', 'their', 'which', 'these', 'those',
|
45
|
+
'being', 'doing', 'until', 'while', 'about', 'after', 'above', 'below', 'under',
|
46
|
+
'again', 'there', 'where', 'other', 'myself', 'itself', 'theirs', 'having', 'during',
|
47
|
+
'before', 'should', 'himself', 'herself', 'because', 'against', 'between', 'through',
|
48
48
|
'further', 'yourself', 'ourselves', 'yourselves', 'themselves'].map{ |x| [x,true] }]
|
49
49
|
}
|
50
50
|
|
@@ -68,45 +68,46 @@ module Mongoid::FullTextSearch
|
|
68
68
|
end
|
69
69
|
|
70
70
|
def fulltext_search_ensure_indexes(index_name, config)
|
71
|
-
db = collection.
|
72
|
-
coll = db
|
71
|
+
db = collection.database
|
72
|
+
coll = db[index_name]
|
73
73
|
|
74
74
|
# The order of filters matters when the same index is used from two or more collections.
|
75
75
|
filter_indexes = (config[:filters] || []).map do |key,value|
|
76
|
-
["filter_values.#{key}",
|
76
|
+
["filter_values.#{key}", 1]
|
77
77
|
end.sort_by { |filter_index| filter_index[0] }
|
78
78
|
|
79
|
-
index_definition = [['ngram',
|
79
|
+
index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes)
|
80
80
|
|
81
81
|
# Since the definition of the index could have changed, we'll clean up by
|
82
82
|
# removing any indexes that aren't on the exact.
|
83
83
|
correct_keys = index_definition.map{ |field_def| field_def[0] }
|
84
84
|
all_filter_keys = filter_indexes.map{ |field_def| field_def[0] }
|
85
|
-
coll.
|
86
|
-
keys =
|
85
|
+
coll.indexes.each do |idef|
|
86
|
+
keys = idef['key'].keys
|
87
87
|
next if !keys.member?('ngram')
|
88
88
|
all_filter_keys |= keys.find_all{ |key| key.starts_with?('filter_values.') }
|
89
89
|
if keys & correct_keys != correct_keys
|
90
|
-
Mongoid.logger.info "Dropping #{name} [#{keys & correct_keys} <=> #{correct_keys}]" if Mongoid.logger
|
91
|
-
coll.
|
90
|
+
Mongoid.logger.info "Dropping #{idef['name']} [#{keys & correct_keys} <=> #{correct_keys}]" if Mongoid.logger
|
91
|
+
coll.indexes.drop(idef['key'])
|
92
92
|
end
|
93
93
|
end
|
94
94
|
|
95
95
|
if all_filter_keys.length > filter_indexes.length
|
96
|
-
filter_indexes = all_filter_keys.map {
|
97
|
-
index_definition = [['ngram',
|
96
|
+
filter_indexes = all_filter_keys.map {|key| [key, 1] }.sort_by { |filter_index| filter_index[0] }
|
97
|
+
index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes)
|
98
98
|
end
|
99
99
|
|
100
100
|
Mongoid.logger.info "Ensuring fts_index on #{coll.name}: #{index_definition}" if Mongoid.logger
|
101
|
-
coll.
|
101
|
+
coll.indexes.create(Hash[index_definition], { :name => 'fts_index' })
|
102
|
+
|
102
103
|
Mongoid.logger.info "Ensuring document_id index on #{coll.name}" if Mongoid.logger
|
103
|
-
coll.
|
104
|
+
coll.indexes.create('document_id' => 1) # to make removes fast
|
104
105
|
end
|
105
106
|
|
106
107
|
def fulltext_search(query_string, options={})
|
107
108
|
max_results = options.has_key?(:max_results) ? options.delete(:max_results) : 10
|
108
109
|
return_scores = options.has_key?(:return_scores) ? options.delete(:return_scores) : false
|
109
|
-
if self.mongoid_fulltext_config.count > 1 and !options.has_key?(:index)
|
110
|
+
if self.mongoid_fulltext_config.count > 1 and !options.has_key?(:index)
|
110
111
|
error_message = '%s is indexed by multiple full-text indexes. You must specify one by passing an :index_name parameter'
|
111
112
|
raise UnspecifiedIndexError, error_message % self.name, caller
|
112
113
|
end
|
@@ -117,12 +118,12 @@ module Mongoid::FullTextSearch
|
|
117
118
|
ngrams = all_ngrams(query_string, self.mongoid_fulltext_config[index_name])
|
118
119
|
return [] if ngrams.empty?
|
119
120
|
|
120
|
-
# For each ngram, construct the query we'll use to pull index documents and
|
121
|
+
# For each ngram, construct the query we'll use to pull index documents and
|
121
122
|
# get a count of the number of index documents containing that n-gram
|
122
|
-
ordering =
|
123
|
+
ordering = {'score' => -1}
|
123
124
|
limit = self.mongoid_fulltext_config[index_name][:max_candidate_set_size]
|
124
|
-
coll = collection.
|
125
|
-
cursors = ngrams.map do |ngram|
|
125
|
+
coll = collection.database[index_name]
|
126
|
+
cursors = ngrams.map do |ngram|
|
126
127
|
query = {'ngram' => ngram[0]}
|
127
128
|
query.update(map_query_filters options)
|
128
129
|
count = coll.find(query).count
|
@@ -130,23 +131,23 @@ module Mongoid::FullTextSearch
|
|
130
131
|
end.sort!{ |record1, record2| record1[:count] <=> record2[:count] }
|
131
132
|
|
132
133
|
# Using the queries we just constructed and the n-gram frequency counts we
|
133
|
-
# just computed, pull in about *:max_candidate_set_size* candidates by
|
134
|
-
# considering the n-grams in order of increasing frequency. When we've
|
135
|
-
# spent all *:max_candidate_set_size* candidates, pull the top-scoring
|
134
|
+
# just computed, pull in about *:max_candidate_set_size* candidates by
|
135
|
+
# considering the n-grams in order of increasing frequency. When we've
|
136
|
+
# spent all *:max_candidate_set_size* candidates, pull the top-scoring
|
136
137
|
# *max_results* candidates for each remaining n-gram.
|
137
138
|
results_so_far = 0
|
138
139
|
candidates_list = cursors.map do |doc|
|
139
140
|
next if doc[:count] == 0
|
140
|
-
|
141
|
+
query_result = coll.find(doc[:query])
|
141
142
|
if results_so_far >= limit
|
142
|
-
|
143
|
+
query_result = query_result.sort(ordering).limit(max_results)
|
143
144
|
elsif doc[:count] > limit - results_so_far
|
144
|
-
|
145
|
+
query_result = query_result.sort(ordering).limit(limit - results_so_far)
|
145
146
|
end
|
146
147
|
results_so_far += doc[:count]
|
147
148
|
ngram_score = ngrams[doc[:ngram][0]]
|
148
|
-
Hash[
|
149
|
-
[candidate['document_id'],
|
149
|
+
Hash[query_result.map do |candidate|
|
150
|
+
[candidate['document_id'],
|
150
151
|
{:clazz => candidate['class'], :score => candidate['score'] * ngram_score}]
|
151
152
|
end]
|
152
153
|
end.compact
|
@@ -161,8 +162,8 @@ module Mongoid::FullTextSearch
|
|
161
162
|
while !candidates_list.empty?
|
162
163
|
candidates = candidates_list.pop
|
163
164
|
scores = candidates.map do |candidate_id, data|
|
164
|
-
{:id => candidate_id,
|
165
|
-
:clazz => data[:clazz],
|
165
|
+
{:id => candidate_id,
|
166
|
+
:clazz => data[:clazz],
|
166
167
|
:score => data[:score] + candidates_list.map{ |others| (others.delete(candidate_id) || {:score => 0})[:score] }.sum
|
167
168
|
}
|
168
169
|
end
|
@@ -173,7 +174,7 @@ module Mongoid::FullTextSearch
|
|
173
174
|
end
|
174
175
|
|
175
176
|
def instantiate_mapreduce_result(result)
|
176
|
-
result[:clazz].constantize.find(
|
177
|
+
result[:clazz].constantize.find(result[:id])
|
177
178
|
end
|
178
179
|
|
179
180
|
def instantiate_mapreduce_results(results, options)
|
@@ -188,11 +189,15 @@ module Mongoid::FullTextSearch
|
|
188
189
|
return {} if str.nil?
|
189
190
|
|
190
191
|
if config[:remove_accents]
|
191
|
-
|
192
|
+
if defined?(UnicodeUtils)
|
193
|
+
str = UnicodeUtils.nfkd(str)
|
194
|
+
elsif defined?(DiacriticsFu)
|
195
|
+
str = DiacriticsFu::escape(str)
|
196
|
+
end
|
192
197
|
end
|
193
198
|
|
194
199
|
# Remove any characters that aren't in the alphabet and aren't word separators
|
195
|
-
filtered_str = str.mb_chars.to_s.
|
200
|
+
filtered_str = str.mb_chars.downcase.to_s.split('').find_all{ |ch| config[:alphabet][ch] or config[:word_separators][ch] }.join('')
|
196
201
|
|
197
202
|
# Figure out how many ngrams to extract from the string. If we can't afford to extract all ngrams,
|
198
203
|
# step over the string in evenly spaced strides to extract ngrams. For example, to extract 3 3-letter
|
@@ -203,7 +208,7 @@ module Mongoid::FullTextSearch
|
|
203
208
|
step_size = 1
|
204
209
|
end
|
205
210
|
|
206
|
-
# Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the
|
211
|
+
# Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the
|
207
212
|
# input string using the step size that we just computed. Let score(x,y) be the score of string x
|
208
213
|
# compared with string y - assigning scores to ngrams with the square root-based scoring function
|
209
214
|
# below and multiplying scores of matching ngrams together yields a score function that has the
|
@@ -258,8 +263,8 @@ module Mongoid::FullTextSearch
|
|
258
263
|
|
259
264
|
def remove_from_ngram_index
|
260
265
|
self.mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
|
261
|
-
coll = collection.
|
262
|
-
coll.
|
266
|
+
coll = collection.database[index_name]
|
267
|
+
coll.find({'class' => self.name}).remove_all
|
263
268
|
end
|
264
269
|
end
|
265
270
|
|
@@ -300,8 +305,8 @@ module Mongoid::FullTextSearch
|
|
300
305
|
end
|
301
306
|
|
302
307
|
# remove existing ngrams from external index
|
303
|
-
coll = collection.
|
304
|
-
coll.
|
308
|
+
coll = collection.database[index_name.to_sym]
|
309
|
+
coll.find({'document_id' => self._id}).remove_all
|
305
310
|
# extract ngrams from fields
|
306
311
|
field_values = fulltext_config[:ngram_fields].map { |field| self.send(field) }
|
307
312
|
ngrams = field_values.inject({}) { |accum, item| accum.update(self.class.all_ngrams(item, fulltext_config, false))}
|
@@ -310,9 +315,9 @@ module Mongoid::FullTextSearch
|
|
310
315
|
filter_values = nil
|
311
316
|
if fulltext_config.has_key?(:filters)
|
312
317
|
filter_values = Hash[fulltext_config[:filters].map do |key,value|
|
313
|
-
begin
|
314
|
-
[key, value.call(self)]
|
315
|
-
rescue
|
318
|
+
begin
|
319
|
+
[key, value.call(self)]
|
320
|
+
rescue
|
316
321
|
# Suppress any exceptions caused by filters
|
317
322
|
end
|
318
323
|
end.compact]
|
@@ -328,8 +333,8 @@ module Mongoid::FullTextSearch
|
|
328
333
|
|
329
334
|
def remove_from_ngram_index
|
330
335
|
self.mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
|
331
|
-
coll = collection.
|
332
|
-
coll.
|
336
|
+
coll = collection.database[index_name]
|
337
|
+
coll.find({'document_id' => self._id}).remove_all
|
333
338
|
end
|
334
339
|
end
|
335
340
|
|
data/mongoid_fulltext.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "mongoid_fulltext"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.6.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Aaron Windsor"]
|
12
|
-
s.date = "2012-
|
12
|
+
s.date = "2012-07-16"
|
13
13
|
s.description = "Full-text search for the Mongoid ORM, using n-grams extracted from text"
|
14
14
|
s.email = "aaron.windsor@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -19,6 +19,8 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.files = [
|
20
20
|
".document",
|
21
21
|
".rspec",
|
22
|
+
".travis.yml",
|
23
|
+
"CHANGELOG.md",
|
22
24
|
"Gemfile",
|
23
25
|
"LICENSE",
|
24
26
|
"README.md",
|
@@ -27,6 +29,7 @@ Gem::Specification.new do |s|
|
|
27
29
|
"lib/mongoid_fulltext.rb",
|
28
30
|
"lib/mongoid_indexes.rb",
|
29
31
|
"mongoid_fulltext.gemspec",
|
32
|
+
"spec/config/mongoid.yml",
|
30
33
|
"spec/models/accentless_artwork.rb",
|
31
34
|
"spec/models/advanced_artwork.rb",
|
32
35
|
"spec/models/basic_artwork.rb",
|
@@ -43,6 +46,7 @@ Gem::Specification.new do |s|
|
|
43
46
|
"spec/models/multi_field_artist.rb",
|
44
47
|
"spec/models/multi_field_artwork.rb",
|
45
48
|
"spec/models/partitioned_artist.rb",
|
49
|
+
"spec/models/russian_artwork.rb",
|
46
50
|
"spec/models/short_prefixes_artwork.rb",
|
47
51
|
"spec/models/stopwords_artwork.rb",
|
48
52
|
"spec/mongoid/fulltext_spec.rb",
|
@@ -51,53 +55,31 @@ Gem::Specification.new do |s|
|
|
51
55
|
s.homepage = "http://github.com/aaw/mongoid_fulltext"
|
52
56
|
s.licenses = ["MIT"]
|
53
57
|
s.require_paths = ["lib"]
|
54
|
-
s.rubygems_version = "1.8.
|
58
|
+
s.rubygems_version = "1.8.24"
|
55
59
|
s.summary = "Full-text search for the Mongoid ORM"
|
56
|
-
s.test_files = [
|
57
|
-
"spec/models/accentless_artwork.rb",
|
58
|
-
"spec/models/advanced_artwork.rb",
|
59
|
-
"spec/models/basic_artwork.rb",
|
60
|
-
"spec/models/delayed_artwork.rb",
|
61
|
-
"spec/models/external_artist.rb",
|
62
|
-
"spec/models/external_artwork.rb",
|
63
|
-
"spec/models/external_artwork_no_fields_supplied.rb",
|
64
|
-
"spec/models/filtered_artist.rb",
|
65
|
-
"spec/models/filtered_artwork.rb",
|
66
|
-
"spec/models/filtered_other.rb",
|
67
|
-
"spec/models/gallery/basic_artwork.rb",
|
68
|
-
"spec/models/hidden_dragon.rb",
|
69
|
-
"spec/models/multi_external_artwork.rb",
|
70
|
-
"spec/models/multi_field_artist.rb",
|
71
|
-
"spec/models/multi_field_artwork.rb",
|
72
|
-
"spec/models/partitioned_artist.rb",
|
73
|
-
"spec/models/short_prefixes_artwork.rb",
|
74
|
-
"spec/models/stopwords_artwork.rb",
|
75
|
-
"spec/mongoid/fulltext_spec.rb",
|
76
|
-
"spec/spec_helper.rb"
|
77
|
-
]
|
78
60
|
|
79
61
|
if s.respond_to? :specification_version then
|
80
62
|
s.specification_version = 3
|
81
63
|
|
82
64
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
65
|
+
s.add_runtime_dependency(%q<mongoid>, ["~> 3.0.1"])
|
83
66
|
s.add_runtime_dependency(%q<unicode_utils>, ["~> 1.0.0"])
|
84
|
-
s.add_development_dependency(%q<
|
85
|
-
s.add_development_dependency(%q<
|
86
|
-
s.add_development_dependency(%q<
|
87
|
-
s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
|
67
|
+
s.add_development_dependency(%q<bundler>, [">= 0"])
|
68
|
+
s.add_development_dependency(%q<rspec>, ["~> 2.10.0"])
|
69
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.8.3"])
|
88
70
|
else
|
71
|
+
s.add_dependency(%q<mongoid>, ["~> 3.0.1"])
|
89
72
|
s.add_dependency(%q<unicode_utils>, ["~> 1.0.0"])
|
90
|
-
s.add_dependency(%q<
|
91
|
-
s.add_dependency(%q<
|
92
|
-
s.add_dependency(%q<
|
93
|
-
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
73
|
+
s.add_dependency(%q<bundler>, [">= 0"])
|
74
|
+
s.add_dependency(%q<rspec>, ["~> 2.10.0"])
|
75
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
|
94
76
|
end
|
95
77
|
else
|
78
|
+
s.add_dependency(%q<mongoid>, ["~> 3.0.1"])
|
96
79
|
s.add_dependency(%q<unicode_utils>, ["~> 1.0.0"])
|
97
|
-
s.add_dependency(%q<
|
98
|
-
s.add_dependency(%q<
|
99
|
-
s.add_dependency(%q<
|
100
|
-
s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
|
80
|
+
s.add_dependency(%q<bundler>, [">= 0"])
|
81
|
+
s.add_dependency(%q<rspec>, ["~> 2.10.0"])
|
82
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
|
101
83
|
end
|
102
84
|
end
|
103
85
|
|
@@ -1,8 +1,8 @@
|
|
1
1
|
class FilteredArtwork
|
2
2
|
include Mongoid::Document
|
3
3
|
include Mongoid::FullTextSearch
|
4
|
-
field :title, type
|
5
|
-
field :colors, type
|
4
|
+
field :title, :type => String
|
5
|
+
field :colors, :type => Array, :default => []
|
6
6
|
fulltext_search_in :title, :index_name => 'mongoid_fulltext.artworks_and_artists',
|
7
7
|
:filters => { :is_foobar => lambda { |x| x.title == 'foobar' },
|
8
8
|
:is_artwork => lambda { |x| true },
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
require 'spec_helper'
|
2
3
|
|
3
4
|
module Mongoid
|
@@ -7,7 +8,7 @@ module Mongoid
|
|
7
8
|
|
8
9
|
let!(:abcdef) { AdvancedArtwork.create(:title => 'abcdefg hijklmn') }
|
9
10
|
let!(:cesar) { AccentlessArtwork.create(:title => "C\u00e9sar Galicia") }
|
10
|
-
let!(:julio) { AccentlessArtwork.create(:title => "Julio Cesar Morales") }
|
11
|
+
let!(:julio) { AccentlessArtwork.create(:title => "Julio Cesar Morales") }
|
11
12
|
|
12
13
|
it "should recognize all options" do
|
13
14
|
# AdvancedArtwork is defined with an ngram_width of 4 and a different alphabet (abcdefg)
|
@@ -226,8 +227,10 @@ module Mongoid
|
|
226
227
|
|
227
228
|
it "doesn't blow up if garbage is in the index collection" do
|
228
229
|
ExternalArtist.fulltext_search('warhol').should == [warhol, andy_warhol]
|
229
|
-
index_collection = ExternalArtist.collection.
|
230
|
-
index_collection.
|
230
|
+
index_collection = ExternalArtist.collection.database[ExternalArtist.mongoid_fulltext_config.keys.first]
|
231
|
+
index_collection.find('document_id' => warhol.id).each do |idef|
|
232
|
+
index_collection.find('_id' => idef['_id']).update('document_id' => Moped::BSON::ObjectId.new)
|
233
|
+
end
|
231
234
|
# We should no longer be able to find warhol, but that shouldn't keep it from returning results
|
232
235
|
ExternalArtist.fulltext_search('warhol').should == [andy_warhol]
|
233
236
|
end
|
@@ -374,14 +377,15 @@ module Mongoid
|
|
374
377
|
# fields as well as the union of all the filter fields to allow for efficient lookups.
|
375
378
|
|
376
379
|
it "creates a proper index for searching efficiently" do
|
377
|
-
[ FilteredArtwork, FilteredArtist, FilteredOther].each do |klass|
|
380
|
+
[ FilteredArtwork, FilteredArtist, FilteredOther].each do |klass|
|
378
381
|
klass.create_indexes
|
379
382
|
end
|
380
|
-
index_collection = FilteredArtwork.collection.
|
381
|
-
ngram_indexes =
|
383
|
+
index_collection = FilteredArtwork.collection.database['mongoid_fulltext.artworks_and_artists']
|
384
|
+
ngram_indexes = []
|
385
|
+
index_collection.indexes.each {|idef| ngram_indexes << idef if idef['key'].has_key?('ngram') }
|
382
386
|
ngram_indexes.length.should == 1
|
383
|
-
keys = ngram_indexes.first[
|
384
|
-
expected_keys = ['ngram','score', 'filter_values.is_fuzzy', 'filter_values.is_awesome',
|
387
|
+
keys = ngram_indexes.first['key'].keys
|
388
|
+
expected_keys = ['ngram','score', 'filter_values.is_fuzzy', 'filter_values.is_awesome',
|
385
389
|
'filter_values.is_foobar', 'filter_values.is_artwork', 'filter_values.is_artist', 'filter_values.colors?'].sort
|
386
390
|
keys.sort.should == expected_keys
|
387
391
|
end
|
@@ -455,10 +459,7 @@ module Mongoid
|
|
455
459
|
['warho', 'rand'].each do |query|
|
456
460
|
results = ExternalArtist.fulltext_search(query, { :return_scores => true })
|
457
461
|
results.length.should > 0
|
458
|
-
results.map{ |result| result[-1] if result[0].to_s.starts_with?(query)}
|
459
|
-
.compact
|
460
|
-
.inject(true){ |accum, item| accum &= (item >= 1 and item < 2) }
|
461
|
-
.should be_true
|
462
|
+
results.map{ |result| result[-1] if result[0].to_s.starts_with?(query)}.compact.inject(true){ |accum, item| accum &= (item >= 1 and item < 2) }.should be_true
|
462
463
|
end
|
463
464
|
end
|
464
465
|
|
@@ -466,10 +467,7 @@ module Mongoid
|
|
466
467
|
['andy', 'warhol', 'mao'].each do |query|
|
467
468
|
results = ExternalArtist.fulltext_search(query, { :return_scores => true })
|
468
469
|
results.length.should > 0
|
469
|
-
results.map{ |result| result[-1] if result[0].to_s.split(' ').member?(query) }
|
470
|
-
.compact
|
471
|
-
.inject(true){ |accum, item| accum &= (item >= 2) }
|
472
|
-
.should be_true
|
470
|
+
results.map{ |result| result[-1] if result[0].to_s.split(' ').member?(query) }.compact.inject(true){ |accum, item| accum &= (item >= 2) }.should be_true
|
473
471
|
end
|
474
472
|
end
|
475
473
|
|
@@ -530,8 +528,8 @@ module Mongoid
|
|
530
528
|
|
531
529
|
it "removes a single record from the index" do
|
532
530
|
flowers1.remove_from_ngram_index
|
533
|
-
BasicArtwork.fulltext_search('flower').length.should == 1
|
534
|
-
end
|
531
|
+
BasicArtwork.fulltext_search('flower').length.should == 1
|
532
|
+
end
|
535
533
|
end
|
536
534
|
|
537
535
|
context "update_ngram_index" do
|
@@ -550,7 +548,12 @@ module Mongoid
|
|
550
548
|
end
|
551
549
|
|
552
550
|
after(:all) do
|
553
|
-
|
551
|
+
# Moped 1.0.0rc raises an error when removing a collection that does not exist
|
552
|
+
# Will be fixed soon.
|
553
|
+
begin
|
554
|
+
Mongoid.default_session["mongoid_fulltext.index_conditional"].drop
|
555
|
+
rescue Moped::Errors::OperationFailure => e
|
556
|
+
end
|
554
557
|
BasicArtwork.mongoid_fulltext_config.delete "mongoid_fulltext.index_conditional"
|
555
558
|
end
|
556
559
|
|
@@ -619,7 +622,7 @@ module Mongoid
|
|
619
622
|
context "from scratch" do
|
620
623
|
|
621
624
|
before(:each) do
|
622
|
-
Mongoid.
|
625
|
+
Mongoid.default_session["mongoid_fulltext.index_basicartwork_0"].drop
|
623
626
|
end
|
624
627
|
|
625
628
|
it "updates index on a single record" do
|
@@ -637,9 +640,9 @@ module Mongoid
|
|
637
640
|
context "incremental" do
|
638
641
|
|
639
642
|
it "removes an existing record" do
|
640
|
-
coll = Mongoid.
|
641
|
-
|
642
|
-
coll.
|
643
|
+
coll = Mongoid.default_session["mongoid_fulltext.index_basicartwork_0"]
|
644
|
+
coll.find('document_id' => flowers1._id).remove_all
|
645
|
+
coll.find('document_id' => flowers1._id).one.should == nil
|
643
646
|
flowers1.update_ngram_index
|
644
647
|
end
|
645
648
|
|
@@ -650,10 +653,14 @@ module Mongoid
|
|
650
653
|
it "can re-create dropped indexes" do
|
651
654
|
# there're no indexes by default as Mongoid.autocreate_indexes is set to false
|
652
655
|
# but mongo will automatically attempt to index _id in the background
|
653
|
-
Mongoid.
|
656
|
+
Mongoid.default_session["mongoid_fulltext.index_basicartwork_0"].indexes.count.should <= 1
|
654
657
|
BasicArtwork.create_indexes
|
655
658
|
expected_indexes = ['_id_', 'fts_index', 'document_id_1'].sort
|
656
|
-
|
659
|
+
current_indexes = []
|
660
|
+
Mongoid.default_session["mongoid_fulltext.index_basicartwork_0"].indexes.each do |idef|
|
661
|
+
current_indexes << idef['name']
|
662
|
+
end
|
663
|
+
current_indexes.sort.should == expected_indexes
|
657
664
|
end
|
658
665
|
|
659
666
|
it "doesn't fail on models that don't have a fulltext index" do
|
@@ -768,10 +775,24 @@ module Mongoid
|
|
768
775
|
context "with an unknown query operator used to override the default $all" do
|
769
776
|
context "with a fulltext search passing red, green, and blue to the colors filter" do
|
770
777
|
it "should raise an error" do
|
771
|
-
|
778
|
+
lambda {
|
779
|
+
FilteredArtwork.fulltext_search(title, :colors? => {:unknown => [red,green,blue]})
|
780
|
+
}.should raise_error(Mongoid::FullTextSearch::UnknownFilterQueryOperator)
|
772
781
|
end
|
773
782
|
end
|
774
783
|
end
|
784
|
+
|
785
|
+
context "should properly work with non-latin strings (i.e. cyrillic)" do
|
786
|
+
let!(:morning) { RussianArtwork.create(:title => "Утро в сосновом лесу Шишкин Morning in a Pine Forest Shishkin") }
|
787
|
+
|
788
|
+
it "should find a match if query is non-latin string" do
|
789
|
+
# RussianArtwork is just like BasicArtwork, except that we set :alphabet to
|
790
|
+
# 'abcdefghijklmnopqrstuvwxyz0123456789абвгдежзиклмнопрстуфхцчшщъыьэюя'
|
791
|
+
RussianArtwork.fulltext_search("shishkin").first.should == morning
|
792
|
+
RussianArtwork.fulltext_search("шишкин").first.should == morning
|
793
|
+
end
|
794
|
+
|
795
|
+
end
|
775
796
|
end
|
776
797
|
|
777
798
|
end
|