pager-ultrasphinx 1.0.20080510
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +184 -0
- data/README +140 -0
- data/Rakefile +27 -0
- data/lib/ultrasphinx/associations.rb +26 -0
- data/lib/ultrasphinx/autoload.rb +12 -0
- data/lib/ultrasphinx/configure.rb +367 -0
- data/lib/ultrasphinx/core_extensions.rb +132 -0
- data/lib/ultrasphinx/fields.rb +198 -0
- data/lib/ultrasphinx/is_indexed.rb +227 -0
- data/lib/ultrasphinx/postgresql/concat_ws.sql +35 -0
- data/lib/ultrasphinx/postgresql/crc32.sql +15 -0
- data/lib/ultrasphinx/postgresql/group_concat.sql +23 -0
- data/lib/ultrasphinx/postgresql/hex_to_int.sql +15 -0
- data/lib/ultrasphinx/postgresql/language.sql +1 -0
- data/lib/ultrasphinx/postgresql/unix_timestamp.sql +12 -0
- data/lib/ultrasphinx/search/internals.rb +385 -0
- data/lib/ultrasphinx/search/parser.rb +139 -0
- data/lib/ultrasphinx/search.rb +456 -0
- data/lib/ultrasphinx/spell.rb +57 -0
- data/lib/ultrasphinx/ultrasphinx.rb +199 -0
- data/lib/ultrasphinx.rb +36 -0
- data/rails/init.rb +2 -0
- data/tasks/ultrasphinx.rake +206 -0
- data/vendor/riddle/MIT-LICENCE +20 -0
- data/vendor/riddle/README +74 -0
- data/vendor/riddle/Rakefile +117 -0
- data/vendor/riddle/lib/riddle/client/filter.rb +44 -0
- data/vendor/riddle/lib/riddle/client/message.rb +65 -0
- data/vendor/riddle/lib/riddle/client/response.rb +84 -0
- data/vendor/riddle/lib/riddle/client.rb +593 -0
- data/vendor/riddle/lib/riddle.rb +25 -0
- data/vendor/will_paginate/LICENSE +18 -0
- metadata +84 -0
@@ -0,0 +1,385 @@
|
|
1
|
+
|
2
|
+
module Ultrasphinx
|
3
|
+
class Search
|
4
|
+
module Internals
|
5
|
+
|
6
|
+
INFINITY = 1/0.0
|
7
|
+
|
8
|
+
include Associations
|
9
|
+
|
10
|
+
# These methods are kept stateless to ease debugging
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def build_request_with_options opts
|
15
|
+
|
16
|
+
request = Riddle::Client.new
|
17
|
+
|
18
|
+
# Basic options
|
19
|
+
request.instance_eval do
|
20
|
+
@server = Ultrasphinx::CLIENT_SETTINGS['server_host']
|
21
|
+
@port = Ultrasphinx::CLIENT_SETTINGS['server_port']
|
22
|
+
@match_mode = :extended # Force extended query mode
|
23
|
+
@offset = opts['per_page'] * (opts['page'] - 1)
|
24
|
+
@limit = opts['per_page']
|
25
|
+
@max_matches = [@offset + @limit + Ultrasphinx::Search.client_options['max_matches_offset'], MAX_MATCHES].min
|
26
|
+
end
|
27
|
+
|
28
|
+
# Geosearch location
|
29
|
+
loc = opts['location']
|
30
|
+
loc.stringify_keys!
|
31
|
+
lat, long = loc['lat'], loc['long']
|
32
|
+
if lat and long
|
33
|
+
# Convert degrees to radians, if requested
|
34
|
+
if loc['units'] == 'degrees'
|
35
|
+
lat = degrees_to_radians(lat)
|
36
|
+
long = degrees_to_radians(long)
|
37
|
+
end
|
38
|
+
# Set the location/anchor point
|
39
|
+
request.set_anchor(loc['lat_attribute_name'], lat, loc['long_attribute_name'], long)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Sorting
|
43
|
+
sort_by = opts['sort_by']
|
44
|
+
if options['location']
|
45
|
+
case sort_by
|
46
|
+
when "distance asc", "distance" then sort_by = "@geodist asc"
|
47
|
+
when "distance desc" then sort_by = "@geodist desc"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Use the additional sortable column if it is a text type
|
52
|
+
sort_by += "_sortable" if Fields.instance.types[sort_by] == "text"
|
53
|
+
|
54
|
+
unless sort_by.blank?
|
55
|
+
if opts['sort_mode'].to_s == 'relevance'
|
56
|
+
# If you're sorting by a field you don't want 'relevance' order
|
57
|
+
raise UsageError, "Sort mode 'relevance' is not valid with a sort_by field"
|
58
|
+
end
|
59
|
+
request.sort_by = sort_by.to_s
|
60
|
+
end
|
61
|
+
|
62
|
+
if sort_mode = SPHINX_CLIENT_PARAMS['sort_mode'][opts['sort_mode']]
|
63
|
+
request.sort_mode = sort_mode
|
64
|
+
else
|
65
|
+
raise UsageError, "Sort mode #{opts['sort_mode'].inspect} is invalid"
|
66
|
+
end
|
67
|
+
|
68
|
+
# Weighting
|
69
|
+
weights = opts['weights']
|
70
|
+
if weights.any?
|
71
|
+
# Order according to the field order for Sphinx, and set the missing fields to 1.0
|
72
|
+
ordered_weights = []
|
73
|
+
Fields.instance.types.map do |name, type|
|
74
|
+
name if type == 'text'
|
75
|
+
end.compact.sort.each do |name|
|
76
|
+
ordered_weights << (weights[name] || 1.0)
|
77
|
+
end
|
78
|
+
request.weights = ordered_weights
|
79
|
+
end
|
80
|
+
|
81
|
+
# Class names
|
82
|
+
unless Array(opts['class_names']).empty?
|
83
|
+
request.filters << Riddle::Client::Filter.new(
|
84
|
+
'class_id',
|
85
|
+
(opts['class_names'].map do |model|
|
86
|
+
MODELS_TO_IDS[model.to_s] or
|
87
|
+
MODELS_TO_IDS[model.to_s.constantize.base_class.to_s] or
|
88
|
+
raise UsageError, "Invalid class name #{model.inspect}"
|
89
|
+
end),
|
90
|
+
false)
|
91
|
+
end
|
92
|
+
|
93
|
+
# Extract raw filters
|
94
|
+
# XXX This is poorly done. We should coerce based on the Field types, not the value class.
|
95
|
+
# That would also allow us to move numeric filters from the query string into the hash.
|
96
|
+
Array(opts['filters']).each do |field, value|
|
97
|
+
|
98
|
+
field = field.to_s
|
99
|
+
type = Fields.instance.types[field]
|
100
|
+
|
101
|
+
# Special derived attribute
|
102
|
+
if field == 'distance' and options['location']
|
103
|
+
field, type = '@geodist', 'float'
|
104
|
+
end
|
105
|
+
|
106
|
+
raise UsageError, "field #{field.inspect} is invalid" unless type
|
107
|
+
|
108
|
+
begin
|
109
|
+
case value
|
110
|
+
when Integer, Float, BigDecimal, NilClass, Array
|
111
|
+
# XXX Hack to force floats to be floats
|
112
|
+
value = value.to_f if type == 'float'
|
113
|
+
# Just bomb the filter in there
|
114
|
+
request.filters << Riddle::Client::Filter.new(field, Array(value), false)
|
115
|
+
when Range
|
116
|
+
# Make sure ranges point in the right direction
|
117
|
+
min, max = [value.begin, value.end].map {|x| x._to_numeric }
|
118
|
+
raise NoMethodError unless min <=> max and max <=> min
|
119
|
+
min, max = max, min if min > max
|
120
|
+
# XXX Hack to force floats to be floats
|
121
|
+
min, max = min.to_f, max.to_f if type == 'float'
|
122
|
+
request.filters << Riddle::Client::Filter.new(field, min..max, false)
|
123
|
+
when String
|
124
|
+
# XXX Hack to move text filters into the query
|
125
|
+
opts['parsed_query'] << " @#{field} #{value}"
|
126
|
+
else
|
127
|
+
raise NoMethodError
|
128
|
+
end
|
129
|
+
rescue NoMethodError => e
|
130
|
+
raise UsageError, "Filter value #{value.inspect} for field #{field.inspect} is invalid"
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
request
|
135
|
+
end
|
136
|
+
|
137
|
+
def get_subtotals(original_request, query)
|
138
|
+
request = original_request._deep_dup
|
139
|
+
request.instance_eval { @filters.delete_if {|filter| filter.attribute == 'class_id'} }
|
140
|
+
|
141
|
+
facets = get_facets(request, query, 'class_id')
|
142
|
+
|
143
|
+
# Not using the standard facet caching here
|
144
|
+
Hash[*(MODELS_TO_IDS.map do |klass, id|
|
145
|
+
[klass, facets[id] || 0]
|
146
|
+
end.flatten)]
|
147
|
+
end
|
148
|
+
|
149
|
+
def get_facets(original_request, query, original_facet)
|
150
|
+
request, facet = original_request._deep_dup, original_facet
|
151
|
+
facet += "_facet" if Fields.instance.types[original_facet] == 'text'
|
152
|
+
|
153
|
+
unless Fields.instance.types[facet]
|
154
|
+
if facet == original_facet
|
155
|
+
raise UsageError, "Field #{original_facet} does not exist"
|
156
|
+
else
|
157
|
+
raise UsageError, "Field #{original_facet} is a text field, but was not configured for text faceting"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# Set the facet query parameter and modify per-page setting so we snag all the facets
|
162
|
+
request.instance_eval do
|
163
|
+
@group_by = facet
|
164
|
+
@group_function = :attr
|
165
|
+
@group_clauses = '@count desc'
|
166
|
+
@offset = 0
|
167
|
+
@limit = Ultrasphinx::Search.client_options['max_facets']
|
168
|
+
@max_matches = [@limit + Ultrasphinx::Search.client_options['max_matches_offset'], MAX_MATCHES].min
|
169
|
+
end
|
170
|
+
|
171
|
+
# Run the query
|
172
|
+
begin
|
173
|
+
matches = request.query(query, options['indexes'])[:matches]
|
174
|
+
rescue DaemonError
|
175
|
+
raise ConfigurationError, "Index seems out of date. Run 'rake ultrasphinx:index'"
|
176
|
+
end
|
177
|
+
|
178
|
+
# Map the facets back to something sane
|
179
|
+
facets = {}
|
180
|
+
matches.each do |match|
|
181
|
+
attributes = match[:attributes]
|
182
|
+
raise DaemonError if facets[attributes['@groupby']]
|
183
|
+
facets[attributes['@groupby']] = attributes['@count']
|
184
|
+
end
|
185
|
+
|
186
|
+
# Invert hash's, if we have them
|
187
|
+
reverse_map_facets(facets, original_facet)
|
188
|
+
end
|
189
|
+
|
190
|
+
def reverse_map_facets(facets, facet)
|
191
|
+
facets = facets.dup
|
192
|
+
|
193
|
+
if Fields.instance.types[facet] == 'text'
|
194
|
+
# Apply the map, rebuilding if the cache is missing or out-of-date
|
195
|
+
facets = Hash[*(facets.map do |hash, value|
|
196
|
+
rebuild_facet_cache(facet) unless FACET_CACHE[facet] and FACET_CACHE[facet].has_key?(hash)
|
197
|
+
[FACET_CACHE[facet][hash], value]
|
198
|
+
end.flatten)]
|
199
|
+
end
|
200
|
+
|
201
|
+
facets
|
202
|
+
end
|
203
|
+
|
204
|
+
def rebuild_facet_cache(facet)
|
205
|
+
# Cache the reverse hash map for the textual facet if it hasn't been done yet
|
206
|
+
# XXX Not necessarily optimal since it requires a direct DB hit once per mongrel
|
207
|
+
Ultrasphinx.say "caching hash reverse map for text facet #{facet}"
|
208
|
+
|
209
|
+
configured_classes = Fields.instance.classes[facet].map do |klass|
|
210
|
+
|
211
|
+
# Concatenates might not work well
|
212
|
+
type, configuration = nil, nil
|
213
|
+
MODEL_CONFIGURATION[klass.name].except('conditions', 'delta').each do |_type, values|
|
214
|
+
type = _type
|
215
|
+
configuration = values.detect { |this_field| this_field['as'] == facet }
|
216
|
+
break if configuration
|
217
|
+
end
|
218
|
+
|
219
|
+
unless configuration and configuration['facet']
|
220
|
+
Ultrasphinx.say "model #{klass.name} has the requested '#{facet}' field, but it was not configured for faceting, and will be skipped"
|
221
|
+
next
|
222
|
+
end
|
223
|
+
|
224
|
+
FACET_CACHE[facet] ||= {}
|
225
|
+
|
226
|
+
# XXX This is a duplication of stuff already known in configure.rb, and ought to be cleaned up,
|
227
|
+
# but that would mean we have to either parse the .conf or configure every time at boot
|
228
|
+
|
229
|
+
field_string, join_string = case type
|
230
|
+
when 'fields'
|
231
|
+
[configuration['field'], ""]
|
232
|
+
when 'include'
|
233
|
+
# XXX Only handles the basic case. No test coverage.
|
234
|
+
|
235
|
+
table_alias = configuration['table_alias']
|
236
|
+
association_model = if configuration['class_name']
|
237
|
+
configuration['class_name'].constantize
|
238
|
+
else
|
239
|
+
get_association_model(klass, configuration)
|
240
|
+
end
|
241
|
+
|
242
|
+
["#{table_alias}.#{configuration['field']}",
|
243
|
+
(configuration['association_sql'] or "LEFT OUTER JOIN #{association_model.table_name} AS #{table_alias} ON #{table_alias}.#{klass.to_s.downcase}_id = #{klass.table_name}.#{association_model.primary_key}")
|
244
|
+
]
|
245
|
+
when 'concatenate'
|
246
|
+
# Wait for someone to complain before worrying about this
|
247
|
+
raise "Concatenation text facets have not been implemented"
|
248
|
+
end
|
249
|
+
|
250
|
+
klass.connection.execute("SELECT #{field_string} AS value, #{SQL_FUNCTIONS[ADAPTER]['hash']._interpolate(field_string)} AS hash FROM #{klass.table_name} #{join_string} GROUP BY value").each do |value, hash|
|
251
|
+
FACET_CACHE[facet][hash.to_i] = value
|
252
|
+
end
|
253
|
+
klass
|
254
|
+
end
|
255
|
+
|
256
|
+
configured_classes.compact!
|
257
|
+
raise ConfigurationError, "no classes were correctly configured for text faceting on '#{facet}'" if configured_classes.empty?
|
258
|
+
|
259
|
+
FACET_CACHE[facet]
|
260
|
+
end
|
261
|
+
|
262
|
+
# Inverse-modulus map the Sphinx ids to the table-specific ids
|
263
|
+
def convert_sphinx_ids(sphinx_ids)
|
264
|
+
|
265
|
+
number_of_models = IDS_TO_MODELS.size
|
266
|
+
raise ConfigurationError, "No model mappings were found. Your #{RAILS_ENV}.conf file is corrupted, or your application container needs to be restarted." if number_of_models == 0
|
267
|
+
|
268
|
+
sphinx_ids.sort_by do |item|
|
269
|
+
item[:index]
|
270
|
+
end.map do |item|
|
271
|
+
class_name = IDS_TO_MODELS[item[:doc] % number_of_models]
|
272
|
+
raise DaemonError, "Impossible Sphinx document id #{item[:doc]} in query result" unless class_name
|
273
|
+
[class_name, item[:doc] / number_of_models]
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
# Fetch them for real
|
278
|
+
def reify_results(ids)
|
279
|
+
results = []
|
280
|
+
|
281
|
+
ids_hash = {}
|
282
|
+
ids.each do |class_name, id|
|
283
|
+
(ids_hash[class_name] ||= []) << id
|
284
|
+
end
|
285
|
+
|
286
|
+
ids.map {|ary| ary.first}.uniq.each do |class_name|
|
287
|
+
klass = class_name.constantize
|
288
|
+
|
289
|
+
finder = (
|
290
|
+
Ultrasphinx::Search.client_options['finder_methods'].detect do |method_name|
|
291
|
+
klass.respond_to? method_name
|
292
|
+
end or
|
293
|
+
# XXX This default is kind of buried, but I'm not sure why you would need it to be
|
294
|
+
# configurable, since you can use ['finder_methods'].
|
295
|
+
"find_all_by_#{klass.primary_key}"
|
296
|
+
)
|
297
|
+
|
298
|
+
records = klass.send(finder, ids_hash[class_name])
|
299
|
+
|
300
|
+
unless Ultrasphinx::Search.client_options['ignore_missing_records']
|
301
|
+
if records.size != ids_hash[class_name].size
|
302
|
+
missed_ids = ids_hash[class_name] - records.map(&:id)
|
303
|
+
msg = if missed_ids.size == 1
|
304
|
+
"Couldn't find #{class_name} with ID=#{missed_ids.first}"
|
305
|
+
else
|
306
|
+
"Couldn't find #{class_name.pluralize} with IDs: #{missed_ids.join(',')} (found #{records.size} results, but was looking for #{ids_hash[class_name].size})"
|
307
|
+
end
|
308
|
+
raise ActiveRecord::RecordNotFound, msg
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
records.each do |record|
|
313
|
+
results[ids.index([class_name, record.id])] = record
|
314
|
+
end
|
315
|
+
end
|
316
|
+
|
317
|
+
# Add an accessor for global search rank for each record, if requested
|
318
|
+
if self.class.client_options['with_global_rank']
|
319
|
+
# XXX Nobody uses this
|
320
|
+
results.each_with_index do |result, index|
|
321
|
+
if result
|
322
|
+
global_index = per_page * (current_page - 1) + index
|
323
|
+
result.instance_variable_get('@attributes')['result_index'] = global_index
|
324
|
+
end
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
# Add an accessor for distance, if requested
|
329
|
+
if self.options['location']['lat'] and self.options['location']['long']
|
330
|
+
results.each_with_index do |result, index|
|
331
|
+
if result
|
332
|
+
distance = (response[:matches][index][:attributes]['@geodist'] or INFINITY)
|
333
|
+
result.instance_variable_get('@attributes')['distance'] = distance
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|
337
|
+
|
338
|
+
results.compact!
|
339
|
+
|
340
|
+
if ids.size - results.size > Ultrasphinx::Search.client_options['max_missing_records']
|
341
|
+
# Never reached if Ultrasphinx::Search.client_options['ignore_missing_records'] is false due to raise
|
342
|
+
raise ConfigurationError, "Too many results for this query returned ActiveRecord::RecordNotFound. The index is probably out of date"
|
343
|
+
end
|
344
|
+
|
345
|
+
results
|
346
|
+
end
|
347
|
+
|
348
|
+
def perform_action_with_retries
|
349
|
+
tries = 0
|
350
|
+
exceptions = [NoMethodError, Riddle::VersionError, Riddle::ResponseError, Errno::ECONNREFUSED, Errno::ECONNRESET, Errno::EPIPE]
|
351
|
+
begin
|
352
|
+
yield
|
353
|
+
rescue *exceptions => e
|
354
|
+
tries += 1
|
355
|
+
if tries <= Ultrasphinx::Search.client_options['max_retries']
|
356
|
+
say "restarting query (#{tries} attempts already) (#{e})"
|
357
|
+
sleep(Ultrasphinx::Search.client_options['retry_sleep_time'])
|
358
|
+
retry
|
359
|
+
else
|
360
|
+
say "query failed"
|
361
|
+
# Clear the rescue list, retry one last time, and let the error fail up the stack
|
362
|
+
exceptions = []
|
363
|
+
retry
|
364
|
+
end
|
365
|
+
end
|
366
|
+
end
|
367
|
+
|
368
|
+
def strip_bogus_characters(s)
|
369
|
+
# Used to remove some garbage before highlighting
|
370
|
+
s.gsub(/<.*?>|\.\.\.|\342\200\246|\n|\r/, " ").gsub(/http.*?( |$)/, ' ') if s
|
371
|
+
end
|
372
|
+
|
373
|
+
def strip_query_commands(s)
|
374
|
+
# XXX Hack for query commands, since Sphinx doesn't intelligently parse the query in excerpt mode
|
375
|
+
# Also removes apostrophes in the middle of words so that they don't get split in two.
|
376
|
+
s.gsub(/(^|\s)(AND|OR|NOT|\@\w+)(\s|$)/i, "").gsub(/(\w)\'(\w)/, '\1\2')
|
377
|
+
end
|
378
|
+
|
379
|
+
def degrees_to_radians(value)
|
380
|
+
Math::PI * value / 180.0
|
381
|
+
end
|
382
|
+
|
383
|
+
end
|
384
|
+
end
|
385
|
+
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
|
2
|
+
module Ultrasphinx
|
3
|
+
class Search
|
4
|
+
module Parser
|
5
|
+
# We could rewrite this in Treetop, but for now it works well.
|
6
|
+
|
7
|
+
class Error < RuntimeError
|
8
|
+
end
|
9
|
+
|
10
|
+
OPERATORS = {
|
11
|
+
'OR' => '|',
|
12
|
+
'AND' => '',
|
13
|
+
'NOT' => '-',
|
14
|
+
'or' => '|',
|
15
|
+
'and' => '',
|
16
|
+
'not' => '-'
|
17
|
+
}
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def parse query
|
22
|
+
# Alters a Google query string into Sphinx 0.97 style
|
23
|
+
return "" if query.blank?
|
24
|
+
# Parse
|
25
|
+
token_hash = token_stream_to_hash(query_to_token_stream(query))
|
26
|
+
# Join everything up and remove some spaces
|
27
|
+
token_hash_to_array(token_hash).join(" ").squeeze(" ").strip
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
def token_hash_to_array(token_hash)
|
32
|
+
query = []
|
33
|
+
|
34
|
+
token_hash.sort_by do |key, value|
|
35
|
+
key or ""
|
36
|
+
end.each do |field, contents|
|
37
|
+
# First operator always goes outside
|
38
|
+
query << contents.first.first
|
39
|
+
|
40
|
+
query << "@#{field}" if field
|
41
|
+
query << "(" if field and contents.size > 1
|
42
|
+
|
43
|
+
contents.each_with_index do |op_and_content, index|
|
44
|
+
op, content = op_and_content
|
45
|
+
query << op unless index == 0
|
46
|
+
query << content
|
47
|
+
end
|
48
|
+
|
49
|
+
query << ")" if field and contents.size > 1
|
50
|
+
end
|
51
|
+
|
52
|
+
# Collapse fieldsets early so that the swap doesn't split them
|
53
|
+
query.each_with_index do |token, index|
|
54
|
+
if token =~ /^@/
|
55
|
+
query[index] = "#{token} #{query[index + 1]}"
|
56
|
+
query[index + 1] = nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Swap the first pair if the order is reversed
|
61
|
+
if [OPERATORS['NOT'], OPERATORS['OR']].include? query.first.upcase
|
62
|
+
query[0], query[1] = query[1], query[0]
|
63
|
+
end
|
64
|
+
|
65
|
+
query
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
def query_to_token_stream(query)
|
70
|
+
# First, split query on spaces that are not inside sets of quotes or parens
|
71
|
+
|
72
|
+
query = query.scan(/[^"() ]*["(][^")]*[")]|[^"() ]+/)
|
73
|
+
|
74
|
+
token_stream = []
|
75
|
+
has_operator = false
|
76
|
+
|
77
|
+
query.each_with_index do |subtoken, index|
|
78
|
+
|
79
|
+
# Recurse for parens, if necessary
|
80
|
+
if subtoken =~ /^(.*?)\((.*)\)(.*?$)/
|
81
|
+
subtoken = query[index] = "#{$1}(#{parse $2})#{$3}"
|
82
|
+
end
|
83
|
+
|
84
|
+
# Reappend missing closing quotes
|
85
|
+
if subtoken =~ /(^|\:)\"/
|
86
|
+
subtoken = subtoken.chomp('"') + '"'
|
87
|
+
end
|
88
|
+
|
89
|
+
# Strip parentheses within quoted strings
|
90
|
+
if subtoken =~ /\"(.*)\"/
|
91
|
+
subtoken.sub!($1, $1.gsub(/[()]/, ''))
|
92
|
+
end
|
93
|
+
|
94
|
+
# Add to the stream, converting the operator
|
95
|
+
if !has_operator
|
96
|
+
if OPERATORS.to_a.flatten.include? subtoken and index != (query.size - 1)
|
97
|
+
# Note that operators at the end of the string are not parsed
|
98
|
+
token_stream << OPERATORS[subtoken] || subtoken
|
99
|
+
has_operator = true # flip
|
100
|
+
else
|
101
|
+
token_stream << ""
|
102
|
+
token_stream << subtoken
|
103
|
+
end
|
104
|
+
else
|
105
|
+
if OPERATORS.to_a.flatten.include? subtoken
|
106
|
+
# Drop extra operator
|
107
|
+
else
|
108
|
+
token_stream << subtoken
|
109
|
+
has_operator = false # flop
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
if token_stream.size.zero? or token_stream.size.odd?
|
115
|
+
raise Error, "#{token_stream.inspect} is not a valid token stream"
|
116
|
+
end
|
117
|
+
token_stream.in_groups_of(2)
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
def token_stream_to_hash(token_stream)
|
122
|
+
token_hash = Hash.new([])
|
123
|
+
token_stream.map do |operator, content|
|
124
|
+
# Remove some spaces
|
125
|
+
content.gsub!(/^"\s+|\s+"$/, '"')
|
126
|
+
# Convert fields into sphinx style, reformat the stream object
|
127
|
+
if content =~ /(.*?):(.*)/
|
128
|
+
token_hash[$1] += [[operator, $2]]
|
129
|
+
else
|
130
|
+
token_hash[nil] += [[operator, content]]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
token_hash
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|