ultrasphinx 1.5.2 → 1.5.3
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +2 -3
- data/CHANGELOG +2 -0
- data/Manifest +134 -28
- data/TODO +0 -1
- data/examples/{app.multi → ap.multi} +1 -1
- data/examples/default.base +2 -0
- data/lib/ultrasphinx.rb +19 -17
- data/lib/ultrasphinx/configure.rb +36 -28
- data/lib/ultrasphinx/core_extensions.rb +11 -1
- data/lib/ultrasphinx/fields.rb +16 -9
- data/lib/ultrasphinx/hex_to_int.sql +15 -0
- data/lib/ultrasphinx/is_indexed.rb +9 -7
- data/lib/ultrasphinx/search.rb +27 -13
- data/lib/ultrasphinx/search/internals.rb +36 -16
- data/lib/ultrasphinx/spell.rb +13 -4
- data/lib/ultrasphinx/ultrasphinx.rb +30 -20
- data/tasks/ultrasphinx.rake +31 -6
- data/test/integration/app/README +182 -0
- data/test/integration/app/Rakefile +10 -0
- data/test/integration/app/app/controllers/addresses_controller.rb +85 -0
- data/test/integration/app/app/controllers/application.rb +7 -0
- data/test/integration/app/app/controllers/sellers_controller.rb +85 -0
- data/test/integration/app/app/controllers/states_controller.rb +85 -0
- data/test/integration/app/app/controllers/users_controller.rb +85 -0
- data/test/integration/app/app/helpers/addresses_helper.rb +2 -0
- data/test/integration/app/app/helpers/application_helper.rb +3 -0
- data/test/integration/app/app/helpers/sellers_helper.rb +28 -0
- data/test/integration/app/app/helpers/states_helper.rb +2 -0
- data/test/integration/app/app/helpers/users_helper.rb +2 -0
- data/test/integration/app/app/models/geo/address.rb +8 -0
- data/test/integration/app/app/models/geo/state.rb +5 -0
- data/test/integration/app/app/models/person/user.rb +9 -0
- data/test/integration/app/app/models/seller.rb +20 -0
- data/test/integration/app/app/views/addresses/edit.html.erb +12 -0
- data/test/integration/app/app/views/addresses/index.html.erb +18 -0
- data/test/integration/app/app/views/addresses/new.html.erb +11 -0
- data/test/integration/app/app/views/addresses/show.html.erb +3 -0
- data/test/integration/app/app/views/layouts/addresses.html.erb +17 -0
- data/test/integration/app/app/views/layouts/sellers.html.erb +17 -0
- data/test/integration/app/app/views/layouts/states.html.erb +17 -0
- data/test/integration/app/app/views/layouts/users.html.erb +17 -0
- data/test/integration/app/app/views/sellers/edit.html.erb +12 -0
- data/test/integration/app/app/views/sellers/index.html.erb +20 -0
- data/test/integration/app/app/views/sellers/new.html.erb +11 -0
- data/test/integration/app/app/views/sellers/show.html.erb +3 -0
- data/test/integration/app/app/views/states/edit.html.erb +12 -0
- data/test/integration/app/app/views/states/index.html.erb +19 -0
- data/test/integration/app/app/views/states/new.html.erb +11 -0
- data/test/integration/app/app/views/states/show.html.erb +3 -0
- data/test/integration/app/app/views/users/edit.html.erb +12 -0
- data/test/integration/app/app/views/users/index.html.erb +22 -0
- data/test/integration/app/app/views/users/new.html.erb +11 -0
- data/test/integration/app/app/views/users/show.html.erb +3 -0
- data/test/integration/app/config/boot.rb +45 -0
- data/test/integration/app/config/database.yml +21 -0
- data/test/integration/app/config/environment.rb +11 -0
- data/test/integration/app/config/environments/development.rb +7 -0
- data/test/integration/app/config/environments/production.rb +18 -0
- data/test/integration/app/config/environments/test.rb +19 -0
- data/test/integration/app/config/locomotive.yml +6 -0
- data/test/integration/app/config/routes.rb +33 -0
- data/test/integration/app/config/ultrasphinx/default.base +56 -0
- data/test/integration/app/config/ultrasphinx/development.conf +159 -0
- data/test/integration/app/config/ultrasphinx/development.conf.canonical +159 -0
- data/test/integration/app/db/migrate/001_create_users.rb +16 -0
- data/test/integration/app/db/migrate/002_create_sellers.rb +14 -0
- data/test/integration/app/db/migrate/003_create_addresses.rb +19 -0
- data/test/integration/app/db/migrate/004_create_states.rb +12 -0
- data/test/integration/app/db/migrate/005_add_capitalization_to_seller.rb +9 -0
- data/test/integration/app/db/migrate/006_add_deleted_to_user.rb +9 -0
- data/test/integration/app/db/migrate/007_add_lat_and_long_to_address.rb +11 -0
- data/test/integration/app/db/migrate/008_add_mission_statement_to_seller.rb +9 -0
- data/test/integration/app/db/schema.rb +45 -0
- data/test/integration/app/doc/README_FOR_APP +2 -0
- data/test/integration/app/public/404.html +30 -0
- data/test/integration/app/public/500.html +30 -0
- data/test/integration/app/public/dispatch.cgi +10 -0
- data/test/integration/app/public/dispatch.fcgi +24 -0
- data/test/integration/app/public/dispatch.rb +10 -0
- data/test/integration/app/public/favicon.ico +0 -0
- data/test/integration/app/public/images/rails.png +0 -0
- data/test/integration/app/public/index.html +277 -0
- data/test/integration/app/public/javascripts/application.js +2 -0
- data/test/integration/app/public/javascripts/controls.js +833 -0
- data/test/integration/app/public/javascripts/dragdrop.js +942 -0
- data/test/integration/app/public/javascripts/effects.js +1088 -0
- data/test/integration/app/public/javascripts/prototype.js +2515 -0
- data/test/integration/app/public/robots.txt +1 -0
- data/test/integration/app/public/stylesheets/scaffold.css +74 -0
- data/test/integration/app/script/about +3 -0
- data/test/integration/app/script/breakpointer +3 -0
- data/test/integration/app/script/console +3 -0
- data/test/integration/app/script/destroy +3 -0
- data/test/integration/app/script/generate +3 -0
- data/test/integration/app/script/performance/benchmarker +3 -0
- data/test/integration/app/script/performance/profiler +3 -0
- data/test/integration/app/script/plugin +3 -0
- data/test/integration/app/script/process/inspector +3 -0
- data/test/integration/app/script/process/reaper +3 -0
- data/test/integration/app/script/process/spawner +3 -0
- data/test/integration/app/script/runner +3 -0
- data/test/integration/app/script/server +3 -0
- data/test/integration/app/test/fixtures/addresses.yml +13 -0
- data/test/integration/app/test/fixtures/sellers.yml +11 -0
- data/test/integration/app/test/fixtures/states.yml +216 -0
- data/test/integration/app/test/fixtures/users.yml +11 -0
- data/test/integration/app/test/functional/addresses_controller_test.rb +57 -0
- data/test/integration/app/test/functional/sellers_controller_test.rb +57 -0
- data/test/integration/app/test/functional/states_controller_test.rb +57 -0
- data/test/integration/app/test/functional/users_controller_test.rb +57 -0
- data/test/integration/app/test/test_helper.rb +28 -0
- data/test/integration/app/test/unit/address_test.rb +10 -0
- data/test/integration/app/test/unit/seller_test.rb +10 -0
- data/test/integration/app/test/unit/state_test.rb +10 -0
- data/test/integration/app/test/unit/user_test.rb +10 -0
- data/test/integration/configure_test.rb +23 -0
- data/test/integration/search_test.rb +221 -0
- data/test/integration/server_test.rb +38 -0
- data/test/integration/spell_test.rb +15 -0
- data/test/setup.rb +12 -0
- data/test/test_all.rb +1 -0
- data/test/test_helper.rb +13 -24
- data/test/ts.multi +2 -0
- data/test/unit/parser_test.rb +86 -86
- data/ultrasphinx.gemspec +12 -5
- metadata +136 -30
- metadata.gz.sig +0 -0
@@ -0,0 +1,15 @@
|
|
1
|
+
|
2
|
+
CREATE OR REPLACE FUNCTION hex_to_int(varchar) RETURNS int4 AS '
|
3
|
+
DECLARE
|
4
|
+
h alias for $1;
|
5
|
+
exec varchar;
|
6
|
+
curs refcursor;
|
7
|
+
res int;
|
8
|
+
BEGIN
|
9
|
+
exec := ''SELECT x'''''' || h || ''''''::int4'';
|
10
|
+
OPEN curs FOR EXECUTE exec;
|
11
|
+
FETCH curs INTO res;
|
12
|
+
CLOSE curs;
|
13
|
+
return res;
|
14
|
+
END;'
|
15
|
+
LANGUAGE 'plpgsql' IMMUTABLE STRICT;
|
@@ -38,6 +38,8 @@ Use the <tt>:include</tt> key.
|
|
38
38
|
|
39
39
|
Accepts an array of hashes.
|
40
40
|
|
41
|
+
:include => [{:class_name => 'Category', :field => 'name', :as => 'category'}]
|
42
|
+
|
41
43
|
Each should contain a <tt>:class_name</tt> key (the class name of the included model), a <tt>:field</tt> key (the name of the field to include), and an optional <tt>:as</tt> key (what to name the field in the parent). You can use the optional key <tt>:association_sql</tt> if you need to pass a custom JOIN string, in which case the default JOIN for <tt>belongs_to</tt> will not be generated.
|
42
44
|
|
43
45
|
The keys <tt>:facet</tt>, <tt>:sortable</tt>, and <tt>:function_sql</tt> are also recognized, just like for regular fields.
|
@@ -68,7 +70,7 @@ Also use the <tt>:concatenate</tt> key.
|
|
68
70
|
To concatenate one field from a set of associated records as a combined field in the parent record, use a group (or vertical) concatenation. A group concatenation should contain a <tt>:class_name</tt> key (the class name of the included model), a <tt>:field</tt> key (the field on the included model to concatenate), and an optional <tt>:as</tt> key (also the name of the result of the concatenation). For example, to concatenate all <tt>Post#body</tt> contents into the parent's <tt>responses</tt> field:
|
69
71
|
:concatenate => [{:class_name => 'Post', :field => 'body', :as => 'responses'}]
|
70
72
|
|
71
|
-
Optional group concatenation keys are <tt>:
|
73
|
+
Optional group concatenation keys are <tt>:association_sql</tt>, if you need to pass a custom JOIN string (for example, a double JOIN for a <tt>has_many :through</tt>), and <tt>:conditions</tt> (if you need custom WHERE conditions for this particular association).
|
72
74
|
|
73
75
|
The keys <tt>:facet</tt>, <tt>:sortable</tt>, and <tt>:function_sql</tt> are also recognized, just like for regular fields.
|
74
76
|
|
@@ -92,8 +94,7 @@ Here's an example configuration using most of the options, taken from production
|
|
92
94
|
:concatenate => [
|
93
95
|
{:fields => ['title', 'long_description', 'short_description'],
|
94
96
|
:as => 'editorial'},
|
95
|
-
{:class_name => 'Page', :field => 'body', :as => 'body',
|
96
|
-
:association_name => 'pages'},
|
97
|
+
{:class_name => 'Page', :field => 'body', :as => 'body'},
|
97
98
|
{:class_name => 'Comment', :field => 'body', :as => 'comments',
|
98
99
|
:conditions => "comments.item_type = '#{base_class}'"}
|
99
100
|
],
|
@@ -135,10 +136,11 @@ If the associations weren't just <tt>has_many</tt> and <tt>belongs_to</tt>, you
|
|
135
136
|
|
136
137
|
Array(opts['concatenate']).each do |entry|
|
137
138
|
entry.stringify_keys!
|
138
|
-
entry.assert_valid_keys ['class_name', 'conditions', 'field', 'as', 'fields', '
|
139
|
-
raise Ultrasphinx::ConfigurationError, "You can't mix regular concat and group concats" if entry['fields'] and (entry['field'] or entry['class_name']
|
140
|
-
raise Ultrasphinx::ConfigurationError, "
|
141
|
-
raise Ultrasphinx::ConfigurationError, "
|
139
|
+
entry.assert_valid_keys ['class_name', 'conditions', 'field', 'as', 'fields', 'association_sql', 'facet', 'function_sql', 'sortable']
|
140
|
+
raise Ultrasphinx::ConfigurationError, "You can't mix regular concat and group concats" if entry['fields'] and (entry['field'] or entry['class_name'])
|
141
|
+
raise Ultrasphinx::ConfigurationError, "Concatenations must specify an :as key" unless entry['as']
|
142
|
+
raise Ultrasphinx::ConfigurationError, "Group concatenations must not have multiple fields" if entry['field'].is_a? Array
|
143
|
+
raise Ultrasphinx::ConfigurationError, "Regular concatenations should have multiple fields" if entry['fields'] and !entry['fields'].is_a?(Array)
|
142
144
|
end
|
143
145
|
|
144
146
|
Array(opts['include']).each do |entry|
|
data/lib/ultrasphinx/search.rb
CHANGED
@@ -45,11 +45,11 @@ The hash lets you customize internal aspects of the search.
|
|
45
45
|
|
46
46
|
Note that you can set up your own query defaults in <tt>environment.rb</tt>:
|
47
47
|
|
48
|
-
Ultrasphinx::Search.query_defaults = {
|
48
|
+
Ultrasphinx::Search.query_defaults = HashWithIndifferentAccess.new({
|
49
49
|
:per_page => 10,
|
50
50
|
:sort_mode => 'relevance',
|
51
51
|
:weights => {'title' => 2.0}
|
52
|
-
}
|
52
|
+
})
|
53
53
|
|
54
54
|
= Advanced features
|
55
55
|
|
@@ -75,14 +75,14 @@ You need to set the <tt>content_methods</tt> key on Ultrasphinx::Search.excerpti
|
|
75
75
|
|
76
76
|
There are some other keys you can set, such as excerpt size, HTML tags to highlight with, and number of words on either side of each excerpt chunk. Example (in <tt>environment.rb</tt>):
|
77
77
|
|
78
|
-
Ultrasphinx::Search.excerpting_options = {
|
78
|
+
Ultrasphinx::Search.excerpting_options = HashWithIndifferentAccess.new({
|
79
79
|
:before_match => '<strong>',
|
80
80
|
:after_match => '</strong>',
|
81
81
|
:chunk_separator => "...",
|
82
82
|
:limit => 256,
|
83
83
|
:around => 3,
|
84
84
|
:content_methods => [['title'], ['body', 'description', 'content'], ['metadata']]
|
85
|
-
}
|
85
|
+
})
|
86
86
|
|
87
87
|
Note that your database is never changed by anything Ultrasphinx does.
|
88
88
|
|
@@ -151,8 +151,18 @@ Note that your database is never changed by anything Ultrasphinx does.
|
|
151
151
|
else
|
152
152
|
begin
|
153
153
|
lines = open(CONF_PATH).readlines
|
154
|
-
|
155
|
-
|
154
|
+
|
155
|
+
sources = lines.select do |line|
|
156
|
+
line =~ /^source \w/
|
157
|
+
end.map do |line|
|
158
|
+
line[/source ([\w\d_-]*)/, 1].gsub('__', '/').classify
|
159
|
+
end
|
160
|
+
|
161
|
+
ids = lines.select do |line|
|
162
|
+
line =~ /^sql_query /
|
163
|
+
end.map do |line|
|
164
|
+
line[/(\d*) AS class_id/, 1].to_i
|
165
|
+
end
|
156
166
|
|
157
167
|
raise unless sources.size == ids.size
|
158
168
|
Hash[*sources.zip(ids).flatten]
|
@@ -195,43 +205,47 @@ Note that your database is never changed by anything Ultrasphinx does.
|
|
195
205
|
|
196
206
|
# Returns the facet map for this query, if facets were used.
|
197
207
|
def facets
|
198
|
-
raise UsageError, "No facet field was configured" unless @options['facets']
|
199
208
|
run?(true)
|
209
|
+
raise UsageError, "No facet field was configured" unless @options['facets']
|
200
210
|
@facets
|
201
211
|
end
|
202
212
|
|
203
213
|
# Returns the raw response from the Sphinx client.
|
204
214
|
def response
|
215
|
+
run?(true)
|
205
216
|
@response
|
206
217
|
end
|
207
218
|
|
208
219
|
def class_name #:nodoc:
|
209
220
|
# Legacy accessor
|
210
|
-
@options['
|
221
|
+
@options['class_names']
|
211
222
|
end
|
212
223
|
|
213
224
|
# Returns a hash of total result counts, scoped to each available model. This requires extra queries against the search daemon right now. Set <tt>Ultrasphinx::Search.client_options[:with_subtotals] = true</tt> to enable the extra queries. Most of the overhead is in instantiating the AR result sets, so the performance hit is not usually significant.
|
214
225
|
def subtotals
|
226
|
+
run?(true)
|
215
227
|
raise UsageError, "Subtotals are not enabled" unless self.class.client_options['with_subtotals']
|
216
228
|
@subtotals
|
217
229
|
end
|
218
230
|
|
219
231
|
# Returns the total result count.
|
220
232
|
def total_entries
|
233
|
+
run?(true)
|
221
234
|
[response['total_found'] || 0, MAX_MATCHES].min
|
222
235
|
end
|
223
236
|
|
224
237
|
# Returns the response time of the query, in milliseconds.
|
225
238
|
def time
|
239
|
+
run?(true)
|
226
240
|
response['time']
|
227
241
|
end
|
228
242
|
|
229
243
|
# Returns whether the query has been run.
|
230
244
|
def run?(should_raise = false)
|
231
|
-
if response.blank? and should_raise
|
245
|
+
if @response.blank? and should_raise
|
232
246
|
raise UsageError, "Search has not yet been run" unless run?
|
233
247
|
else
|
234
|
-
|
248
|
+
!@response.blank?
|
235
249
|
end
|
236
250
|
end
|
237
251
|
|
@@ -247,6 +261,7 @@ Note that your database is never changed by anything Ultrasphinx does.
|
|
247
261
|
|
248
262
|
# Returns the last available page number in the result set.
|
249
263
|
def page_count
|
264
|
+
run?(true)
|
250
265
|
(total_entries / per_page) + (total_entries % per_page == 0 ? 0 : 1)
|
251
266
|
end
|
252
267
|
|
@@ -302,7 +317,6 @@ Note that your database is never changed by anything Ultrasphinx does.
|
|
302
317
|
say "searching for #{@options.inspect}"
|
303
318
|
|
304
319
|
begin
|
305
|
-
|
306
320
|
@response = @request.Query(parsed_query)
|
307
321
|
say "search returned, error #{@request.GetLastError.inspect}, warning #{@request.GetLastWarning.inspect}, returned #{total_entries}/#{response['total_found']} in #{time} seconds."
|
308
322
|
|
@@ -317,14 +331,14 @@ Note that your database is never changed by anything Ultrasphinx does.
|
|
317
331
|
# if you don't reify, you'll have to do the modulus reversal yourself to get record ids
|
318
332
|
@results = reify_results(@results) if reify
|
319
333
|
|
320
|
-
rescue Sphinx::SphinxResponseError, Sphinx::SphinxTemporaryError, Errno::EPIPE => e
|
334
|
+
rescue Sphinx::SphinxConnectError, Sphinx::SphinxResponseError, Sphinx::SphinxTemporaryError, Errno::ECONNRESET, Errno::EPIPE => e
|
321
335
|
if (tries += 1) <= self.class.client_options['max_retries']
|
322
336
|
say "restarting query (#{tries} attempts already) (#{e})"
|
323
337
|
sleep(self.class.client_options['retry_sleep_time']) if tries == self.class.client_options['max_retries']
|
324
338
|
retry
|
325
339
|
else
|
326
340
|
say "query failed"
|
327
|
-
raise e
|
341
|
+
raise Sphinx::SphinxConnectError, e.to_s
|
328
342
|
end
|
329
343
|
end
|
330
344
|
|
@@ -22,7 +22,12 @@ module Ultrasphinx
|
|
22
22
|
offset, limit = opts['per_page'] * (opts['page'] - 1), opts['per_page']
|
23
23
|
|
24
24
|
request.SetLimits offset, limit, [offset + limit, MAX_MATCHES].min
|
25
|
-
|
25
|
+
|
26
|
+
if SPHINX_CLIENT_PARAMS['sort_mode'][opts['sort_mode']]
|
27
|
+
request.SetSortMode SPHINX_CLIENT_PARAMS['sort_mode'][opts['sort_mode']], opts['sort_by'].to_s
|
28
|
+
else
|
29
|
+
raise UsageError, "Sort mode #{opts['sort_mode'].inspect} is invalid"
|
30
|
+
end
|
26
31
|
|
27
32
|
if weights = opts['weights']
|
28
33
|
# Order the weights hash according to the field order for Sphinx, and set the missing fields to 1.0
|
@@ -30,10 +35,14 @@ module Ultrasphinx
|
|
30
35
|
array << (weights[field] || 1.0)
|
31
36
|
end)
|
32
37
|
end
|
33
|
-
|
38
|
+
|
34
39
|
unless opts['class_names'].compact.empty?
|
35
|
-
request.SetFilter
|
36
|
-
|
40
|
+
request.SetFilter('class_id', (opts['class_names'].map do |model|
|
41
|
+
MODELS_TO_IDS[model.to_s] or
|
42
|
+
MODELS_TO_IDS[model.to_s.constantize.base_class.to_s] or
|
43
|
+
raise UsageError, "Invalid class name #{model.inspect}"
|
44
|
+
end))
|
45
|
+
end
|
37
46
|
|
38
47
|
# Extract ranged raw filters
|
39
48
|
# Some of this mangling might not be necessary
|
@@ -90,7 +99,11 @@ module Ultrasphinx
|
|
90
99
|
request.SetLimits 0, limit, [limit, MAX_MATCHES].min
|
91
100
|
|
92
101
|
# Run the query
|
93
|
-
|
102
|
+
begin
|
103
|
+
matches = request.Query(query)['matches']
|
104
|
+
rescue Sphinx::SphinxInternalError
|
105
|
+
raise ConfigurationError, "Index is out of date. Run 'rake ultrasphinx:index'"
|
106
|
+
end
|
94
107
|
|
95
108
|
# Map the facets back to something sane
|
96
109
|
facets = {}
|
@@ -100,7 +113,7 @@ module Ultrasphinx
|
|
100
113
|
facets[match['@groupby']] = match['@count']
|
101
114
|
end
|
102
115
|
|
103
|
-
# Invert
|
116
|
+
# Invert hash's, if we have them
|
104
117
|
reverse_map_facets(facets, original_facet)
|
105
118
|
end
|
106
119
|
|
@@ -109,9 +122,9 @@ module Ultrasphinx
|
|
109
122
|
|
110
123
|
if Fields.instance.types[facet] == 'text'
|
111
124
|
# Apply the map, rebuilding if the cache is missing or out-of-date
|
112
|
-
facets = Hash[*(facets.map do |
|
113
|
-
rebuild_facet_cache(facet) unless FACET_CACHE[facet] and FACET_CACHE[facet].has_key?(
|
114
|
-
[FACET_CACHE[facet][
|
125
|
+
facets = Hash[*(facets.map do |hash, value|
|
126
|
+
rebuild_facet_cache(facet) unless FACET_CACHE[facet] and FACET_CACHE[facet].has_key?(hash)
|
127
|
+
[FACET_CACHE[facet][hash], value]
|
115
128
|
end.flatten)]
|
116
129
|
end
|
117
130
|
|
@@ -119,18 +132,25 @@ module Ultrasphinx
|
|
119
132
|
end
|
120
133
|
|
121
134
|
def rebuild_facet_cache(facet)
|
122
|
-
# Cache the reverse
|
135
|
+
# Cache the reverse hash map for the textual facet if it hasn't been done yet
|
123
136
|
# XXX not necessarily optimal since it requires a direct DB hit once per mongrel
|
124
|
-
Ultrasphinx.say "caching
|
137
|
+
Ultrasphinx.say "caching hash reverse map for text facet #{facet}"
|
125
138
|
|
126
139
|
Fields.instance.classes[facet].each do |klass|
|
127
140
|
# you can only use a facet from your own self right now; no includes allowed
|
128
|
-
field =
|
141
|
+
field = MODEL_CONFIGURATION[klass.name]['fields'].detect do |field_hash|
|
129
142
|
field_hash['as'] == facet
|
130
|
-
end
|
131
|
-
|
132
|
-
|
133
|
-
|
143
|
+
end
|
144
|
+
|
145
|
+
raise ConfigurationError, "Model #{klass.name} has the requested '#{facet}' field, but it was not configured for faceting" unless field
|
146
|
+
field = field['field']
|
147
|
+
|
148
|
+
if hash_stored_procedure = ADAPTER_SQL_FUNCTIONS[ADAPTER]['hash_stored_procedure']
|
149
|
+
klass.connection.execute(hash_stored_procedure)
|
150
|
+
end
|
151
|
+
|
152
|
+
klass.connection.execute("SELECT #{field} AS value, #{ADAPTER_SQL_FUNCTIONS[ADAPTER]['hash']._interpolate(field)} AS hash FROM #{klass.table_name} GROUP BY #{field}").each_hash do |hash|
|
153
|
+
(FACET_CACHE[facet] ||= {})[hash['hash'].to_i] = hash['value']
|
134
154
|
end
|
135
155
|
end
|
136
156
|
FACET_CACHE[facet]
|
data/lib/ultrasphinx/spell.rb
CHANGED
@@ -10,13 +10,14 @@ In order to spellcheck your user's query, Ultrasphinx bundles a small spelling m
|
|
10
10
|
|
11
11
|
Make sure Aspell and the Rubygem <tt>raspell</tt> are installed. See http://blog.evanweaver.com/files/doc/fauna/raspell/ for detailed instructions.
|
12
12
|
|
13
|
-
Copy the <tt>examples/
|
13
|
+
Copy the <tt>examples/ap.multi</tt> file into your Aspell dictionary folder (<tt>/opt/local/share/aspell/</tt> on Mac, <tt>/usr/lib/aspell-0.60/</tt> on Linux). This file lets Aspell load a custom wordlist generated by Sphinx from your app data (you can configure its filename in the <tt>config/ultrasphinx/*.base</tt> files). Modify the file if you don't want to also use the default American English dictionary.
|
14
14
|
|
15
15
|
Finally, to build the custom wordlist, run:
|
16
16
|
sudo rake ultrasphinx:spelling:build
|
17
17
|
|
18
18
|
You need to use <tt>sudo</tt> because Ultrasphinx needs to write to the Aspell dictionary folder. Also note that Aspell, <tt>raspell</tt>, and the custom dictionary must be available on each application server, not on the Sphinx daemon server.
|
19
19
|
|
20
|
+
|
20
21
|
== Usage
|
21
22
|
|
22
23
|
Now you can see if a query is correctly spelled as so:
|
@@ -27,11 +28,19 @@ If <tt>@correction</tt> is not <tt>nil</tt>, go ahead and suggest it to the user
|
|
27
28
|
=end
|
28
29
|
|
29
30
|
module Spell
|
30
|
-
|
31
|
-
|
32
|
-
|
31
|
+
|
32
|
+
begin
|
33
|
+
SP = Aspell.new(Ultrasphinx::DICTIONARY)
|
34
|
+
SP.suggestion_mode = Aspell::NORMAL
|
35
|
+
SP.set_option("ignore-case", "true")
|
36
|
+
Ultrasphinx.say "spelling support enabled"
|
37
|
+
rescue Object => e
|
38
|
+
SP = nil
|
39
|
+
Ultrasphinx.say "spelling support not available (raspell configuration raised \"#{e}\")"
|
40
|
+
end
|
33
41
|
|
34
42
|
def self.correct string
|
43
|
+
return nil unless SP
|
35
44
|
correction = string.gsub(/[\w\']+/) do |word|
|
36
45
|
unless SP.check(word)
|
37
46
|
SP.suggest(word).first
|
@@ -1,13 +1,13 @@
|
|
1
1
|
|
2
2
|
module Ultrasphinx
|
3
3
|
|
4
|
-
class
|
4
|
+
class Error < ::StandardError #:nodoc:
|
5
5
|
end
|
6
|
-
class ConfigurationError <
|
6
|
+
class ConfigurationError < Error #:nodoc:
|
7
7
|
end
|
8
|
-
class DaemonError <
|
8
|
+
class DaemonError < Error #:nodoc:
|
9
9
|
end
|
10
|
-
class UsageError <
|
10
|
+
class UsageError < Error #:nodoc:
|
11
11
|
end
|
12
12
|
|
13
13
|
# Internal file paths
|
@@ -15,6 +15,8 @@ module Ultrasphinx
|
|
15
15
|
SUBDIR = "config/ultrasphinx"
|
16
16
|
|
17
17
|
DIR = "#{RAILS_ROOT}/#{SUBDIR}"
|
18
|
+
|
19
|
+
THIS_DIR = File.expand_path(File.dirname(__FILE__))
|
18
20
|
|
19
21
|
CONF_PATH = "#{DIR}/#{RAILS_ENV}.conf"
|
20
22
|
|
@@ -55,29 +57,34 @@ module Ultrasphinx
|
|
55
57
|
CONNECTION_DEFAULTS = {
|
56
58
|
:host => 'localhost'
|
57
59
|
}
|
58
|
-
|
59
|
-
ADAPTER_DEFAULTS = {
|
60
|
-
'mysql' => %(
|
61
|
-
type = mysql
|
62
|
-
sql_query_pre = SET SESSION group_concat_max_len = 65535
|
63
|
-
sql_query_pre = SET NAMES utf8
|
64
|
-
),
|
65
|
-
'postgresql' => %(
|
66
|
-
type = pgsql
|
67
|
-
)}
|
68
|
-
|
60
|
+
|
69
61
|
ADAPTER_SQL_FUNCTIONS = {
|
70
62
|
'mysql' => {
|
71
63
|
'group_by' => 'GROUP BY id',
|
72
|
-
'timestamp' => 'UNIX_TIMESTAMP('
|
64
|
+
'timestamp' => 'UNIX_TIMESTAMP(?)',
|
65
|
+
'hash' => 'CRC32(?)'
|
73
66
|
},
|
74
67
|
'postgresql' => {
|
75
68
|
'group_by' => '',
|
76
|
-
'timestamp' => 'EXTRACT(EPOCH FROM '
|
69
|
+
'timestamp' => 'EXTRACT(EPOCH FROM ?)',
|
70
|
+
'hash' => 'hex_to_int(SUBSTRING(MD5(?) FROM 1 FOR 8))',
|
71
|
+
'hash_stored_procedure' => open("#{THIS_DIR}/hex_to_int.sql").read.gsub("\n", ' ')
|
77
72
|
}
|
78
73
|
}
|
79
74
|
|
80
|
-
|
75
|
+
ADAPTER_DEFAULTS = {
|
76
|
+
'mysql' => %(
|
77
|
+
type = mysql
|
78
|
+
sql_query_pre = SET SESSION group_concat_max_len = 65535
|
79
|
+
sql_query_pre = SET NAMES utf8
|
80
|
+
),
|
81
|
+
'postgresql' => %(
|
82
|
+
type = pgsql
|
83
|
+
sql_query_pre = ) + ADAPTER_SQL_FUNCTIONS['postgresql']['hash_stored_procedure'] + %(
|
84
|
+
)
|
85
|
+
}
|
86
|
+
|
87
|
+
ADAPTER = ActiveRecord::Base.connection.instance_variable_get("@config")[:adapter] rescue 'mysql'
|
81
88
|
|
82
89
|
mattr_accessor :with_rake
|
83
90
|
|
@@ -97,7 +104,7 @@ type = pgsql
|
|
97
104
|
|
98
105
|
# Configuration file parser.
|
99
106
|
def self.options_for(heading, path)
|
100
|
-
section = open(path).read[/^#{heading}\s*?\{(.*?)\}/m, 1]
|
107
|
+
section = open(path).read[/^#{heading.gsub('/', '__')}\s*?\{(.*?)\}/m, 1]
|
101
108
|
|
102
109
|
unless section
|
103
110
|
Ultrasphinx.say "warning; heading #{heading} not found in #{path}; it may be corrupted. "
|
@@ -121,8 +128,11 @@ type = pgsql
|
|
121
128
|
|
122
129
|
# Make sure there's a trailing slash
|
123
130
|
INDEX_SETTINGS['path'] = INDEX_SETTINGS['path'].chomp("/") + "/"
|
131
|
+
|
132
|
+
DICTIONARY = CLIENT_SETTINGS['dictionary_name'] || 'ap'
|
133
|
+
raise ConfigurationError, "Aspell does not support dictionary names longer than two letters" if DICTIONARY.size > 2
|
124
134
|
|
125
|
-
STOPWORDS_PATH = "#{Ultrasphinx::INDEX_SETTINGS['path']}
|
135
|
+
STOPWORDS_PATH = "#{Ultrasphinx::INDEX_SETTINGS['path']}/#{DICTIONARY}-stopwords.txt"
|
126
136
|
|
127
137
|
MODEL_CONFIGURATION = {}
|
128
138
|
|
data/tasks/ultrasphinx.rake
CHANGED
@@ -11,6 +11,8 @@ namespace :ultrasphinx do
|
|
11
11
|
|
12
12
|
desc "Bootstrap a full Sphinx environment"
|
13
13
|
task :bootstrap => [:_environment, :configure, :index, :"daemon:restart"] do
|
14
|
+
say "done"
|
15
|
+
say "please restart Mongrel"
|
14
16
|
end
|
15
17
|
|
16
18
|
desc "Rebuild the configuration file for this particular environment."
|
@@ -20,13 +22,27 @@ namespace :ultrasphinx do
|
|
20
22
|
|
21
23
|
desc "Reindex the database and send an update signal to the search daemon."
|
22
24
|
task :index => [:_environment] do
|
25
|
+
rotate = ultrasphinx_daemon_running?
|
23
26
|
mkdir_p Ultrasphinx::INDEX_SETTINGS['path']
|
27
|
+
|
24
28
|
cmd = "indexer --config #{Ultrasphinx::CONF_PATH}"
|
25
29
|
cmd << " #{ENV['OPTS']} " if ENV['OPTS']
|
26
|
-
cmd << " --rotate" if
|
30
|
+
cmd << " --rotate" if rotate
|
27
31
|
cmd << " #{Ultrasphinx::UNIFIED_INDEX_NAME}"
|
32
|
+
|
28
33
|
say cmd
|
29
34
|
system cmd
|
35
|
+
|
36
|
+
if rotate
|
37
|
+
sleep(4)
|
38
|
+
failed = Dir[Ultrasphinx::INDEX_SETTINGS['path'] + "/*.new.*"]
|
39
|
+
if failed.any?
|
40
|
+
say "warning; index failed to rotate! Deleting new indexes"
|
41
|
+
failed.each {|f| File.delete f }
|
42
|
+
else
|
43
|
+
say "index rotated ok"
|
44
|
+
end
|
45
|
+
end
|
30
46
|
end
|
31
47
|
|
32
48
|
|
@@ -36,7 +52,7 @@ namespace :ultrasphinx do
|
|
36
52
|
FileUtils.mkdir_p File.dirname(Ultrasphinx::DAEMON_SETTINGS["log"]) rescue nil
|
37
53
|
raise Ultrasphinx::DaemonError, "Already running" if ultrasphinx_daemon_running?
|
38
54
|
system "searchd --config #{Ultrasphinx::CONF_PATH}"
|
39
|
-
sleep(
|
55
|
+
sleep(4) # give daemon a chance to write the pid file
|
40
56
|
if ultrasphinx_daemon_running?
|
41
57
|
say "started successfully"
|
42
58
|
else
|
@@ -48,7 +64,16 @@ namespace :ultrasphinx do
|
|
48
64
|
task :stop => [:_environment] do
|
49
65
|
raise Ultrasphinx::DaemonError, "Doesn't seem to be running" unless ultrasphinx_daemon_running?
|
50
66
|
system "kill #{pid = ultrasphinx_daemon_pid}"
|
51
|
-
|
67
|
+
sleep(1)
|
68
|
+
if ultrasphinx_daemon_running?
|
69
|
+
system "kill -9 #{pid}"
|
70
|
+
sleep(1)
|
71
|
+
end
|
72
|
+
if ultrasphinx_daemon_running?
|
73
|
+
say "#{pid} could not be stopped"
|
74
|
+
else
|
75
|
+
say "stopped #{pid}"
|
76
|
+
end
|
52
77
|
end
|
53
78
|
|
54
79
|
desc "Restart the search daemon"
|
@@ -74,7 +99,7 @@ namespace :ultrasphinx do
|
|
74
99
|
task :build => [:_environment] do
|
75
100
|
ENV['OPTS'] = "--buildstops #{Ultrasphinx::STOPWORDS_PATH} #{Ultrasphinx::MAX_WORDS} --buildfreqs"
|
76
101
|
Rake::Task["ultrasphinx:index"].invoke
|
77
|
-
tmpfile = "/tmp/
|
102
|
+
tmpfile = "/tmp/ultrasphinx-stopwords.txt"
|
78
103
|
words = []
|
79
104
|
say "filtering"
|
80
105
|
File.open(Ultrasphinx::STOPWORDS_PATH).each do |line|
|
@@ -87,8 +112,8 @@ namespace :ultrasphinx do
|
|
87
112
|
end
|
88
113
|
say "writing #{words.size} words"
|
89
114
|
File.open(tmpfile, 'w').write(words.join("\n"))
|
90
|
-
say "loading into aspell"
|
91
|
-
system("aspell --lang=en create master
|
115
|
+
say "loading dictionary '#{Ultrasphinx::DICTIONARY}' into aspell"
|
116
|
+
system("aspell --lang=en create master #{Ultrasphinx::DICTIONARY}.rws < #{tmpfile}")
|
92
117
|
end
|
93
118
|
end
|
94
119
|
|