ultrasphinx 1 → 1.5
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +3 -0
- data/CHANGELOG +13 -2
- data/Manifest +29 -21
- data/README +44 -18
- data/TODO +6 -0
- data/examples/default.base +31 -9
- data/lib/ultrasphinx.rb +6 -0
- data/lib/ultrasphinx/autoload.rb +1 -1
- data/lib/ultrasphinx/configure.rb +266 -0
- data/lib/ultrasphinx/core_extensions.rb +37 -5
- data/lib/ultrasphinx/fields.rb +74 -24
- data/lib/ultrasphinx/is_indexed.rb +90 -34
- data/lib/ultrasphinx/search.rb +199 -246
- data/lib/ultrasphinx/search/internals.rb +204 -0
- data/lib/ultrasphinx/search/parser.rb +115 -0
- data/lib/ultrasphinx/spell.rb +13 -6
- data/lib/ultrasphinx/ultrasphinx.rb +50 -213
- data/tasks/ultrasphinx.rake +18 -25
- data/test/config/ultrasphinx/test.base +56 -0
- data/test/test_helper.rb +32 -0
- data/test/unit/parser_test.rb +93 -0
- data/ultrasphinx.gemspec +35 -0
- data/vendor/sphinx/LICENSE +58 -0
- data/vendor/will_paginate/LICENSE +18 -0
- metadata +66 -27
- metadata.gz.sig +3 -0
- data/Rakefile +0 -21
@@ -0,0 +1,204 @@
|
|
1
|
+
|
2
|
+
module Ultrasphinx
|
3
|
+
class Search
|
4
|
+
module Internals
|
5
|
+
|
6
|
+
# These methods are kept stateless to ease debugging
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def build_request_with_options opts
|
11
|
+
|
12
|
+
request = Sphinx::Client.new
|
13
|
+
|
14
|
+
request.SetServer(
|
15
|
+
Ultrasphinx::CLIENT_SETTINGS['server_host'],
|
16
|
+
Ultrasphinx::CLIENT_SETTINGS['server_port']
|
17
|
+
)
|
18
|
+
|
19
|
+
# Force extended query mode
|
20
|
+
request.SetMatchMode(Sphinx::Client::SPH_MATCH_EXTENDED)
|
21
|
+
|
22
|
+
offset, limit = opts['per_page'] * (opts['page'] - 1), opts['per_page']
|
23
|
+
|
24
|
+
request.SetLimits offset, limit, [offset + limit, MAX_MATCHES].min
|
25
|
+
request.SetSortMode SPHINX_CLIENT_PARAMS['sort_mode'][opts['sort_mode']], opts['sort_by'].to_s
|
26
|
+
|
27
|
+
if weights = opts['weight']
|
28
|
+
# Order the weights hash according to the field order for Sphinx, and set the missing fields to 1.0
|
29
|
+
request.SetWeights(Fields.instance.types.select{|n,t| t == 'text'}.map(&:first).sort.inject([]) do |array, field|
|
30
|
+
array << (weights[field] || 1.0)
|
31
|
+
end)
|
32
|
+
end
|
33
|
+
|
34
|
+
unless opts['class_name'].compact.empty?
|
35
|
+
request.SetFilter 'class_id', opts['class_name'].map{|m| MODELS_TO_IDS[m.to_s]}
|
36
|
+
end
|
37
|
+
|
38
|
+
# Extract ranged raw filters
|
39
|
+
# Some of this mangling might not be necessary
|
40
|
+
opts['filter'].each do |field, value|
|
41
|
+
begin
|
42
|
+
case value
|
43
|
+
when Fixnum, Float, BigDecimal, NilClass, Array
|
44
|
+
request.SetFilter field, Array(value)
|
45
|
+
when Range
|
46
|
+
min, max = [value.begin, value.end].map do |x|
|
47
|
+
x._to_numeric
|
48
|
+
end
|
49
|
+
raise NoMethodError unless min <=> max and max <=> min
|
50
|
+
min, max = max, min if min > max
|
51
|
+
request.SetFilterRange field, min, max
|
52
|
+
when String
|
53
|
+
opts['parsed_query'] << " @#{field} #{value}"
|
54
|
+
else
|
55
|
+
raise NoMethodError
|
56
|
+
end
|
57
|
+
rescue NoMethodError => e
|
58
|
+
raise Sphinx::SphinxArgumentError, "filter: #{field.inspect}:#{value.inspect} is invalid"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
request
|
63
|
+
end
|
64
|
+
|
65
|
+
def get_subtotals(original_request, query)
|
66
|
+
request = original_request._deep_dup
|
67
|
+
request.instance_eval { @filters.delete_if {|f| f['attr'] == 'class_id'} }
|
68
|
+
|
69
|
+
facets = get_facets(request, query, 'class_id')
|
70
|
+
|
71
|
+
# Not using the standard facet caching here
|
72
|
+
Hash[*(MODELS_TO_IDS.map do |klass, id|
|
73
|
+
[klass, facets[id] || 0]
|
74
|
+
end.flatten)]
|
75
|
+
end
|
76
|
+
|
77
|
+
def get_facets(original_request, query, original_facet)
|
78
|
+
request, facet = original_request._deep_dup, original_facet
|
79
|
+
facet += "_facet" if Fields.instance.types[original_facet] == 'text'
|
80
|
+
|
81
|
+
raise UsageError, "Field #{original_facet} does not exist or was not configured for faceting" unless Fields.instance.types[facet]
|
82
|
+
|
83
|
+
# Set the facet query parameter and modify per-page setting so we snag all the facets
|
84
|
+
request.SetGroupBy(facet, Sphinx::Client::SPH_GROUPBY_ATTR, '@count desc')
|
85
|
+
limit = self.class.client_options['max_facets']
|
86
|
+
request.SetLimits 0, limit, [limit, MAX_MATCHES].min
|
87
|
+
|
88
|
+
# Run the query
|
89
|
+
matches = request.Query(query)['matches']
|
90
|
+
|
91
|
+
# Map the facets back to something sane
|
92
|
+
facets = {}
|
93
|
+
matches.each do |match|
|
94
|
+
match = match.last['attrs'] # :(
|
95
|
+
raise ResponseError if facets[match['@groupby']]
|
96
|
+
facets[match['@groupby']] = match['@count']
|
97
|
+
end
|
98
|
+
|
99
|
+
# Invert crc's, if we have them
|
100
|
+
reverse_map_facets(facets, original_facet)
|
101
|
+
end
|
102
|
+
|
103
|
+
def reverse_map_facets(facets, facet)
|
104
|
+
facets = facets.dup
|
105
|
+
|
106
|
+
if Fields.instance.types[facet] == 'text'
|
107
|
+
unless FACET_CACHE[facet]
|
108
|
+
# Cache the reverse CRC map for the textual facet if it hasn't been done yet
|
109
|
+
# XXX not necessarily optimal since it requires a direct DB hit once per mongrel
|
110
|
+
Ultrasphinx.say "caching crc reverse map for text facet #{facet}"
|
111
|
+
|
112
|
+
Fields.instance.classes[facet].each do |klass|
|
113
|
+
# you can only use a facet from your own self right now; no includes allowed
|
114
|
+
field = (MODEL_CONFIGURATION[klass.name]['fields'].detect do |field_hash|
|
115
|
+
field_hash['as'] == facet
|
116
|
+
end)['field']
|
117
|
+
|
118
|
+
klass.connection.execute("SELECT #{field} AS value, CRC32(#{field}) AS crc FROM #{klass.table_name} GROUP BY #{field}").each_hash do |hash|
|
119
|
+
(FACET_CACHE[facet] ||= {})[hash['crc'].to_i] = hash['value']
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# Apply the map
|
125
|
+
facets = Hash[*(facets.map do |crc, value|
|
126
|
+
[FACET_CACHE[facet][crc], value]
|
127
|
+
end.flatten)]
|
128
|
+
end
|
129
|
+
|
130
|
+
facets
|
131
|
+
end
|
132
|
+
|
133
|
+
def reify_results(sphinx_ids)
|
134
|
+
|
135
|
+
# Order by position and then toss the rest of the data
|
136
|
+
sphinx_ids = sphinx_ids.sort_by do |key, value|
|
137
|
+
value['index'] or raise ConfigurationError, "Your Sphinx client is not properly patched."
|
138
|
+
end.map(&:first)
|
139
|
+
|
140
|
+
# Inverse-modulus map the sphinx ids to the table-specific ids
|
141
|
+
ids = Hash.new([])
|
142
|
+
sphinx_ids.each do |id|
|
143
|
+
ids[MODELS_TO_IDS.invert[id % MODELS_TO_IDS.size]] += [id / MODELS_TO_IDS.size] # yay math
|
144
|
+
end
|
145
|
+
raise Sphinx::SphinxResponseError, "impossible document id in query result" unless ids.values.flatten.size == sphinx_ids.size
|
146
|
+
|
147
|
+
# Fetch them for real
|
148
|
+
results = []
|
149
|
+
ids.each do |model, id_set|
|
150
|
+
klass = model.constantize
|
151
|
+
|
152
|
+
finder = self.class.client_options['finder_methods'].detect do |method_name|
|
153
|
+
klass.respond_to? method_name
|
154
|
+
end
|
155
|
+
|
156
|
+
logger.debug "** ultrasphinx: using #{klass.name}.#{finder} as finder method"
|
157
|
+
|
158
|
+
begin
|
159
|
+
# XXX Does not use Memcached's multiget
|
160
|
+
results += case instances = id_set.map { |id| klass.send(finder, id) }
|
161
|
+
when Hash
|
162
|
+
instances.values
|
163
|
+
when Array
|
164
|
+
instances
|
165
|
+
else
|
166
|
+
Array(instances)
|
167
|
+
end
|
168
|
+
rescue ActiveRecord::ActiveRecordError => e
|
169
|
+
raise Sphinx::SphinxResponseError, e.inspect
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
# Put them back in order
|
174
|
+
results.sort_by do |r|
|
175
|
+
raise Sphinx::SphinxResponseError, "Bogus ActiveRecord id for #{r.class}:#{r.id}" unless r.id
|
176
|
+
index = (sphinx_ids.index(sphinx_id = r.id * MODELS_TO_IDS.size + MODELS_TO_IDS[r.class.base_class.name]))
|
177
|
+
raise Sphinx::SphinxResponseError, "Bogus reverse id for #{r.class}:#{r.id} (Sphinx:#{sphinx_id})" unless index
|
178
|
+
index / sphinx_ids.size.to_f
|
179
|
+
end
|
180
|
+
|
181
|
+
# Add an accessor for absolute search rank for each record
|
182
|
+
results.each_with_index do |r, index|
|
183
|
+
i = per_page * (current_page - 1) + index
|
184
|
+
r._metaclass.send('define_method', 'result_index') { i }
|
185
|
+
end
|
186
|
+
|
187
|
+
results
|
188
|
+
end
|
189
|
+
|
190
|
+
|
191
|
+
def strip_bogus_characters(s)
|
192
|
+
# Used to remove some garbage before highlighting
|
193
|
+
s.gsub(/<.*?>|\.\.\.|\342\200\246|\n|\r/, " ").gsub(/http.*?( |$)/, ' ') if s
|
194
|
+
end
|
195
|
+
|
196
|
+
def strip_query_commands(s)
|
197
|
+
# XXX Hack for query commands, since sphinx doesn't intelligently parse the query in excerpt mode
|
198
|
+
# Also removes apostrophes in the middle of words so that they don't get split in two.
|
199
|
+
s.gsub(/(^|\s)(AND|OR|NOT|\@\w+)(\s|$)/i, "").gsub(/(\w)\'(\w)/, '\1\2')
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
|
2
|
+
module Ultrasphinx
|
3
|
+
class Search
|
4
|
+
module Parser
|
5
|
+
|
6
|
+
class Error < RuntimeError; end
|
7
|
+
|
8
|
+
OPERATORS = {
|
9
|
+
'OR' => '|',
|
10
|
+
'AND' => '',
|
11
|
+
'NOT' => '-',
|
12
|
+
'or' => '|',
|
13
|
+
'and' => '',
|
14
|
+
'not' => '-'
|
15
|
+
}
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def parse query
|
20
|
+
# Alters a Google query string into Sphinx 0.97 style
|
21
|
+
return "" if query.blank?
|
22
|
+
# Parse
|
23
|
+
token_hash = token_stream_to_hash(query_to_token_stream(query))
|
24
|
+
# Join everything up and remove some spaces
|
25
|
+
token_hash_to_array(token_hash).join(" ").squeeze(" ").strip
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
def token_hash_to_array(token_hash)
|
30
|
+
query = []
|
31
|
+
|
32
|
+
token_hash.sort_by do |key, value|
|
33
|
+
key or ""
|
34
|
+
end.each do |field, contents|
|
35
|
+
# first operator always goes outside
|
36
|
+
query << contents.first.first
|
37
|
+
|
38
|
+
query << "@#{field}" if field
|
39
|
+
query << "(" if field and contents.size > 1
|
40
|
+
|
41
|
+
contents.each_with_index do |op_and_content, index|
|
42
|
+
op, content = op_and_content
|
43
|
+
query << op unless index == 0
|
44
|
+
query << content
|
45
|
+
end
|
46
|
+
|
47
|
+
query << ")" if field and contents.size > 1
|
48
|
+
end
|
49
|
+
|
50
|
+
# XXX swap the first pair if the order is reversed
|
51
|
+
if [OPERATORS['NOT'], OPERATORS['OR']].include? query.first.upcase
|
52
|
+
query[0], query[1] = query[1], query[0]
|
53
|
+
end
|
54
|
+
|
55
|
+
query
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
def query_to_token_stream(query)
|
60
|
+
# First, split query on spaces that are not inside sets of quotes or parens
|
61
|
+
query = query.scan(/[^"() ]*["(][^")]*[")]|[^"() ]+/)
|
62
|
+
|
63
|
+
token_stream = []
|
64
|
+
has_operator = false
|
65
|
+
|
66
|
+
query.each_with_index do |subtoken, index|
|
67
|
+
|
68
|
+
# recurse for parens, if necessary
|
69
|
+
if subtoken =~ /^(.*?)\((.*)\)(.*?$)/
|
70
|
+
subtoken = query[index] = "#{$1}(#{parse $2})#{$3}"
|
71
|
+
end
|
72
|
+
|
73
|
+
# add to the stream, converting the operator
|
74
|
+
if !has_operator
|
75
|
+
if OPERATORS.to_a.flatten.include? subtoken and index != (query.size - 1) # operators at the end of the string are not parsed
|
76
|
+
token_stream << OPERATORS[subtoken] || subtoken
|
77
|
+
has_operator = true # flip
|
78
|
+
else
|
79
|
+
token_stream << ""
|
80
|
+
token_stream << subtoken
|
81
|
+
end
|
82
|
+
else
|
83
|
+
if OPERATORS.to_a.flatten.include? subtoken
|
84
|
+
# drop extra operator
|
85
|
+
else
|
86
|
+
token_stream << subtoken
|
87
|
+
has_operator = false # flop
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
raise Error, "#{token_stream.inspect} is not a valid token stream" unless token_stream.size % 2 == 0
|
93
|
+
token_stream.in_groups_of(2)
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
def token_stream_to_hash(token_stream)
|
98
|
+
token_hash = Hash.new([])
|
99
|
+
token_stream.map do |operator, content|
|
100
|
+
# remove some spaces
|
101
|
+
content.gsub!(/^"\s+|\s+"$/, '"')
|
102
|
+
# convert fields into sphinx style, reformat the stream object
|
103
|
+
if content =~ /(.*?):(.*)/
|
104
|
+
token_hash[$1] += [[operator, $2]]
|
105
|
+
else
|
106
|
+
token_hash[nil] += [[operator, content]]
|
107
|
+
end
|
108
|
+
end
|
109
|
+
token_hash
|
110
|
+
end
|
111
|
+
|
112
|
+
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
data/lib/ultrasphinx/spell.rb
CHANGED
@@ -3,19 +3,26 @@
|
|
3
3
|
module Ultrasphinx
|
4
4
|
|
5
5
|
=begin rdoc
|
6
|
-
== Spelling support
|
7
6
|
|
8
|
-
In order to spellcheck your user's query, Ultrasphinx bundles a small spelling module.
|
7
|
+
In order to spellcheck your user's query, Ultrasphinx bundles a small spelling module.
|
8
|
+
|
9
|
+
== Setup
|
10
|
+
|
11
|
+
Make sure Aspell and the Rubygem <tt>raspell</tt> are installed. See http://blog.evanweaver.com/files/doc/fauna/raspell/ for detailed instructions.
|
9
12
|
|
10
|
-
|
13
|
+
Copy the <tt>examples/app.multi</tt> file into your Aspell dictionary folder (<tt>/opt/local/share/aspell/</tt> on Mac, <tt>/usr/lib/aspell-0.60/</tt> on Linux). This file lets Aspell load a custom wordlist generated by Sphinx from your app data. Modify the file if you don't want to also use the default American English dictionary.
|
11
14
|
|
12
|
-
|
13
|
-
rake ultrasphinx:spelling:build
|
15
|
+
Finally, to build the custom wordlist, run:
|
16
|
+
sudo rake ultrasphinx:spelling:build
|
17
|
+
|
18
|
+
You need to use <tt>sudo</tt> because Ultrasphinx needs to write to the Aspell dictionary folder. Also note that Aspell, <tt>raspell</tt>, and the custom dictionary must be available on each application server, not on the Sphinx daemon server.
|
19
|
+
|
20
|
+
== Usage
|
14
21
|
|
15
22
|
Now you can see if a query is correctly spelled as so:
|
16
23
|
@correction = Ultrasphinx::Spell.correct(@search.query)
|
17
24
|
|
18
|
-
If
|
25
|
+
If <tt>@correction</tt> is not <tt>nil</tt>, go ahead and suggest it to the user.
|
19
26
|
|
20
27
|
=end
|
21
28
|
|
@@ -7,6 +7,8 @@ module Ultrasphinx
|
|
7
7
|
end
|
8
8
|
class DaemonError < Exception #:nodoc:
|
9
9
|
end
|
10
|
+
class UsageError < Exception #:nodoc:
|
11
|
+
end
|
10
12
|
|
11
13
|
# internal file paths
|
12
14
|
|
@@ -30,247 +32,82 @@ module Ultrasphinx
|
|
30
32
|
|
31
33
|
MAX_WORDS = 2**16 # maximum number of stopwords built
|
32
34
|
|
35
|
+
EMPTY_SEARCHABLE = "__empty_searchable__"
|
36
|
+
|
33
37
|
UNIFIED_INDEX_NAME = "complete"
|
34
38
|
|
35
|
-
|
36
|
-
|
37
|
-
|
39
|
+
CONFIG_MAP = {
|
40
|
+
# These must be symbols for key mapping against Rails itself
|
41
|
+
:username => 'sql_user',
|
38
42
|
:password => 'sql_pass',
|
39
43
|
:host => 'sql_host',
|
40
44
|
:database => 'sql_db',
|
41
45
|
:port => 'sql_port',
|
42
|
-
:socket => 'sql_sock'
|
43
|
-
|
44
|
-
OPTIONAL_SPHINX_KEYS = ['morphology', 'stopwords', 'min_word_len', 'charset_type', 'charset_table', 'docinfo']
|
45
|
-
|
46
|
-
# some default settings for the sphinx conf files
|
46
|
+
:socket => 'sql_sock'
|
47
|
+
}
|
47
48
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
sql_query_post =
|
52
|
-
sql_range_step = 20000
|
53
|
-
)
|
49
|
+
CONNECTION_DEFAULTS = {
|
50
|
+
:host => 'localhost'
|
51
|
+
}
|
54
52
|
|
55
53
|
ADAPTER_DEFAULTS = {
|
56
|
-
|
54
|
+
'mysql' => %(
|
57
55
|
type = mysql
|
58
56
|
sql_query_pre = SET SESSION group_concat_max_len = 65535
|
59
57
|
sql_query_pre = SET NAMES utf8
|
60
58
|
),
|
61
|
-
|
59
|
+
'postgresql' => %(
|
62
60
|
type = pgsql
|
63
61
|
)}
|
64
|
-
|
62
|
+
|
63
|
+
# Logger.
|
64
|
+
def self.say msg
|
65
|
+
STDERR.puts "** ultrasphinx: #{msg}"
|
66
|
+
end
|
65
67
|
|
66
68
|
# Configuration file parser.
|
67
69
|
def self.options_for(heading, path)
|
70
|
+
section = open(path).read[/^#{heading}\s*?\{(.*?)\}/m, 1]
|
68
71
|
|
69
|
-
section = open(path).read[/^#{heading}.*?\{(.*?)\}/m, 1]
|
70
72
|
unless section
|
71
|
-
Ultrasphinx.say "#{path}
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
73
|
+
Ultrasphinx.say "warning; heading #{heading} not found in #{path}; it may be corrupted. "
|
74
|
+
{}
|
75
|
+
else
|
76
|
+
options = section.split("\n").map do |line|
|
77
|
+
line =~ /\s*(.*?)\s*=\s*([^\#]*)/
|
78
|
+
$1 ? [$1, $2.strip] : []
|
79
|
+
end
|
80
|
+
Hash[*options.flatten]
|
78
81
|
end
|
79
82
|
|
80
|
-
Hash[*options.flatten]
|
81
83
|
end
|
82
84
|
|
83
85
|
# introspect on the existing generated conf files
|
84
86
|
|
85
|
-
|
86
|
-
|
87
|
+
INDEXER_SETTINGS = options_for('indexer', BASE_PATH)
|
88
|
+
CLIENT_SETTINGS = options_for('client', BASE_PATH)
|
87
89
|
DAEMON_SETTINGS = options_for('searchd', BASE_PATH)
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
MODEL_CONFIGURATION = {}
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
class << self
|
96
|
-
|
97
|
-
# Logger.
|
98
|
-
def say msg
|
99
|
-
$stderr.puts "** ultrasphinx: #{msg}"
|
100
|
-
end
|
101
|
-
|
102
|
-
# Force all the indexed models to load and fill the MODEL_CONFIGURATION hash.
|
103
|
-
def load_constants
|
104
|
-
|
105
|
-
Dir["#{RAILS_ROOT}/app/models/**/*.rb"].each do |filename|
|
106
|
-
next if filename =~ /\/(\.svn|CVS|\.bzr)\//
|
107
|
-
begin
|
108
|
-
open(filename) {|file| load filename if file.grep(/is_indexed/).any?}
|
109
|
-
rescue Object => e
|
110
|
-
say "warning; possibly critical autoload error on #{filename}"
|
111
|
-
say e.inspect
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
# build the field-to-type mappings
|
116
|
-
Fields.instance.configure(MODEL_CONFIGURATION)
|
117
|
-
end
|
118
|
-
|
119
|
-
# Complain if the database names go out of sync.
|
120
|
-
def verify_database_name
|
121
|
-
if File.exist? CONF_PATH
|
122
|
-
if options_for("source", CONF_PATH)['sql_db'] != ActiveRecord::Base.connection.instance_variable_get("@config")[:database]
|
123
|
-
say "warning; configured database name is out-of-date"
|
124
|
-
say "please run 'rake ultrasphinx:configure'"
|
125
|
-
end rescue nil
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
|
130
|
-
# Main SQL builder.
|
131
|
-
def configure
|
132
|
-
load_constants
|
133
|
-
|
134
|
-
puts "Rebuilding Ultrasphinx configurations for #{ENV['RAILS_ENV']} environment"
|
135
|
-
puts "Available models are #{MODEL_CONFIGURATION.keys.to_sentence}"
|
136
|
-
File.open(CONF_PATH, "w") do |conf|
|
137
|
-
conf.puts "\n# Auto-generated at #{Time.now}.\n# Hand modifications will be overwritten.\n"
|
138
|
-
|
139
|
-
conf.puts "\n# #{BASE_PATH}"
|
140
|
-
conf.puts open(BASE_PATH).read.sub(/^ultrasphinx.*?\{.*?\}/m, '') + "\n"
|
141
|
-
|
142
|
-
sphinx_source_list = []
|
143
|
-
|
144
|
-
conf.puts "\n# Source configuration\n\n"
|
145
|
-
|
146
|
-
puts "Generating SQL"
|
147
|
-
MODEL_CONFIGURATION.each_with_index do |model_options, class_id|
|
148
|
-
model, options = model_options
|
149
|
-
klass, source = model.constantize, model.tableize
|
150
|
-
|
151
|
-
# puts "SQL for #{model}"
|
152
|
-
|
153
|
-
sphinx_source_list << source
|
90
|
+
SOURCE_SETTINGS = options_for('source', BASE_PATH)
|
91
|
+
INDEX_SETTINGS = options_for('index', BASE_PATH)
|
154
92
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
"#{class_id} AS class_id", "'#{klass.name}' AS class"]
|
173
|
-
remaining_columns = Fields.instance.keys - ["class", "class_id"]
|
174
|
-
|
175
|
-
conf.puts "\nsql_query_range = SELECT MIN(#{pkey}), MAX(#{pkey}) FROM #{table}"
|
176
|
-
|
177
|
-
options[:fields].to_a.each do |f|
|
178
|
-
column, as = f.is_a?(Hash) ? [f[:field], f[:as]] : [f, f]
|
179
|
-
column_strings << Fields.instance.cast("#{table}.#{column}", as)
|
180
|
-
remaining_columns.delete(as)
|
181
|
-
end
|
182
|
-
|
183
|
-
options[:includes].to_a.each do |join|
|
184
|
-
join_klass = join[:model].constantize
|
185
|
-
association = klass.reflect_on_association(join[:model].underscore.to_sym)
|
186
|
-
if not association
|
187
|
-
if not join[:association_sql]
|
188
|
-
raise ConfigurationError, "Unknown association from #{klass} to #{join[:model]}"
|
189
|
-
else
|
190
|
-
join_strings << join[:association_sql]
|
191
|
-
end
|
192
|
-
else
|
193
|
-
join_strings << "LEFT OUTER JOIN #{join_klass.table_name} ON " +
|
194
|
-
if (macro = association.macro) == :belongs_to
|
195
|
-
"#{join_klass.table_name}.#{join_klass.primary_key} = #{table}.#{association.primary_key_name}"
|
196
|
-
elsif macro == :has_one
|
197
|
-
"#{table}.#{klass.primary_key} = #{join_klass.table_name}.#{association.instance_variable_get('@foreign_key_name')}"
|
198
|
-
else
|
199
|
-
raise ConfigurationError, "Unidentified association macro #{macro.inspect}"
|
200
|
-
end
|
201
|
-
end
|
202
|
-
column_strings << "#{join_klass.table_name}.#{join[:field]} AS #{join[:as] or join[:field]}"
|
203
|
-
remaining_columns.delete(join[:as] || join[:field])
|
204
|
-
end
|
205
|
-
|
206
|
-
options[:concats].to_a.select{|concat| concat[:model] and concat[:field]}.each do |group|
|
207
|
-
# only has_many's or explicit sql right now
|
208
|
-
join_klass = group[:model].constantize
|
209
|
-
if group[:association_sql]
|
210
|
-
join_strings << group[:association_sql]
|
211
|
-
else
|
212
|
-
association = klass.reflect_on_association(group[:association_name] ? group[:association_name].to_sym : group[:model].underscore.pluralize.to_sym)
|
213
|
-
join_strings << "LEFT OUTER JOIN #{join_klass.table_name} ON #{table}.#{klass.primary_key} = #{join_klass.table_name}.#{association.primary_key_name}" + (" AND (#{group[:conditions]})" if group[:conditions]).to_s # XXX make sure foreign key is right for polymorphic relationships
|
214
|
-
end
|
215
|
-
column_strings << Fields.instance.cast("GROUP_CONCAT(#{join_klass.table_name}.#{group[:field]} SEPARATOR ' ')", group[:as])
|
216
|
-
remaining_columns.delete(group[:as])
|
217
|
-
end
|
218
|
-
|
219
|
-
options[:concats].to_a.select{|concat| concat[:fields]}.each do |concat|
|
220
|
-
column_strings << Fields.instance.cast("CONCAT_WS(' ', #{concat[:fields].map{|field| "#{table}.#{field}"}.join(', ')})", concat[:as])
|
221
|
-
remaining_columns.delete(concat[:as])
|
222
|
-
end
|
223
|
-
|
224
|
-
# puts "#{model} has #{remaining_columns.inspect} remaining"
|
225
|
-
remaining_columns.each do |field|
|
226
|
-
column_strings << Fields.instance.null(field)
|
227
|
-
end
|
228
|
-
|
229
|
-
query_strings = ["SELECT", column_strings.sort_by do |string|
|
230
|
-
# sphinx wants them always in the same order, but "id" must be first
|
231
|
-
(field = string[/.*AS (.*)/, 1]) == "id" ? "*" : field
|
232
|
-
end.join(", ")]
|
233
|
-
query_strings << "FROM #{table}"
|
234
|
-
query_strings += join_strings.uniq
|
235
|
-
query_strings << "WHERE #{table}.#{pkey} >= $start AND #{table}.#{pkey} <= $end"
|
236
|
-
query_strings += condition_strings.uniq.map{|s| "AND #{s}"}
|
237
|
-
query_strings << "GROUP BY id"
|
238
|
-
|
239
|
-
conf.puts "sql_query = #{query_strings.join(" ")}"
|
240
|
-
|
241
|
-
groups = []
|
242
|
-
# group and date sorting params... this really only would have to be run once
|
243
|
-
Fields.instance.each do |field, type|
|
244
|
-
case type
|
245
|
-
when 'numeric'
|
246
|
-
groups << "sql_group_column = #{field}"
|
247
|
-
when 'date'
|
248
|
-
groups << "sql_date_column = #{field}"
|
249
|
-
end
|
250
|
-
end
|
251
|
-
conf.puts "\n" + groups.sort_by{|s| s[/= (.*)/, 1]}.join("\n")
|
252
|
-
conf.puts "\nsql_query_info = SELECT * FROM #{table} WHERE #{table}.#{pkey} = (($id - #{class_id}) / #{MODEL_CONFIGURATION.size})"
|
253
|
-
conf.puts "}\n\n"
|
254
|
-
end
|
255
|
-
|
256
|
-
conf.puts "\n# Index configuration\n\n"
|
257
|
-
|
258
|
-
|
259
|
-
# only output the unified index; no one uses the individual ones anyway
|
260
|
-
|
261
|
-
conf.puts "index #{UNIFIED_INDEX_NAME}"
|
262
|
-
conf.puts "{"
|
263
|
-
conf.puts sphinx_source_list.map {|s| "source = #{s}" }
|
264
|
-
|
265
|
-
OPTIONAL_SPHINX_KEYS.each do |key|
|
266
|
-
conf.puts "#{key} = #{PLUGIN_SETTINGS[key]}" if PLUGIN_SETTINGS[key]
|
267
|
-
end
|
268
|
-
|
269
|
-
conf.puts "path = #{PLUGIN_SETTINGS["path"]}/sphinx_index_#{UNIFIED_INDEX_NAME}"
|
270
|
-
conf.puts "}\n\n"
|
271
|
-
end
|
272
|
-
|
93
|
+
# Make sure there's a trailing slash
|
94
|
+
INDEX_SETTINGS['path'] = INDEX_SETTINGS['path'].chomp("/") + "/"
|
95
|
+
|
96
|
+
STOPWORDS_PATH = "#{Ultrasphinx::INDEX_SETTINGS['path']}/stopwords.txt"
|
97
|
+
|
98
|
+
MODEL_CONFIGURATION = {}
|
99
|
+
|
100
|
+
# Complain if the database names go out of sync.
|
101
|
+
def self.verify_database_name
|
102
|
+
if File.exist? CONF_PATH
|
103
|
+
if options_for(
|
104
|
+
"source #{MODEL_CONFIGURATION.keys.first.tableize}",
|
105
|
+
CONF_PATH
|
106
|
+
)['sql_db'] != ActiveRecord::Base.connection.instance_variable_get("@config")[:database]
|
107
|
+
say "warning; configured database name is out-of-date"
|
108
|
+
say "please run 'rake ultrasphinx:configure'"
|
109
|
+
end rescue nil
|
273
110
|
end
|
274
|
-
|
275
111
|
end
|
112
|
+
|
276
113
|
end
|