ultrasphinx 1 → 1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +3 -0
- data/CHANGELOG +13 -2
- data/Manifest +29 -21
- data/README +44 -18
- data/TODO +6 -0
- data/examples/default.base +31 -9
- data/lib/ultrasphinx.rb +6 -0
- data/lib/ultrasphinx/autoload.rb +1 -1
- data/lib/ultrasphinx/configure.rb +266 -0
- data/lib/ultrasphinx/core_extensions.rb +37 -5
- data/lib/ultrasphinx/fields.rb +74 -24
- data/lib/ultrasphinx/is_indexed.rb +90 -34
- data/lib/ultrasphinx/search.rb +199 -246
- data/lib/ultrasphinx/search/internals.rb +204 -0
- data/lib/ultrasphinx/search/parser.rb +115 -0
- data/lib/ultrasphinx/spell.rb +13 -6
- data/lib/ultrasphinx/ultrasphinx.rb +50 -213
- data/tasks/ultrasphinx.rake +18 -25
- data/test/config/ultrasphinx/test.base +56 -0
- data/test/test_helper.rb +32 -0
- data/test/unit/parser_test.rb +93 -0
- data/ultrasphinx.gemspec +35 -0
- data/vendor/sphinx/LICENSE +58 -0
- data/vendor/will_paginate/LICENSE +18 -0
- metadata +66 -27
- metadata.gz.sig +3 -0
- data/Rakefile +0 -21
@@ -0,0 +1,204 @@
|
|
1
|
+
|
2
|
+
module Ultrasphinx
|
3
|
+
class Search
|
4
|
+
module Internals
|
5
|
+
|
6
|
+
# These methods are kept stateless to ease debugging
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def build_request_with_options opts
|
11
|
+
|
12
|
+
request = Sphinx::Client.new
|
13
|
+
|
14
|
+
request.SetServer(
|
15
|
+
Ultrasphinx::CLIENT_SETTINGS['server_host'],
|
16
|
+
Ultrasphinx::CLIENT_SETTINGS['server_port']
|
17
|
+
)
|
18
|
+
|
19
|
+
# Force extended query mode
|
20
|
+
request.SetMatchMode(Sphinx::Client::SPH_MATCH_EXTENDED)
|
21
|
+
|
22
|
+
offset, limit = opts['per_page'] * (opts['page'] - 1), opts['per_page']
|
23
|
+
|
24
|
+
request.SetLimits offset, limit, [offset + limit, MAX_MATCHES].min
|
25
|
+
request.SetSortMode SPHINX_CLIENT_PARAMS['sort_mode'][opts['sort_mode']], opts['sort_by'].to_s
|
26
|
+
|
27
|
+
if weights = opts['weight']
|
28
|
+
# Order the weights hash according to the field order for Sphinx, and set the missing fields to 1.0
|
29
|
+
request.SetWeights(Fields.instance.types.select{|n,t| t == 'text'}.map(&:first).sort.inject([]) do |array, field|
|
30
|
+
array << (weights[field] || 1.0)
|
31
|
+
end)
|
32
|
+
end
|
33
|
+
|
34
|
+
unless opts['class_name'].compact.empty?
|
35
|
+
request.SetFilter 'class_id', opts['class_name'].map{|m| MODELS_TO_IDS[m.to_s]}
|
36
|
+
end
|
37
|
+
|
38
|
+
# Extract ranged raw filters
|
39
|
+
# Some of this mangling might not be necessary
|
40
|
+
opts['filter'].each do |field, value|
|
41
|
+
begin
|
42
|
+
case value
|
43
|
+
when Fixnum, Float, BigDecimal, NilClass, Array
|
44
|
+
request.SetFilter field, Array(value)
|
45
|
+
when Range
|
46
|
+
min, max = [value.begin, value.end].map do |x|
|
47
|
+
x._to_numeric
|
48
|
+
end
|
49
|
+
raise NoMethodError unless min <=> max and max <=> min
|
50
|
+
min, max = max, min if min > max
|
51
|
+
request.SetFilterRange field, min, max
|
52
|
+
when String
|
53
|
+
opts['parsed_query'] << " @#{field} #{value}"
|
54
|
+
else
|
55
|
+
raise NoMethodError
|
56
|
+
end
|
57
|
+
rescue NoMethodError => e
|
58
|
+
raise Sphinx::SphinxArgumentError, "filter: #{field.inspect}:#{value.inspect} is invalid"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
request
|
63
|
+
end
|
64
|
+
|
65
|
+
def get_subtotals(original_request, query)
|
66
|
+
request = original_request._deep_dup
|
67
|
+
request.instance_eval { @filters.delete_if {|f| f['attr'] == 'class_id'} }
|
68
|
+
|
69
|
+
facets = get_facets(request, query, 'class_id')
|
70
|
+
|
71
|
+
# Not using the standard facet caching here
|
72
|
+
Hash[*(MODELS_TO_IDS.map do |klass, id|
|
73
|
+
[klass, facets[id] || 0]
|
74
|
+
end.flatten)]
|
75
|
+
end
|
76
|
+
|
77
|
+
def get_facets(original_request, query, original_facet)
|
78
|
+
request, facet = original_request._deep_dup, original_facet
|
79
|
+
facet += "_facet" if Fields.instance.types[original_facet] == 'text'
|
80
|
+
|
81
|
+
raise UsageError, "Field #{original_facet} does not exist or was not configured for faceting" unless Fields.instance.types[facet]
|
82
|
+
|
83
|
+
# Set the facet query parameter and modify per-page setting so we snag all the facets
|
84
|
+
request.SetGroupBy(facet, Sphinx::Client::SPH_GROUPBY_ATTR, '@count desc')
|
85
|
+
limit = self.class.client_options['max_facets']
|
86
|
+
request.SetLimits 0, limit, [limit, MAX_MATCHES].min
|
87
|
+
|
88
|
+
# Run the query
|
89
|
+
matches = request.Query(query)['matches']
|
90
|
+
|
91
|
+
# Map the facets back to something sane
|
92
|
+
facets = {}
|
93
|
+
matches.each do |match|
|
94
|
+
match = match.last['attrs'] # :(
|
95
|
+
raise ResponseError if facets[match['@groupby']]
|
96
|
+
facets[match['@groupby']] = match['@count']
|
97
|
+
end
|
98
|
+
|
99
|
+
# Invert crc's, if we have them
|
100
|
+
reverse_map_facets(facets, original_facet)
|
101
|
+
end
|
102
|
+
|
103
|
+
def reverse_map_facets(facets, facet)
|
104
|
+
facets = facets.dup
|
105
|
+
|
106
|
+
if Fields.instance.types[facet] == 'text'
|
107
|
+
unless FACET_CACHE[facet]
|
108
|
+
# Cache the reverse CRC map for the textual facet if it hasn't been done yet
|
109
|
+
# XXX not necessarily optimal since it requires a direct DB hit once per mongrel
|
110
|
+
Ultrasphinx.say "caching crc reverse map for text facet #{facet}"
|
111
|
+
|
112
|
+
Fields.instance.classes[facet].each do |klass|
|
113
|
+
# you can only use a facet from your own self right now; no includes allowed
|
114
|
+
field = (MODEL_CONFIGURATION[klass.name]['fields'].detect do |field_hash|
|
115
|
+
field_hash['as'] == facet
|
116
|
+
end)['field']
|
117
|
+
|
118
|
+
klass.connection.execute("SELECT #{field} AS value, CRC32(#{field}) AS crc FROM #{klass.table_name} GROUP BY #{field}").each_hash do |hash|
|
119
|
+
(FACET_CACHE[facet] ||= {})[hash['crc'].to_i] = hash['value']
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# Apply the map
|
125
|
+
facets = Hash[*(facets.map do |crc, value|
|
126
|
+
[FACET_CACHE[facet][crc], value]
|
127
|
+
end.flatten)]
|
128
|
+
end
|
129
|
+
|
130
|
+
facets
|
131
|
+
end
|
132
|
+
|
133
|
+
def reify_results(sphinx_ids)
|
134
|
+
|
135
|
+
# Order by position and then toss the rest of the data
|
136
|
+
sphinx_ids = sphinx_ids.sort_by do |key, value|
|
137
|
+
value['index'] or raise ConfigurationError, "Your Sphinx client is not properly patched."
|
138
|
+
end.map(&:first)
|
139
|
+
|
140
|
+
# Inverse-modulus map the sphinx ids to the table-specific ids
|
141
|
+
ids = Hash.new([])
|
142
|
+
sphinx_ids.each do |id|
|
143
|
+
ids[MODELS_TO_IDS.invert[id % MODELS_TO_IDS.size]] += [id / MODELS_TO_IDS.size] # yay math
|
144
|
+
end
|
145
|
+
raise Sphinx::SphinxResponseError, "impossible document id in query result" unless ids.values.flatten.size == sphinx_ids.size
|
146
|
+
|
147
|
+
# Fetch them for real
|
148
|
+
results = []
|
149
|
+
ids.each do |model, id_set|
|
150
|
+
klass = model.constantize
|
151
|
+
|
152
|
+
finder = self.class.client_options['finder_methods'].detect do |method_name|
|
153
|
+
klass.respond_to? method_name
|
154
|
+
end
|
155
|
+
|
156
|
+
logger.debug "** ultrasphinx: using #{klass.name}.#{finder} as finder method"
|
157
|
+
|
158
|
+
begin
|
159
|
+
# XXX Does not use Memcached's multiget
|
160
|
+
results += case instances = id_set.map { |id| klass.send(finder, id) }
|
161
|
+
when Hash
|
162
|
+
instances.values
|
163
|
+
when Array
|
164
|
+
instances
|
165
|
+
else
|
166
|
+
Array(instances)
|
167
|
+
end
|
168
|
+
rescue ActiveRecord::ActiveRecordError => e
|
169
|
+
raise Sphinx::SphinxResponseError, e.inspect
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
# Put them back in order
|
174
|
+
results.sort_by do |r|
|
175
|
+
raise Sphinx::SphinxResponseError, "Bogus ActiveRecord id for #{r.class}:#{r.id}" unless r.id
|
176
|
+
index = (sphinx_ids.index(sphinx_id = r.id * MODELS_TO_IDS.size + MODELS_TO_IDS[r.class.base_class.name]))
|
177
|
+
raise Sphinx::SphinxResponseError, "Bogus reverse id for #{r.class}:#{r.id} (Sphinx:#{sphinx_id})" unless index
|
178
|
+
index / sphinx_ids.size.to_f
|
179
|
+
end
|
180
|
+
|
181
|
+
# Add an accessor for absolute search rank for each record
|
182
|
+
results.each_with_index do |r, index|
|
183
|
+
i = per_page * (current_page - 1) + index
|
184
|
+
r._metaclass.send('define_method', 'result_index') { i }
|
185
|
+
end
|
186
|
+
|
187
|
+
results
|
188
|
+
end
|
189
|
+
|
190
|
+
|
191
|
+
def strip_bogus_characters(s)
|
192
|
+
# Used to remove some garbage before highlighting
|
193
|
+
s.gsub(/<.*?>|\.\.\.|\342\200\246|\n|\r/, " ").gsub(/http.*?( |$)/, ' ') if s
|
194
|
+
end
|
195
|
+
|
196
|
+
def strip_query_commands(s)
|
197
|
+
# XXX Hack for query commands, since sphinx doesn't intelligently parse the query in excerpt mode
|
198
|
+
# Also removes apostrophes in the middle of words so that they don't get split in two.
|
199
|
+
s.gsub(/(^|\s)(AND|OR|NOT|\@\w+)(\s|$)/i, "").gsub(/(\w)\'(\w)/, '\1\2')
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
|
2
|
+
module Ultrasphinx
|
3
|
+
class Search
|
4
|
+
module Parser
|
5
|
+
|
6
|
+
class Error < RuntimeError; end
|
7
|
+
|
8
|
+
OPERATORS = {
|
9
|
+
'OR' => '|',
|
10
|
+
'AND' => '',
|
11
|
+
'NOT' => '-',
|
12
|
+
'or' => '|',
|
13
|
+
'and' => '',
|
14
|
+
'not' => '-'
|
15
|
+
}
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def parse query
|
20
|
+
# Alters a Google query string into Sphinx 0.97 style
|
21
|
+
return "" if query.blank?
|
22
|
+
# Parse
|
23
|
+
token_hash = token_stream_to_hash(query_to_token_stream(query))
|
24
|
+
# Join everything up and remove some spaces
|
25
|
+
token_hash_to_array(token_hash).join(" ").squeeze(" ").strip
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
def token_hash_to_array(token_hash)
|
30
|
+
query = []
|
31
|
+
|
32
|
+
token_hash.sort_by do |key, value|
|
33
|
+
key or ""
|
34
|
+
end.each do |field, contents|
|
35
|
+
# first operator always goes outside
|
36
|
+
query << contents.first.first
|
37
|
+
|
38
|
+
query << "@#{field}" if field
|
39
|
+
query << "(" if field and contents.size > 1
|
40
|
+
|
41
|
+
contents.each_with_index do |op_and_content, index|
|
42
|
+
op, content = op_and_content
|
43
|
+
query << op unless index == 0
|
44
|
+
query << content
|
45
|
+
end
|
46
|
+
|
47
|
+
query << ")" if field and contents.size > 1
|
48
|
+
end
|
49
|
+
|
50
|
+
# XXX swap the first pair if the order is reversed
|
51
|
+
if [OPERATORS['NOT'], OPERATORS['OR']].include? query.first.upcase
|
52
|
+
query[0], query[1] = query[1], query[0]
|
53
|
+
end
|
54
|
+
|
55
|
+
query
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
def query_to_token_stream(query)
|
60
|
+
# First, split query on spaces that are not inside sets of quotes or parens
|
61
|
+
query = query.scan(/[^"() ]*["(][^")]*[")]|[^"() ]+/)
|
62
|
+
|
63
|
+
token_stream = []
|
64
|
+
has_operator = false
|
65
|
+
|
66
|
+
query.each_with_index do |subtoken, index|
|
67
|
+
|
68
|
+
# recurse for parens, if necessary
|
69
|
+
if subtoken =~ /^(.*?)\((.*)\)(.*?$)/
|
70
|
+
subtoken = query[index] = "#{$1}(#{parse $2})#{$3}"
|
71
|
+
end
|
72
|
+
|
73
|
+
# add to the stream, converting the operator
|
74
|
+
if !has_operator
|
75
|
+
if OPERATORS.to_a.flatten.include? subtoken and index != (query.size - 1) # operators at the end of the string are not parsed
|
76
|
+
token_stream << OPERATORS[subtoken] || subtoken
|
77
|
+
has_operator = true # flip
|
78
|
+
else
|
79
|
+
token_stream << ""
|
80
|
+
token_stream << subtoken
|
81
|
+
end
|
82
|
+
else
|
83
|
+
if OPERATORS.to_a.flatten.include? subtoken
|
84
|
+
# drop extra operator
|
85
|
+
else
|
86
|
+
token_stream << subtoken
|
87
|
+
has_operator = false # flop
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
raise Error, "#{token_stream.inspect} is not a valid token stream" unless token_stream.size % 2 == 0
|
93
|
+
token_stream.in_groups_of(2)
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
def token_stream_to_hash(token_stream)
|
98
|
+
token_hash = Hash.new([])
|
99
|
+
token_stream.map do |operator, content|
|
100
|
+
# remove some spaces
|
101
|
+
content.gsub!(/^"\s+|\s+"$/, '"')
|
102
|
+
# convert fields into sphinx style, reformat the stream object
|
103
|
+
if content =~ /(.*?):(.*)/
|
104
|
+
token_hash[$1] += [[operator, $2]]
|
105
|
+
else
|
106
|
+
token_hash[nil] += [[operator, content]]
|
107
|
+
end
|
108
|
+
end
|
109
|
+
token_hash
|
110
|
+
end
|
111
|
+
|
112
|
+
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
data/lib/ultrasphinx/spell.rb
CHANGED
@@ -3,19 +3,26 @@
|
|
3
3
|
module Ultrasphinx
|
4
4
|
|
5
5
|
=begin rdoc
|
6
|
-
== Spelling support
|
7
6
|
|
8
|
-
In order to spellcheck your user's query, Ultrasphinx bundles a small spelling module.
|
7
|
+
In order to spellcheck your user's query, Ultrasphinx bundles a small spelling module.
|
8
|
+
|
9
|
+
== Setup
|
10
|
+
|
11
|
+
Make sure Aspell and the Rubygem <tt>raspell</tt> are installed. See http://blog.evanweaver.com/files/doc/fauna/raspell/ for detailed instructions.
|
9
12
|
|
10
|
-
|
13
|
+
Copy the <tt>examples/app.multi</tt> file into your Aspell dictionary folder (<tt>/opt/local/share/aspell/</tt> on Mac, <tt>/usr/lib/aspell-0.60/</tt> on Linux). This file lets Aspell load a custom wordlist generated by Sphinx from your app data. Modify the file if you don't want to also use the default American English dictionary.
|
11
14
|
|
12
|
-
|
13
|
-
rake ultrasphinx:spelling:build
|
15
|
+
Finally, to build the custom wordlist, run:
|
16
|
+
sudo rake ultrasphinx:spelling:build
|
17
|
+
|
18
|
+
You need to use <tt>sudo</tt> because Ultrasphinx needs to write to the Aspell dictionary folder. Also note that Aspell, <tt>raspell</tt>, and the custom dictionary must be available on each application server, not on the Sphinx daemon server.
|
19
|
+
|
20
|
+
== Usage
|
14
21
|
|
15
22
|
Now you can see if a query is correctly spelled as so:
|
16
23
|
@correction = Ultrasphinx::Spell.correct(@search.query)
|
17
24
|
|
18
|
-
If
|
25
|
+
If <tt>@correction</tt> is not <tt>nil</tt>, go ahead and suggest it to the user.
|
19
26
|
|
20
27
|
=end
|
21
28
|
|
@@ -7,6 +7,8 @@ module Ultrasphinx
|
|
7
7
|
end
|
8
8
|
class DaemonError < Exception #:nodoc:
|
9
9
|
end
|
10
|
+
class UsageError < Exception #:nodoc:
|
11
|
+
end
|
10
12
|
|
11
13
|
# internal file paths
|
12
14
|
|
@@ -30,247 +32,82 @@ module Ultrasphinx
|
|
30
32
|
|
31
33
|
MAX_WORDS = 2**16 # maximum number of stopwords built
|
32
34
|
|
35
|
+
EMPTY_SEARCHABLE = "__empty_searchable__"
|
36
|
+
|
33
37
|
UNIFIED_INDEX_NAME = "complete"
|
34
38
|
|
35
|
-
|
36
|
-
|
37
|
-
|
39
|
+
CONFIG_MAP = {
|
40
|
+
# These must be symbols for key mapping against Rails itself
|
41
|
+
:username => 'sql_user',
|
38
42
|
:password => 'sql_pass',
|
39
43
|
:host => 'sql_host',
|
40
44
|
:database => 'sql_db',
|
41
45
|
:port => 'sql_port',
|
42
|
-
:socket => 'sql_sock'
|
43
|
-
|
44
|
-
OPTIONAL_SPHINX_KEYS = ['morphology', 'stopwords', 'min_word_len', 'charset_type', 'charset_table', 'docinfo']
|
45
|
-
|
46
|
-
# some default settings for the sphinx conf files
|
46
|
+
:socket => 'sql_sock'
|
47
|
+
}
|
47
48
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
sql_query_post =
|
52
|
-
sql_range_step = 20000
|
53
|
-
)
|
49
|
+
CONNECTION_DEFAULTS = {
|
50
|
+
:host => 'localhost'
|
51
|
+
}
|
54
52
|
|
55
53
|
ADAPTER_DEFAULTS = {
|
56
|
-
|
54
|
+
'mysql' => %(
|
57
55
|
type = mysql
|
58
56
|
sql_query_pre = SET SESSION group_concat_max_len = 65535
|
59
57
|
sql_query_pre = SET NAMES utf8
|
60
58
|
),
|
61
|
-
|
59
|
+
'postgresql' => %(
|
62
60
|
type = pgsql
|
63
61
|
)}
|
64
|
-
|
62
|
+
|
63
|
+
# Logger.
|
64
|
+
def self.say msg
|
65
|
+
STDERR.puts "** ultrasphinx: #{msg}"
|
66
|
+
end
|
65
67
|
|
66
68
|
# Configuration file parser.
|
67
69
|
def self.options_for(heading, path)
|
70
|
+
section = open(path).read[/^#{heading}\s*?\{(.*?)\}/m, 1]
|
68
71
|
|
69
|
-
section = open(path).read[/^#{heading}.*?\{(.*?)\}/m, 1]
|
70
72
|
unless section
|
71
|
-
Ultrasphinx.say "#{path}
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
73
|
+
Ultrasphinx.say "warning; heading #{heading} not found in #{path}; it may be corrupted. "
|
74
|
+
{}
|
75
|
+
else
|
76
|
+
options = section.split("\n").map do |line|
|
77
|
+
line =~ /\s*(.*?)\s*=\s*([^\#]*)/
|
78
|
+
$1 ? [$1, $2.strip] : []
|
79
|
+
end
|
80
|
+
Hash[*options.flatten]
|
78
81
|
end
|
79
82
|
|
80
|
-
Hash[*options.flatten]
|
81
83
|
end
|
82
84
|
|
83
85
|
# introspect on the existing generated conf files
|
84
86
|
|
85
|
-
|
86
|
-
|
87
|
+
INDEXER_SETTINGS = options_for('indexer', BASE_PATH)
|
88
|
+
CLIENT_SETTINGS = options_for('client', BASE_PATH)
|
87
89
|
DAEMON_SETTINGS = options_for('searchd', BASE_PATH)
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
MODEL_CONFIGURATION = {}
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
class << self
|
96
|
-
|
97
|
-
# Logger.
|
98
|
-
def say msg
|
99
|
-
$stderr.puts "** ultrasphinx: #{msg}"
|
100
|
-
end
|
101
|
-
|
102
|
-
# Force all the indexed models to load and fill the MODEL_CONFIGURATION hash.
|
103
|
-
def load_constants
|
104
|
-
|
105
|
-
Dir["#{RAILS_ROOT}/app/models/**/*.rb"].each do |filename|
|
106
|
-
next if filename =~ /\/(\.svn|CVS|\.bzr)\//
|
107
|
-
begin
|
108
|
-
open(filename) {|file| load filename if file.grep(/is_indexed/).any?}
|
109
|
-
rescue Object => e
|
110
|
-
say "warning; possibly critical autoload error on #{filename}"
|
111
|
-
say e.inspect
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
# build the field-to-type mappings
|
116
|
-
Fields.instance.configure(MODEL_CONFIGURATION)
|
117
|
-
end
|
118
|
-
|
119
|
-
# Complain if the database names go out of sync.
|
120
|
-
def verify_database_name
|
121
|
-
if File.exist? CONF_PATH
|
122
|
-
if options_for("source", CONF_PATH)['sql_db'] != ActiveRecord::Base.connection.instance_variable_get("@config")[:database]
|
123
|
-
say "warning; configured database name is out-of-date"
|
124
|
-
say "please run 'rake ultrasphinx:configure'"
|
125
|
-
end rescue nil
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
|
130
|
-
# Main SQL builder.
|
131
|
-
def configure
|
132
|
-
load_constants
|
133
|
-
|
134
|
-
puts "Rebuilding Ultrasphinx configurations for #{ENV['RAILS_ENV']} environment"
|
135
|
-
puts "Available models are #{MODEL_CONFIGURATION.keys.to_sentence}"
|
136
|
-
File.open(CONF_PATH, "w") do |conf|
|
137
|
-
conf.puts "\n# Auto-generated at #{Time.now}.\n# Hand modifications will be overwritten.\n"
|
138
|
-
|
139
|
-
conf.puts "\n# #{BASE_PATH}"
|
140
|
-
conf.puts open(BASE_PATH).read.sub(/^ultrasphinx.*?\{.*?\}/m, '') + "\n"
|
141
|
-
|
142
|
-
sphinx_source_list = []
|
143
|
-
|
144
|
-
conf.puts "\n# Source configuration\n\n"
|
145
|
-
|
146
|
-
puts "Generating SQL"
|
147
|
-
MODEL_CONFIGURATION.each_with_index do |model_options, class_id|
|
148
|
-
model, options = model_options
|
149
|
-
klass, source = model.constantize, model.tableize
|
150
|
-
|
151
|
-
# puts "SQL for #{model}"
|
152
|
-
|
153
|
-
sphinx_source_list << source
|
90
|
+
SOURCE_SETTINGS = options_for('source', BASE_PATH)
|
91
|
+
INDEX_SETTINGS = options_for('index', BASE_PATH)
|
154
92
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
"#{class_id} AS class_id", "'#{klass.name}' AS class"]
|
173
|
-
remaining_columns = Fields.instance.keys - ["class", "class_id"]
|
174
|
-
|
175
|
-
conf.puts "\nsql_query_range = SELECT MIN(#{pkey}), MAX(#{pkey}) FROM #{table}"
|
176
|
-
|
177
|
-
options[:fields].to_a.each do |f|
|
178
|
-
column, as = f.is_a?(Hash) ? [f[:field], f[:as]] : [f, f]
|
179
|
-
column_strings << Fields.instance.cast("#{table}.#{column}", as)
|
180
|
-
remaining_columns.delete(as)
|
181
|
-
end
|
182
|
-
|
183
|
-
options[:includes].to_a.each do |join|
|
184
|
-
join_klass = join[:model].constantize
|
185
|
-
association = klass.reflect_on_association(join[:model].underscore.to_sym)
|
186
|
-
if not association
|
187
|
-
if not join[:association_sql]
|
188
|
-
raise ConfigurationError, "Unknown association from #{klass} to #{join[:model]}"
|
189
|
-
else
|
190
|
-
join_strings << join[:association_sql]
|
191
|
-
end
|
192
|
-
else
|
193
|
-
join_strings << "LEFT OUTER JOIN #{join_klass.table_name} ON " +
|
194
|
-
if (macro = association.macro) == :belongs_to
|
195
|
-
"#{join_klass.table_name}.#{join_klass.primary_key} = #{table}.#{association.primary_key_name}"
|
196
|
-
elsif macro == :has_one
|
197
|
-
"#{table}.#{klass.primary_key} = #{join_klass.table_name}.#{association.instance_variable_get('@foreign_key_name')}"
|
198
|
-
else
|
199
|
-
raise ConfigurationError, "Unidentified association macro #{macro.inspect}"
|
200
|
-
end
|
201
|
-
end
|
202
|
-
column_strings << "#{join_klass.table_name}.#{join[:field]} AS #{join[:as] or join[:field]}"
|
203
|
-
remaining_columns.delete(join[:as] || join[:field])
|
204
|
-
end
|
205
|
-
|
206
|
-
options[:concats].to_a.select{|concat| concat[:model] and concat[:field]}.each do |group|
|
207
|
-
# only has_many's or explicit sql right now
|
208
|
-
join_klass = group[:model].constantize
|
209
|
-
if group[:association_sql]
|
210
|
-
join_strings << group[:association_sql]
|
211
|
-
else
|
212
|
-
association = klass.reflect_on_association(group[:association_name] ? group[:association_name].to_sym : group[:model].underscore.pluralize.to_sym)
|
213
|
-
join_strings << "LEFT OUTER JOIN #{join_klass.table_name} ON #{table}.#{klass.primary_key} = #{join_klass.table_name}.#{association.primary_key_name}" + (" AND (#{group[:conditions]})" if group[:conditions]).to_s # XXX make sure foreign key is right for polymorphic relationships
|
214
|
-
end
|
215
|
-
column_strings << Fields.instance.cast("GROUP_CONCAT(#{join_klass.table_name}.#{group[:field]} SEPARATOR ' ')", group[:as])
|
216
|
-
remaining_columns.delete(group[:as])
|
217
|
-
end
|
218
|
-
|
219
|
-
options[:concats].to_a.select{|concat| concat[:fields]}.each do |concat|
|
220
|
-
column_strings << Fields.instance.cast("CONCAT_WS(' ', #{concat[:fields].map{|field| "#{table}.#{field}"}.join(', ')})", concat[:as])
|
221
|
-
remaining_columns.delete(concat[:as])
|
222
|
-
end
|
223
|
-
|
224
|
-
# puts "#{model} has #{remaining_columns.inspect} remaining"
|
225
|
-
remaining_columns.each do |field|
|
226
|
-
column_strings << Fields.instance.null(field)
|
227
|
-
end
|
228
|
-
|
229
|
-
query_strings = ["SELECT", column_strings.sort_by do |string|
|
230
|
-
# sphinx wants them always in the same order, but "id" must be first
|
231
|
-
(field = string[/.*AS (.*)/, 1]) == "id" ? "*" : field
|
232
|
-
end.join(", ")]
|
233
|
-
query_strings << "FROM #{table}"
|
234
|
-
query_strings += join_strings.uniq
|
235
|
-
query_strings << "WHERE #{table}.#{pkey} >= $start AND #{table}.#{pkey} <= $end"
|
236
|
-
query_strings += condition_strings.uniq.map{|s| "AND #{s}"}
|
237
|
-
query_strings << "GROUP BY id"
|
238
|
-
|
239
|
-
conf.puts "sql_query = #{query_strings.join(" ")}"
|
240
|
-
|
241
|
-
groups = []
|
242
|
-
# group and date sorting params... this really only would have to be run once
|
243
|
-
Fields.instance.each do |field, type|
|
244
|
-
case type
|
245
|
-
when 'numeric'
|
246
|
-
groups << "sql_group_column = #{field}"
|
247
|
-
when 'date'
|
248
|
-
groups << "sql_date_column = #{field}"
|
249
|
-
end
|
250
|
-
end
|
251
|
-
conf.puts "\n" + groups.sort_by{|s| s[/= (.*)/, 1]}.join("\n")
|
252
|
-
conf.puts "\nsql_query_info = SELECT * FROM #{table} WHERE #{table}.#{pkey} = (($id - #{class_id}) / #{MODEL_CONFIGURATION.size})"
|
253
|
-
conf.puts "}\n\n"
|
254
|
-
end
|
255
|
-
|
256
|
-
conf.puts "\n# Index configuration\n\n"
|
257
|
-
|
258
|
-
|
259
|
-
# only output the unified index; no one uses the individual ones anyway
|
260
|
-
|
261
|
-
conf.puts "index #{UNIFIED_INDEX_NAME}"
|
262
|
-
conf.puts "{"
|
263
|
-
conf.puts sphinx_source_list.map {|s| "source = #{s}" }
|
264
|
-
|
265
|
-
OPTIONAL_SPHINX_KEYS.each do |key|
|
266
|
-
conf.puts "#{key} = #{PLUGIN_SETTINGS[key]}" if PLUGIN_SETTINGS[key]
|
267
|
-
end
|
268
|
-
|
269
|
-
conf.puts "path = #{PLUGIN_SETTINGS["path"]}/sphinx_index_#{UNIFIED_INDEX_NAME}"
|
270
|
-
conf.puts "}\n\n"
|
271
|
-
end
|
272
|
-
|
93
|
+
# Make sure there's a trailing slash
|
94
|
+
INDEX_SETTINGS['path'] = INDEX_SETTINGS['path'].chomp("/") + "/"
|
95
|
+
|
96
|
+
STOPWORDS_PATH = "#{Ultrasphinx::INDEX_SETTINGS['path']}/stopwords.txt"
|
97
|
+
|
98
|
+
MODEL_CONFIGURATION = {}
|
99
|
+
|
100
|
+
# Complain if the database names go out of sync.
|
101
|
+
def self.verify_database_name
|
102
|
+
if File.exist? CONF_PATH
|
103
|
+
if options_for(
|
104
|
+
"source #{MODEL_CONFIGURATION.keys.first.tableize}",
|
105
|
+
CONF_PATH
|
106
|
+
)['sql_db'] != ActiveRecord::Base.connection.instance_variable_get("@config")[:database]
|
107
|
+
say "warning; configured database name is out-of-date"
|
108
|
+
say "please run 'rake ultrasphinx:configure'"
|
109
|
+
end rescue nil
|
273
110
|
end
|
274
|
-
|
275
111
|
end
|
112
|
+
|
276
113
|
end
|