thinking-sphinx 1.2.12
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENCE +20 -0
- data/README.textile +157 -0
- data/VERSION.yml +4 -0
- data/lib/thinking_sphinx.rb +211 -0
- data/lib/thinking_sphinx/active_record.rb +307 -0
- data/lib/thinking_sphinx/active_record/attribute_updates.rb +48 -0
- data/lib/thinking_sphinx/active_record/delta.rb +87 -0
- data/lib/thinking_sphinx/active_record/has_many_association.rb +28 -0
- data/lib/thinking_sphinx/active_record/scopes.rb +39 -0
- data/lib/thinking_sphinx/adapters/abstract_adapter.rb +42 -0
- data/lib/thinking_sphinx/adapters/mysql_adapter.rb +54 -0
- data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +136 -0
- data/lib/thinking_sphinx/association.rb +164 -0
- data/lib/thinking_sphinx/attribute.rb +342 -0
- data/lib/thinking_sphinx/class_facet.rb +15 -0
- data/lib/thinking_sphinx/configuration.rb +282 -0
- data/lib/thinking_sphinx/core/array.rb +7 -0
- data/lib/thinking_sphinx/core/string.rb +15 -0
- data/lib/thinking_sphinx/deltas.rb +30 -0
- data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
- data/lib/thinking_sphinx/deltas/default_delta.rb +68 -0
- data/lib/thinking_sphinx/deltas/delayed_delta.rb +30 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
- data/lib/thinking_sphinx/deploy/capistrano.rb +100 -0
- data/lib/thinking_sphinx/excerpter.rb +22 -0
- data/lib/thinking_sphinx/facet.rb +125 -0
- data/lib/thinking_sphinx/facet_search.rb +134 -0
- data/lib/thinking_sphinx/field.rb +82 -0
- data/lib/thinking_sphinx/index.rb +99 -0
- data/lib/thinking_sphinx/index/builder.rb +286 -0
- data/lib/thinking_sphinx/index/faux_column.rb +110 -0
- data/lib/thinking_sphinx/property.rb +162 -0
- data/lib/thinking_sphinx/rails_additions.rb +150 -0
- data/lib/thinking_sphinx/search.rb +707 -0
- data/lib/thinking_sphinx/search_methods.rb +421 -0
- data/lib/thinking_sphinx/source.rb +150 -0
- data/lib/thinking_sphinx/source/internal_properties.rb +46 -0
- data/lib/thinking_sphinx/source/sql.rb +128 -0
- data/lib/thinking_sphinx/tasks.rb +165 -0
- data/rails/init.rb +14 -0
- data/spec/lib/thinking_sphinx/active_record/delta_spec.rb +130 -0
- data/spec/lib/thinking_sphinx/active_record/has_many_association_spec.rb +49 -0
- data/spec/lib/thinking_sphinx/active_record/scopes_spec.rb +96 -0
- data/spec/lib/thinking_sphinx/active_record_spec.rb +364 -0
- data/spec/lib/thinking_sphinx/association_spec.rb +239 -0
- data/spec/lib/thinking_sphinx/attribute_spec.rb +500 -0
- data/spec/lib/thinking_sphinx/configuration_spec.rb +268 -0
- data/spec/lib/thinking_sphinx/core/array_spec.rb +9 -0
- data/spec/lib/thinking_sphinx/core/string_spec.rb +9 -0
- data/spec/lib/thinking_sphinx/excerpter_spec.rb +49 -0
- data/spec/lib/thinking_sphinx/facet_search_spec.rb +176 -0
- data/spec/lib/thinking_sphinx/facet_spec.rb +333 -0
- data/spec/lib/thinking_sphinx/field_spec.rb +154 -0
- data/spec/lib/thinking_sphinx/index/builder_spec.rb +455 -0
- data/spec/lib/thinking_sphinx/index/faux_column_spec.rb +30 -0
- data/spec/lib/thinking_sphinx/index_spec.rb +45 -0
- data/spec/lib/thinking_sphinx/rails_additions_spec.rb +203 -0
- data/spec/lib/thinking_sphinx/search_methods_spec.rb +152 -0
- data/spec/lib/thinking_sphinx/search_spec.rb +1092 -0
- data/spec/lib/thinking_sphinx/source_spec.rb +227 -0
- data/spec/lib/thinking_sphinx_spec.rb +162 -0
- data/tasks/distribution.rb +50 -0
- data/tasks/rails.rake +1 -0
- data/tasks/testing.rb +83 -0
- data/vendor/after_commit/LICENSE +20 -0
- data/vendor/after_commit/README +16 -0
- data/vendor/after_commit/Rakefile +22 -0
- data/vendor/after_commit/init.rb +8 -0
- data/vendor/after_commit/lib/after_commit.rb +45 -0
- data/vendor/after_commit/lib/after_commit/active_record.rb +114 -0
- data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
- data/vendor/after_commit/test/after_commit_test.rb +53 -0
- data/vendor/delayed_job/lib/delayed/job.rb +251 -0
- data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
- data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
- data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
- data/vendor/riddle/lib/riddle.rb +30 -0
- data/vendor/riddle/lib/riddle/client.rb +635 -0
- data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
- data/vendor/riddle/lib/riddle/client/message.rb +66 -0
- data/vendor/riddle/lib/riddle/client/response.rb +84 -0
- data/vendor/riddle/lib/riddle/configuration.rb +33 -0
- data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
- data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
- data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
- data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
- data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
- data/vendor/riddle/lib/riddle/configuration/section.rb +43 -0
- data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
- data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
- data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
- data/vendor/riddle/lib/riddle/controller.rb +53 -0
- metadata +172 -0
@@ -0,0 +1,342 @@
|
|
1
|
+
module ThinkingSphinx
|
2
|
+
# Attributes - eternally useful when it comes to filtering, sorting or
|
3
|
+
# grouping. This class isn't really useful to you unless you're hacking
|
4
|
+
# around with the internals of Thinking Sphinx - but hey, don't let that
|
5
|
+
# stop you.
|
6
|
+
#
|
7
|
+
# One key thing to remember - if you're using the attribute manually to
|
8
|
+
# generate SQL statements, you'll need to set the base model, and all the
|
9
|
+
# associations. Which can get messy. Use Index.link!, it really helps.
|
10
|
+
#
|
11
|
+
class Attribute < ThinkingSphinx::Property
|
12
|
+
attr_accessor :query_source
|
13
|
+
|
14
|
+
# To create a new attribute, you'll need to pass in either a single Column
|
15
|
+
# or an array of them, and some (optional) options.
|
16
|
+
#
|
17
|
+
# Valid options are:
|
18
|
+
# - :as => :alias_name
|
19
|
+
# - :type => :attribute_type
|
20
|
+
# - :source => :field, :query, :ranged_query
|
21
|
+
#
|
22
|
+
# Alias is only required in three circumstances: when there's
|
23
|
+
# another attribute or field with the same name, when the column name is
|
24
|
+
# 'id', or when there's more than one column.
|
25
|
+
#
|
26
|
+
# Type is not required, unless you want to force a column to be a certain
|
27
|
+
# type (but keep in mind the value will not be CASTed in the SQL
|
28
|
+
# statements). The only time you really need to use this is when the type
|
29
|
+
# can't be figured out by the column - ie: when not actually using a
|
30
|
+
# database column as your source.
|
31
|
+
#
|
32
|
+
# Source is only used for multi-value attributes (MVA). By default this will
|
33
|
+
# use a left-join and a group_concat to obtain the values. For better performance
|
34
|
+
# during indexing it can be beneficial to let Sphinx use a separate query to retrieve
|
35
|
+
# all document,value-pairs.
|
36
|
+
# Either :query or :ranged_query will enable this feature, where :ranged_query will cause
|
37
|
+
# the query to be executed incremental.
|
38
|
+
#
|
39
|
+
# Example usage:
|
40
|
+
#
|
41
|
+
# Attribute.new(
|
42
|
+
# Column.new(:created_at)
|
43
|
+
# )
|
44
|
+
#
|
45
|
+
# Attribute.new(
|
46
|
+
# Column.new(:posts, :id),
|
47
|
+
# :as => :post_ids
|
48
|
+
# )
|
49
|
+
#
|
50
|
+
# Attribute.new(
|
51
|
+
# Column.new(:posts, :id),
|
52
|
+
# :as => :post_ids,
|
53
|
+
# :source => :ranged_query
|
54
|
+
# )
|
55
|
+
#
|
56
|
+
# Attribute.new(
|
57
|
+
# [Column.new(:pages, :id), Column.new(:articles, :id)],
|
58
|
+
# :as => :content_ids
|
59
|
+
# )
|
60
|
+
#
|
61
|
+
# Attribute.new(
|
62
|
+
# Column.new("NOW()"),
|
63
|
+
# :as => :indexed_at,
|
64
|
+
# :type => :datetime
|
65
|
+
# )
|
66
|
+
#
|
67
|
+
# If you're creating attributes for latitude and longitude, don't forget
|
68
|
+
# that Sphinx expects these values to be in radians.
|
69
|
+
#
|
70
|
+
def initialize(source, columns, options = {})
|
71
|
+
super
|
72
|
+
|
73
|
+
@type = options[:type]
|
74
|
+
@query_source = options[:source]
|
75
|
+
@crc = options[:crc]
|
76
|
+
|
77
|
+
@type ||= :multi unless @query_source.nil?
|
78
|
+
if @type == :string && @crc
|
79
|
+
@type = is_many? ? :multi : :integer
|
80
|
+
end
|
81
|
+
|
82
|
+
source.attributes << self
|
83
|
+
end
|
84
|
+
|
85
|
+
# Get the part of the SELECT clause related to this attribute. Don't forget
|
86
|
+
# to set your model and associations first though.
|
87
|
+
#
|
88
|
+
# This will concatenate strings and arrays of integers, and convert
|
89
|
+
# datetimes to timestamps, as needed.
|
90
|
+
#
|
91
|
+
def to_select_sql
|
92
|
+
return nil unless include_as_association?
|
93
|
+
|
94
|
+
separator = all_ints? || all_datetimes? || @crc ? ',' : ' '
|
95
|
+
|
96
|
+
clause = @columns.collect { |column|
|
97
|
+
part = column_with_prefix(column)
|
98
|
+
case type
|
99
|
+
when :string
|
100
|
+
adapter.convert_nulls(part)
|
101
|
+
when :datetime
|
102
|
+
adapter.cast_to_datetime(part)
|
103
|
+
when :multi
|
104
|
+
adapter.convert_nulls(part, 0)
|
105
|
+
else
|
106
|
+
part
|
107
|
+
end
|
108
|
+
}.join(', ')
|
109
|
+
|
110
|
+
# clause = adapter.cast_to_datetime(clause) if type == :datetime
|
111
|
+
clause = adapter.crc(clause) if @crc
|
112
|
+
clause = adapter.concatenate(clause, separator) if concat_ws?
|
113
|
+
clause = adapter.group_concatenate(clause, separator) if is_many?
|
114
|
+
|
115
|
+
"#{clause} AS #{quote_column(unique_name)}"
|
116
|
+
end
|
117
|
+
|
118
|
+
def type_to_config
|
119
|
+
{
|
120
|
+
:multi => :sql_attr_multi,
|
121
|
+
:datetime => :sql_attr_timestamp,
|
122
|
+
:string => :sql_attr_str2ordinal,
|
123
|
+
:float => :sql_attr_float,
|
124
|
+
:boolean => :sql_attr_bool,
|
125
|
+
:integer => :sql_attr_uint
|
126
|
+
}[type]
|
127
|
+
end
|
128
|
+
|
129
|
+
def include_as_association?
|
130
|
+
! (type == :multi && (query_source == :query || query_source == :ranged_query))
|
131
|
+
end
|
132
|
+
|
133
|
+
# Returns the configuration value that should be used for
|
134
|
+
# the attribute.
|
135
|
+
# Special case is the multi-valued attribute that needs some
|
136
|
+
# extra configuration.
|
137
|
+
#
|
138
|
+
def config_value(offset = nil, delta = false)
|
139
|
+
if type == :multi
|
140
|
+
multi_config = include_as_association? ? "field" :
|
141
|
+
source_value(offset, delta).gsub(/\s+/m, " ").strip
|
142
|
+
"uint #{unique_name} from #{multi_config}"
|
143
|
+
else
|
144
|
+
unique_name
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# Returns the type of the column. If that's not already set, it returns
|
149
|
+
# :multi if there's the possibility of more than one value, :string if
|
150
|
+
# there's more than one association, otherwise it figures out what the
|
151
|
+
# actual column's datatype is and returns that.
|
152
|
+
#
|
153
|
+
def type
|
154
|
+
@type ||= begin
|
155
|
+
base_type = case
|
156
|
+
when is_many_datetimes?
|
157
|
+
:datetime
|
158
|
+
when is_many?, is_many_ints?
|
159
|
+
:multi
|
160
|
+
when @associations.values.flatten.length > 1
|
161
|
+
:string
|
162
|
+
else
|
163
|
+
translated_type_from_database
|
164
|
+
end
|
165
|
+
|
166
|
+
if base_type == :string && @crc
|
167
|
+
base_type = :integer
|
168
|
+
else
|
169
|
+
@crc = false unless base_type == :multi && is_many_strings? && @crc
|
170
|
+
end
|
171
|
+
|
172
|
+
base_type
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def updatable?
|
177
|
+
[:integer, :datetime, :boolean].include?(type) && !is_string?
|
178
|
+
end
|
179
|
+
|
180
|
+
def live_value(instance)
|
181
|
+
object = instance
|
182
|
+
column = @columns.first
|
183
|
+
column.__stack.each { |method| object = object.send(method) }
|
184
|
+
object.send(column.__name)
|
185
|
+
end
|
186
|
+
|
187
|
+
def all_ints?
|
188
|
+
all_of_type?(:integer)
|
189
|
+
end
|
190
|
+
|
191
|
+
def all_datetimes?
|
192
|
+
all_of_type?(:datetime, :date, :timestamp)
|
193
|
+
end
|
194
|
+
|
195
|
+
def all_strings?
|
196
|
+
all_of_type?(:string, :text)
|
197
|
+
end
|
198
|
+
|
199
|
+
private
|
200
|
+
|
201
|
+
def source_value(offset, delta)
|
202
|
+
if is_string?
|
203
|
+
return "#{query_source.to_s.dasherize}; #{columns.first.__name}"
|
204
|
+
end
|
205
|
+
|
206
|
+
query = query(offset)
|
207
|
+
|
208
|
+
if query_source == :ranged_query
|
209
|
+
query += query_clause
|
210
|
+
query += " AND #{query_delta.strip}" if delta
|
211
|
+
"ranged-query; #{query}; #{range_query}"
|
212
|
+
else
|
213
|
+
query += "WHERE #{query_delta.strip}" if delta
|
214
|
+
"query; #{query}"
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
def query(offset)
|
219
|
+
base_assoc = base_association_for_mva
|
220
|
+
end_assoc = end_association_for_mva
|
221
|
+
raise "Could not determine SQL for MVA" if base_assoc.nil?
|
222
|
+
|
223
|
+
<<-SQL
|
224
|
+
SELECT #{foreign_key_for_mva base_assoc}
|
225
|
+
#{ThinkingSphinx.unique_id_expression(offset)} AS #{quote_column('id')},
|
226
|
+
#{primary_key_for_mva(end_assoc)} AS #{quote_column(unique_name)}
|
227
|
+
FROM #{quote_table_name base_assoc.table} #{association_joins}
|
228
|
+
SQL
|
229
|
+
end
|
230
|
+
|
231
|
+
def query_clause
|
232
|
+
foreign_key = foreign_key_for_mva base_association_for_mva
|
233
|
+
"WHERE #{foreign_key} >= $start AND #{foreign_key} <= $end"
|
234
|
+
end
|
235
|
+
|
236
|
+
def query_delta
|
237
|
+
foreign_key = foreign_key_for_mva base_association_for_mva
|
238
|
+
<<-SQL
|
239
|
+
#{foreign_key} IN (SELECT #{quote_column model.primary_key}
|
240
|
+
FROM #{model.quoted_table_name}
|
241
|
+
WHERE #{@source.index.delta_object.clause(model, true)})
|
242
|
+
SQL
|
243
|
+
end
|
244
|
+
|
245
|
+
def range_query
|
246
|
+
assoc = base_association_for_mva
|
247
|
+
foreign_key = foreign_key_for_mva assoc
|
248
|
+
"SELECT MIN(#{foreign_key}), MAX(#{foreign_key}) FROM #{quote_table_name assoc.table}"
|
249
|
+
end
|
250
|
+
|
251
|
+
def primary_key_for_mva(assoc)
|
252
|
+
quote_with_table(
|
253
|
+
assoc.table, assoc.primary_key_from_reflection || columns.first.__name
|
254
|
+
)
|
255
|
+
end
|
256
|
+
|
257
|
+
def foreign_key_for_mva(assoc)
|
258
|
+
quote_with_table assoc.table, assoc.reflection.primary_key_name
|
259
|
+
end
|
260
|
+
|
261
|
+
def end_association_for_mva
|
262
|
+
@association_for_mva ||= associations[columns.first].detect { |assoc|
|
263
|
+
assoc.has_column?(columns.first.__name)
|
264
|
+
}
|
265
|
+
end
|
266
|
+
|
267
|
+
def base_association_for_mva
|
268
|
+
@first_association_for_mva ||= begin
|
269
|
+
assoc = end_association_for_mva
|
270
|
+
while !assoc.parent.nil?
|
271
|
+
assoc = assoc.parent
|
272
|
+
end
|
273
|
+
|
274
|
+
assoc
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
def association_joins
|
279
|
+
joins = []
|
280
|
+
assoc = end_association_for_mva
|
281
|
+
while assoc != base_association_for_mva
|
282
|
+
joins << assoc.to_sql
|
283
|
+
assoc = assoc.parent
|
284
|
+
end
|
285
|
+
|
286
|
+
joins.join(' ')
|
287
|
+
end
|
288
|
+
|
289
|
+
def is_many_ints?
|
290
|
+
concat_ws? && all_ints?
|
291
|
+
end
|
292
|
+
|
293
|
+
def is_many_datetimes?
|
294
|
+
is_many? && all_datetimes?
|
295
|
+
end
|
296
|
+
|
297
|
+
def is_many_strings?
|
298
|
+
is_many? && all_strings?
|
299
|
+
end
|
300
|
+
|
301
|
+
def type_from_database
|
302
|
+
klass = @associations.values.flatten.first ?
|
303
|
+
@associations.values.flatten.first.reflection.klass : @model
|
304
|
+
|
305
|
+
column = klass.columns.detect { |col|
|
306
|
+
@columns.collect { |c| c.__name.to_s }.include? col.name
|
307
|
+
}
|
308
|
+
column.nil? ? nil : column.type
|
309
|
+
end
|
310
|
+
|
311
|
+
def translated_type_from_database
|
312
|
+
case type_from_db = type_from_database
|
313
|
+
when :datetime, :string, :float, :boolean, :integer
|
314
|
+
type_from_db
|
315
|
+
when :decimal
|
316
|
+
:float
|
317
|
+
when :timestamp, :date
|
318
|
+
:datetime
|
319
|
+
else
|
320
|
+
raise <<-MESSAGE
|
321
|
+
|
322
|
+
Cannot automatically map attribute #{unique_name} in #{@model.name} to an
|
323
|
+
equivalent Sphinx type (integer, float, boolean, datetime, string as ordinal).
|
324
|
+
You could try to explicitly convert the column's value in your define_index
|
325
|
+
block:
|
326
|
+
has "CAST(column AS INT)", :type => :integer, :as => :column
|
327
|
+
MESSAGE
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
def all_of_type?(*column_types)
|
332
|
+
@columns.all? { |col|
|
333
|
+
klasses = @associations[col].empty? ? [@model] :
|
334
|
+
@associations[col].collect { |assoc| assoc.reflection.klass }
|
335
|
+
klasses.all? { |klass|
|
336
|
+
column = klass.columns.detect { |column| column.name == col.__name.to_s }
|
337
|
+
!column.nil? && column_types.include?(column.type)
|
338
|
+
}
|
339
|
+
}
|
340
|
+
end
|
341
|
+
end
|
342
|
+
end
|
@@ -0,0 +1,282 @@
|
|
1
|
+
require 'erb'
|
2
|
+
require 'singleton'
|
3
|
+
|
4
|
+
module ThinkingSphinx
|
5
|
+
# This class both keeps track of the configuration settings for Sphinx and
|
6
|
+
# also generates the resulting file for Sphinx to use.
|
7
|
+
#
|
8
|
+
# Here are the default settings, relative to RAILS_ROOT where relevant:
|
9
|
+
#
|
10
|
+
# config file:: config/#{environment}.sphinx.conf
|
11
|
+
# searchd log file:: log/searchd.log
|
12
|
+
# query log file:: log/searchd.query.log
|
13
|
+
# pid file:: log/searchd.#{environment}.pid
|
14
|
+
# searchd files:: db/sphinx/#{environment}/
|
15
|
+
# address:: 127.0.0.1
|
16
|
+
# port:: 3312
|
17
|
+
# allow star:: false
|
18
|
+
# min prefix length:: 1
|
19
|
+
# min infix length:: 1
|
20
|
+
# mem limit:: 64M
|
21
|
+
# max matches:: 1000
|
22
|
+
# morphology:: nil
|
23
|
+
# charset type:: utf-8
|
24
|
+
# charset table:: nil
|
25
|
+
# ignore chars:: nil
|
26
|
+
# html strip:: false
|
27
|
+
# html remove elements:: ''
|
28
|
+
# searchd_binary_name:: searchd
|
29
|
+
# indexer_binary_name:: indexer
|
30
|
+
#
|
31
|
+
# If you want to change these settings, create a YAML file at
|
32
|
+
# config/sphinx.yml with settings for each environment, in a similar
|
33
|
+
# fashion to database.yml - using the following keys: config_file,
|
34
|
+
# searchd_log_file, query_log_file, pid_file, searchd_file_path, port,
|
35
|
+
# allow_star, enable_star, min_prefix_len, min_infix_len, mem_limit,
|
36
|
+
# max_matches, morphology, charset_type, charset_table, ignore_chars,
|
37
|
+
# html_strip, html_remove_elements, delayed_job_priority,
|
38
|
+
# searchd_binary_name, indexer_binary_name.
|
39
|
+
#
|
40
|
+
# I think you've got the idea.
|
41
|
+
#
|
42
|
+
# Each setting in the YAML file is optional - so only put in the ones you
|
43
|
+
# want to change.
|
44
|
+
#
|
45
|
+
# Keep in mind, if for some particular reason you're using a version of
|
46
|
+
# Sphinx older than 0.9.8 r871 (that's prior to the proper 0.9.8 release),
|
47
|
+
# don't set allow_star to true.
|
48
|
+
#
|
49
|
+
class Configuration
|
50
|
+
include Singleton
|
51
|
+
|
52
|
+
SourceOptions = %w( mysql_connect_flags sql_range_step sql_query_pre
|
53
|
+
sql_query_post sql_ranged_throttle sql_query_post_index )
|
54
|
+
|
55
|
+
IndexOptions = %w( charset_table charset_type docinfo enable_star
|
56
|
+
exceptions html_index_attrs html_remove_elements html_strip ignore_chars
|
57
|
+
min_infix_len min_prefix_len min_word_len mlock morphology ngram_chars
|
58
|
+
ngram_len phrase_boundary phrase_boundary_step preopen stopwords
|
59
|
+
wordforms )
|
60
|
+
|
61
|
+
CustomOptions = %w( disable_range )
|
62
|
+
|
63
|
+
attr_accessor :config_file, :searchd_log_file, :query_log_file,
|
64
|
+
:pid_file, :searchd_file_path, :address, :port, :allow_star,
|
65
|
+
:database_yml_file, :app_root, :bin_path, :model_directories,
|
66
|
+
:delayed_job_priority, :searchd_binary_name, :indexer_binary_name
|
67
|
+
|
68
|
+
attr_accessor :source_options, :index_options
|
69
|
+
|
70
|
+
attr_reader :environment, :configuration
|
71
|
+
|
72
|
+
# Load in the configuration settings - this will look for config/sphinx.yml
|
73
|
+
# and parse it according to the current environment.
|
74
|
+
#
|
75
|
+
def initialize(app_root = Dir.pwd)
|
76
|
+
self.reset
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.configure(&block)
|
80
|
+
yield instance
|
81
|
+
instance.reset(instance.app_root)
|
82
|
+
end
|
83
|
+
|
84
|
+
def reset(custom_app_root=nil)
|
85
|
+
if custom_app_root
|
86
|
+
self.app_root = custom_app_root
|
87
|
+
else
|
88
|
+
self.app_root = RAILS_ROOT if defined?(RAILS_ROOT)
|
89
|
+
self.app_root = Merb.root if defined?(Merb)
|
90
|
+
self.app_root ||= app_root
|
91
|
+
end
|
92
|
+
|
93
|
+
@configuration = Riddle::Configuration.new
|
94
|
+
@configuration.searchd.address = "127.0.0.1"
|
95
|
+
@configuration.searchd.port = 3312
|
96
|
+
@configuration.searchd.pid_file = "#{self.app_root}/log/searchd.#{environment}.pid"
|
97
|
+
@configuration.searchd.log = "#{self.app_root}/log/searchd.log"
|
98
|
+
@configuration.searchd.query_log = "#{self.app_root}/log/searchd.query.log"
|
99
|
+
|
100
|
+
self.database_yml_file = "#{self.app_root}/config/database.yml"
|
101
|
+
self.config_file = "#{self.app_root}/config/#{environment}.sphinx.conf"
|
102
|
+
self.searchd_file_path = "#{self.app_root}/db/sphinx/#{environment}"
|
103
|
+
self.allow_star = false
|
104
|
+
self.bin_path = ""
|
105
|
+
self.model_directories = ["#{app_root}/app/models/"] +
|
106
|
+
Dir.glob("#{app_root}/vendor/plugins/*/app/models/")
|
107
|
+
self.delayed_job_priority = 0
|
108
|
+
|
109
|
+
self.source_options = {}
|
110
|
+
self.index_options = {
|
111
|
+
:charset_type => "utf-8"
|
112
|
+
}
|
113
|
+
|
114
|
+
self.searchd_binary_name = "searchd"
|
115
|
+
self.indexer_binary_name = "indexer"
|
116
|
+
|
117
|
+
parse_config
|
118
|
+
|
119
|
+
self
|
120
|
+
end
|
121
|
+
|
122
|
+
def self.environment
|
123
|
+
@@environment ||= (
|
124
|
+
defined?(Merb) ? Merb.environment : ENV['RAILS_ENV']
|
125
|
+
) || "development"
|
126
|
+
end
|
127
|
+
|
128
|
+
def environment
|
129
|
+
self.class.environment
|
130
|
+
end
|
131
|
+
|
132
|
+
def controller
|
133
|
+
@controller ||= Riddle::Controller.new(@configuration, self.config_file)
|
134
|
+
end
|
135
|
+
|
136
|
+
# Generate the config file for Sphinx by using all the settings defined and
|
137
|
+
# looping through all the models with indexes to build the relevant
|
138
|
+
# indexer and searchd configuration, and sources and indexes details.
|
139
|
+
#
|
140
|
+
def build(file_path=nil)
|
141
|
+
load_models
|
142
|
+
file_path ||= "#{self.config_file}"
|
143
|
+
|
144
|
+
@configuration.indexes.clear
|
145
|
+
|
146
|
+
ThinkingSphinx.indexed_models.each_with_index do |model, model_index|
|
147
|
+
@configuration.indexes.concat model.constantize.to_riddle(model_index)
|
148
|
+
end
|
149
|
+
|
150
|
+
open(file_path, "w") do |file|
|
151
|
+
file.write @configuration.render
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
# Make sure all models are loaded - without reloading any that
|
156
|
+
# ActiveRecord::Base is already aware of (otherwise we start to hit some
|
157
|
+
# messy dependencies issues).
|
158
|
+
#
|
159
|
+
def load_models
|
160
|
+
return if defined?(Rails) &&
|
161
|
+
Rails::VERSION::STRING.to_f > 2.1 &&
|
162
|
+
Rails.configuration.cache_classes
|
163
|
+
|
164
|
+
self.model_directories.each do |base|
|
165
|
+
Dir["#{base}**/*.rb"].each do |file|
|
166
|
+
model_name = file.gsub(/^#{base}([\w_\/\\]+)\.rb/, '\1')
|
167
|
+
|
168
|
+
next if model_name.nil?
|
169
|
+
next if ::ActiveRecord::Base.send(:subclasses).detect { |model|
|
170
|
+
model.name == model_name
|
171
|
+
}
|
172
|
+
|
173
|
+
begin
|
174
|
+
model_name.camelize.constantize
|
175
|
+
rescue LoadError
|
176
|
+
model_name.gsub!(/.*[\/\\]/, '').nil? ? next : retry
|
177
|
+
rescue NameError
|
178
|
+
next
|
179
|
+
rescue StandardError
|
180
|
+
puts "Warning: Error loading #{file}"
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
def address
|
187
|
+
@configuration.searchd.address
|
188
|
+
end
|
189
|
+
|
190
|
+
def address=(address)
|
191
|
+
@configuration.searchd.address = address
|
192
|
+
end
|
193
|
+
|
194
|
+
def port
|
195
|
+
@configuration.searchd.port
|
196
|
+
end
|
197
|
+
|
198
|
+
def port=(port)
|
199
|
+
@configuration.searchd.port = port
|
200
|
+
end
|
201
|
+
|
202
|
+
def pid_file
|
203
|
+
@configuration.searchd.pid_file
|
204
|
+
end
|
205
|
+
|
206
|
+
def pid_file=(pid_file)
|
207
|
+
@configuration.searchd.pid_file = pid_file
|
208
|
+
end
|
209
|
+
|
210
|
+
def searchd_log_file
|
211
|
+
@configuration.searchd.log
|
212
|
+
end
|
213
|
+
|
214
|
+
def searchd_log_file=(file)
|
215
|
+
@configuration.searchd.log = file
|
216
|
+
end
|
217
|
+
|
218
|
+
def query_log_file
|
219
|
+
@configuration.searchd.query_log
|
220
|
+
end
|
221
|
+
|
222
|
+
def query_log_file=(file)
|
223
|
+
@configuration.searchd.query_log = file
|
224
|
+
end
|
225
|
+
|
226
|
+
def client
|
227
|
+
client = Riddle::Client.new address, port
|
228
|
+
client.max_matches = configuration.searchd.max_matches || 1000
|
229
|
+
client
|
230
|
+
end
|
231
|
+
|
232
|
+
def models_by_crc
|
233
|
+
@models_by_crc ||= begin
|
234
|
+
ThinkingSphinx.indexed_models.inject({}) do |hash, model|
|
235
|
+
hash[model.constantize.to_crc32] = model
|
236
|
+
Object.subclasses_of(model.constantize).each { |subclass|
|
237
|
+
hash[subclass.to_crc32] = subclass.name
|
238
|
+
}
|
239
|
+
hash
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
private
|
245
|
+
|
246
|
+
# Parse the config/sphinx.yml file - if it exists - then use the attribute
|
247
|
+
# accessors to set the appropriate values. Nothing too clever.
|
248
|
+
#
|
249
|
+
def parse_config
|
250
|
+
path = "#{app_root}/config/sphinx.yml"
|
251
|
+
return unless File.exists?(path)
|
252
|
+
|
253
|
+
conf = YAML::load(ERB.new(IO.read(path)).result)[environment]
|
254
|
+
|
255
|
+
conf.each do |key,value|
|
256
|
+
self.send("#{key}=", value) if self.respond_to?("#{key}=")
|
257
|
+
|
258
|
+
set_sphinx_setting self.source_options, key, value, SourceOptions
|
259
|
+
set_sphinx_setting self.index_options, key, value, IndexOptions
|
260
|
+
set_sphinx_setting self.index_options, key, value, CustomOptions
|
261
|
+
set_sphinx_setting @configuration.searchd, key, value
|
262
|
+
set_sphinx_setting @configuration.indexer, key, value
|
263
|
+
end unless conf.nil?
|
264
|
+
|
265
|
+
self.bin_path += '/' unless self.bin_path.blank?
|
266
|
+
|
267
|
+
if self.allow_star
|
268
|
+
self.index_options[:enable_star] = true
|
269
|
+
self.index_options[:min_prefix_len] = 1
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
def set_sphinx_setting(object, key, value, allowed = {})
|
274
|
+
if object.is_a?(Hash)
|
275
|
+
object[key.to_sym] = value if allowed.include?(key.to_s)
|
276
|
+
else
|
277
|
+
object.send("#{key}=", value) if object.respond_to?("#{key}")
|
278
|
+
send("#{key}=", value) if self.respond_to?("#{key}")
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
282
|
+
end
|