thinking-sphinx 1.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENCE +20 -0
- data/README.textile +157 -0
- data/VERSION.yml +4 -0
- data/lib/thinking_sphinx.rb +211 -0
- data/lib/thinking_sphinx/active_record.rb +307 -0
- data/lib/thinking_sphinx/active_record/attribute_updates.rb +48 -0
- data/lib/thinking_sphinx/active_record/delta.rb +87 -0
- data/lib/thinking_sphinx/active_record/has_many_association.rb +28 -0
- data/lib/thinking_sphinx/active_record/scopes.rb +39 -0
- data/lib/thinking_sphinx/adapters/abstract_adapter.rb +42 -0
- data/lib/thinking_sphinx/adapters/mysql_adapter.rb +54 -0
- data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +136 -0
- data/lib/thinking_sphinx/association.rb +164 -0
- data/lib/thinking_sphinx/attribute.rb +342 -0
- data/lib/thinking_sphinx/class_facet.rb +15 -0
- data/lib/thinking_sphinx/configuration.rb +282 -0
- data/lib/thinking_sphinx/core/array.rb +7 -0
- data/lib/thinking_sphinx/core/string.rb +15 -0
- data/lib/thinking_sphinx/deltas.rb +30 -0
- data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
- data/lib/thinking_sphinx/deltas/default_delta.rb +68 -0
- data/lib/thinking_sphinx/deltas/delayed_delta.rb +30 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
- data/lib/thinking_sphinx/deploy/capistrano.rb +100 -0
- data/lib/thinking_sphinx/excerpter.rb +22 -0
- data/lib/thinking_sphinx/facet.rb +125 -0
- data/lib/thinking_sphinx/facet_search.rb +134 -0
- data/lib/thinking_sphinx/field.rb +82 -0
- data/lib/thinking_sphinx/index.rb +99 -0
- data/lib/thinking_sphinx/index/builder.rb +286 -0
- data/lib/thinking_sphinx/index/faux_column.rb +110 -0
- data/lib/thinking_sphinx/property.rb +162 -0
- data/lib/thinking_sphinx/rails_additions.rb +150 -0
- data/lib/thinking_sphinx/search.rb +707 -0
- data/lib/thinking_sphinx/search_methods.rb +421 -0
- data/lib/thinking_sphinx/source.rb +150 -0
- data/lib/thinking_sphinx/source/internal_properties.rb +46 -0
- data/lib/thinking_sphinx/source/sql.rb +128 -0
- data/lib/thinking_sphinx/tasks.rb +165 -0
- data/rails/init.rb +14 -0
- data/spec/lib/thinking_sphinx/active_record/delta_spec.rb +130 -0
- data/spec/lib/thinking_sphinx/active_record/has_many_association_spec.rb +49 -0
- data/spec/lib/thinking_sphinx/active_record/scopes_spec.rb +96 -0
- data/spec/lib/thinking_sphinx/active_record_spec.rb +364 -0
- data/spec/lib/thinking_sphinx/association_spec.rb +239 -0
- data/spec/lib/thinking_sphinx/attribute_spec.rb +500 -0
- data/spec/lib/thinking_sphinx/configuration_spec.rb +268 -0
- data/spec/lib/thinking_sphinx/core/array_spec.rb +9 -0
- data/spec/lib/thinking_sphinx/core/string_spec.rb +9 -0
- data/spec/lib/thinking_sphinx/excerpter_spec.rb +49 -0
- data/spec/lib/thinking_sphinx/facet_search_spec.rb +176 -0
- data/spec/lib/thinking_sphinx/facet_spec.rb +333 -0
- data/spec/lib/thinking_sphinx/field_spec.rb +154 -0
- data/spec/lib/thinking_sphinx/index/builder_spec.rb +455 -0
- data/spec/lib/thinking_sphinx/index/faux_column_spec.rb +30 -0
- data/spec/lib/thinking_sphinx/index_spec.rb +45 -0
- data/spec/lib/thinking_sphinx/rails_additions_spec.rb +203 -0
- data/spec/lib/thinking_sphinx/search_methods_spec.rb +152 -0
- data/spec/lib/thinking_sphinx/search_spec.rb +1092 -0
- data/spec/lib/thinking_sphinx/source_spec.rb +227 -0
- data/spec/lib/thinking_sphinx_spec.rb +162 -0
- data/tasks/distribution.rb +50 -0
- data/tasks/rails.rake +1 -0
- data/tasks/testing.rb +83 -0
- data/vendor/after_commit/LICENSE +20 -0
- data/vendor/after_commit/README +16 -0
- data/vendor/after_commit/Rakefile +22 -0
- data/vendor/after_commit/init.rb +8 -0
- data/vendor/after_commit/lib/after_commit.rb +45 -0
- data/vendor/after_commit/lib/after_commit/active_record.rb +114 -0
- data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
- data/vendor/after_commit/test/after_commit_test.rb +53 -0
- data/vendor/delayed_job/lib/delayed/job.rb +251 -0
- data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
- data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
- data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
- data/vendor/riddle/lib/riddle.rb +30 -0
- data/vendor/riddle/lib/riddle/client.rb +635 -0
- data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
- data/vendor/riddle/lib/riddle/client/message.rb +66 -0
- data/vendor/riddle/lib/riddle/client/response.rb +84 -0
- data/vendor/riddle/lib/riddle/configuration.rb +33 -0
- data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
- data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
- data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
- data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
- data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
- data/vendor/riddle/lib/riddle/configuration/section.rb +43 -0
- data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
- data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
- data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
- data/vendor/riddle/lib/riddle/controller.rb +53 -0
- metadata +172 -0
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
module ThinkingSphinx
|
|
2
|
+
# Attributes - eternally useful when it comes to filtering, sorting or
|
|
3
|
+
# grouping. This class isn't really useful to you unless you're hacking
|
|
4
|
+
# around with the internals of Thinking Sphinx - but hey, don't let that
|
|
5
|
+
# stop you.
|
|
6
|
+
#
|
|
7
|
+
# One key thing to remember - if you're using the attribute manually to
|
|
8
|
+
# generate SQL statements, you'll need to set the base model, and all the
|
|
9
|
+
# associations. Which can get messy. Use Index.link!, it really helps.
|
|
10
|
+
#
|
|
11
|
+
class Attribute < ThinkingSphinx::Property
|
|
12
|
+
attr_accessor :query_source
|
|
13
|
+
|
|
14
|
+
# To create a new attribute, you'll need to pass in either a single Column
|
|
15
|
+
# or an array of them, and some (optional) options.
|
|
16
|
+
#
|
|
17
|
+
# Valid options are:
|
|
18
|
+
# - :as => :alias_name
|
|
19
|
+
# - :type => :attribute_type
|
|
20
|
+
# - :source => :field, :query, :ranged_query
|
|
21
|
+
#
|
|
22
|
+
# Alias is only required in three circumstances: when there's
|
|
23
|
+
# another attribute or field with the same name, when the column name is
|
|
24
|
+
# 'id', or when there's more than one column.
|
|
25
|
+
#
|
|
26
|
+
# Type is not required, unless you want to force a column to be a certain
|
|
27
|
+
# type (but keep in mind the value will not be CASTed in the SQL
|
|
28
|
+
# statements). The only time you really need to use this is when the type
|
|
29
|
+
# can't be figured out by the column - ie: when not actually using a
|
|
30
|
+
# database column as your source.
|
|
31
|
+
#
|
|
32
|
+
# Source is only used for multi-value attributes (MVA). By default this will
|
|
33
|
+
# use a left-join and a group_concat to obtain the values. For better performance
|
|
34
|
+
# during indexing it can be beneficial to let Sphinx use a separate query to retrieve
|
|
35
|
+
# all document,value-pairs.
|
|
36
|
+
# Either :query or :ranged_query will enable this feature, where :ranged_query will cause
|
|
37
|
+
# the query to be executed incremental.
|
|
38
|
+
#
|
|
39
|
+
# Example usage:
|
|
40
|
+
#
|
|
41
|
+
# Attribute.new(
|
|
42
|
+
# Column.new(:created_at)
|
|
43
|
+
# )
|
|
44
|
+
#
|
|
45
|
+
# Attribute.new(
|
|
46
|
+
# Column.new(:posts, :id),
|
|
47
|
+
# :as => :post_ids
|
|
48
|
+
# )
|
|
49
|
+
#
|
|
50
|
+
# Attribute.new(
|
|
51
|
+
# Column.new(:posts, :id),
|
|
52
|
+
# :as => :post_ids,
|
|
53
|
+
# :source => :ranged_query
|
|
54
|
+
# )
|
|
55
|
+
#
|
|
56
|
+
# Attribute.new(
|
|
57
|
+
# [Column.new(:pages, :id), Column.new(:articles, :id)],
|
|
58
|
+
# :as => :content_ids
|
|
59
|
+
# )
|
|
60
|
+
#
|
|
61
|
+
# Attribute.new(
|
|
62
|
+
# Column.new("NOW()"),
|
|
63
|
+
# :as => :indexed_at,
|
|
64
|
+
# :type => :datetime
|
|
65
|
+
# )
|
|
66
|
+
#
|
|
67
|
+
# If you're creating attributes for latitude and longitude, don't forget
|
|
68
|
+
# that Sphinx expects these values to be in radians.
|
|
69
|
+
#
|
|
70
|
+
def initialize(source, columns, options = {})
|
|
71
|
+
super
|
|
72
|
+
|
|
73
|
+
@type = options[:type]
|
|
74
|
+
@query_source = options[:source]
|
|
75
|
+
@crc = options[:crc]
|
|
76
|
+
|
|
77
|
+
@type ||= :multi unless @query_source.nil?
|
|
78
|
+
if @type == :string && @crc
|
|
79
|
+
@type = is_many? ? :multi : :integer
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
source.attributes << self
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Get the part of the SELECT clause related to this attribute. Don't forget
|
|
86
|
+
# to set your model and associations first though.
|
|
87
|
+
#
|
|
88
|
+
# This will concatenate strings and arrays of integers, and convert
|
|
89
|
+
# datetimes to timestamps, as needed.
|
|
90
|
+
#
|
|
91
|
+
def to_select_sql
|
|
92
|
+
return nil unless include_as_association?
|
|
93
|
+
|
|
94
|
+
separator = all_ints? || all_datetimes? || @crc ? ',' : ' '
|
|
95
|
+
|
|
96
|
+
clause = @columns.collect { |column|
|
|
97
|
+
part = column_with_prefix(column)
|
|
98
|
+
case type
|
|
99
|
+
when :string
|
|
100
|
+
adapter.convert_nulls(part)
|
|
101
|
+
when :datetime
|
|
102
|
+
adapter.cast_to_datetime(part)
|
|
103
|
+
when :multi
|
|
104
|
+
adapter.convert_nulls(part, 0)
|
|
105
|
+
else
|
|
106
|
+
part
|
|
107
|
+
end
|
|
108
|
+
}.join(', ')
|
|
109
|
+
|
|
110
|
+
# clause = adapter.cast_to_datetime(clause) if type == :datetime
|
|
111
|
+
clause = adapter.crc(clause) if @crc
|
|
112
|
+
clause = adapter.concatenate(clause, separator) if concat_ws?
|
|
113
|
+
clause = adapter.group_concatenate(clause, separator) if is_many?
|
|
114
|
+
|
|
115
|
+
"#{clause} AS #{quote_column(unique_name)}"
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def type_to_config
|
|
119
|
+
{
|
|
120
|
+
:multi => :sql_attr_multi,
|
|
121
|
+
:datetime => :sql_attr_timestamp,
|
|
122
|
+
:string => :sql_attr_str2ordinal,
|
|
123
|
+
:float => :sql_attr_float,
|
|
124
|
+
:boolean => :sql_attr_bool,
|
|
125
|
+
:integer => :sql_attr_uint
|
|
126
|
+
}[type]
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def include_as_association?
|
|
130
|
+
! (type == :multi && (query_source == :query || query_source == :ranged_query))
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Returns the configuration value that should be used for
|
|
134
|
+
# the attribute.
|
|
135
|
+
# Special case is the multi-valued attribute that needs some
|
|
136
|
+
# extra configuration.
|
|
137
|
+
#
|
|
138
|
+
def config_value(offset = nil, delta = false)
|
|
139
|
+
if type == :multi
|
|
140
|
+
multi_config = include_as_association? ? "field" :
|
|
141
|
+
source_value(offset, delta).gsub(/\s+/m, " ").strip
|
|
142
|
+
"uint #{unique_name} from #{multi_config}"
|
|
143
|
+
else
|
|
144
|
+
unique_name
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Returns the type of the column. If that's not already set, it returns
|
|
149
|
+
# :multi if there's the possibility of more than one value, :string if
|
|
150
|
+
# there's more than one association, otherwise it figures out what the
|
|
151
|
+
# actual column's datatype is and returns that.
|
|
152
|
+
#
|
|
153
|
+
def type
|
|
154
|
+
@type ||= begin
|
|
155
|
+
base_type = case
|
|
156
|
+
when is_many_datetimes?
|
|
157
|
+
:datetime
|
|
158
|
+
when is_many?, is_many_ints?
|
|
159
|
+
:multi
|
|
160
|
+
when @associations.values.flatten.length > 1
|
|
161
|
+
:string
|
|
162
|
+
else
|
|
163
|
+
translated_type_from_database
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
if base_type == :string && @crc
|
|
167
|
+
base_type = :integer
|
|
168
|
+
else
|
|
169
|
+
@crc = false unless base_type == :multi && is_many_strings? && @crc
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
base_type
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def updatable?
|
|
177
|
+
[:integer, :datetime, :boolean].include?(type) && !is_string?
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def live_value(instance)
|
|
181
|
+
object = instance
|
|
182
|
+
column = @columns.first
|
|
183
|
+
column.__stack.each { |method| object = object.send(method) }
|
|
184
|
+
object.send(column.__name)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def all_ints?
|
|
188
|
+
all_of_type?(:integer)
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def all_datetimes?
|
|
192
|
+
all_of_type?(:datetime, :date, :timestamp)
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def all_strings?
|
|
196
|
+
all_of_type?(:string, :text)
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
private
|
|
200
|
+
|
|
201
|
+
def source_value(offset, delta)
|
|
202
|
+
if is_string?
|
|
203
|
+
return "#{query_source.to_s.dasherize}; #{columns.first.__name}"
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
query = query(offset)
|
|
207
|
+
|
|
208
|
+
if query_source == :ranged_query
|
|
209
|
+
query += query_clause
|
|
210
|
+
query += " AND #{query_delta.strip}" if delta
|
|
211
|
+
"ranged-query; #{query}; #{range_query}"
|
|
212
|
+
else
|
|
213
|
+
query += "WHERE #{query_delta.strip}" if delta
|
|
214
|
+
"query; #{query}"
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def query(offset)
|
|
219
|
+
base_assoc = base_association_for_mva
|
|
220
|
+
end_assoc = end_association_for_mva
|
|
221
|
+
raise "Could not determine SQL for MVA" if base_assoc.nil?
|
|
222
|
+
|
|
223
|
+
<<-SQL
|
|
224
|
+
SELECT #{foreign_key_for_mva base_assoc}
|
|
225
|
+
#{ThinkingSphinx.unique_id_expression(offset)} AS #{quote_column('id')},
|
|
226
|
+
#{primary_key_for_mva(end_assoc)} AS #{quote_column(unique_name)}
|
|
227
|
+
FROM #{quote_table_name base_assoc.table} #{association_joins}
|
|
228
|
+
SQL
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def query_clause
|
|
232
|
+
foreign_key = foreign_key_for_mva base_association_for_mva
|
|
233
|
+
"WHERE #{foreign_key} >= $start AND #{foreign_key} <= $end"
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def query_delta
|
|
237
|
+
foreign_key = foreign_key_for_mva base_association_for_mva
|
|
238
|
+
<<-SQL
|
|
239
|
+
#{foreign_key} IN (SELECT #{quote_column model.primary_key}
|
|
240
|
+
FROM #{model.quoted_table_name}
|
|
241
|
+
WHERE #{@source.index.delta_object.clause(model, true)})
|
|
242
|
+
SQL
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def range_query
|
|
246
|
+
assoc = base_association_for_mva
|
|
247
|
+
foreign_key = foreign_key_for_mva assoc
|
|
248
|
+
"SELECT MIN(#{foreign_key}), MAX(#{foreign_key}) FROM #{quote_table_name assoc.table}"
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def primary_key_for_mva(assoc)
|
|
252
|
+
quote_with_table(
|
|
253
|
+
assoc.table, assoc.primary_key_from_reflection || columns.first.__name
|
|
254
|
+
)
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def foreign_key_for_mva(assoc)
|
|
258
|
+
quote_with_table assoc.table, assoc.reflection.primary_key_name
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def end_association_for_mva
|
|
262
|
+
@association_for_mva ||= associations[columns.first].detect { |assoc|
|
|
263
|
+
assoc.has_column?(columns.first.__name)
|
|
264
|
+
}
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def base_association_for_mva
|
|
268
|
+
@first_association_for_mva ||= begin
|
|
269
|
+
assoc = end_association_for_mva
|
|
270
|
+
while !assoc.parent.nil?
|
|
271
|
+
assoc = assoc.parent
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
assoc
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
def association_joins
|
|
279
|
+
joins = []
|
|
280
|
+
assoc = end_association_for_mva
|
|
281
|
+
while assoc != base_association_for_mva
|
|
282
|
+
joins << assoc.to_sql
|
|
283
|
+
assoc = assoc.parent
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
joins.join(' ')
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def is_many_ints?
|
|
290
|
+
concat_ws? && all_ints?
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
def is_many_datetimes?
|
|
294
|
+
is_many? && all_datetimes?
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def is_many_strings?
|
|
298
|
+
is_many? && all_strings?
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
def type_from_database
|
|
302
|
+
klass = @associations.values.flatten.first ?
|
|
303
|
+
@associations.values.flatten.first.reflection.klass : @model
|
|
304
|
+
|
|
305
|
+
column = klass.columns.detect { |col|
|
|
306
|
+
@columns.collect { |c| c.__name.to_s }.include? col.name
|
|
307
|
+
}
|
|
308
|
+
column.nil? ? nil : column.type
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
def translated_type_from_database
|
|
312
|
+
case type_from_db = type_from_database
|
|
313
|
+
when :datetime, :string, :float, :boolean, :integer
|
|
314
|
+
type_from_db
|
|
315
|
+
when :decimal
|
|
316
|
+
:float
|
|
317
|
+
when :timestamp, :date
|
|
318
|
+
:datetime
|
|
319
|
+
else
|
|
320
|
+
raise <<-MESSAGE
|
|
321
|
+
|
|
322
|
+
Cannot automatically map attribute #{unique_name} in #{@model.name} to an
|
|
323
|
+
equivalent Sphinx type (integer, float, boolean, datetime, string as ordinal).
|
|
324
|
+
You could try to explicitly convert the column's value in your define_index
|
|
325
|
+
block:
|
|
326
|
+
has "CAST(column AS INT)", :type => :integer, :as => :column
|
|
327
|
+
MESSAGE
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
def all_of_type?(*column_types)
|
|
332
|
+
@columns.all? { |col|
|
|
333
|
+
klasses = @associations[col].empty? ? [@model] :
|
|
334
|
+
@associations[col].collect { |assoc| assoc.reflection.klass }
|
|
335
|
+
klasses.all? { |klass|
|
|
336
|
+
column = klass.columns.detect { |column| column.name == col.__name.to_s }
|
|
337
|
+
!column.nil? && column_types.include?(column.type)
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
end
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
require 'erb'
|
|
2
|
+
require 'singleton'
|
|
3
|
+
|
|
4
|
+
module ThinkingSphinx
|
|
5
|
+
# This class both keeps track of the configuration settings for Sphinx and
|
|
6
|
+
# also generates the resulting file for Sphinx to use.
|
|
7
|
+
#
|
|
8
|
+
# Here are the default settings, relative to RAILS_ROOT where relevant:
|
|
9
|
+
#
|
|
10
|
+
# config file:: config/#{environment}.sphinx.conf
|
|
11
|
+
# searchd log file:: log/searchd.log
|
|
12
|
+
# query log file:: log/searchd.query.log
|
|
13
|
+
# pid file:: log/searchd.#{environment}.pid
|
|
14
|
+
# searchd files:: db/sphinx/#{environment}/
|
|
15
|
+
# address:: 127.0.0.1
|
|
16
|
+
# port:: 3312
|
|
17
|
+
# allow star:: false
|
|
18
|
+
# min prefix length:: 1
|
|
19
|
+
# min infix length:: 1
|
|
20
|
+
# mem limit:: 64M
|
|
21
|
+
# max matches:: 1000
|
|
22
|
+
# morphology:: nil
|
|
23
|
+
# charset type:: utf-8
|
|
24
|
+
# charset table:: nil
|
|
25
|
+
# ignore chars:: nil
|
|
26
|
+
# html strip:: false
|
|
27
|
+
# html remove elements:: ''
|
|
28
|
+
# searchd_binary_name:: searchd
|
|
29
|
+
# indexer_binary_name:: indexer
|
|
30
|
+
#
|
|
31
|
+
# If you want to change these settings, create a YAML file at
|
|
32
|
+
# config/sphinx.yml with settings for each environment, in a similar
|
|
33
|
+
# fashion to database.yml - using the following keys: config_file,
|
|
34
|
+
# searchd_log_file, query_log_file, pid_file, searchd_file_path, port,
|
|
35
|
+
# allow_star, enable_star, min_prefix_len, min_infix_len, mem_limit,
|
|
36
|
+
# max_matches, morphology, charset_type, charset_table, ignore_chars,
|
|
37
|
+
# html_strip, html_remove_elements, delayed_job_priority,
|
|
38
|
+
# searchd_binary_name, indexer_binary_name.
|
|
39
|
+
#
|
|
40
|
+
# I think you've got the idea.
|
|
41
|
+
#
|
|
42
|
+
# Each setting in the YAML file is optional - so only put in the ones you
|
|
43
|
+
# want to change.
|
|
44
|
+
#
|
|
45
|
+
# Keep in mind, if for some particular reason you're using a version of
|
|
46
|
+
# Sphinx older than 0.9.8 r871 (that's prior to the proper 0.9.8 release),
|
|
47
|
+
# don't set allow_star to true.
|
|
48
|
+
#
|
|
49
|
+
class Configuration
|
|
50
|
+
include Singleton
|
|
51
|
+
|
|
52
|
+
SourceOptions = %w( mysql_connect_flags sql_range_step sql_query_pre
|
|
53
|
+
sql_query_post sql_ranged_throttle sql_query_post_index )
|
|
54
|
+
|
|
55
|
+
IndexOptions = %w( charset_table charset_type docinfo enable_star
|
|
56
|
+
exceptions html_index_attrs html_remove_elements html_strip ignore_chars
|
|
57
|
+
min_infix_len min_prefix_len min_word_len mlock morphology ngram_chars
|
|
58
|
+
ngram_len phrase_boundary phrase_boundary_step preopen stopwords
|
|
59
|
+
wordforms )
|
|
60
|
+
|
|
61
|
+
CustomOptions = %w( disable_range )
|
|
62
|
+
|
|
63
|
+
attr_accessor :config_file, :searchd_log_file, :query_log_file,
|
|
64
|
+
:pid_file, :searchd_file_path, :address, :port, :allow_star,
|
|
65
|
+
:database_yml_file, :app_root, :bin_path, :model_directories,
|
|
66
|
+
:delayed_job_priority, :searchd_binary_name, :indexer_binary_name
|
|
67
|
+
|
|
68
|
+
attr_accessor :source_options, :index_options
|
|
69
|
+
|
|
70
|
+
attr_reader :environment, :configuration
|
|
71
|
+
|
|
72
|
+
# Load in the configuration settings - this will look for config/sphinx.yml
|
|
73
|
+
# and parse it according to the current environment.
|
|
74
|
+
#
|
|
75
|
+
def initialize(app_root = Dir.pwd)
|
|
76
|
+
self.reset
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def self.configure(&block)
|
|
80
|
+
yield instance
|
|
81
|
+
instance.reset(instance.app_root)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def reset(custom_app_root=nil)
|
|
85
|
+
if custom_app_root
|
|
86
|
+
self.app_root = custom_app_root
|
|
87
|
+
else
|
|
88
|
+
self.app_root = RAILS_ROOT if defined?(RAILS_ROOT)
|
|
89
|
+
self.app_root = Merb.root if defined?(Merb)
|
|
90
|
+
self.app_root ||= app_root
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
@configuration = Riddle::Configuration.new
|
|
94
|
+
@configuration.searchd.address = "127.0.0.1"
|
|
95
|
+
@configuration.searchd.port = 3312
|
|
96
|
+
@configuration.searchd.pid_file = "#{self.app_root}/log/searchd.#{environment}.pid"
|
|
97
|
+
@configuration.searchd.log = "#{self.app_root}/log/searchd.log"
|
|
98
|
+
@configuration.searchd.query_log = "#{self.app_root}/log/searchd.query.log"
|
|
99
|
+
|
|
100
|
+
self.database_yml_file = "#{self.app_root}/config/database.yml"
|
|
101
|
+
self.config_file = "#{self.app_root}/config/#{environment}.sphinx.conf"
|
|
102
|
+
self.searchd_file_path = "#{self.app_root}/db/sphinx/#{environment}"
|
|
103
|
+
self.allow_star = false
|
|
104
|
+
self.bin_path = ""
|
|
105
|
+
self.model_directories = ["#{app_root}/app/models/"] +
|
|
106
|
+
Dir.glob("#{app_root}/vendor/plugins/*/app/models/")
|
|
107
|
+
self.delayed_job_priority = 0
|
|
108
|
+
|
|
109
|
+
self.source_options = {}
|
|
110
|
+
self.index_options = {
|
|
111
|
+
:charset_type => "utf-8"
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
self.searchd_binary_name = "searchd"
|
|
115
|
+
self.indexer_binary_name = "indexer"
|
|
116
|
+
|
|
117
|
+
parse_config
|
|
118
|
+
|
|
119
|
+
self
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def self.environment
|
|
123
|
+
@@environment ||= (
|
|
124
|
+
defined?(Merb) ? Merb.environment : ENV['RAILS_ENV']
|
|
125
|
+
) || "development"
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def environment
|
|
129
|
+
self.class.environment
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def controller
|
|
133
|
+
@controller ||= Riddle::Controller.new(@configuration, self.config_file)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Generate the config file for Sphinx by using all the settings defined and
|
|
137
|
+
# looping through all the models with indexes to build the relevant
|
|
138
|
+
# indexer and searchd configuration, and sources and indexes details.
|
|
139
|
+
#
|
|
140
|
+
def build(file_path=nil)
|
|
141
|
+
load_models
|
|
142
|
+
file_path ||= "#{self.config_file}"
|
|
143
|
+
|
|
144
|
+
@configuration.indexes.clear
|
|
145
|
+
|
|
146
|
+
ThinkingSphinx.indexed_models.each_with_index do |model, model_index|
|
|
147
|
+
@configuration.indexes.concat model.constantize.to_riddle(model_index)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
open(file_path, "w") do |file|
|
|
151
|
+
file.write @configuration.render
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Make sure all models are loaded - without reloading any that
|
|
156
|
+
# ActiveRecord::Base is already aware of (otherwise we start to hit some
|
|
157
|
+
# messy dependencies issues).
|
|
158
|
+
#
|
|
159
|
+
def load_models
|
|
160
|
+
return if defined?(Rails) &&
|
|
161
|
+
Rails::VERSION::STRING.to_f > 2.1 &&
|
|
162
|
+
Rails.configuration.cache_classes
|
|
163
|
+
|
|
164
|
+
self.model_directories.each do |base|
|
|
165
|
+
Dir["#{base}**/*.rb"].each do |file|
|
|
166
|
+
model_name = file.gsub(/^#{base}([\w_\/\\]+)\.rb/, '\1')
|
|
167
|
+
|
|
168
|
+
next if model_name.nil?
|
|
169
|
+
next if ::ActiveRecord::Base.send(:subclasses).detect { |model|
|
|
170
|
+
model.name == model_name
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
begin
|
|
174
|
+
model_name.camelize.constantize
|
|
175
|
+
rescue LoadError
|
|
176
|
+
model_name.gsub!(/.*[\/\\]/, '').nil? ? next : retry
|
|
177
|
+
rescue NameError
|
|
178
|
+
next
|
|
179
|
+
rescue StandardError
|
|
180
|
+
puts "Warning: Error loading #{file}"
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def address
|
|
187
|
+
@configuration.searchd.address
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def address=(address)
|
|
191
|
+
@configuration.searchd.address = address
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def port
|
|
195
|
+
@configuration.searchd.port
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def port=(port)
|
|
199
|
+
@configuration.searchd.port = port
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def pid_file
|
|
203
|
+
@configuration.searchd.pid_file
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def pid_file=(pid_file)
|
|
207
|
+
@configuration.searchd.pid_file = pid_file
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def searchd_log_file
|
|
211
|
+
@configuration.searchd.log
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def searchd_log_file=(file)
|
|
215
|
+
@configuration.searchd.log = file
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def query_log_file
|
|
219
|
+
@configuration.searchd.query_log
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def query_log_file=(file)
|
|
223
|
+
@configuration.searchd.query_log = file
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def client
|
|
227
|
+
client = Riddle::Client.new address, port
|
|
228
|
+
client.max_matches = configuration.searchd.max_matches || 1000
|
|
229
|
+
client
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def models_by_crc
|
|
233
|
+
@models_by_crc ||= begin
|
|
234
|
+
ThinkingSphinx.indexed_models.inject({}) do |hash, model|
|
|
235
|
+
hash[model.constantize.to_crc32] = model
|
|
236
|
+
Object.subclasses_of(model.constantize).each { |subclass|
|
|
237
|
+
hash[subclass.to_crc32] = subclass.name
|
|
238
|
+
}
|
|
239
|
+
hash
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
private
|
|
245
|
+
|
|
246
|
+
# Parse the config/sphinx.yml file - if it exists - then use the attribute
|
|
247
|
+
# accessors to set the appropriate values. Nothing too clever.
|
|
248
|
+
#
|
|
249
|
+
def parse_config
|
|
250
|
+
path = "#{app_root}/config/sphinx.yml"
|
|
251
|
+
return unless File.exists?(path)
|
|
252
|
+
|
|
253
|
+
conf = YAML::load(ERB.new(IO.read(path)).result)[environment]
|
|
254
|
+
|
|
255
|
+
conf.each do |key,value|
|
|
256
|
+
self.send("#{key}=", value) if self.respond_to?("#{key}=")
|
|
257
|
+
|
|
258
|
+
set_sphinx_setting self.source_options, key, value, SourceOptions
|
|
259
|
+
set_sphinx_setting self.index_options, key, value, IndexOptions
|
|
260
|
+
set_sphinx_setting self.index_options, key, value, CustomOptions
|
|
261
|
+
set_sphinx_setting @configuration.searchd, key, value
|
|
262
|
+
set_sphinx_setting @configuration.indexer, key, value
|
|
263
|
+
end unless conf.nil?
|
|
264
|
+
|
|
265
|
+
self.bin_path += '/' unless self.bin_path.blank?
|
|
266
|
+
|
|
267
|
+
if self.allow_star
|
|
268
|
+
self.index_options[:enable_star] = true
|
|
269
|
+
self.index_options[:min_prefix_len] = 1
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
def set_sphinx_setting(object, key, value, allowed = {})
|
|
274
|
+
if object.is_a?(Hash)
|
|
275
|
+
object[key.to_sym] = value if allowed.include?(key.to_s)
|
|
276
|
+
else
|
|
277
|
+
object.send("#{key}=", value) if object.respond_to?("#{key}")
|
|
278
|
+
send("#{key}=", value) if self.respond_to?("#{key}")
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
end
|