sherpa99-thinking-sphinx 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENCE +20 -0
- data/README +107 -0
- data/README.textile +107 -0
- data/Rakefile +4 -0
- data/contribute.rb +328 -0
- data/cucumber.yml +1 -0
- data/features/a.rb +17 -0
- data/features/attribute_transformation.feature +22 -0
- data/features/datetime_deltas.feature +55 -0
- data/features/delayed_delta_indexing.feature +37 -0
- data/features/deleting_instances.feature +52 -0
- data/features/facets.feature +26 -0
- data/features/handling_edits.feature +67 -0
- data/features/retry_stale_indexes.feature +24 -0
- data/features/searching_across_models.feature +20 -0
- data/features/searching_by_model.feature +118 -0
- data/features/searching_with_find_arguments.feature +56 -0
- data/features/sphinx_detection.feature +16 -0
- data/features/step_definitions/alpha_steps.rb +3 -0
- data/features/step_definitions/beta_steps.rb +11 -0
- data/features/step_definitions/cat_steps.rb +3 -0
- data/features/step_definitions/common_steps.rb +154 -0
- data/features/step_definitions/datetime_delta_steps.rb +11 -0
- data/features/step_definitions/delayed_delta_indexing_steps.rb +7 -0
- data/features/step_definitions/facet_steps.rb +30 -0
- data/features/step_definitions/find_arguments_steps.rb +36 -0
- data/features/step_definitions/gamma_steps.rb +15 -0
- data/features/step_definitions/search_steps.rb +66 -0
- data/features/step_definitions/sphinx_steps.rb +23 -0
- data/features/support/db/active_record.rb +40 -0
- data/features/support/db/database.example.yml +4 -0
- data/features/support/db/migrations/create_alphas.rb +18 -0
- data/features/support/db/migrations/create_animals.rb +9 -0
- data/features/support/db/migrations/create_betas.rb +15 -0
- data/features/support/db/migrations/create_boxes.rb +13 -0
- data/features/support/db/migrations/create_comments.rb +13 -0
- data/features/support/db/migrations/create_delayed_betas.rb +28 -0
- data/features/support/db/migrations/create_developers.rb +39 -0
- data/features/support/db/migrations/create_gammas.rb +14 -0
- data/features/support/db/migrations/create_people.rb +1014 -0
- data/features/support/db/migrations/create_posts.rb +6 -0
- data/features/support/db/migrations/create_thetas.rb +16 -0
- data/features/support/db/mysql.rb +4 -0
- data/features/support/db/postgresql.rb +4 -0
- data/features/support/env.rb +6 -0
- data/features/support/models/alpha.rb +9 -0
- data/features/support/models/animal.rb +5 -0
- data/features/support/models/beta.rb +7 -0
- data/features/support/models/box.rb +8 -0
- data/features/support/models/cat.rb +3 -0
- data/features/support/models/comment.rb +3 -0
- data/features/support/models/delayed_beta.rb +7 -0
- data/features/support/models/developer.rb +8 -0
- data/features/support/models/gamma.rb +5 -0
- data/features/support/models/person.rb +8 -0
- data/features/support/models/post.rb +8 -0
- data/features/support/models/theta.rb +7 -0
- data/features/support/post_database.rb +37 -0
- data/features/support/z.rb +19 -0
- data/ginger_scenarios.rb +24 -0
- data/init.rb +12 -0
- data/lib/thinking_sphinx.rb +144 -0
- data/lib/thinking_sphinx/active_record.rb +245 -0
- data/lib/thinking_sphinx/active_record/delta.rb +74 -0
- data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
- data/lib/thinking_sphinx/active_record/search.rb +57 -0
- data/lib/thinking_sphinx/adapters/abstract_adapter.rb +34 -0
- data/lib/thinking_sphinx/adapters/mysql_adapter.rb +53 -0
- data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +129 -0
- data/lib/thinking_sphinx/association.rb +144 -0
- data/lib/thinking_sphinx/attribute.rb +258 -0
- data/lib/thinking_sphinx/collection.rb +142 -0
- data/lib/thinking_sphinx/configuration.rb +236 -0
- data/lib/thinking_sphinx/core/string.rb +22 -0
- data/lib/thinking_sphinx/deltas.rb +22 -0
- data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
- data/lib/thinking_sphinx/deltas/default_delta.rb +65 -0
- data/lib/thinking_sphinx/deltas/delayed_delta.rb +25 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
- data/lib/thinking_sphinx/facet.rb +58 -0
- data/lib/thinking_sphinx/facet_collection.rb +44 -0
- data/lib/thinking_sphinx/field.rb +172 -0
- data/lib/thinking_sphinx/index.rb +414 -0
- data/lib/thinking_sphinx/index/builder.rb +233 -0
- data/lib/thinking_sphinx/index/faux_column.rb +110 -0
- data/lib/thinking_sphinx/rails_additions.rb +133 -0
- data/lib/thinking_sphinx/search.rb +638 -0
- data/lib/thinking_sphinx/tasks.rb +128 -0
- data/rails/init.rb +6 -0
- data/spec/fixtures/data.sql +32 -0
- data/spec/fixtures/database.yml.default +3 -0
- data/spec/fixtures/models.rb +81 -0
- data/spec/fixtures/structure.sql +84 -0
- data/spec/spec_helper.rb +54 -0
- data/spec/sphinx_helper.rb +109 -0
- data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +136 -0
- data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
- data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
- data/spec/unit/thinking_sphinx/active_record_spec.rb +256 -0
- data/spec/unit/thinking_sphinx/association_spec.rb +247 -0
- data/spec/unit/thinking_sphinx/attribute_spec.rb +212 -0
- data/spec/unit/thinking_sphinx/collection_spec.rb +14 -0
- data/spec/unit/thinking_sphinx/configuration_spec.rb +136 -0
- data/spec/unit/thinking_sphinx/core/string_spec.rb +9 -0
- data/spec/unit/thinking_sphinx/field_spec.rb +145 -0
- data/spec/unit/thinking_sphinx/index/builder_spec.rb +5 -0
- data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +30 -0
- data/spec/unit/thinking_sphinx/index_spec.rb +54 -0
- data/spec/unit/thinking_sphinx/search_spec.rb +59 -0
- data/spec/unit/thinking_sphinx_spec.rb +129 -0
- data/tasks/distribution.rb +48 -0
- data/tasks/rails.rake +1 -0
- data/tasks/testing.rb +86 -0
- data/thinking-sphinx.gemspec +232 -0
- data/vendor/after_commit/LICENSE +20 -0
- data/vendor/after_commit/README +16 -0
- data/vendor/after_commit/Rakefile +22 -0
- data/vendor/after_commit/init.rb +5 -0
- data/vendor/after_commit/lib/after_commit.rb +42 -0
- data/vendor/after_commit/lib/after_commit/active_record.rb +91 -0
- data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
- data/vendor/after_commit/test/after_commit_test.rb +53 -0
- data/vendor/delayed_job/lib/delayed/job.rb +251 -0
- data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
- data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
- data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
- data/vendor/riddle/lib/riddle.rb +30 -0
- data/vendor/riddle/lib/riddle/client.rb +619 -0
- data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
- data/vendor/riddle/lib/riddle/client/message.rb +65 -0
- data/vendor/riddle/lib/riddle/client/response.rb +84 -0
- data/vendor/riddle/lib/riddle/configuration.rb +33 -0
- data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
- data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
- data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
- data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
- data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
- data/vendor/riddle/lib/riddle/configuration/section.rb +37 -0
- data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
- data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
- data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
- data/vendor/riddle/lib/riddle/controller.rb +44 -0
- metadata +248 -0
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
module ThinkingSphinx
|
|
2
|
+
# Attributes - eternally useful when it comes to filtering, sorting or
|
|
3
|
+
# grouping. This class isn't really useful to you unless you're hacking
|
|
4
|
+
# around with the internals of Thinking Sphinx - but hey, don't let that
|
|
5
|
+
# stop you.
|
|
6
|
+
#
|
|
7
|
+
# One key thing to remember - if you're using the attribute manually to
|
|
8
|
+
# generate SQL statements, you'll need to set the base model, and all the
|
|
9
|
+
# associations. Which can get messy. Use Index.link!, it really helps.
|
|
10
|
+
#
|
|
11
|
+
class Attribute
|
|
12
|
+
attr_accessor :alias, :columns, :associations, :model, :faceted
|
|
13
|
+
|
|
14
|
+
# To create a new attribute, you'll need to pass in either a single Column
|
|
15
|
+
# or an array of them, and some (optional) options.
|
|
16
|
+
#
|
|
17
|
+
# Valid options are:
|
|
18
|
+
# - :as => :alias_name
|
|
19
|
+
# - :type => :attribute_type
|
|
20
|
+
#
|
|
21
|
+
# Alias is only required in three circumstances: when there's
|
|
22
|
+
# another attribute or field with the same name, when the column name is
|
|
23
|
+
# 'id', or when there's more than one column.
|
|
24
|
+
#
|
|
25
|
+
# Type is not required, unless you want to force a column to be a certain
|
|
26
|
+
# type (but keep in mind the value will not be CASTed in the SQL
|
|
27
|
+
# statements). The only time you really need to use this is when the type
|
|
28
|
+
# can't be figured out by the column - ie: when not actually using a
|
|
29
|
+
# database column as your source.
|
|
30
|
+
#
|
|
31
|
+
# Example usage:
|
|
32
|
+
#
|
|
33
|
+
# Attribute.new(
|
|
34
|
+
# Column.new(:created_at)
|
|
35
|
+
# )
|
|
36
|
+
#
|
|
37
|
+
# Attribute.new(
|
|
38
|
+
# Column.new(:posts, :id),
|
|
39
|
+
# :as => :post_ids
|
|
40
|
+
# )
|
|
41
|
+
#
|
|
42
|
+
# Attribute.new(
|
|
43
|
+
# [Column.new(:pages, :id), Column.new(:articles, :id)],
|
|
44
|
+
# :as => :content_ids
|
|
45
|
+
# )
|
|
46
|
+
#
|
|
47
|
+
# Attribute.new(
|
|
48
|
+
# Column.new("NOW()"),
|
|
49
|
+
# :as => :indexed_at,
|
|
50
|
+
# :type => :datetime
|
|
51
|
+
# )
|
|
52
|
+
#
|
|
53
|
+
# If you're creating attributes for latitude and longitude, don't forget
|
|
54
|
+
# that Sphinx expects these values to be in radians.
|
|
55
|
+
#
|
|
56
|
+
def initialize(columns, options = {})
|
|
57
|
+
@columns = Array(columns)
|
|
58
|
+
@associations = {}
|
|
59
|
+
|
|
60
|
+
raise "Cannot define a field with no columns. Maybe you are trying to index a field with a reserved name (id, name). You can fix this error by using a symbol rather than a bare name (:id instead of id)." if @columns.empty? || @columns.any? { |column| !column.respond_to?(:__stack) }
|
|
61
|
+
|
|
62
|
+
@alias = options[:as]
|
|
63
|
+
@type = options[:type]
|
|
64
|
+
@faceted = options[:facet]
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Get the part of the SELECT clause related to this attribute. Don't forget
|
|
68
|
+
# to set your model and associations first though.
|
|
69
|
+
#
|
|
70
|
+
# This will concatenate strings and arrays of integers, and convert
|
|
71
|
+
# datetimes to timestamps, as needed.
|
|
72
|
+
#
|
|
73
|
+
def to_select_sql
|
|
74
|
+
clause = @columns.collect { |column|
|
|
75
|
+
column_with_prefix(column)
|
|
76
|
+
}.join(', ')
|
|
77
|
+
|
|
78
|
+
separator = all_ints? ? ',' : ' '
|
|
79
|
+
|
|
80
|
+
clause = adapter.concatenate(clause, separator) if concat_ws?
|
|
81
|
+
clause = adapter.group_concatenate(clause, separator) if is_many?
|
|
82
|
+
clause = adapter.cast_to_datetime(clause) if type == :datetime
|
|
83
|
+
clause = adapter.convert_nulls(clause) if type == :string
|
|
84
|
+
|
|
85
|
+
"#{clause} AS #{quote_column(unique_name)}"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Get the part of the GROUP BY clause related to this attribute - if one is
|
|
89
|
+
# needed. If not, all you'll get back is nil. The latter will happen if
|
|
90
|
+
# there isn't actually a real column to get data from, or if there's
|
|
91
|
+
# multiple data values (read: a has_many or has_and_belongs_to_many
|
|
92
|
+
# association).
|
|
93
|
+
#
|
|
94
|
+
def to_group_sql
|
|
95
|
+
case
|
|
96
|
+
when is_many?, is_string?, ThinkingSphinx.use_group_by_shortcut?
|
|
97
|
+
nil
|
|
98
|
+
else
|
|
99
|
+
@columns.collect { |column|
|
|
100
|
+
column_with_prefix(column)
|
|
101
|
+
}
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def type_to_config
|
|
106
|
+
{
|
|
107
|
+
:multi => :sql_attr_multi,
|
|
108
|
+
:datetime => :sql_attr_timestamp,
|
|
109
|
+
:string => :sql_attr_str2ordinal,
|
|
110
|
+
:float => :sql_attr_float,
|
|
111
|
+
:boolean => :sql_attr_bool,
|
|
112
|
+
:integer => :sql_attr_uint
|
|
113
|
+
}[type]
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def config_value
|
|
117
|
+
if type == :multi
|
|
118
|
+
"uint #{unique_name} from field"
|
|
119
|
+
else
|
|
120
|
+
unique_name
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Returns the unique name of the attribute - which is either the alias of
|
|
125
|
+
# the attribute, or the name of the only column - if there is only one. If
|
|
126
|
+
# there isn't, there should be an alias. Else things probably won't work.
|
|
127
|
+
# Consider yourself warned.
|
|
128
|
+
#
|
|
129
|
+
def unique_name
|
|
130
|
+
if @columns.length == 1
|
|
131
|
+
@alias || @columns.first.__name
|
|
132
|
+
else
|
|
133
|
+
@alias
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Returns the type of the column. If that's not already set, it returns
|
|
138
|
+
# :multi if there's the possibility of more than one value, :string if
|
|
139
|
+
# there's more than one association, otherwise it figures out what the
|
|
140
|
+
# actual column's datatype is and returns that.
|
|
141
|
+
def type
|
|
142
|
+
@type ||= case
|
|
143
|
+
when is_many?, is_many_ints?
|
|
144
|
+
:multi
|
|
145
|
+
when @associations.values.flatten.length > 1
|
|
146
|
+
:string
|
|
147
|
+
else
|
|
148
|
+
translated_type_from_database
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def to_facet
|
|
153
|
+
return nil unless @faceted
|
|
154
|
+
|
|
155
|
+
ThinkingSphinx::Facet.new(self)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
private
|
|
159
|
+
|
|
160
|
+
def adapter
|
|
161
|
+
@adapter ||= @model.sphinx_database_adapter
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def quote_column(column)
|
|
165
|
+
@model.connection.quote_column_name(column)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Indication of whether the columns should be concatenated with a space
|
|
169
|
+
# between each value. True if there's either multiple sources or multiple
|
|
170
|
+
# associations.
|
|
171
|
+
#
|
|
172
|
+
def concat_ws?
|
|
173
|
+
multiple_associations? || @columns.length > 1
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Checks whether any column requires multiple associations (which only
|
|
177
|
+
# happens for polymorphic situations).
|
|
178
|
+
#
|
|
179
|
+
def multiple_associations?
|
|
180
|
+
associations.any? { |col,assocs| assocs.length > 1 }
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Builds a column reference tied to the appropriate associations. This
|
|
184
|
+
# dives into the associations hash and their corresponding joins to
|
|
185
|
+
# figure out how to correctly reference a column in SQL.
|
|
186
|
+
#
|
|
187
|
+
def column_with_prefix(column)
|
|
188
|
+
if column.is_string?
|
|
189
|
+
column.__name
|
|
190
|
+
elsif associations[column].empty?
|
|
191
|
+
"#{@model.quoted_table_name}.#{quote_column(column.__name)}"
|
|
192
|
+
else
|
|
193
|
+
associations[column].collect { |assoc|
|
|
194
|
+
assoc.has_column?(column.__name) ?
|
|
195
|
+
"#{@model.connection.quote_table_name(assoc.join.aliased_table_name)}" +
|
|
196
|
+
".#{quote_column(column.__name)}" :
|
|
197
|
+
nil
|
|
198
|
+
}.compact.join(', ')
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Could there be more than one value related to the parent record? If so,
|
|
203
|
+
# then this will return true. If not, false. It's that simple.
|
|
204
|
+
#
|
|
205
|
+
def is_many?
|
|
206
|
+
associations.values.flatten.any? { |assoc| assoc.is_many? }
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def is_many_ints?
|
|
210
|
+
concat_ws? && all_ints?
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# Returns true if any of the columns are string values, instead of database
|
|
214
|
+
# column references.
|
|
215
|
+
def is_string?
|
|
216
|
+
columns.all? { |col| col.is_string? }
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def all_ints?
|
|
220
|
+
@columns.all? { |col|
|
|
221
|
+
klasses = @associations[col].empty? ? [@model] :
|
|
222
|
+
@associations[col].collect { |assoc| assoc.reflection.klass }
|
|
223
|
+
klasses.all? { |klass|
|
|
224
|
+
column = klass.columns.detect { |column| column.name == col.__name.to_s }
|
|
225
|
+
!column.nil? && column.type == :integer
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def type_from_database
|
|
231
|
+
klass = @associations.values.flatten.first ?
|
|
232
|
+
@associations.values.flatten.first.reflection.klass : @model
|
|
233
|
+
|
|
234
|
+
klass.columns.detect { |col|
|
|
235
|
+
@columns.collect { |c| c.__name.to_s }.include? col.name
|
|
236
|
+
}.type
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def translated_type_from_database
|
|
240
|
+
case type_from_db = type_from_database
|
|
241
|
+
when :datetime, :string, :float, :boolean, :integer
|
|
242
|
+
type_from_db
|
|
243
|
+
when :decimal
|
|
244
|
+
:float
|
|
245
|
+
when :timestamp, :date
|
|
246
|
+
:datetime
|
|
247
|
+
else
|
|
248
|
+
raise <<-MESSAGE
|
|
249
|
+
|
|
250
|
+
Cannot automatically map column type #{type_from_db} to an equivalent Sphinx
|
|
251
|
+
type (integer, float, boolean, datetime, string as ordinal). You could try to
|
|
252
|
+
explicitly convert the column's value in your define_index block:
|
|
253
|
+
has "CAST(column AS INT)", :type => :integer, :as => :column
|
|
254
|
+
MESSAGE
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
end
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
module ThinkingSphinx
|
|
2
|
+
class Collection < ::Array
|
|
3
|
+
attr_reader :total_entries, :total_pages, :current_page, :per_page
|
|
4
|
+
attr_accessor :results
|
|
5
|
+
|
|
6
|
+
# Compatibility with older versions of will_paginate
|
|
7
|
+
alias_method :page_count, :total_pages
|
|
8
|
+
|
|
9
|
+
def initialize(page, per_page, entries, total_entries)
|
|
10
|
+
@current_page, @per_page, @total_entries = page, per_page, total_entries
|
|
11
|
+
|
|
12
|
+
@total_pages = (entries / @per_page.to_f).ceil
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def self.ids_from_results(results, page, limit, options)
|
|
16
|
+
collection = self.new(page, limit,
|
|
17
|
+
results[:total] || 0, results[:total_found] || 0
|
|
18
|
+
)
|
|
19
|
+
collection.results = results
|
|
20
|
+
collection.replace results[:matches].collect { |match|
|
|
21
|
+
match[:attributes]["sphinx_internal_id"]
|
|
22
|
+
}
|
|
23
|
+
return collection
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def self.create_from_results(results, page, limit, options)
|
|
27
|
+
collection = self.new(page, limit,
|
|
28
|
+
results[:total] || 0, results[:total_found] || 0
|
|
29
|
+
)
|
|
30
|
+
collection.results = results
|
|
31
|
+
collection.replace instances_from_matches(results[:matches], options)
|
|
32
|
+
return collection
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.instances_from_matches(matches, options = {})
|
|
36
|
+
if klass = options[:class]
|
|
37
|
+
instances_from_class klass, matches, options
|
|
38
|
+
else
|
|
39
|
+
instances_from_classes matches, options
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def self.instances_from_class(klass, matches, options = {})
|
|
44
|
+
index_options = klass.sphinx_index_options
|
|
45
|
+
|
|
46
|
+
ids = matches.collect { |match| match[:attributes]["sphinx_internal_id"] }
|
|
47
|
+
instances = ids.length > 0 ? klass.find(
|
|
48
|
+
:all,
|
|
49
|
+
:conditions => {klass.primary_key.to_sym => ids},
|
|
50
|
+
:include => (options[:include] || index_options[:include]),
|
|
51
|
+
:select => (options[:select] || index_options[:select])
|
|
52
|
+
) : []
|
|
53
|
+
|
|
54
|
+
# Raise an exception if we find records in Sphinx but not in the DB, so
|
|
55
|
+
# the search method can retry without them. See
|
|
56
|
+
# ThinkingSphinx::Search.retry_search_on_stale_index.
|
|
57
|
+
if options[:raise_on_stale] && instances.length < ids.length
|
|
58
|
+
stale_ids = ids - instances.map {|i| i.id }
|
|
59
|
+
raise StaleIdsException, stale_ids
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
ids.collect { |obj_id|
|
|
63
|
+
instances.detect { |obj| obj.id == obj_id }
|
|
64
|
+
}
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Group results by class and call #find(:all) once for each group to reduce
|
|
68
|
+
# the number of #find's in multi-model searches.
|
|
69
|
+
#
|
|
70
|
+
def self.instances_from_classes(matches, options = {})
|
|
71
|
+
groups = matches.group_by { |match| match[:attributes]["class_crc"] }
|
|
72
|
+
groups.each do |crc, group|
|
|
73
|
+
group.replace(
|
|
74
|
+
instances_from_class(class_from_crc(crc), group, options)
|
|
75
|
+
)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
matches.collect do |match|
|
|
79
|
+
groups.detect { |crc, group|
|
|
80
|
+
crc == match[:attributes]["class_crc"]
|
|
81
|
+
}[1].detect { |obj|
|
|
82
|
+
obj.id == match[:attributes]["sphinx_internal_id"]
|
|
83
|
+
}
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def self.class_from_crc(crc)
|
|
88
|
+
@@models_by_crc ||= ThinkingSphinx.indexed_models.inject({}) do |hash, model|
|
|
89
|
+
hash[model.constantize.to_crc32] = model
|
|
90
|
+
model.constantize.subclasses.each { |subclass|
|
|
91
|
+
hash[subclass.to_crc32] = subclass.name
|
|
92
|
+
}
|
|
93
|
+
hash
|
|
94
|
+
end
|
|
95
|
+
@@models_by_crc[crc].constantize
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def previous_page
|
|
99
|
+
current_page > 1 ? (current_page - 1) : nil
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def next_page
|
|
103
|
+
current_page < total_pages ? (current_page + 1): nil
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def offset
|
|
107
|
+
(current_page - 1) * @per_page
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def method_missing(method, *args, &block)
|
|
111
|
+
super unless method.to_s[/^each_with_.*/]
|
|
112
|
+
|
|
113
|
+
each_with_attribute method.to_s.gsub(/^each_with_/, ''), &block
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def each_with_groupby_and_count(&block)
|
|
117
|
+
results[:matches].each_with_index do |match, index|
|
|
118
|
+
yield self[index], match[:attributes]["@groupby"], match[:attributes]["@count"]
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def each_with_attribute(attribute, &block)
|
|
123
|
+
results[:matches].each_with_index do |match, index|
|
|
124
|
+
yield self[index], (match[:attributes][attribute] || match[:attributes]["@#{attribute}"])
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def each_with_weighting(&block)
|
|
129
|
+
results[:matches].each_with_index do |match, index|
|
|
130
|
+
yield self[index], match[:weight]
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def inject_with_groupby_and_count(initial = nil, &block)
|
|
135
|
+
index = -1
|
|
136
|
+
results[:matches].inject(initial) do |memo, match|
|
|
137
|
+
index += 1
|
|
138
|
+
yield memo, self[index], match[:attributes]["@groupby"], match[:attributes]["@count"]
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
require 'erb'
|
|
2
|
+
require 'singleton'
|
|
3
|
+
|
|
4
|
+
module ThinkingSphinx
|
|
5
|
+
# This class both keeps track of the configuration settings for Sphinx and
|
|
6
|
+
# also generates the resulting file for Sphinx to use.
|
|
7
|
+
#
|
|
8
|
+
# Here are the default settings, relative to RAILS_ROOT where relevant:
|
|
9
|
+
#
|
|
10
|
+
# config file:: config/#{environment}.sphinx.conf
|
|
11
|
+
# searchd log file:: log/searchd.log
|
|
12
|
+
# query log file:: log/searchd.query.log
|
|
13
|
+
# pid file:: log/searchd.#{environment}.pid
|
|
14
|
+
# searchd files:: db/sphinx/#{environment}/
|
|
15
|
+
# address:: 127.0.0.1
|
|
16
|
+
# port:: 3312
|
|
17
|
+
# allow star:: false
|
|
18
|
+
# min prefix length:: 1
|
|
19
|
+
# min infix length:: 1
|
|
20
|
+
# mem limit:: 64M
|
|
21
|
+
# max matches:: 1000
|
|
22
|
+
# morphology:: stem_en
|
|
23
|
+
# charset type:: utf-8
|
|
24
|
+
# charset table:: nil
|
|
25
|
+
# ignore chars:: nil
|
|
26
|
+
# html strip:: false
|
|
27
|
+
# html remove elements:: ''
|
|
28
|
+
#
|
|
29
|
+
# If you want to change these settings, create a YAML file at
|
|
30
|
+
# config/sphinx.yml with settings for each environment, in a similar
|
|
31
|
+
# fashion to database.yml - using the following keys: config_file,
|
|
32
|
+
# searchd_log_file, query_log_file, pid_file, searchd_file_path, port,
|
|
33
|
+
# allow_star, enable_star, min_prefix_len, min_infix_len, mem_limit,
|
|
34
|
+
# max_matches, # morphology, charset_type, charset_table, ignore_chars,
|
|
35
|
+
# html_strip, # html_remove_elements. I think you've got the idea.
|
|
36
|
+
#
|
|
37
|
+
# Each setting in the YAML file is optional - so only put in the ones you
|
|
38
|
+
# want to change.
|
|
39
|
+
#
|
|
40
|
+
# Keep in mind, if for some particular reason you're using a version of
|
|
41
|
+
# Sphinx older than 0.9.8 r871 (that's prior to the proper 0.9.8 release),
|
|
42
|
+
# don't set allow_star to true.
|
|
43
|
+
#
|
|
44
|
+
class Configuration
|
|
45
|
+
include Singleton
|
|
46
|
+
|
|
47
|
+
SourceOptions = %w( mysql_connect_flags sql_range_step sql_query_pre
|
|
48
|
+
sql_query_post sql_ranged_throttle sql_query_post_index )
|
|
49
|
+
|
|
50
|
+
IndexOptions = %w( charset_table charset_type docinfo enable_star
|
|
51
|
+
exceptions html_index_attrs html_remove_elements html_strip ignore_chars
|
|
52
|
+
min_infix_len min_prefix_len min_word_len mlock morphology ngram_chars
|
|
53
|
+
ngram_len phrase_boundary phrase_boundary_step preopen stopwords
|
|
54
|
+
wordforms )
|
|
55
|
+
|
|
56
|
+
attr_accessor :config_file, :searchd_log_file, :query_log_file,
|
|
57
|
+
:pid_file, :searchd_file_path, :address, :port, :allow_star,
|
|
58
|
+
:database_yml_file, :app_root, :bin_path, :model_directories
|
|
59
|
+
|
|
60
|
+
attr_accessor :source_options, :index_options
|
|
61
|
+
|
|
62
|
+
attr_reader :environment, :configuration
|
|
63
|
+
|
|
64
|
+
# Load in the configuration settings - this will look for config/sphinx.yml
|
|
65
|
+
# and parse it according to the current environment.
|
|
66
|
+
#
|
|
67
|
+
def initialize(app_root = Dir.pwd)
|
|
68
|
+
self.reset
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def reset
|
|
72
|
+
self.app_root = RAILS_ROOT if defined?(RAILS_ROOT)
|
|
73
|
+
self.app_root = Merb.root if defined?(Merb)
|
|
74
|
+
self.app_root ||= app_root
|
|
75
|
+
|
|
76
|
+
@configuration = Riddle::Configuration.new
|
|
77
|
+
@configuration.searchd.address = "127.0.0.1"
|
|
78
|
+
@configuration.searchd.port = 3312
|
|
79
|
+
@configuration.searchd.pid_file = "#{self.app_root}/log/searchd.#{environment}.pid"
|
|
80
|
+
@configuration.searchd.log = "#{self.app_root}/log/searchd.log"
|
|
81
|
+
@configuration.searchd.query_log = "#{self.app_root}/log/searchd.query.log"
|
|
82
|
+
|
|
83
|
+
self.database_yml_file = "#{self.app_root}/config/database.yml"
|
|
84
|
+
self.config_file = "#{self.app_root}/config/#{environment}.sphinx.conf"
|
|
85
|
+
self.searchd_file_path = "#{self.app_root}/db/sphinx/#{environment}"
|
|
86
|
+
self.allow_star = false
|
|
87
|
+
self.bin_path = ""
|
|
88
|
+
self.model_directories = ["#{app_root}/app/models/"]
|
|
89
|
+
|
|
90
|
+
self.source_options = {}
|
|
91
|
+
self.index_options = {
|
|
92
|
+
:charset_type => "utf-8",
|
|
93
|
+
:morphology => "stem_en"
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
parse_config
|
|
97
|
+
|
|
98
|
+
self
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def self.environment
|
|
102
|
+
@@environment ||= (
|
|
103
|
+
defined?(Merb) ? Merb.environment : ENV['RAILS_ENV']
|
|
104
|
+
) || "development"
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def environment
|
|
108
|
+
self.class.environment
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def controller
|
|
112
|
+
@controller ||= Riddle::Controller.new(@configuration, self.config_file)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Generate the config file for Sphinx by using all the settings defined and
|
|
116
|
+
# looping through all the models with indexes to build the relevant
|
|
117
|
+
# indexer and searchd configuration, and sources and indexes details.
|
|
118
|
+
#
|
|
119
|
+
def build(file_path=nil)
|
|
120
|
+
load_models
|
|
121
|
+
file_path ||= "#{self.config_file}"
|
|
122
|
+
|
|
123
|
+
@configuration.indexes.clear
|
|
124
|
+
|
|
125
|
+
ThinkingSphinx.indexed_models.each_with_index do |model, model_index|
|
|
126
|
+
@configuration.indexes.concat model.constantize.to_riddle(model_index)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
open(file_path, "w") do |file|
|
|
130
|
+
file.write @configuration.render
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Make sure all models are loaded - without reloading any that
|
|
135
|
+
# ActiveRecord::Base is already aware of (otherwise we start to hit some
|
|
136
|
+
# messy dependencies issues).
|
|
137
|
+
#
|
|
138
|
+
def load_models
|
|
139
|
+
self.model_directories.each do |base|
|
|
140
|
+
Dir["#{base}**/*.rb"].each do |file|
|
|
141
|
+
model_name = file.gsub(/^#{base}([\w_\/\\]+)\.rb/, '\1')
|
|
142
|
+
|
|
143
|
+
next if model_name.nil?
|
|
144
|
+
next if ::ActiveRecord::Base.send(:subclasses).detect { |model|
|
|
145
|
+
model.name == model_name
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
begin
|
|
149
|
+
model_name.camelize.constantize
|
|
150
|
+
rescue LoadError
|
|
151
|
+
model_name.gsub!(/.*[\/\\]/, '').nil? ? next : retry
|
|
152
|
+
rescue NameError
|
|
153
|
+
next
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def address
|
|
160
|
+
@configuration.searchd.address
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def address=(address)
|
|
164
|
+
@configuration.searchd.address = address
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def port
|
|
168
|
+
@configuration.searchd.port
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def port=(port)
|
|
172
|
+
@configuration.searchd.port = port
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def pid_file
|
|
176
|
+
@configuration.searchd.pid_file
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def pid_file=(pid_file)
|
|
180
|
+
@configuration.searchd.pid_file = pid_file
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def searchd_log_file
|
|
184
|
+
@configuration.searchd.log
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def searchd_log_file=(file)
|
|
188
|
+
@configuration.searchd.log = file
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def query_log_file
|
|
192
|
+
@configuration.searchd.query_log
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def query_log_file=(file)
|
|
196
|
+
@configuration.searchd.query_log = file
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
private
|
|
200
|
+
|
|
201
|
+
# Parse the config/sphinx.yml file - if it exists - then use the attribute
|
|
202
|
+
# accessors to set the appropriate values. Nothing too clever.
|
|
203
|
+
#
|
|
204
|
+
def parse_config
|
|
205
|
+
path = "#{app_root}/config/sphinx.yml"
|
|
206
|
+
return unless File.exists?(path)
|
|
207
|
+
|
|
208
|
+
conf = YAML::load(ERB.new(IO.read(path)).result)[environment]
|
|
209
|
+
|
|
210
|
+
conf.each do |key,value|
|
|
211
|
+
self.send("#{key}=", value) if self.methods.include?("#{key}=")
|
|
212
|
+
|
|
213
|
+
set_sphinx_setting self.source_options, key, value, SourceOptions
|
|
214
|
+
set_sphinx_setting self.index_options, key, value, IndexOptions
|
|
215
|
+
set_sphinx_setting @configuration.searchd, key, value
|
|
216
|
+
set_sphinx_setting @configuration.indexer, key, value
|
|
217
|
+
end unless conf.nil?
|
|
218
|
+
|
|
219
|
+
self.bin_path += '/' unless self.bin_path.blank?
|
|
220
|
+
|
|
221
|
+
if self.allow_star
|
|
222
|
+
self.index_options[:enable_star] = true
|
|
223
|
+
self.index_options[:min_prefix_len] = 1
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def set_sphinx_setting(object, key, value, allowed = {})
|
|
228
|
+
if object.is_a?(Hash)
|
|
229
|
+
object[key.to_sym] = value if allowed.include?(key.to_s)
|
|
230
|
+
else
|
|
231
|
+
object.send("#{key}=", value) if object.methods.include?("#{key}")
|
|
232
|
+
send("#{key}=", value) if self.methods.include?("#{key}")
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
end
|