sherpa99-thinking-sphinx 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. data/LICENCE +20 -0
  2. data/README +107 -0
  3. data/README.textile +107 -0
  4. data/Rakefile +4 -0
  5. data/contribute.rb +328 -0
  6. data/cucumber.yml +1 -0
  7. data/features/a.rb +17 -0
  8. data/features/attribute_transformation.feature +22 -0
  9. data/features/datetime_deltas.feature +55 -0
  10. data/features/delayed_delta_indexing.feature +37 -0
  11. data/features/deleting_instances.feature +52 -0
  12. data/features/facets.feature +26 -0
  13. data/features/handling_edits.feature +67 -0
  14. data/features/retry_stale_indexes.feature +24 -0
  15. data/features/searching_across_models.feature +20 -0
  16. data/features/searching_by_model.feature +118 -0
  17. data/features/searching_with_find_arguments.feature +56 -0
  18. data/features/sphinx_detection.feature +16 -0
  19. data/features/step_definitions/alpha_steps.rb +3 -0
  20. data/features/step_definitions/beta_steps.rb +11 -0
  21. data/features/step_definitions/cat_steps.rb +3 -0
  22. data/features/step_definitions/common_steps.rb +154 -0
  23. data/features/step_definitions/datetime_delta_steps.rb +11 -0
  24. data/features/step_definitions/delayed_delta_indexing_steps.rb +7 -0
  25. data/features/step_definitions/facet_steps.rb +30 -0
  26. data/features/step_definitions/find_arguments_steps.rb +36 -0
  27. data/features/step_definitions/gamma_steps.rb +15 -0
  28. data/features/step_definitions/search_steps.rb +66 -0
  29. data/features/step_definitions/sphinx_steps.rb +23 -0
  30. data/features/support/db/active_record.rb +40 -0
  31. data/features/support/db/database.example.yml +4 -0
  32. data/features/support/db/migrations/create_alphas.rb +18 -0
  33. data/features/support/db/migrations/create_animals.rb +9 -0
  34. data/features/support/db/migrations/create_betas.rb +15 -0
  35. data/features/support/db/migrations/create_boxes.rb +13 -0
  36. data/features/support/db/migrations/create_comments.rb +13 -0
  37. data/features/support/db/migrations/create_delayed_betas.rb +28 -0
  38. data/features/support/db/migrations/create_developers.rb +39 -0
  39. data/features/support/db/migrations/create_gammas.rb +14 -0
  40. data/features/support/db/migrations/create_people.rb +1014 -0
  41. data/features/support/db/migrations/create_posts.rb +6 -0
  42. data/features/support/db/migrations/create_thetas.rb +16 -0
  43. data/features/support/db/mysql.rb +4 -0
  44. data/features/support/db/postgresql.rb +4 -0
  45. data/features/support/env.rb +6 -0
  46. data/features/support/models/alpha.rb +9 -0
  47. data/features/support/models/animal.rb +5 -0
  48. data/features/support/models/beta.rb +7 -0
  49. data/features/support/models/box.rb +8 -0
  50. data/features/support/models/cat.rb +3 -0
  51. data/features/support/models/comment.rb +3 -0
  52. data/features/support/models/delayed_beta.rb +7 -0
  53. data/features/support/models/developer.rb +8 -0
  54. data/features/support/models/gamma.rb +5 -0
  55. data/features/support/models/person.rb +8 -0
  56. data/features/support/models/post.rb +8 -0
  57. data/features/support/models/theta.rb +7 -0
  58. data/features/support/post_database.rb +37 -0
  59. data/features/support/z.rb +19 -0
  60. data/ginger_scenarios.rb +24 -0
  61. data/init.rb +12 -0
  62. data/lib/thinking_sphinx.rb +144 -0
  63. data/lib/thinking_sphinx/active_record.rb +245 -0
  64. data/lib/thinking_sphinx/active_record/delta.rb +74 -0
  65. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  66. data/lib/thinking_sphinx/active_record/search.rb +57 -0
  67. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +34 -0
  68. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +53 -0
  69. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +129 -0
  70. data/lib/thinking_sphinx/association.rb +144 -0
  71. data/lib/thinking_sphinx/attribute.rb +258 -0
  72. data/lib/thinking_sphinx/collection.rb +142 -0
  73. data/lib/thinking_sphinx/configuration.rb +236 -0
  74. data/lib/thinking_sphinx/core/string.rb +22 -0
  75. data/lib/thinking_sphinx/deltas.rb +22 -0
  76. data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
  77. data/lib/thinking_sphinx/deltas/default_delta.rb +65 -0
  78. data/lib/thinking_sphinx/deltas/delayed_delta.rb +25 -0
  79. data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
  80. data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
  81. data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
  82. data/lib/thinking_sphinx/facet.rb +58 -0
  83. data/lib/thinking_sphinx/facet_collection.rb +44 -0
  84. data/lib/thinking_sphinx/field.rb +172 -0
  85. data/lib/thinking_sphinx/index.rb +414 -0
  86. data/lib/thinking_sphinx/index/builder.rb +233 -0
  87. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  88. data/lib/thinking_sphinx/rails_additions.rb +133 -0
  89. data/lib/thinking_sphinx/search.rb +638 -0
  90. data/lib/thinking_sphinx/tasks.rb +128 -0
  91. data/rails/init.rb +6 -0
  92. data/spec/fixtures/data.sql +32 -0
  93. data/spec/fixtures/database.yml.default +3 -0
  94. data/spec/fixtures/models.rb +81 -0
  95. data/spec/fixtures/structure.sql +84 -0
  96. data/spec/spec_helper.rb +54 -0
  97. data/spec/sphinx_helper.rb +109 -0
  98. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +136 -0
  99. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  100. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
  101. data/spec/unit/thinking_sphinx/active_record_spec.rb +256 -0
  102. data/spec/unit/thinking_sphinx/association_spec.rb +247 -0
  103. data/spec/unit/thinking_sphinx/attribute_spec.rb +212 -0
  104. data/spec/unit/thinking_sphinx/collection_spec.rb +14 -0
  105. data/spec/unit/thinking_sphinx/configuration_spec.rb +136 -0
  106. data/spec/unit/thinking_sphinx/core/string_spec.rb +9 -0
  107. data/spec/unit/thinking_sphinx/field_spec.rb +145 -0
  108. data/spec/unit/thinking_sphinx/index/builder_spec.rb +5 -0
  109. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +30 -0
  110. data/spec/unit/thinking_sphinx/index_spec.rb +54 -0
  111. data/spec/unit/thinking_sphinx/search_spec.rb +59 -0
  112. data/spec/unit/thinking_sphinx_spec.rb +129 -0
  113. data/tasks/distribution.rb +48 -0
  114. data/tasks/rails.rake +1 -0
  115. data/tasks/testing.rb +86 -0
  116. data/thinking-sphinx.gemspec +232 -0
  117. data/vendor/after_commit/LICENSE +20 -0
  118. data/vendor/after_commit/README +16 -0
  119. data/vendor/after_commit/Rakefile +22 -0
  120. data/vendor/after_commit/init.rb +5 -0
  121. data/vendor/after_commit/lib/after_commit.rb +42 -0
  122. data/vendor/after_commit/lib/after_commit/active_record.rb +91 -0
  123. data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
  124. data/vendor/after_commit/test/after_commit_test.rb +53 -0
  125. data/vendor/delayed_job/lib/delayed/job.rb +251 -0
  126. data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
  127. data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
  128. data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
  129. data/vendor/riddle/lib/riddle.rb +30 -0
  130. data/vendor/riddle/lib/riddle/client.rb +619 -0
  131. data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
  132. data/vendor/riddle/lib/riddle/client/message.rb +65 -0
  133. data/vendor/riddle/lib/riddle/client/response.rb +84 -0
  134. data/vendor/riddle/lib/riddle/configuration.rb +33 -0
  135. data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
  136. data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
  137. data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
  138. data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
  139. data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
  140. data/vendor/riddle/lib/riddle/configuration/section.rb +37 -0
  141. data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
  142. data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
  143. data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
  144. data/vendor/riddle/lib/riddle/controller.rb +44 -0
  145. metadata +248 -0
@@ -0,0 +1,110 @@
1
+ module ThinkingSphinx
2
+ class Index
3
+ # Instances of this class represent database columns and the stack of
4
+ # associations that lead from the base model to them.
5
+ #
6
+ # The name and stack are accessible through methods starting with __ to
7
+ # avoid conflicting with the method_missing calls that build the stack.
8
+ #
9
+ class FauxColumn
10
+ # Create a new column with a pre-defined stack. The top element in the
11
+ # stack will get shifted to be the name value.
12
+ #
13
+ def initialize(*stack)
14
+ @name = stack.pop
15
+ @stack = stack
16
+ end
17
+
18
+ def self.coerce(columns)
19
+ case columns
20
+ when Symbol, String
21
+ FauxColumn.new(columns)
22
+ when Array
23
+ columns.collect { |col| FauxColumn.coerce(col) }
24
+ when FauxColumn
25
+ columns
26
+ else
27
+ nil
28
+ end
29
+ end
30
+
31
+ # Can't use normal method name, as that could be an association or
32
+ # column name.
33
+ #
34
+ def __name
35
+ @name
36
+ end
37
+
38
+ # Can't use normal method name, as that could be an association or
39
+ # column name.
40
+ #
41
+ def __stack
42
+ @stack
43
+ end
44
+
45
+ # Returns true if the stack is empty *and* if the name is a string -
46
+ # which is an indication that of raw SQL, as opposed to a value from a
47
+ # table's column.
48
+ #
49
+ def is_string?
50
+ @name.is_a?(String) && @stack.empty?
51
+ end
52
+
53
+ # This handles any 'invalid' method calls and sets them as the name,
54
+ # and pushing the previous name into the stack. The object returns
55
+ # itself.
56
+ #
57
+ # If there's a single argument, it becomes the name, and the method
58
+ # symbol goes into the stack as well. Multiple arguments means new
59
+ # columns with the original stack and new names (from each argument) gets
60
+ # returned.
61
+ #
62
+ # Easier to explain with examples:
63
+ #
64
+ # col = FauxColumn.new :a, :b, :c
65
+ # col.__name #=> :c
66
+ # col.__stack #=> [:a, :b]
67
+ #
68
+ # col.whatever #=> col
69
+ # col.__name #=> :whatever
70
+ # col.__stack #=> [:a, :b, :c]
71
+ #
72
+ # col.something(:id) #=> col
73
+ # col.__name #=> :id
74
+ # col.__stack #=> [:a, :b, :c, :whatever, :something]
75
+ #
76
+ # cols = col.short(:x, :y, :z)
77
+ # cols[0].__name #=> :x
78
+ # cols[0].__stack #=> [:a, :b, :c, :whatever, :something, :short]
79
+ # cols[1].__name #=> :y
80
+ # cols[1].__stack #=> [:a, :b, :c, :whatever, :something, :short]
81
+ # cols[2].__name #=> :z
82
+ # cols[2].__stack #=> [:a, :b, :c, :whatever, :something, :short]
83
+ #
84
+ # Also, this allows method chaining to build up a relevant stack:
85
+ #
86
+ # col = FauxColumn.new :a, :b
87
+ # col.__name #=> :b
88
+ # col.__stack #=> [:a]
89
+ #
90
+ # col.one.two.three #=> col
91
+ # col.__name #=> :three
92
+ # col.__stack #=> [:a, :b, :one, :two]
93
+ #
94
+ def method_missing(method, *args)
95
+ @stack << @name
96
+ @name = method
97
+
98
+ if (args.empty?)
99
+ self
100
+ elsif (args.length == 1)
101
+ method_missing(args.first)
102
+ else
103
+ args.collect { |arg|
104
+ FauxColumn.new(@stack + [@name, arg])
105
+ }
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,133 @@
1
+ module ThinkingSphinx
2
+ module HashExcept
3
+ # Returns a new hash without the given keys.
4
+ def except(*keys)
5
+ rejected = Set.new(respond_to?(:convert_key) ? keys.map { |key| convert_key(key) } : keys)
6
+ reject { |key,| rejected.include?(key) }
7
+ end
8
+
9
+ # Replaces the hash without only the given keys.
10
+ def except!(*keys)
11
+ replace(except(*keys))
12
+ end
13
+ end
14
+ end
15
+
16
+ Hash.send(
17
+ :include, ThinkingSphinx::HashExcept
18
+ ) unless Hash.instance_methods.include?("except")
19
+
20
+ module ThinkingSphinx
21
+ module ArrayExtractOptions
22
+ def extract_options!
23
+ last.is_a?(::Hash) ? pop : {}
24
+ end
25
+ end
26
+ end
27
+
28
+ Array.send(
29
+ :include, ThinkingSphinx::ArrayExtractOptions
30
+ ) unless Array.instance_methods.include?("extract_options!")
31
+
32
+ module ThinkingSphinx
33
+ module AbstractQuotedTableName
34
+ def quote_table_name(name)
35
+ quote_column_name(name)
36
+ end
37
+ end
38
+ end
39
+
40
+ ActiveRecord::ConnectionAdapters::AbstractAdapter.send(
41
+ :include, ThinkingSphinx::AbstractQuotedTableName
42
+ ) unless ActiveRecord::ConnectionAdapters::AbstractAdapter.instance_methods.include?("quote_table_name")
43
+
44
+ module ThinkingSphinx
45
+ module MysqlQuotedTableName
46
+ def quote_table_name(name) #:nodoc:
47
+ quote_column_name(name).gsub('.', '`.`')
48
+ end
49
+ end
50
+ end
51
+
52
+ if ActiveRecord::ConnectionAdapters.constants.include?("MysqlAdapter")
53
+ ActiveRecord::ConnectionAdapters::MysqlAdapter.send(
54
+ :include, ThinkingSphinx::MysqlQuotedTableName
55
+ ) unless ActiveRecord::ConnectionAdapters::MysqlAdapter.instance_methods.include?("quote_table_name")
56
+ end
57
+
58
+ module ThinkingSphinx
59
+ module ActiveRecordQuotedName
60
+ def quoted_table_name
61
+ self.connection.quote_table_name(self.table_name)
62
+ end
63
+ end
64
+ end
65
+
66
+ ActiveRecord::Base.extend(
67
+ ThinkingSphinx::ActiveRecordQuotedName
68
+ ) unless ActiveRecord::Base.respond_to?("quoted_table_name")
69
+
70
+ module ThinkingSphinx
71
+ module ActiveRecordStoreFullSTIClass
72
+ def store_full_sti_class
73
+ false
74
+ end
75
+ end
76
+ end
77
+
78
+ ActiveRecord::Base.extend(
79
+ ThinkingSphinx::ActiveRecordStoreFullSTIClass
80
+ ) unless ActiveRecord::Base.respond_to?(:store_full_sti_class)
81
+
82
+ module ThinkingSphinx
83
+ module ClassAttributeMethods
84
+ def cattr_reader(*syms)
85
+ syms.flatten.each do |sym|
86
+ next if sym.is_a?(Hash)
87
+ class_eval(<<-EOS, __FILE__, __LINE__)
88
+ unless defined? @@#{sym}
89
+ @@#{sym} = nil
90
+ end
91
+
92
+ def self.#{sym}
93
+ @@#{sym}
94
+ end
95
+
96
+ def #{sym}
97
+ @@#{sym}
98
+ end
99
+ EOS
100
+ end
101
+ end
102
+
103
+ def cattr_writer(*syms)
104
+ options = syms.extract_options!
105
+ syms.flatten.each do |sym|
106
+ class_eval(<<-EOS, __FILE__, __LINE__)
107
+ unless defined? @@#{sym}
108
+ @@#{sym} = nil
109
+ end
110
+
111
+ def self.#{sym}=(obj)
112
+ @@#{sym} = obj
113
+ end
114
+
115
+ #{"
116
+ def #{sym}=(obj)
117
+ @@#{sym} = obj
118
+ end
119
+ " unless options[:instance_writer] == false }
120
+ EOS
121
+ end
122
+ end
123
+
124
+ def cattr_accessor(*syms)
125
+ cattr_reader(*syms)
126
+ cattr_writer(*syms)
127
+ end
128
+ end
129
+ end
130
+
131
+ Class.extend(
132
+ ThinkingSphinx::ClassAttributeMethods
133
+ ) unless Class.respond_to?(:cattr_reader)
@@ -0,0 +1,638 @@
1
+ module ThinkingSphinx
2
+ # Once you've got those indexes in and built, this is the stuff that
3
+ # matters - how to search! This class provides a generic search
4
+ # interface - which you can use to search all your indexed models at once.
5
+ # Most times, you will just want a specific model's results - to search and
6
+ # search_for_ids methods will do the job in exactly the same manner when
7
+ # called from a model.
8
+ #
9
+ class Search
10
+ class << self
11
+ # Searches for results that match the parameters provided. Will only
12
+ # return the ids for the matching objects. See #search for syntax
13
+ # examples.
14
+ #
15
+ # Note that this only searches the Sphinx index, with no ActiveRecord
16
+ # queries. Thus, if your index is not in sync with the database, this
17
+ # method may return ids that no longer exist there.
18
+ #
19
+ def search_for_ids(*args)
20
+ results, client = search_results(*args.clone)
21
+
22
+ options = args.extract_options!
23
+ page = options[:page] ? options[:page].to_i : 1
24
+
25
+ ThinkingSphinx::Collection.ids_from_results(results, page, client.limit, options)
26
+ end
27
+
28
+ # Searches through the Sphinx indexes for relevant matches. There's
29
+ # various ways to search, sort, group and filter - which are covered
30
+ # below.
31
+ #
32
+ # Also, if you have WillPaginate installed, the search method can be used
33
+ # just like paginate. The same parameters - :page and :per_page - work as
34
+ # expected, and the returned result set can be used by the will_paginate
35
+ # helper.
36
+ #
37
+ # == Basic Searching
38
+ #
39
+ # The simplest way of searching is straight text.
40
+ #
41
+ # ThinkingSphinx::Search.search "pat"
42
+ # ThinkingSphinx::Search.search "google"
43
+ # User.search "pat", :page => (params[:page] || 1)
44
+ # Article.search "relevant news issue of the day"
45
+ #
46
+ # If you specify :include, like in an #find call, this will be respected
47
+ # when loading the relevant models from the search results.
48
+ #
49
+ # User.search "pat", :include => :posts
50
+ #
51
+ # == Match Modes
52
+ #
53
+ # Sphinx supports 5 different matching modes. By default Thinking Sphinx
54
+ # uses :all, which unsurprisingly requires all the supplied search terms
55
+ # to match a result.
56
+ #
57
+ # Alternative modes include:
58
+ #
59
+ # User.search "pat allan", :match_mode => :any
60
+ # User.search "pat allan", :match_mode => :phrase
61
+ # User.search "pat | allan", :match_mode => :boolean
62
+ # User.search "@name pat | @username pat", :match_mode => :extended
63
+ #
64
+ # Any will find results with any of the search terms. Phrase treats the search
65
+ # terms a single phrase instead of individual words. Boolean and extended allow
66
+ # for more complex query syntax, refer to the sphinx documentation for further
67
+ # details.
68
+ #
69
+ # == Weighting
70
+ #
71
+ # Sphinx has support for weighting, where matches in one field can be considered
72
+ # more important than in another. Weights are integers, with 1 as the default.
73
+ # They can be set per-search like this:
74
+ #
75
+ # User.search "pat allan", :field_weights => { :alias => 4, :aka => 2 }
76
+ #
77
+ # If you're searching multiple models, you can set per-index weights:
78
+ #
79
+ # ThinkingSphinx::Search.search "pat", :index_weights => { User => 10 }
80
+ #
81
+ # See http://sphinxsearch.com/doc.html#weighting for further details.
82
+ #
83
+ # == Searching by Fields
84
+ #
85
+ # If you want to step it up a level, you can limit your search terms to
86
+ # specific fields:
87
+ #
88
+ # User.search :conditions => {:name => "pat"}
89
+ #
90
+ # This uses Sphinx's extended match mode, unless you specify a different
91
+ # match mode explicitly (but then this way of searching won't work). Also
92
+ # note that you don't need to put in a search string.
93
+ #
94
+ # == Searching by Attributes
95
+ #
96
+ # Also known as filters, you can limit your searches to documents that
97
+ # have specific values for their attributes. There are two ways to do
98
+ # this. The first is one that works in all scenarios - using the :with
99
+ # option.
100
+ #
101
+ # ThinkingSphinx::Search.search :with => {:parent_id => 10}
102
+ #
103
+ # The second is only viable if you're searching with a specific model
104
+ # (not multi-model searching). With a single model, Thinking Sphinx
105
+ # can figure out what attributes and fields are available, so you can
106
+ # put it all in the :conditions hash, and it will sort it out.
107
+ #
108
+ # Node.search :conditions => {:parent_id => 10}
109
+ #
110
+ # Filters can be single values, arrays of values, or ranges.
111
+ #
112
+ # Article.search "East Timor", :conditions => {:rating => 3..5}
113
+ #
114
+ # == Excluding by Attributes
115
+ #
116
+ # Sphinx also supports negative filtering - where the filters are of
117
+ # attribute values to exclude. This is done with the :without option:
118
+ #
119
+ # User.search :without => {:role_id => 1}
120
+ #
121
+ # == Excluding by Primary Key
122
+ #
123
+ # There is a shortcut to exclude records by their ActiveRecord primary key:
124
+ #
125
+ # User.search :without_ids => 1
126
+ #
127
+ # Pass an array or a single value.
128
+ #
129
+ # The primary key must be an integer as a negative filter is used. Note
130
+ # that for multi-model search, an id may occur in more than one model.
131
+ #
132
+ # == Infix (Star) Searching
133
+ #
134
+ # By default, Sphinx uses English stemming, e.g. matching "shoes" if you
135
+ # search for "shoe". It won't find "Melbourne" if you search for
136
+ # "elbourn", though.
137
+ #
138
+ # Enable infix searching by something like this in config/sphinx.yml:
139
+ #
140
+ # development:
141
+ # enable_star: 1
142
+ # min_infix_length: 2
143
+ #
144
+ # Note that this will make indexing take longer.
145
+ #
146
+ # With those settings (and after reindexing), wildcard asterisks can be used
147
+ # in queries:
148
+ #
149
+ # Location.search "*elbourn*"
150
+ #
151
+ # To automatically add asterisks around every token (but not operators),
152
+ # pass the :star option:
153
+ #
154
+ # Location.search "elbourn -ustrali", :star => true, :match_mode => :boolean
155
+ #
156
+ # This would become "*elbourn* -*ustrali*". The :star option only adds the
157
+ # asterisks. You need to make the config/sphinx.yml changes yourself.
158
+ #
159
+ # By default, the tokens are assumed to match the regular expression /\w+/u.
160
+ # If you've modified the charset_table, pass another regular expression, e.g.
161
+ #
162
+ # User.search("oo@bar.c", :star => /[\w@.]+/u)
163
+ #
164
+ # to search for "*oo@bar.c*" and not "*oo*@*bar*.*c*".
165
+ #
166
+ # == Sorting
167
+ #
168
+ # Sphinx can only sort by attributes, so generally you will need to avoid
169
+ # using field names in your :order option. However, if you're searching
170
+ # on a single model, and have specified some fields as sortable, you can
171
+ # use those field names and Thinking Sphinx will interpret accordingly.
172
+ # Remember: this will only happen for single-model searches, and only
173
+ # through the :order option.
174
+ #
175
+ # Location.search "Melbourne", :order => :state
176
+ # User.search :conditions => {:role_id => 2}, :order => "name ASC"
177
+ #
178
+ # Keep in mind that if you use a string, you *must* specify the direction
179
+ # (ASC or DESC) else Sphinx won't return any results. If you use a symbol
180
+ # then Thinking Sphinx assumes ASC, but if you wish to state otherwise,
181
+ # use the :sort_mode option:
182
+ #
183
+ # Location.search "Melbourne", :order => :state, :sort_mode => :desc
184
+ #
185
+ # Of course, there are other sort modes - check out the Sphinx
186
+ # documentation[http://sphinxsearch.com/doc.html] for that level of
187
+ # detail though.
188
+ #
189
+ # == Grouping
190
+ #
191
+ # For this you can use the group_by, group_clause and group_function
192
+ # options - which are all directly linked to Sphinx's expectations. No
193
+ # magic from Thinking Sphinx. It can get a little tricky, so make sure
194
+ # you read all the relevant
195
+ # documentation[http://sphinxsearch.com/doc.html#clustering] first.
196
+ #
197
+ # Yes this section will be expanded, but this is a start.
198
+ #
199
+ # == Geo/Location Searching
200
+ #
201
+ # Sphinx - and therefore Thinking Sphinx - has the facility to search
202
+ # around a geographical point, using a given latitude and longitude. To
203
+ # take advantage of this, you will need to have both of those values in
204
+ # attributes. To search with that point, you can then use one of the
205
+ # following syntax examples:
206
+ #
207
+ # Address.search "Melbourne", :geo => [1.4, -2.217], :order => "@geodist asc"
208
+ # Address.search "Australia", :geo => [-0.55, 3.108], :order => "@geodist asc"
209
+ # :latitude_attr => "latit", :longitude_attr => "longit"
210
+ #
211
+ # The first example applies when your latitude and longitude attributes
212
+ # are named any of lat, latitude, lon, long or longitude. If that's not
213
+ # the case, you will need to explicitly state them in your search, _or_
214
+ # you can do so in your model:
215
+ #
216
+ # define_index do
217
+ # has :latit # Float column, stored in radians
218
+ # has :longit # Float column, stored in radians
219
+ #
220
+ # set_property :latitude_attr => "latit"
221
+ # set_property :longitude_attr => "longit"
222
+ # end
223
+ #
224
+ # Now, geo-location searching really only has an affect if you have a
225
+ # filter, sort or grouping clause related to it - otherwise it's just a
226
+ # normal search, and _will not_ return a distance value otherwise. To
227
+ # make use of the positioning difference, use the special attribute
228
+ # "@geodist" in any of your filters or sorting or grouping clauses.
229
+ #
230
+ # And don't forget - both the latitude and longitude you use in your
231
+ # search, and the values in your indexes, need to be stored as a float in radians,
232
+ # _not_ degrees. Keep in mind that if you do this conversion in SQL
233
+ # you will need to explicitly declare a column type of :float.
234
+ #
235
+ # define_index do
236
+ # has 'RADIANS(lat)', :as => :lat, :type => :float
237
+ # # ...
238
+ # end
239
+ #
240
+ # Once you've got your results set, you can access the distances as
241
+ # follows:
242
+ #
243
+ # @results.each_with_geodist do |result, distance|
244
+ # # ...
245
+ # end
246
+ #
247
+ # The distance value is returned as a float, representing the distance in
248
+ # metres.
249
+ #
250
+ # == Handling a Stale Index
251
+ #
252
+ # Especially if you don't use delta indexing, you risk having records in the
253
+ # Sphinx index that are no longer in the database. By default, those will simply
254
+ # come back as nils:
255
+ #
256
+ # >> pat_user.delete
257
+ # >> User.search("pat")
258
+ # Sphinx Result: [1,2]
259
+ # => [nil, <#User id: 2>]
260
+ #
261
+ # (If you search across multiple models, you'll get ActiveRecord::RecordNotFound.)
262
+ #
263
+ # You can simply Array#compact these results or handle the nils in some other way, but
264
+ # Sphinx will still report two results, and the missing records may upset your layout.
265
+ #
266
+ # If you pass :retry_stale => true to a single-model search, missing records will
267
+ # cause Thinking Sphinx to retry the query but excluding those records. Since search
268
+ # is paginated, the new search could potentially include missing records as well, so by
269
+ # default Thinking Sphinx will retry three times. Pass :retry_stale => 5 to retry five
270
+ # times, and so on. If there are still missing ids on the last retry, they are
271
+ # shown as nils.
272
+ #
273
+ def search(*args)
274
+ query = args.clone # an array
275
+ options = query.extract_options!
276
+
277
+ retry_search_on_stale_index(query, options) do
278
+ results, client = search_results(*(query + [options]))
279
+
280
+ ::ActiveRecord::Base.logger.error(
281
+ "Sphinx Error: #{results[:error]}"
282
+ ) if results[:error]
283
+
284
+ klass = options[:class]
285
+ page = options[:page] ? options[:page].to_i : 1
286
+
287
+ ThinkingSphinx::Collection.create_from_results(results, page, client.limit, options)
288
+ end
289
+ end
290
+
291
+ def retry_search_on_stale_index(query, options, &block)
292
+ stale_ids = []
293
+ stale_retries_left = case options[:retry_stale]
294
+ when true: 3 # default to three retries
295
+ when nil, false: 0 # no retries
296
+ else options[:retry_stale].to_i
297
+ end
298
+ begin
299
+ # Passing this in an option so Collection.create_from_results can see it.
300
+ # It should only raise on stale records if there are any retries left.
301
+ options[:raise_on_stale] = stale_retries_left > 0
302
+ block.call
303
+ # If ThinkingSphinx::Collection.create_from_results found records in Sphinx but not
304
+ # in the DB and the :raise_on_stale option is set, this exception is raised. We retry
305
+ # a limited number of times, excluding the stale ids from the search.
306
+ rescue StaleIdsException => e
307
+ stale_retries_left -= 1
308
+
309
+ stale_ids |= e.ids # For logging
310
+ options[:without_ids] = Array(options[:without_ids]) | e.ids # Actual exclusion
311
+
312
+ tries = stale_retries_left
313
+ ::ActiveRecord::Base.logger.debug("Sphinx Stale Ids (%s %s left): %s" % [
314
+ tries, (tries==1 ? 'try' : 'tries'), stale_ids.join(', ')
315
+ ])
316
+
317
+ retry
318
+ end
319
+ end
320
+
321
+ def count(*args)
322
+ results, client = search_results(*args.clone)
323
+ results[:total_found] || 0
324
+ end
325
+
326
+ # Checks if a document with the given id exists within a specific index.
327
+ # Expected parameters:
328
+ #
329
+ # - ID of the document
330
+ # - Index to check within
331
+ # - Options hash (defaults to {})
332
+ #
333
+ # Example:
334
+ #
335
+ # ThinkingSphinx::Search.search_for_id(10, "user_core", :class => User)
336
+ #
337
+ def search_for_id(*args)
338
+ options = args.extract_options!
339
+ client = client_from_options options
340
+
341
+ query, filters = search_conditions(
342
+ options[:class], options[:conditions] || {}
343
+ )
344
+ client.filters += filters
345
+ client.match_mode = :extended unless query.empty?
346
+ client.id_range = args.first..args.first
347
+
348
+ begin
349
+ return client.query(query, args[1])[:matches].length > 0
350
+ rescue Errno::ECONNREFUSED => err
351
+ raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed."
352
+ end
353
+ end
354
+
355
+ def facets(*args)
356
+ hash = ThinkingSphinx::FacetCollection.new args
357
+ options = args.extract_options!.clone.merge! :group_function => :attr
358
+
359
+ options[:class].sphinx_facets.inject(hash) do |hash, facet|
360
+ options[:group_by] = facet.attribute_name
361
+
362
+ hash.add_from_results facet, search(*(args + [options]))
363
+ hash
364
+ end
365
+ end
366
+
367
+ private
368
+
369
+ # This method handles the common search functionality, and returns both
370
+ # the result hash and the client. Not super elegant, but it'll do for
371
+ # the moment.
372
+ #
373
+ def search_results(*args)
374
+ options = args.extract_options!
375
+ query = args.join(' ')
376
+ client = client_from_options options
377
+
378
+ query = star_query(query, options[:star]) if options[:star]
379
+
380
+ extra_query, filters = search_conditions(
381
+ options[:class], options[:conditions] || {}
382
+ )
383
+ client.filters += filters
384
+ client.match_mode = :extended unless extra_query.empty?
385
+ query = [query, extra_query].join(' ')
386
+ query.strip! # Because "" and " " are not equivalent
387
+
388
+ set_sort_options! client, options
389
+
390
+ client.limit = options[:per_page].to_i if options[:per_page]
391
+ page = options[:page] ? options[:page].to_i : 1
392
+ client.offset = (page - 1) * client.limit
393
+
394
+ begin
395
+ ::ActiveRecord::Base.logger.debug "Sphinx: #{query}"
396
+ results = client.query query
397
+ ::ActiveRecord::Base.logger.debug "Sphinx Result: #{results[:matches].collect{|m| m[:attributes]["sphinx_internal_id"]}.inspect}"
398
+ rescue Errno::ECONNREFUSED => err
399
+ raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed."
400
+ end
401
+
402
+ return results, client
403
+ end
404
+
405
+ # Set all the appropriate settings for the client, using the provided
406
+ # options hash.
407
+ #
408
+ def client_from_options(options = {})
409
+ config = ThinkingSphinx::Configuration.instance
410
+ client = Riddle::Client.new config.address, config.port
411
+ klass = options[:class]
412
+ index_options = klass ? klass.sphinx_index_options : {}
413
+
414
+ # The Riddle default is per-query max_matches=1000. If we set the
415
+ # per-server max to a smaller value in sphinx.yml, we need to override
416
+ # the Riddle default or else we get search errors like
417
+ # "per-query max_matches=1000 out of bounds (per-server max_matches=200)"
418
+ if per_server_max_matches = config.configuration.searchd.max_matches
419
+ options[:max_matches] ||= per_server_max_matches
420
+ end
421
+
422
+ # Turn :index_weights => { "foo" => 2, User => 1 }
423
+ # into :index_weights => { "foo" => 2, "user_core" => 1, "user_delta" => 1 }
424
+ if iw = options[:index_weights]
425
+ options[:index_weights] = iw.inject({}) do |hash, (index,weight)|
426
+ if index.is_a?(Class)
427
+ name = ThinkingSphinx::Index.name(index)
428
+ hash["#{name}_core"] = weight
429
+ hash["#{name}_delta"] = weight
430
+ else
431
+ hash[index] = weight
432
+ end
433
+ hash
434
+ end
435
+ end
436
+
437
+ [
438
+ :max_matches, :match_mode, :sort_mode, :sort_by, :id_range,
439
+ :group_by, :group_function, :group_clause, :group_distinct, :cut_off,
440
+ :retry_count, :retry_delay, :index_weights, :rank_mode,
441
+ :max_query_time, :field_weights, :filters, :anchor, :limit
442
+ ].each do |key|
443
+ client.send(
444
+ key.to_s.concat("=").to_sym,
445
+ options[key] || index_options[key] || client.send(key)
446
+ )
447
+ end
448
+
449
+ options[:classes] = [klass] if klass
450
+
451
+ client.anchor = anchor_conditions(klass, options) || {} if client.anchor.empty?
452
+
453
+ client.filters << Riddle::Client::Filter.new(
454
+ "sphinx_deleted", [0]
455
+ )
456
+
457
+ # class filters
458
+ client.filters << Riddle::Client::Filter.new(
459
+ "class_crc", options[:classes].collect { |k| k.to_crc32s }.flatten
460
+ ) if options[:classes]
461
+
462
+ # normal attribute filters
463
+ client.filters += options[:with].collect { |attr,val|
464
+ Riddle::Client::Filter.new attr.to_s, filter_value(val)
465
+ } if options[:with]
466
+
467
+ # exclusive attribute filters
468
+ client.filters += options[:without].collect { |attr,val|
469
+ Riddle::Client::Filter.new attr.to_s, filter_value(val), true
470
+ } if options[:without]
471
+
472
+ # every-match attribute filters
473
+ client.filters += options[:with_all].collect { |attr,vals|
474
+ Array(vals).collect { |val|
475
+ Riddle::Client::Filter.new attr.to_s, filter_value(val)
476
+ }
477
+ }.flatten if options[:with_all]
478
+
479
+ # exclusive attribute filter on primary key
480
+ client.filters += Array(options[:without_ids]).collect { |id|
481
+ Riddle::Client::Filter.new 'sphinx_internal_id', filter_value(id), true
482
+ } if options[:without_ids]
483
+
484
+ client
485
+ end
486
+
487
+ def star_query(query, custom_token = nil)
488
+ token = custom_token.is_a?(Regexp) ? custom_token : /\w+/u
489
+
490
+ query.gsub(/("#{token}(.*?#{token})?"|(?![!-])#{token})/u) do
491
+ pre, proper, post = $`, $&, $'
492
+ is_operator = pre.match(%r{(\W|^)[@~/]\Z}) # E.g. "@foo", "/2", "~3", but not as part of a token
493
+ is_quote = proper.starts_with?('"') && proper.ends_with?('"') # E.g. "foo bar", with quotes
494
+ has_star = pre.ends_with?("*") || post.starts_with?("*")
495
+ if is_operator || is_quote || has_star
496
+ proper
497
+ else
498
+ "*#{proper}*"
499
+ end
500
+ end
501
+ end
502
+
503
+ def filter_value(value)
504
+ case value
505
+ when Range
506
+ value.first.is_a?(Time) ? timestamp(value.first)..timestamp(value.last) : value
507
+ when Array
508
+ value.collect { |val| val.is_a?(Time) ? timestamp(val) : val }
509
+ else
510
+ Array(value)
511
+ end
512
+ end
513
+
514
+ # Returns the integer timestamp for a Time object.
515
+ #
516
+ # If using Rails 2.1+, need to handle timezones to translate them back to
517
+ # UTC, as that's what datetimes will be stored as by MySQL.
518
+ #
519
+ # in_time_zone is a method that was added for the timezone support in
520
+ # Rails 2.1, which is why it's used for testing. I'm sure there's better
521
+ # ways, but this does the job.
522
+ #
523
+ def timestamp(value)
524
+ value.respond_to?(:in_time_zone) ? value.utc.to_i : value.to_i
525
+ end
526
+
527
+ # Translate field and attribute conditions to the relevant search string
528
+ # and filters.
529
+ #
530
+ def search_conditions(klass, conditions={})
531
+ attributes = klass ? klass.sphinx_indexes.collect { |index|
532
+ index.attributes.collect { |attrib| attrib.unique_name }
533
+ }.flatten : []
534
+
535
+ search_string = []
536
+ filters = []
537
+
538
+ conditions.each do |key,val|
539
+ if attributes.include?(key.to_sym)
540
+ filters << Riddle::Client::Filter.new(
541
+ key.to_s, filter_value(val)
542
+ )
543
+ else
544
+ search_string << "@#{key} #{val}"
545
+ end
546
+ end
547
+
548
+ return search_string.join(' '), filters
549
+ end
550
+
551
+ # Return the appropriate latitude and longitude values, depending on
552
+ # whether the relevant attributes have been defined, and also whether
553
+ # there's actually any values.
554
+ #
555
+ def anchor_conditions(klass, options)
556
+ attributes = klass ? klass.sphinx_indexes.collect { |index|
557
+ index.attributes.collect { |attrib| attrib.unique_name }
558
+ }.flatten : []
559
+
560
+ lat_attr = klass ? klass.sphinx_indexes.collect { |index|
561
+ index.options[:latitude_attr]
562
+ }.compact.first : nil
563
+
564
+ lon_attr = klass ? klass.sphinx_indexes.collect { |index|
565
+ index.options[:longitude_attr]
566
+ }.compact.first : nil
567
+
568
+ lat_attr = options[:latitude_attr] if options[:latitude_attr]
569
+ lat_attr ||= :lat if attributes.include?(:lat)
570
+ lat_attr ||= :latitude if attributes.include?(:latitude)
571
+
572
+ lon_attr = options[:longitude_attr] if options[:longitude_attr]
573
+ lon_attr ||= :lng if attributes.include?(:lng)
574
+ lon_attr ||= :lon if attributes.include?(:lon)
575
+ lon_attr ||= :long if attributes.include?(:long)
576
+ lon_attr ||= :longitude if attributes.include?(:longitude)
577
+
578
+ lat = options[:lat]
579
+ lon = options[:lon]
580
+
581
+ if options[:geo]
582
+ lat = options[:geo].first
583
+ lon = options[:geo].last
584
+ end
585
+
586
+ lat && lon ? {
587
+ :latitude_attribute => lat_attr.to_s,
588
+ :latitude => lat,
589
+ :longitude_attribute => lon_attr.to_s,
590
+ :longitude => lon
591
+ } : nil
592
+ end
593
+
594
+ # Set the sort options using the :order key as well as the appropriate
595
+ # Riddle settings.
596
+ #
597
+ def set_sort_options!(client, options)
598
+ klass = options[:class]
599
+ fields = klass ? klass.sphinx_indexes.collect { |index|
600
+ index.fields.collect { |field| field.unique_name }
601
+ }.flatten : []
602
+ index_options = klass ? klass.sphinx_index_options : {}
603
+
604
+ order = options[:order] || index_options[:order]
605
+ case order
606
+ when Symbol
607
+ client.sort_mode = :attr_asc if client.sort_mode == :relevance || client.sort_mode.nil?
608
+ if fields.include?(order)
609
+ client.sort_by = order.to_s.concat("_sort")
610
+ else
611
+ client.sort_by = order.to_s
612
+ end
613
+ when String
614
+ client.sort_mode = :extended
615
+ client.sort_by = sorted_fields_to_attributes(order, fields)
616
+ else
617
+ # do nothing
618
+ end
619
+
620
+ client.sort_mode = :attr_asc if client.sort_mode == :asc
621
+ client.sort_mode = :attr_desc if client.sort_mode == :desc
622
+ end
623
+
624
+ # Search through a collection of fields and translate any appearances
625
+ # of them in a string to their attribute equivalent for sorting.
626
+ #
627
+ def sorted_fields_to_attributes(string, fields)
628
+ fields.each { |field|
629
+ string.gsub!(/(^|\s)#{field}(,?\s|$)/) { |match|
630
+ match.gsub field.to_s, field.to_s.concat("_sort")
631
+ }
632
+ }
633
+
634
+ string
635
+ end
636
+ end
637
+ end
638
+ end