dbldots_oedipus 0.0.16

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. data/.gitignore +10 -0
  2. data/.rspec +1 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE +20 -0
  5. data/README.md +435 -0
  6. data/Rakefile +26 -0
  7. data/ext/oedipus/extconf.rb +72 -0
  8. data/ext/oedipus/lexing.c +96 -0
  9. data/ext/oedipus/lexing.h +20 -0
  10. data/ext/oedipus/oedipus.c +339 -0
  11. data/ext/oedipus/oedipus.h +58 -0
  12. data/lib/oedipus.rb +40 -0
  13. data/lib/oedipus/comparison.rb +88 -0
  14. data/lib/oedipus/comparison/between.rb +21 -0
  15. data/lib/oedipus/comparison/equal.rb +21 -0
  16. data/lib/oedipus/comparison/gt.rb +21 -0
  17. data/lib/oedipus/comparison/gte.rb +21 -0
  18. data/lib/oedipus/comparison/in.rb +21 -0
  19. data/lib/oedipus/comparison/lt.rb +21 -0
  20. data/lib/oedipus/comparison/lte.rb +21 -0
  21. data/lib/oedipus/comparison/not.rb +25 -0
  22. data/lib/oedipus/comparison/not_equal.rb +21 -0
  23. data/lib/oedipus/comparison/not_in.rb +21 -0
  24. data/lib/oedipus/comparison/outside.rb +21 -0
  25. data/lib/oedipus/comparison/shortcuts.rb +144 -0
  26. data/lib/oedipus/connection.rb +124 -0
  27. data/lib/oedipus/connection/pool.rb +133 -0
  28. data/lib/oedipus/connection/registry.rb +56 -0
  29. data/lib/oedipus/connection_error.rb +14 -0
  30. data/lib/oedipus/index.rb +320 -0
  31. data/lib/oedipus/query_builder.rb +185 -0
  32. data/lib/oedipus/rspec/test_rig.rb +132 -0
  33. data/lib/oedipus/version.rb +12 -0
  34. data/oedipus.gemspec +42 -0
  35. data/spec/data/.gitkeep +0 -0
  36. data/spec/integration/connection/registry_spec.rb +50 -0
  37. data/spec/integration/connection_spec.rb +156 -0
  38. data/spec/integration/index_spec.rb +442 -0
  39. data/spec/spec_helper.rb +16 -0
  40. data/spec/unit/comparison/between_spec.rb +36 -0
  41. data/spec/unit/comparison/equal_spec.rb +22 -0
  42. data/spec/unit/comparison/gt_spec.rb +22 -0
  43. data/spec/unit/comparison/gte_spec.rb +22 -0
  44. data/spec/unit/comparison/in_spec.rb +22 -0
  45. data/spec/unit/comparison/lt_spec.rb +22 -0
  46. data/spec/unit/comparison/lte_spec.rb +22 -0
  47. data/spec/unit/comparison/not_equal_spec.rb +22 -0
  48. data/spec/unit/comparison/not_in_spec.rb +22 -0
  49. data/spec/unit/comparison/not_spec.rb +37 -0
  50. data/spec/unit/comparison/outside_spec.rb +36 -0
  51. data/spec/unit/comparison/shortcuts_spec.rb +125 -0
  52. data/spec/unit/comparison_spec.rb +109 -0
  53. data/spec/unit/query_builder_spec.rb +205 -0
  54. metadata +164 -0
@@ -0,0 +1,124 @@
1
+ # encoding: utf-8
2
+
3
+ ##
4
+ # Oedipus Sphinx 2 Search.
5
+ # Copyright © 2012 Chris Corbyn.
6
+ #
7
+ # See LICENSE file for details.
8
+ ##
9
+
10
+ module Oedipus
11
+ # Provides an interface for talking to SphinxQL.
12
+ #
13
+ # Currently this class wraps a native mysql extension.
14
+ class Connection
15
+ attr_reader :options
16
+
17
+ # Instantiate a new Connection to a SphinxQL host.
18
+ #
19
+ # @param [String] server
20
+ # a 'hostname:port' string
21
+ #
22
+ # @param [Hash] options
23
+ # a Hash containing :host and :port
24
+ #
25
+ # The connection will be established on initialization.
26
+ #
27
+ # The underlying implementation uses a thread-safe connection pool.
28
+ def initialize(options)
29
+ @options =
30
+ if options.kind_of?(String)
31
+ Hash[ [:host, :port].zip(options.split(":")) ]
32
+ else
33
+ options.dup
34
+ end.tap { |o| o[:port] = o[:port].to_i }
35
+
36
+ @pool = Pool.new(
37
+ host: @options[:host],
38
+ port: @options[:port],
39
+ size: @options.fetch(:pool_size, 8),
40
+ ttl: 60
41
+ )
42
+
43
+ assert_valid_pool unless @options[:verify] == false
44
+ end
45
+
46
+ # Acess a specific index for querying.
47
+ #
48
+ # @param [String] index_name
49
+ # the name of an existing index in Sphinx
50
+ #
51
+ # @return [Index]
52
+ # an index that can be queried
53
+ def [](index_name)
54
+ Index.new(index_name, self)
55
+ end
56
+
57
+ alias_method :index, :[]
58
+
59
+ # Execute one or more queries in a batch.
60
+ #
61
+ # Queries should be separated by semicolons.
62
+ # Results are returned in a 2-dimensional array.
63
+ #
64
+ # @param [String] sql
65
+ # one or more SphinxQL statements, separated by semicolons
66
+ #
67
+ # @param [Object...] bind_values
68
+ # values to be substituted in place of '?' in the query
69
+ #
70
+ # @return [Array]
71
+ # an array of arrays, containing the returned records
72
+ #
73
+ # Note that SphinxQL does not support prepared statements.
74
+ def multi_query(sql, *bind_values)
75
+ @pool.acquire { |conn| conn.query(sql, *bind_values) }
76
+ end
77
+
78
+ # Execute a single read query.
79
+ #
80
+ # @param [String] sql
81
+ # a single SphinxQL statement
82
+ #
83
+ # @param [Object...] bind_values
84
+ # values to be substituted in place of '?' in the query
85
+ #
86
+ # @return [Array]
87
+ # an array of Hashes containing the matched records
88
+ #
89
+ # Note that SphinxQL does not support prepared statements.
90
+ def query(sql, *bind_values)
91
+ @pool.acquire { |conn| conn.query(sql, *bind_values).first }
92
+ end
93
+
94
+ # Execute a non-read query.
95
+ #
96
+ # @param [String] sql
97
+ # a SphinxQL query, such as INSERT or REPLACE
98
+ #
99
+ # @param [Object...] bind_values
100
+ # values to be substituted in place of '?' in the query
101
+ #
102
+ # @return [Fixnum]
103
+ # the number of affected rows
104
+ #
105
+ # Note that SphinxQL does not support prepared statements.
106
+ def execute(sql, *bind_values)
107
+ @pool.acquire { |conn| conn.execute(sql, *bind_values) }
108
+ end
109
+
110
+ # Disconnect from the remote host.
111
+ #
112
+ # There is no need to explicitly re-connect after invoking this;
113
+ # connections are re-established as needed.
114
+ def close
115
+ @pool.dispose
116
+ end
117
+
118
+ private
119
+
120
+ def assert_valid_pool
121
+ @pool.acquire { nil }
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,133 @@
1
+ # encoding: utf-8
2
+
3
+ ##
4
+ # Oedipus Sphinx 2 Search.
5
+ # Copyright © 2012 Chris Corbyn.
6
+ #
7
+ # See LICENSE file for details.
8
+ ##
9
+
10
+ require "thread"
11
+
12
+ module Oedipus
13
+ class Connection
14
+ # Provides a thread-safe pool of connections, with a specified TTL.
15
+ class Pool
16
+ # Initialize a new connection pool with the given options.
17
+ #
18
+ # @param [Hash] options
19
+ # configuration for the pool
20
+ #
21
+ # @option [String] host
22
+ # the host to use when allocating new connections
23
+ #
24
+ # @option [Fixnum] port
25
+ # the port to use when allocating new connections
26
+ #
27
+ # @option [Fixnum] size
28
+ # the maximum number of connections (defaults to 8)
29
+ #
30
+ # @option [Fixnum] ttl
31
+ # the length of time for which any given connection should live
32
+ def initialize(options)
33
+ @host = options[:host]
34
+ @port = options[:port]
35
+
36
+ @size = options.fetch(:size, 8)
37
+ @ttl = options.fetch(:ttl, 60)
38
+
39
+ @available = []
40
+ @used = {}
41
+ @expiries = {}
42
+ @condition = ConditionVariable.new
43
+ @lock = Mutex.new
44
+
45
+ sweeper
46
+ end
47
+
48
+ # Acquire a connection from the pool, for the duration of a block.
49
+ #
50
+ # The release of the connection is done automatically.
51
+ #
52
+ # @yields [Oedipus::Mysql]
53
+ # a connection object
54
+ def acquire
55
+ instance = nil
56
+ begin
57
+ @lock.synchronize do
58
+ if instance = @available.pop
59
+ @used[instance] = instance
60
+ elsif @size > (@available.size + @used.size)
61
+ instance = new_instance
62
+ else
63
+ @condition.wait(@lock)
64
+ end
65
+ end
66
+ end until instance
67
+
68
+ yield instance
69
+ ensure
70
+ release(instance)
71
+ end
72
+
73
+ # Dispose all connections in the pool.
74
+ #
75
+ # Waits until all connections have finished processing current queries
76
+ # and then releases them.
77
+ def dispose
78
+ begin
79
+ @lock.synchronize do
80
+ while instance = @available.pop
81
+ instance.close
82
+ end
83
+
84
+ @condition.wait(@lock) if @used.size > 0
85
+ end
86
+ end until empty?
87
+ end
88
+
89
+ # Returns true if the pool is currently empty.
90
+ #
91
+ # @return [Boolean]
92
+ # true if no connections are pooled, false otherwise
93
+ def empty?
94
+ @lock.synchronize { @used.size == 0 && @available.size == 0 }
95
+ end
96
+
97
+ private
98
+
99
+ def release(instance)
100
+ @lock.synchronize do
101
+ @available << @used.delete(instance) if instance
102
+ @condition.broadcast
103
+ end
104
+ end
105
+
106
+ def new_instance
107
+ Oedipus::Mysql.new(@host, @port).tap do |instance|
108
+ @used[instance] = instance
109
+ @expiries[instance] = Time.now + @ttl
110
+ end
111
+ end
112
+
113
+ # Close connections past their ttl (runs in a new Thread)
114
+ def sweeper
115
+ Thread.new(@expiries, @available) do |exp, avail|
116
+ loop do
117
+ sleep 15
118
+ @lock.synchronize {
119
+ avail.each do |instance|
120
+ if exp[instance] < Time.now
121
+ avail.delete(instance)
122
+ exp.delete(instance)
123
+ instance.close
124
+ end
125
+ end
126
+ }
127
+ end
128
+ end
129
+ end
130
+
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,56 @@
1
+ # encoding: utf-8
2
+
3
+ ##
4
+ # Oedipus Sphinx 2 Search.
5
+ # Copyright © 2012 Chris Corbyn.
6
+ #
7
+ # See LICENSE file for details.
8
+ ##
9
+
10
+ module Oedipus
11
+ class Connection
12
+ module Registry
13
+ # Connect to Sphinx running SphinxQL.
14
+ #
15
+ # Connections are cached for re-use.
16
+ #
17
+ # @example
18
+ # c = Oedipus.connect("127.0.0.1:9306")
19
+ # c = Oedipus.connect(host: "127.0.0.1", port: 9306)
20
+ # c = Oedipus.connect("127.0.0.1:9306", :dist_host)
21
+ #
22
+ # @param [String|Hash] server
23
+ # a 'hostname:port' string, or
24
+ # a Hash with :host and :port keys
25
+ #
26
+ # @param [Object] key
27
+ # an optional name for the connection
28
+ #
29
+ # @return [Connection]
30
+ # a client connected to SphinxQL
31
+ def connect(options, key = :default)
32
+ connections[key] = Connection.new(options)
33
+ end
34
+
35
+ # Lookup an already connected connection.
36
+ #
37
+ # @example
38
+ # c = Oedipus.connection
39
+ # c = Oedipus.connection(:dist_host)
40
+ #
41
+ # @param [Object] key
42
+ # an optional name for the connection
43
+ #
44
+ # @return [Connection]
45
+ # a client connected to SphinxQL
46
+ def connection(key = :default)
47
+ raise ArgumentError, "Connection #{key} is not defined" unless connections.key?(key)
48
+ connections[key]
49
+ end
50
+
51
+ def connections
52
+ @connections ||= {}
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,14 @@
1
+ # encoding: utf-8
2
+
3
+ ##
4
+ # Oedipus Sphinx 2 Search.
5
+ # Copyright © 2012 Chris Corbyn.
6
+ #
7
+ # See LICENSE file for details.
8
+ ##
9
+
10
+ module Oedipus
11
+ # Raised on any error coming from Sphinx.
12
+ class ConnectionError < RuntimeError
13
+ end
14
+ end
@@ -0,0 +1,320 @@
1
+ # encoding: utf-8
2
+
3
+ ##
4
+ # Oedipus Sphinx 2 Search.
5
+ # Copyright © 2012 Chris Corbyn.
6
+ #
7
+ # See LICENSE file for details.
8
+ ##
9
+
10
+ module Oedipus
11
+ # Representation of a search index for querying.
12
+ class Index
13
+ attr_reader :name
14
+
15
+ # Initialize the index named +name+ on the connection +conn+.
16
+ #
17
+ # @param [Symbol] name
18
+ # the name of an existing index in sphinx
19
+ #
20
+ # @param [Connection] conn
21
+ # an instance of Oedipus::Connection for querying
22
+ def initialize(name, conn)
23
+ @name = name.to_sym
24
+ @conn = conn
25
+ @builder = QueryBuilder.new(name)
26
+ end
27
+
28
+ # Insert the record with the ID +id+.
29
+ #
30
+ # @example
31
+ # index.insert(42, title: "example", views: 22)
32
+ #
33
+ # @param [Integer] id
34
+ # the unique ID of the document in the index
35
+ #
36
+ # @param [Hash] hash
37
+ # a symbol-keyed hash of data to insert
38
+ #
39
+ # @return [Fixnum]
40
+ # the number of rows inserted (currently always 1)
41
+ def insert(id, hash)
42
+ @conn.execute(*@builder.insert(id, hash))
43
+ end
44
+
45
+ # Update the record with the ID +id+.
46
+ #
47
+ # @example
48
+ # index.update(42, views: 25)
49
+ #
50
+ # @param [Integer] id
51
+ # the unique ID of the document in the index
52
+ #
53
+ # @param [Hash] hash
54
+ # a symbol-keyed hash of data to set
55
+ #
56
+ # @return [Fixnum]
57
+ # the number of rows updated (1 or 0)
58
+ def update(id, hash)
59
+ @conn.execute(*@builder.update(id, hash))
60
+ end
61
+
62
+ # Completely replace the record with the ID +id+.
63
+ #
64
+ # @example
65
+ # index.replace(42, title: "New title", views: 25)
66
+ #
67
+ # @param [Integer] id
68
+ # the unique ID of the document in the index
69
+ #
70
+ # @param [Hash] hash
71
+ # a symbol-keyed hash of data to insert
72
+ #
73
+ # @return [Fixnum]
74
+ # the number of rows inserted (currentl always 1)
75
+ def replace(id, hash)
76
+ @conn.execute(*@builder.replace(id, hash))
77
+ end
78
+
79
+ # Delete the record with the ID +id+.
80
+ #
81
+ # @example
82
+ # index.delete(42)
83
+ #
84
+ # @param [Integer] id
85
+ # the unique ID of the document in the index
86
+ #
87
+ # @return [Fixnum]
88
+ # the number of rows deleted (currently always 1 or 0)
89
+ def delete(id)
90
+ @conn.execute(*@builder.delete(id))
91
+ end
92
+
93
+ # Fetch a single document by its ID.
94
+ #
95
+ # Returns the Hash of attributes if found, otherwise nil.
96
+ #
97
+ # @param [Fixnum] id
98
+ # the ID of the document
99
+ #
100
+ # @return [Hash]
101
+ # the attributes of the record
102
+ def fetch(id)
103
+ search(id: id)[:records].first
104
+ end
105
+
106
+ # Perform a search on the index.
107
+ #
108
+ # Either one or two arguments may be passed, with either one being mutually
109
+ # optional.
110
+ #
111
+ # @example Fulltext search
112
+ # index.search("cats AND dogs")
113
+ #
114
+ # @example Fulltext search with attribute filters
115
+ # index.search("cats AND dogs", author_id: 57)
116
+ #
117
+ # @example Attribute search only
118
+ # index.search(author_id: 57)
119
+ #
120
+ # When performing a faceted search, the base query is inherited by each facet, which
121
+ # may override (or refine) the query.
122
+ #
123
+ # The results returned include a :facets key, containing the results for each facet.
124
+ #
125
+ # @example Performing a faceted search
126
+ # index.search(
127
+ # "cats | dogs",
128
+ # category_id: 7,
129
+ # facets: {
130
+ # popular: {views: Oedipus.gt(150)},
131
+ # recent: {published_at: Oedipus.gt(Time.now.to_i - 7 * 86400)}
132
+ # }
133
+ # )
134
+ #
135
+ # To perform an n-dimensional faceted search, add a :facets option to each
136
+ # facet. Each facet will inherit from its immediate parent, which inerits
137
+ # from its parent, up to the root query.
138
+ #
139
+ # @example Performing a n-dimensional faceted search
140
+ # index.search(
141
+ # "cats | dogs",
142
+ # facets: {
143
+ # popular: {
144
+ # views: Oedipus.gte(1000),
145
+ # facets: {
146
+ # in_title: "@title (%{query})"
147
+ # }
148
+ # }
149
+ # }
150
+ # )
151
+ #
152
+ # The results in a n-dimensional faceted search are returned with each set
153
+ # of facet results in turn containing a :facets element.
154
+ #
155
+ # @param [String] query
156
+ # a fulltext query
157
+ #
158
+ # @param [Hash] options
159
+ # attribute filters, limits, sorting, facets and other options
160
+ #
161
+ # @option [Hash] facets
162
+ # variations on the main search to return nested in the result
163
+ #
164
+ # @option [Array] attrs
165
+ # attributes to fetch from the index, either as Symbols, or SphinxQL fragments
166
+ #
167
+ # @option [Hash] order
168
+ # an attr => direction mapping of sort orders
169
+ #
170
+ # @option [Fixnum] limit
171
+ # a limit to apply, defaults to 20 inside Sphinx itself
172
+ #
173
+ # @option [Fixnum] offset
174
+ # an offset to apply, defaults to 0
175
+ #
176
+ # @option [Object] everything_else
177
+ # all additional options are taken to be attribute filters
178
+ #
179
+ # @return [Hash]
180
+ # a Hash containing meta data, with the records in :records, and if any
181
+ # facets were included, the facets inside the :facets Hash
182
+ def search(*args)
183
+ expand_facet_tree(multi_search(deep_merge_facets(args)))
184
+ end
185
+
186
+ # Perform a faceted search on the index, using a base query and one or more facets.
187
+ #
188
+ # This method is deprecated and will be removed in version 1.0. Use #search instead.
189
+ #
190
+ # @deprecated
191
+ #
192
+ # @see #search
193
+ def faceted_search(*args)
194
+ search(*args)
195
+ end
196
+
197
+ # Perform a a batch search on the index.
198
+ #
199
+ # A Hash of queries is passed, whose keys are used to collate the results in
200
+ # the return value.
201
+ #
202
+ # Each query may either by a string (fulltext search), a Hash (attribute search)
203
+ # or an array containing both. In other words, the same arguments accepted by
204
+ # the #search method.
205
+ #
206
+ # @example
207
+ # index.multi_search(
208
+ # cat_results: ["cats", { author_id: 57 }],
209
+ # dog_results: ["dogs", { author_id: 57 }]
210
+ # )
211
+ #
212
+ # @param [Hash] queries
213
+ # a hash whose keys map to queries
214
+ #
215
+ # @return [Hash]
216
+ # a Hash whose keys map 1:1 with the input Hash, each element containing the
217
+ # same results as those returned by the #search method.
218
+ def multi_search(queries)
219
+ unless queries.kind_of?(Hash)
220
+ raise ArgumentError, "Argument must be a Hash of named queries (#{queries.class} given)"
221
+ end
222
+
223
+ rs = []
224
+ queries.each do |key, args|
225
+ str, *values = @builder.select(*extract_query_data(args))
226
+ rs.push @conn.query("#{str};", *values)
227
+ rs.push @conn.query("SHOW META;")
228
+ end
229
+
230
+ Hash[].tap do |result|
231
+ queries.keys.each do |key|
232
+ records, meta = rs.shift, rs.shift
233
+ result[key] = meta_to_hash(meta).tap do |r|
234
+ r[:records] = records.map { |hash|
235
+ hash.inject({}) { |o, (k, v)| o.merge!(k.to_sym => v) }
236
+ }
237
+ end
238
+ end
239
+ end
240
+ end
241
+
242
+ private
243
+
244
+ def meta_to_hash(meta)
245
+ Hash[].tap do |hash|
246
+ meta.each do |m|
247
+ n, v = m.values
248
+ case n
249
+ when "total_found", "total" then hash[n.to_sym] = v.to_i
250
+ when "time" then hash[:time] = v.to_f
251
+ when /\Adocs\[\d+\]\Z/ then (hash[:docs] ||= []).tap { |a| a << v.to_i }
252
+ when /\Ahits\[\d+\]\Z/ then (hash[:hits] ||= []).tap { |a| a << v.to_i }
253
+ when /\Akeyword\[\d+\]\Z/ then (hash[:keywords] ||= []).tap { |a| a << v }
254
+ else hash[n.to_sym] = v
255
+ end
256
+ end
257
+
258
+ if hash.key?(:docs) && hash.key?(:hits) && hash.key?(:keywords)
259
+ hash[:docs] = Hash[(hash[:keywords]).zip(hash[:docs])]
260
+ hash[:hits] = Hash[(hash[:keywords]).zip(hash[:hits])]
261
+ end
262
+ end
263
+ end
264
+
265
+ def extract_query_data(args, default_query = "")
266
+ args = [args] unless Array === args
267
+
268
+ unless (1..2) === args.size
269
+ raise ArgumentError, "Wrong number of query arguments (#{args.size} for 1..2)"
270
+ end
271
+
272
+ case args[0]
273
+ when String then [args[0], args.fetch(1, {}).dup]
274
+ when Hash then [default_query, args[0].dup ]
275
+ else raise ArgumentError, "Invalid query argument type #{args.first.class}"
276
+ end
277
+ end
278
+
279
+ def expand_facet_tree(result)
280
+ Hash[].tap do |tree|
281
+ result.each do |k, v|
282
+ t = tree
283
+
284
+ k.each do |name|
285
+ f = t[:facets] ||= {}
286
+ t = f[name] ||= {}
287
+ end
288
+
289
+ t.merge!(v)
290
+ end
291
+ end
292
+ end
293
+
294
+ def deep_merge_facets(base_args, list = {}, path = [])
295
+ # FIXME: Try and make this shorter and more functional in style
296
+ base_query, base_options = extract_query_data(base_args)
297
+
298
+ facets = base_options.delete(:facets)
299
+
300
+ list.merge!(path => [base_query, base_options])
301
+
302
+ unless facets.nil?
303
+ facets.each do |k, q|
304
+ facet_query, facet_options = extract_query_data(q, base_query)
305
+
306
+ deep_merge_facets(
307
+ [
308
+ facet_query.gsub("%{query}", base_query),
309
+ base_options.merge(facet_options)
310
+ ],
311
+ list,
312
+ path.dup << k
313
+ )
314
+ end
315
+ end
316
+
317
+ list
318
+ end
319
+ end
320
+ end