dbldots_oedipus 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. data/.gitignore +10 -0
  2. data/.rspec +1 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE +20 -0
  5. data/README.md +435 -0
  6. data/Rakefile +26 -0
  7. data/ext/oedipus/extconf.rb +72 -0
  8. data/ext/oedipus/lexing.c +96 -0
  9. data/ext/oedipus/lexing.h +20 -0
  10. data/ext/oedipus/oedipus.c +339 -0
  11. data/ext/oedipus/oedipus.h +58 -0
  12. data/lib/oedipus.rb +40 -0
  13. data/lib/oedipus/comparison.rb +88 -0
  14. data/lib/oedipus/comparison/between.rb +21 -0
  15. data/lib/oedipus/comparison/equal.rb +21 -0
  16. data/lib/oedipus/comparison/gt.rb +21 -0
  17. data/lib/oedipus/comparison/gte.rb +21 -0
  18. data/lib/oedipus/comparison/in.rb +21 -0
  19. data/lib/oedipus/comparison/lt.rb +21 -0
  20. data/lib/oedipus/comparison/lte.rb +21 -0
  21. data/lib/oedipus/comparison/not.rb +25 -0
  22. data/lib/oedipus/comparison/not_equal.rb +21 -0
  23. data/lib/oedipus/comparison/not_in.rb +21 -0
  24. data/lib/oedipus/comparison/outside.rb +21 -0
  25. data/lib/oedipus/comparison/shortcuts.rb +144 -0
  26. data/lib/oedipus/connection.rb +124 -0
  27. data/lib/oedipus/connection/pool.rb +133 -0
  28. data/lib/oedipus/connection/registry.rb +56 -0
  29. data/lib/oedipus/connection_error.rb +14 -0
  30. data/lib/oedipus/index.rb +320 -0
  31. data/lib/oedipus/query_builder.rb +185 -0
  32. data/lib/oedipus/rspec/test_rig.rb +132 -0
  33. data/lib/oedipus/version.rb +12 -0
  34. data/oedipus.gemspec +42 -0
  35. data/spec/data/.gitkeep +0 -0
  36. data/spec/integration/connection/registry_spec.rb +50 -0
  37. data/spec/integration/connection_spec.rb +156 -0
  38. data/spec/integration/index_spec.rb +442 -0
  39. data/spec/spec_helper.rb +16 -0
  40. data/spec/unit/comparison/between_spec.rb +36 -0
  41. data/spec/unit/comparison/equal_spec.rb +22 -0
  42. data/spec/unit/comparison/gt_spec.rb +22 -0
  43. data/spec/unit/comparison/gte_spec.rb +22 -0
  44. data/spec/unit/comparison/in_spec.rb +22 -0
  45. data/spec/unit/comparison/lt_spec.rb +22 -0
  46. data/spec/unit/comparison/lte_spec.rb +22 -0
  47. data/spec/unit/comparison/not_equal_spec.rb +22 -0
  48. data/spec/unit/comparison/not_in_spec.rb +22 -0
  49. data/spec/unit/comparison/not_spec.rb +37 -0
  50. data/spec/unit/comparison/outside_spec.rb +36 -0
  51. data/spec/unit/comparison/shortcuts_spec.rb +125 -0
  52. data/spec/unit/comparison_spec.rb +109 -0
  53. data/spec/unit/query_builder_spec.rb +205 -0
  54. metadata +164 -0
@@ -0,0 +1,124 @@
1
+ # encoding: utf-8
2
+
3
+ ##
4
+ # Oedipus Sphinx 2 Search.
5
+ # Copyright © 2012 Chris Corbyn.
6
+ #
7
+ # See LICENSE file for details.
8
+ ##
9
+
10
+ module Oedipus
11
+ # Provides an interface for talking to SphinxQL.
12
+ #
13
+ # Currently this class wraps a native mysql extension.
14
+ class Connection
15
+ attr_reader :options
16
+
17
+ # Instantiate a new Connection to a SphinxQL host.
18
+ #
19
+ # @param [String] server
20
+ # a 'hostname:port' string
21
+ #
22
+ # @param [Hash] options
23
+ # a Hash containing :host and :port
24
+ #
25
+ # The connection will be established on initialization.
26
+ #
27
+ # The underlying implementation uses a thread-safe connection pool.
28
+ def initialize(options)
29
+ @options =
30
+ if options.kind_of?(String)
31
+ Hash[ [:host, :port].zip(options.split(":")) ]
32
+ else
33
+ options.dup
34
+ end.tap { |o| o[:port] = o[:port].to_i }
35
+
36
+ @pool = Pool.new(
37
+ host: @options[:host],
38
+ port: @options[:port],
39
+ size: @options.fetch(:pool_size, 8),
40
+ ttl: 60
41
+ )
42
+
43
+ assert_valid_pool unless @options[:verify] == false
44
+ end
45
+
46
+ # Acess a specific index for querying.
47
+ #
48
+ # @param [String] index_name
49
+ # the name of an existing index in Sphinx
50
+ #
51
+ # @return [Index]
52
+ # an index that can be queried
53
+ def [](index_name)
54
+ Index.new(index_name, self)
55
+ end
56
+
57
+ alias_method :index, :[]
58
+
59
+ # Execute one or more queries in a batch.
60
+ #
61
+ # Queries should be separated by semicolons.
62
+ # Results are returned in a 2-dimensional array.
63
+ #
64
+ # @param [String] sql
65
+ # one or more SphinxQL statements, separated by semicolons
66
+ #
67
+ # @param [Object...] bind_values
68
+ # values to be substituted in place of '?' in the query
69
+ #
70
+ # @return [Array]
71
+ # an array of arrays, containing the returned records
72
+ #
73
+ # Note that SphinxQL does not support prepared statements.
74
+ def multi_query(sql, *bind_values)
75
+ @pool.acquire { |conn| conn.query(sql, *bind_values) }
76
+ end
77
+
78
+ # Execute a single read query.
79
+ #
80
+ # @param [String] sql
81
+ # a single SphinxQL statement
82
+ #
83
+ # @param [Object...] bind_values
84
+ # values to be substituted in place of '?' in the query
85
+ #
86
+ # @return [Array]
87
+ # an array of Hashes containing the matched records
88
+ #
89
+ # Note that SphinxQL does not support prepared statements.
90
+ def query(sql, *bind_values)
91
+ @pool.acquire { |conn| conn.query(sql, *bind_values).first }
92
+ end
93
+
94
+ # Execute a non-read query.
95
+ #
96
+ # @param [String] sql
97
+ # a SphinxQL query, such as INSERT or REPLACE
98
+ #
99
+ # @param [Object...] bind_values
100
+ # values to be substituted in place of '?' in the query
101
+ #
102
+ # @return [Fixnum]
103
+ # the number of affected rows
104
+ #
105
+ # Note that SphinxQL does not support prepared statements.
106
+ def execute(sql, *bind_values)
107
+ @pool.acquire { |conn| conn.execute(sql, *bind_values) }
108
+ end
109
+
110
+ # Disconnect from the remote host.
111
+ #
112
+ # There is no need to explicitly re-connect after invoking this;
113
+ # connections are re-established as needed.
114
+ def close
115
+ @pool.dispose
116
+ end
117
+
118
+ private
119
+
120
+ def assert_valid_pool
121
+ @pool.acquire { nil }
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,133 @@
1
+ # encoding: utf-8
2
+
3
+ ##
4
+ # Oedipus Sphinx 2 Search.
5
+ # Copyright © 2012 Chris Corbyn.
6
+ #
7
+ # See LICENSE file for details.
8
+ ##
9
+
10
+ require "thread"
11
+
12
+ module Oedipus
13
+ class Connection
14
+ # Provides a thread-safe pool of connections, with a specified TTL.
15
+ class Pool
16
+ # Initialize a new connection pool with the given options.
17
+ #
18
+ # @param [Hash] options
19
+ # configuration for the pool
20
+ #
21
+ # @option [String] host
22
+ # the host to use when allocating new connections
23
+ #
24
+ # @option [Fixnum] port
25
+ # the port to use when allocating new connections
26
+ #
27
+ # @option [Fixnum] size
28
+ # the maximum number of connections (defaults to 8)
29
+ #
30
+ # @option [Fixnum] ttl
31
+ # the length of time for which any given connection should live
32
+ def initialize(options)
33
+ @host = options[:host]
34
+ @port = options[:port]
35
+
36
+ @size = options.fetch(:size, 8)
37
+ @ttl = options.fetch(:ttl, 60)
38
+
39
+ @available = []
40
+ @used = {}
41
+ @expiries = {}
42
+ @condition = ConditionVariable.new
43
+ @lock = Mutex.new
44
+
45
+ sweeper
46
+ end
47
+
48
+ # Acquire a connection from the pool, for the duration of a block.
49
+ #
50
+ # The release of the connection is done automatically.
51
+ #
52
+ # @yields [Oedipus::Mysql]
53
+ # a connection object
54
+ def acquire
55
+ instance = nil
56
+ begin
57
+ @lock.synchronize do
58
+ if instance = @available.pop
59
+ @used[instance] = instance
60
+ elsif @size > (@available.size + @used.size)
61
+ instance = new_instance
62
+ else
63
+ @condition.wait(@lock)
64
+ end
65
+ end
66
+ end until instance
67
+
68
+ yield instance
69
+ ensure
70
+ release(instance)
71
+ end
72
+
73
+ # Dispose all connections in the pool.
74
+ #
75
+ # Waits until all connections have finished processing current queries
76
+ # and then releases them.
77
+ def dispose
78
+ begin
79
+ @lock.synchronize do
80
+ while instance = @available.pop
81
+ instance.close
82
+ end
83
+
84
+ @condition.wait(@lock) if @used.size > 0
85
+ end
86
+ end until empty?
87
+ end
88
+
89
+ # Returns true if the pool is currently empty.
90
+ #
91
+ # @return [Boolean]
92
+ # true if no connections are pooled, false otherwise
93
+ def empty?
94
+ @lock.synchronize { @used.size == 0 && @available.size == 0 }
95
+ end
96
+
97
+ private
98
+
99
+ def release(instance)
100
+ @lock.synchronize do
101
+ @available << @used.delete(instance) if instance
102
+ @condition.broadcast
103
+ end
104
+ end
105
+
106
+ def new_instance
107
+ Oedipus::Mysql.new(@host, @port).tap do |instance|
108
+ @used[instance] = instance
109
+ @expiries[instance] = Time.now + @ttl
110
+ end
111
+ end
112
+
113
+ # Close connections past their ttl (runs in a new Thread)
114
+ def sweeper
115
+ Thread.new(@expiries, @available) do |exp, avail|
116
+ loop do
117
+ sleep 15
118
+ @lock.synchronize {
119
+ avail.each do |instance|
120
+ if exp[instance] < Time.now
121
+ avail.delete(instance)
122
+ exp.delete(instance)
123
+ instance.close
124
+ end
125
+ end
126
+ }
127
+ end
128
+ end
129
+ end
130
+
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,56 @@
1
+ # encoding: utf-8
2
+
3
+ ##
4
+ # Oedipus Sphinx 2 Search.
5
+ # Copyright © 2012 Chris Corbyn.
6
+ #
7
+ # See LICENSE file for details.
8
+ ##
9
+
10
+ module Oedipus
11
+ class Connection
12
+ module Registry
13
+ # Connect to Sphinx running SphinxQL.
14
+ #
15
+ # Connections are cached for re-use.
16
+ #
17
+ # @example
18
+ # c = Oedipus.connect("127.0.0.1:9306")
19
+ # c = Oedipus.connect(host: "127.0.0.1", port: 9306)
20
+ # c = Oedipus.connect("127.0.0.1:9306", :dist_host)
21
+ #
22
+ # @param [String|Hash] server
23
+ # a 'hostname:port' string, or
24
+ # a Hash with :host and :port keys
25
+ #
26
+ # @param [Object] key
27
+ # an optional name for the connection
28
+ #
29
+ # @return [Connection]
30
+ # a client connected to SphinxQL
31
+ def connect(options, key = :default)
32
+ connections[key] = Connection.new(options)
33
+ end
34
+
35
+ # Lookup an already connected connection.
36
+ #
37
+ # @example
38
+ # c = Oedipus.connection
39
+ # c = Oedipus.connection(:dist_host)
40
+ #
41
+ # @param [Object] key
42
+ # an optional name for the connection
43
+ #
44
+ # @return [Connection]
45
+ # a client connected to SphinxQL
46
+ def connection(key = :default)
47
+ raise ArgumentError, "Connection #{key} is not defined" unless connections.key?(key)
48
+ connections[key]
49
+ end
50
+
51
+ def connections
52
+ @connections ||= {}
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,14 @@
1
+ # encoding: utf-8
2
+
3
+ ##
4
+ # Oedipus Sphinx 2 Search.
5
+ # Copyright © 2012 Chris Corbyn.
6
+ #
7
+ # See LICENSE file for details.
8
+ ##
9
+
10
+ module Oedipus
11
+ # Raised on any error coming from Sphinx.
12
+ class ConnectionError < RuntimeError
13
+ end
14
+ end
@@ -0,0 +1,320 @@
1
+ # encoding: utf-8
2
+
3
+ ##
4
+ # Oedipus Sphinx 2 Search.
5
+ # Copyright © 2012 Chris Corbyn.
6
+ #
7
+ # See LICENSE file for details.
8
+ ##
9
+
10
+ module Oedipus
11
+ # Representation of a search index for querying.
12
+ class Index
13
+ attr_reader :name
14
+
15
+ # Initialize the index named +name+ on the connection +conn+.
16
+ #
17
+ # @param [Symbol] name
18
+ # the name of an existing index in sphinx
19
+ #
20
+ # @param [Connection] conn
21
+ # an instance of Oedipus::Connection for querying
22
+ def initialize(name, conn)
23
+ @name = name.to_sym
24
+ @conn = conn
25
+ @builder = QueryBuilder.new(name)
26
+ end
27
+
28
+ # Insert the record with the ID +id+.
29
+ #
30
+ # @example
31
+ # index.insert(42, title: "example", views: 22)
32
+ #
33
+ # @param [Integer] id
34
+ # the unique ID of the document in the index
35
+ #
36
+ # @param [Hash] hash
37
+ # a symbol-keyed hash of data to insert
38
+ #
39
+ # @return [Fixnum]
40
+ # the number of rows inserted (currently always 1)
41
+ def insert(id, hash)
42
+ @conn.execute(*@builder.insert(id, hash))
43
+ end
44
+
45
+ # Update the record with the ID +id+.
46
+ #
47
+ # @example
48
+ # index.update(42, views: 25)
49
+ #
50
+ # @param [Integer] id
51
+ # the unique ID of the document in the index
52
+ #
53
+ # @param [Hash] hash
54
+ # a symbol-keyed hash of data to set
55
+ #
56
+ # @return [Fixnum]
57
+ # the number of rows updated (1 or 0)
58
+ def update(id, hash)
59
+ @conn.execute(*@builder.update(id, hash))
60
+ end
61
+
62
+ # Completely replace the record with the ID +id+.
63
+ #
64
+ # @example
65
+ # index.replace(42, title: "New title", views: 25)
66
+ #
67
+ # @param [Integer] id
68
+ # the unique ID of the document in the index
69
+ #
70
+ # @param [Hash] hash
71
+ # a symbol-keyed hash of data to insert
72
+ #
73
+ # @return [Fixnum]
74
+ # the number of rows inserted (currentl always 1)
75
+ def replace(id, hash)
76
+ @conn.execute(*@builder.replace(id, hash))
77
+ end
78
+
79
+ # Delete the record with the ID +id+.
80
+ #
81
+ # @example
82
+ # index.delete(42)
83
+ #
84
+ # @param [Integer] id
85
+ # the unique ID of the document in the index
86
+ #
87
+ # @return [Fixnum]
88
+ # the number of rows deleted (currently always 1 or 0)
89
+ def delete(id)
90
+ @conn.execute(*@builder.delete(id))
91
+ end
92
+
93
+ # Fetch a single document by its ID.
94
+ #
95
+ # Returns the Hash of attributes if found, otherwise nil.
96
+ #
97
+ # @param [Fixnum] id
98
+ # the ID of the document
99
+ #
100
+ # @return [Hash]
101
+ # the attributes of the record
102
+ def fetch(id)
103
+ search(id: id)[:records].first
104
+ end
105
+
106
+ # Perform a search on the index.
107
+ #
108
+ # Either one or two arguments may be passed, with either one being mutually
109
+ # optional.
110
+ #
111
+ # @example Fulltext search
112
+ # index.search("cats AND dogs")
113
+ #
114
+ # @example Fulltext search with attribute filters
115
+ # index.search("cats AND dogs", author_id: 57)
116
+ #
117
+ # @example Attribute search only
118
+ # index.search(author_id: 57)
119
+ #
120
+ # When performing a faceted search, the base query is inherited by each facet, which
121
+ # may override (or refine) the query.
122
+ #
123
+ # The results returned include a :facets key, containing the results for each facet.
124
+ #
125
+ # @example Performing a faceted search
126
+ # index.search(
127
+ # "cats | dogs",
128
+ # category_id: 7,
129
+ # facets: {
130
+ # popular: {views: Oedipus.gt(150)},
131
+ # recent: {published_at: Oedipus.gt(Time.now.to_i - 7 * 86400)}
132
+ # }
133
+ # )
134
+ #
135
+ # To perform an n-dimensional faceted search, add a :facets option to each
136
+ # facet. Each facet will inherit from its immediate parent, which inerits
137
+ # from its parent, up to the root query.
138
+ #
139
+ # @example Performing a n-dimensional faceted search
140
+ # index.search(
141
+ # "cats | dogs",
142
+ # facets: {
143
+ # popular: {
144
+ # views: Oedipus.gte(1000),
145
+ # facets: {
146
+ # in_title: "@title (%{query})"
147
+ # }
148
+ # }
149
+ # }
150
+ # )
151
+ #
152
+ # The results in a n-dimensional faceted search are returned with each set
153
+ # of facet results in turn containing a :facets element.
154
+ #
155
+ # @param [String] query
156
+ # a fulltext query
157
+ #
158
+ # @param [Hash] options
159
+ # attribute filters, limits, sorting, facets and other options
160
+ #
161
+ # @option [Hash] facets
162
+ # variations on the main search to return nested in the result
163
+ #
164
+ # @option [Array] attrs
165
+ # attributes to fetch from the index, either as Symbols, or SphinxQL fragments
166
+ #
167
+ # @option [Hash] order
168
+ # an attr => direction mapping of sort orders
169
+ #
170
+ # @option [Fixnum] limit
171
+ # a limit to apply, defaults to 20 inside Sphinx itself
172
+ #
173
+ # @option [Fixnum] offset
174
+ # an offset to apply, defaults to 0
175
+ #
176
+ # @option [Object] everything_else
177
+ # all additional options are taken to be attribute filters
178
+ #
179
+ # @return [Hash]
180
+ # a Hash containing meta data, with the records in :records, and if any
181
+ # facets were included, the facets inside the :facets Hash
182
+ def search(*args)
183
+ expand_facet_tree(multi_search(deep_merge_facets(args)))
184
+ end
185
+
186
+ # Perform a faceted search on the index, using a base query and one or more facets.
187
+ #
188
+ # This method is deprecated and will be removed in version 1.0. Use #search instead.
189
+ #
190
+ # @deprecated
191
+ #
192
+ # @see #search
193
+ def faceted_search(*args)
194
+ search(*args)
195
+ end
196
+
197
+ # Perform a a batch search on the index.
198
+ #
199
+ # A Hash of queries is passed, whose keys are used to collate the results in
200
+ # the return value.
201
+ #
202
+ # Each query may either by a string (fulltext search), a Hash (attribute search)
203
+ # or an array containing both. In other words, the same arguments accepted by
204
+ # the #search method.
205
+ #
206
+ # @example
207
+ # index.multi_search(
208
+ # cat_results: ["cats", { author_id: 57 }],
209
+ # dog_results: ["dogs", { author_id: 57 }]
210
+ # )
211
+ #
212
+ # @param [Hash] queries
213
+ # a hash whose keys map to queries
214
+ #
215
+ # @return [Hash]
216
+ # a Hash whose keys map 1:1 with the input Hash, each element containing the
217
+ # same results as those returned by the #search method.
218
+ def multi_search(queries)
219
+ unless queries.kind_of?(Hash)
220
+ raise ArgumentError, "Argument must be a Hash of named queries (#{queries.class} given)"
221
+ end
222
+
223
+ rs = []
224
+ queries.each do |key, args|
225
+ str, *values = @builder.select(*extract_query_data(args))
226
+ rs.push @conn.query("#{str};", *values)
227
+ rs.push @conn.query("SHOW META;")
228
+ end
229
+
230
+ Hash[].tap do |result|
231
+ queries.keys.each do |key|
232
+ records, meta = rs.shift, rs.shift
233
+ result[key] = meta_to_hash(meta).tap do |r|
234
+ r[:records] = records.map { |hash|
235
+ hash.inject({}) { |o, (k, v)| o.merge!(k.to_sym => v) }
236
+ }
237
+ end
238
+ end
239
+ end
240
+ end
241
+
242
+ private
243
+
244
+ def meta_to_hash(meta)
245
+ Hash[].tap do |hash|
246
+ meta.each do |m|
247
+ n, v = m.values
248
+ case n
249
+ when "total_found", "total" then hash[n.to_sym] = v.to_i
250
+ when "time" then hash[:time] = v.to_f
251
+ when /\Adocs\[\d+\]\Z/ then (hash[:docs] ||= []).tap { |a| a << v.to_i }
252
+ when /\Ahits\[\d+\]\Z/ then (hash[:hits] ||= []).tap { |a| a << v.to_i }
253
+ when /\Akeyword\[\d+\]\Z/ then (hash[:keywords] ||= []).tap { |a| a << v }
254
+ else hash[n.to_sym] = v
255
+ end
256
+ end
257
+
258
+ if hash.key?(:docs) && hash.key?(:hits) && hash.key?(:keywords)
259
+ hash[:docs] = Hash[(hash[:keywords]).zip(hash[:docs])]
260
+ hash[:hits] = Hash[(hash[:keywords]).zip(hash[:hits])]
261
+ end
262
+ end
263
+ end
264
+
265
+ def extract_query_data(args, default_query = "")
266
+ args = [args] unless Array === args
267
+
268
+ unless (1..2) === args.size
269
+ raise ArgumentError, "Wrong number of query arguments (#{args.size} for 1..2)"
270
+ end
271
+
272
+ case args[0]
273
+ when String then [args[0], args.fetch(1, {}).dup]
274
+ when Hash then [default_query, args[0].dup ]
275
+ else raise ArgumentError, "Invalid query argument type #{args.first.class}"
276
+ end
277
+ end
278
+
279
+ def expand_facet_tree(result)
280
+ Hash[].tap do |tree|
281
+ result.each do |k, v|
282
+ t = tree
283
+
284
+ k.each do |name|
285
+ f = t[:facets] ||= {}
286
+ t = f[name] ||= {}
287
+ end
288
+
289
+ t.merge!(v)
290
+ end
291
+ end
292
+ end
293
+
294
+ def deep_merge_facets(base_args, list = {}, path = [])
295
+ # FIXME: Try and make this shorter and more functional in style
296
+ base_query, base_options = extract_query_data(base_args)
297
+
298
+ facets = base_options.delete(:facets)
299
+
300
+ list.merge!(path => [base_query, base_options])
301
+
302
+ unless facets.nil?
303
+ facets.each do |k, q|
304
+ facet_query, facet_options = extract_query_data(q, base_query)
305
+
306
+ deep_merge_facets(
307
+ [
308
+ facet_query.gsub("%{query}", base_query),
309
+ base_options.merge(facet_options)
310
+ ],
311
+ list,
312
+ path.dup << k
313
+ )
314
+ end
315
+ end
316
+
317
+ list
318
+ end
319
+ end
320
+ end