delsolr 0.1.6 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/{README.txt → README.rdoc} +0 -0
- data/Rakefile +9 -0
- data/lib/delsolr.rb +7 -278
- data/lib/delsolr/client.rb +301 -0
- data/lib/delsolr/{configuration.rb → client/configuration.rb} +0 -0
- data/lib/delsolr/{query_builder.rb → client/query_builder.rb} +1 -1
- data/lib/delsolr/{response.rb → client/response.rb} +7 -7
- data/lib/delsolr/document.rb +12 -22
- data/lib/delsolr/extensions.rb +15 -1
- metadata +65 -27
- data/test/test_client.rb +0 -268
- data/test/test_helper.rb +0 -2
- data/test/test_query_builder.rb +0 -179
- data/test/test_response.rb +0 -145
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4de05b98f6f01c63e8aac3747b3db1f5ec80095b
|
4
|
+
data.tar.gz: 0884ee4981d08641891f106121171a3593f034ab
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 714d83d32d936eaf17f8204dc67028c4451d704e037be49f325e8b2e8e74c68ce2ea72c5d08e3e92f62c940956d80c01d6a613be1d69231df86ea8f6141fe0b4
|
7
|
+
data.tar.gz: 29460a4feb7892d5d55445c2e8846fe64c14f820e909ea8c4378bcf10b713f716e859d2216091fa457e53bff2518e8ac7eae618be1bfaf80f266216cd10889ad
|
data/{README.txt → README.rdoc}
RENAMED
File without changes
|
data/Rakefile
ADDED
data/lib/delsolr.rb
CHANGED
@@ -1,289 +1,18 @@
|
|
1
1
|
#
|
2
2
|
# DelSolr
|
3
|
-
#
|
3
|
+
#
|
4
4
|
# ben@avvo.com 9.1.2008
|
5
5
|
#
|
6
6
|
# see README.txt
|
7
7
|
#
|
8
8
|
|
9
|
-
|
10
|
-
require '
|
11
|
-
|
9
|
+
require 'faraday'
|
10
|
+
require 'json'
|
12
11
|
require 'digest/md5'
|
13
12
|
|
14
|
-
require
|
15
|
-
require File.join(File.dirname(__FILE__), 'delsolr', 'configuration')
|
16
|
-
require File.join(File.dirname(__FILE__), 'delsolr', 'query_builder')
|
17
|
-
require File.join(File.dirname(__FILE__), 'delsolr', 'document')
|
18
|
-
require File.join(File.dirname(__FILE__), 'delsolr', 'extensions')
|
19
|
-
|
13
|
+
require 'delsolr/extensions'
|
20
14
|
|
21
15
|
module DelSolr
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
attr_reader :configuration, :logger
|
26
|
-
|
27
|
-
#
|
28
|
-
# [<b><tt>:server</tt></b>]
|
29
|
-
# the server you want to connect to
|
30
|
-
#
|
31
|
-
# [<b><tt>:port</tt></b>]
|
32
|
-
# the port you want to connect to
|
33
|
-
#
|
34
|
-
# [<b><tt>:cache</tt></b>]
|
35
|
-
# (optional) a cache instance (any object the supports get and set)
|
36
|
-
#
|
37
|
-
# [<b><tt>:shortcuts</tt></b>]
|
38
|
-
# (optional) a list of values in the doc fields to generate short cuts for (ie: [:scores, :id], you will be able to call <tt>rsp.scores</tt> and have it return an array of scores, likewise for <tt>ids</tt>.) Defaults to [:id, :unique_id, :score]
|
39
|
-
#
|
40
|
-
# [<b><tt>:path</tt></b>]
|
41
|
-
# (optional) the path of the solr install (defaults to "/solr")
|
42
|
-
#
|
43
|
-
# [<b><tt>:logger</tt></b>]
|
44
|
-
# (optional) Log4r logger object
|
45
|
-
def initialize(options = {})
|
46
|
-
@configuration = DelSolr::Client::Configuration.new(options[:server], options[:port], options[:timeout], options[:path])
|
47
|
-
@cache = options[:cache]
|
48
|
-
@logger = options[:logger]
|
49
|
-
@shortcuts = options[:shortcuts]
|
50
|
-
end
|
51
|
-
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# <tt>request_handler</tt> - type of query to perform (should match up w/ request handlers defined in solrconfig.xml)
|
55
|
-
#
|
56
|
-
#
|
57
|
-
# options
|
58
|
-
#
|
59
|
-
# [<b><tt>:query</tt></b>]
|
60
|
-
# (required) effectively the 'q' param in the solr URL. The treatment of <tt>:query</tt> depends on the type
|
61
|
-
# of request handler you are using. The supported values are Strings and Hashes. Any valid Lucene query string is acceptable
|
62
|
-
# (ie: :query => "brand:apple" and :query => "apply" are both valid). If given a Hash delsolr will build the appropriate
|
63
|
-
# query string given a hash of fieldnames => values. For instance, the following two queries are effectively the
|
64
|
-
# same. Both will end up passing "brand:apple" as the 'q' param to solr.
|
65
|
-
#
|
66
|
-
# c.query('standard', :query => {:brand => 'apple'})
|
67
|
-
# c.query('standard', :query => "brand:apple")
|
68
|
-
#
|
69
|
-
# [<b><tt>:filters</tt></b>]
|
70
|
-
# (optional)array, string, or hash of additional filters to apply. Filters end up in the 'fq' param in the solr query URL.
|
71
|
-
# The value can be a String, Array of Strings, or Hash. The following are all equivelent.
|
72
|
-
#
|
73
|
-
# c.query('standard', :query => 'abc', :filters => {:instock => true})
|
74
|
-
# c.query('standard', :query => 'abc', :filters => "instock:true")
|
75
|
-
# c.query('standard', :query => 'abc', :filters => ["instock:true"])
|
76
|
-
#
|
77
|
-
# as are the following
|
78
|
-
#
|
79
|
-
# c.query('standard', :query => 'abc', :filters => {:instock => true, :onsale => true})
|
80
|
-
# c.query('standard', :query => 'abc', :filters => ["instock:true", "onsale:true"])
|
81
|
-
#
|
82
|
-
#
|
83
|
-
# [<b><tt>:facets</tt></b>]
|
84
|
-
# (optional) array of hashes for all the facet params (ie: {:field => 'instock_b', :limit => 15, :mincount => 5})
|
85
|
-
#
|
86
|
-
# <em>Faceting by field...</em>
|
87
|
-
#
|
88
|
-
# c.query('standard', :query => 'abc', :facets => [{:field => 'brand', :limit => 15, :mincount => 5}])
|
89
|
-
#
|
90
|
-
# ...will request counts for the 'brand' field name that have a minimum of 5 documents, returning
|
91
|
-
# a max/limit of 15. The counts for this facet can be pulled from the response like so:
|
92
|
-
#
|
93
|
-
# rsp.facet_field_count('brand', 'Apple') => 17 # returns count as fixnum
|
94
|
-
#
|
95
|
-
# The list of values for this facet can be pulled from the response like so:
|
96
|
-
#
|
97
|
-
# rsp.facet_field_values('brand') => ['Apple', 'Microsoft', 'Dell'] # returns an array of strings
|
98
|
-
#
|
99
|
-
# <em>Faceting by query...</em>
|
100
|
-
#
|
101
|
-
# c.query('standard', :query => 'abc',
|
102
|
-
# :facets => [:query => {:city => 'seattle', :instock => true},
|
103
|
-
# :prefix => {:key => 'seattle_instock'}}])
|
104
|
-
#
|
105
|
-
# ...will request counts for the number of documents where "seattle" matches on the "city" field and "instock" is set to true.
|
106
|
-
# Faceting by query requires you to assign a name to the facet so the counts can easily be fetched from the response. Solr
|
107
|
-
# resolves facet querys to count by the actual facet query string, which can be cumbersome. The delsolr response object maintains
|
108
|
-
# a mapping of query name => query string for you so your application only needs to remember the query name.
|
109
|
-
#
|
110
|
-
# The count for this facet query can be pulled like so:
|
111
|
-
#
|
112
|
-
# rsp.facet_query_count_by_key('seattle_instock').
|
113
|
-
#
|
114
|
-
# [<b><tt>:sorts</tt></b>]
|
115
|
-
# (optional) array or string of sorts in Lucene syntax (<fieldname> <asc/desc>)
|
116
|
-
#
|
117
|
-
# c.query('standard', :query => 'abc', :sort => "product_name asc")
|
118
|
-
#
|
119
|
-
#
|
120
|
-
# [<b><tt>:limit</tt></b>]
|
121
|
-
# (optional) number to return (defaults to 10). (becomes the 'rows' param in the solr URL)
|
122
|
-
#
|
123
|
-
# c.query('standard', ;query => 'abc', :limit => 100)
|
124
|
-
#
|
125
|
-
# [<b><tt>:offset</tt></b>]
|
126
|
-
# (optional) offset (defaults to 0, becomes the 'start' param in the solr URL)
|
127
|
-
#
|
128
|
-
# c.query('standard', :query => 'abc', :offset => 40)
|
129
|
-
#
|
130
|
-
# [<b><tt>:enable_caching</tt></b>]
|
131
|
-
# (optional) switch to control whether or not to use the cache (for fetching or setting) for the current query. Only works if a cache store was passed to the constructor.
|
132
|
-
#
|
133
|
-
# c = DelSolr::Client.new(:server => 'solr1', :port => 8983, :cache => SomeCacheStore.new)
|
134
|
-
# c.query('standard', :query => 'abc', :filters => {:instock => true}, :enable_caching => true)
|
135
|
-
# c.query('standard', :query => 'abc', :filters => {:instock => true}, :enable_caching => true) # this one should hit the cache
|
136
|
-
#
|
137
|
-
# Cache keys are created from MD5's of the solr URL that is generated.
|
138
|
-
#
|
139
|
-
# [<b><tt>:boot</tt></b>] becomes the 'bq' param which is used for query time boosting
|
140
|
-
# [<b><tt>:fields</tt></b>] becomes the 'fl' param which decides which fields to return. Defaults to 'id,unique_id,score'
|
141
|
-
#
|
142
|
-
# NOTE: Any unrecognized options will be passed along as URL params in the solr request URL. This allows you to access solr features
|
143
|
-
# which are unsupported by DelSolr.
|
144
|
-
#
|
145
|
-
# Returns a DelSolr::Client::Response instance
|
146
|
-
def query(request_handler, opts = {})
|
147
|
-
|
148
|
-
raise "request_handler must be supplied" if request_handler.blank?
|
149
|
-
|
150
|
-
enable_caching = opts.delete(:enable_caching) && !@cache.nil?
|
151
|
-
ttl = opts.delete(:ttl) || 1.hours
|
152
|
-
|
153
|
-
query_builder = DelSolr::Client::QueryBuilder.new(request_handler, opts)
|
154
|
-
|
155
|
-
# it's important that the QueryBuilder returns strings in a deterministic fashion
|
156
|
-
# so that the cache keys will match for the same query.
|
157
|
-
cache_key = Digest::MD5.hexdigest(query_builder.request_string)
|
158
|
-
from_cache = false
|
159
|
-
|
160
|
-
# if we're caching, first try looking in the cache
|
161
|
-
if enable_caching
|
162
|
-
t1 = Time.now
|
163
|
-
body = @cache.get(cache_key) rescue body = nil
|
164
|
-
from_cache = true unless body.blank?
|
165
|
-
cache_time = (Time.now - t1).to_i * 1000 # retrieval time from the cache in ms
|
166
|
-
end
|
167
|
-
|
168
|
-
if body.blank? # cache miss (or wasn't enabled)
|
169
|
-
header, body = connection.post("#{configuration.path}/select", query_builder.request_string)
|
170
|
-
# We get UTF-8 from Solr back, make sure the string knows about it
|
171
|
-
# when running on Ruby >= 1.9
|
172
|
-
if body.respond_to?(:force_encoding)
|
173
|
-
body.force_encoding("UTF-8")
|
174
|
-
end
|
175
|
-
|
176
|
-
# add to the cache if caching
|
177
|
-
if enable_caching
|
178
|
-
begin
|
179
|
-
@cache.set(cache_key, body, ttl)
|
180
|
-
rescue
|
181
|
-
end
|
182
|
-
end
|
183
|
-
end
|
184
|
-
|
185
|
-
response = DelSolr::Client::Response.new(body, query_builder, :logger => logger, :from_cache => from_cache, :shortcuts => @shortcuts)
|
186
|
-
if logger
|
187
|
-
if response && response.success?
|
188
|
-
response_stat_string = "#{from_cache ? cache_time : response.qtime},#{response.total},"
|
189
|
-
end
|
190
|
-
logger.info "#{from_cache ? 'C' : 'S'},#{response_stat_string}http://#{configuration.full_path}/select?#{response.request_url}"
|
191
|
-
end
|
192
|
-
response
|
193
|
-
# If we error, just return nil and let the client decide what to do
|
194
|
-
rescue StandardError
|
195
|
-
logger.info "http://#{configuration.full_path}#{query_builder.request_string}" if logger && configuration && query_builder
|
196
|
-
return nil
|
197
|
-
end
|
198
|
-
|
199
|
-
# Adds a document to the buffer to be posted to solr (NOTE: does not perform the actual post)
|
200
|
-
#
|
201
|
-
# [<b><tt>docs</tt></b>]
|
202
|
-
# docs must be a DelSolr::Document or array of instances. See DelSolr::Document for how to setup a document
|
203
|
-
def update(docs)
|
204
|
-
self.pending_documents.push(*Array(docs))
|
205
|
-
true
|
206
|
-
end
|
207
|
-
|
208
|
-
# Exactly like <tt>update</tt>, but performs the post immediately. Use <tt>update</tt> if you wish to batch document updates.
|
209
|
-
def update!(docs)
|
210
|
-
update(docs) && post_update!
|
211
|
-
end
|
212
|
-
|
213
|
-
# Calls <tt>update!</tt> on the docs and then <tt>commit!</tt>
|
214
|
-
def update_and_commit!(docs)
|
215
|
-
update!(docs) && commit!
|
216
|
-
end
|
217
|
-
|
218
|
-
# posts the buffer created by <tt>update</tt> to solr
|
219
|
-
def post_update!
|
220
|
-
h,b = post(prepare_update_xml())
|
221
|
-
success?(b)
|
222
|
-
end
|
223
|
-
|
224
|
-
# deletes <tt>unique_id</tt> from the index
|
225
|
-
def delete(unique_id)
|
226
|
-
h,b = post("<delete><id>#{unique_id}</id></delete>")
|
227
|
-
success?(b)
|
228
|
-
end
|
229
|
-
|
230
|
-
# not implemented
|
231
|
-
def delete_by_query(query)
|
232
|
-
raise 'not implemented yet :('
|
233
|
-
end
|
234
|
-
|
235
|
-
# commits all pending adds/deletes
|
236
|
-
def commit!
|
237
|
-
h,b = post("<commit/>")
|
238
|
-
success?(b)
|
239
|
-
end
|
240
|
-
|
241
|
-
# posts the optimize directive to solr
|
242
|
-
def optimize!
|
243
|
-
h,b = post("<optimize/>")
|
244
|
-
success?(b)
|
245
|
-
end
|
246
|
-
|
247
|
-
# accessor to the connection instance
|
248
|
-
def connection
|
249
|
-
@connection ||= begin
|
250
|
-
c = Net::HTTP.new(configuration.server, configuration.port)
|
251
|
-
c.read_timeout = configuration.timeout
|
252
|
-
raise "Failed to connect to #{configuration.server}:#{configuration.port}" if c.nil?
|
253
|
-
c
|
254
|
-
end
|
255
|
-
end
|
256
|
-
|
257
|
-
# clears out the connection so a new one will be created
|
258
|
-
def reset_connection!
|
259
|
-
@connection = nil
|
260
|
-
end
|
261
|
-
|
262
|
-
# returns the array of documents that are waiting to be posted to solr
|
263
|
-
def pending_documents
|
264
|
-
@pending_documents ||= []
|
265
|
-
end
|
266
|
-
|
267
|
-
private
|
268
|
-
|
269
|
-
# returns the update xml buffer
|
270
|
-
def prepare_update_xml
|
271
|
-
r = ["<add>\n"]
|
272
|
-
# copy and clear pending docs
|
273
|
-
working_docs, @pending_documents = @pending_documents, nil
|
274
|
-
working_docs.each { |doc| r << doc.xml }
|
275
|
-
r << "\n</add>\n"
|
276
|
-
r.join # not sure, but I think Array#join is faster then String#<< for large buffers
|
277
|
-
end
|
278
|
-
|
279
|
-
# helper for posting data to solr
|
280
|
-
def post(buffer)
|
281
|
-
connection.post("#{configuration.path}/update", buffer, {'Content-type' => 'text/xml;charset=utf-8'})
|
282
|
-
end
|
283
|
-
|
284
|
-
def success?(response_body)
|
285
|
-
response_body == '<result status="0"></result>'
|
286
|
-
end
|
287
|
-
|
288
|
-
end
|
289
|
-
end
|
16
|
+
autoload :Client, 'delsolr/client'
|
17
|
+
autoload :Document, 'delsolr/document'
|
18
|
+
end
|
@@ -0,0 +1,301 @@
|
|
1
|
+
module DelSolr
|
2
|
+
|
3
|
+
class Client
|
4
|
+
autoload :Configuration, "delsolr/client/configuration"
|
5
|
+
autoload :QueryBuilder, "delsolr/client/query_builder"
|
6
|
+
autoload :Response, "delsolr/client/response"
|
7
|
+
|
8
|
+
attr_reader :configuration, :logger
|
9
|
+
|
10
|
+
#
|
11
|
+
# [<b><tt>:server</tt></b>]
|
12
|
+
# the server you want to connect to
|
13
|
+
#
|
14
|
+
# [<b><tt>:port</tt></b>]
|
15
|
+
# the port you want to connect to
|
16
|
+
#
|
17
|
+
# [<b><tt>:cache</tt></b>]
|
18
|
+
# (optional) a cache instance (any object the supports get and set)
|
19
|
+
#
|
20
|
+
# [<b><tt>:shortcuts</tt></b>]
|
21
|
+
# (optional) a list of values in the doc fields to generate short cuts for (ie: [:scores, :id], you will be able to call <tt>rsp.scores</tt> and have it return an array of scores, likewise for <tt>ids</tt>.) Defaults to [:id, :unique_id, :score]
|
22
|
+
#
|
23
|
+
# [<b><tt>:path</tt></b>]
|
24
|
+
# (optional) the path of the solr install (defaults to "/solr")
|
25
|
+
#
|
26
|
+
# [<b><tt>:logger</tt></b>]
|
27
|
+
# (optional) Log4r logger object
|
28
|
+
def initialize(options = {}, &connection_block)
|
29
|
+
@configuration = DelSolr::Client::Configuration.new(options[:server], options[:port], options[:timeout], options[:path])
|
30
|
+
@cache = options[:cache]
|
31
|
+
@logger = options[:logger]
|
32
|
+
@shortcuts = options[:shortcuts]
|
33
|
+
setup_connection(&connection_block) if connection_block
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
#
|
38
|
+
# <tt>request_handler</tt> - type of query to perform (should match up w/ request handlers defined in solrconfig.xml)
|
39
|
+
#
|
40
|
+
#
|
41
|
+
# options
|
42
|
+
#
|
43
|
+
# [<b><tt>:query</tt></b>]
|
44
|
+
# (required) effectively the 'q' param in the solr URL. The treatment of <tt>:query</tt> depends on the type
|
45
|
+
# of request handler you are using. The supported values are Strings and Hashes. Any valid Lucene query string is acceptable
|
46
|
+
# (ie: :query => "brand:apple" and :query => "apply" are both valid). If given a Hash delsolr will build the appropriate
|
47
|
+
# query string given a hash of fieldnames => values. For instance, the following two queries are effectively the
|
48
|
+
# same. Both will end up passing "brand:apple" as the 'q' param to solr.
|
49
|
+
#
|
50
|
+
# c.query('standard', :query => {:brand => 'apple'})
|
51
|
+
# c.query('standard', :query => "brand:apple")
|
52
|
+
#
|
53
|
+
# [<b><tt>:filters</tt></b>]
|
54
|
+
# (optional)array, string, or hash of additional filters to apply. Filters end up in the 'fq' param in the solr query URL.
|
55
|
+
# The value can be a String, Array of Strings, or Hash. The following are all equivelent.
|
56
|
+
#
|
57
|
+
# c.query('standard', :query => 'abc', :filters => {:instock => true})
|
58
|
+
# c.query('standard', :query => 'abc', :filters => "instock:true")
|
59
|
+
# c.query('standard', :query => 'abc', :filters => ["instock:true"])
|
60
|
+
#
|
61
|
+
# as are the following
|
62
|
+
#
|
63
|
+
# c.query('standard', :query => 'abc', :filters => {:instock => true, :onsale => true})
|
64
|
+
# c.query('standard', :query => 'abc', :filters => ["instock:true", "onsale:true"])
|
65
|
+
#
|
66
|
+
#
|
67
|
+
# [<b><tt>:facets</tt></b>]
|
68
|
+
# (optional) array of hashes for all the facet params (ie: {:field => 'instock_b', :limit => 15, :mincount => 5})
|
69
|
+
#
|
70
|
+
# <em>Faceting by field...</em>
|
71
|
+
#
|
72
|
+
# c.query('standard', :query => 'abc', :facets => [{:field => 'brand', :limit => 15, :mincount => 5}])
|
73
|
+
#
|
74
|
+
# ...will request counts for the 'brand' field name that have a minimum of 5 documents, returning
|
75
|
+
# a max/limit of 15. The counts for this facet can be pulled from the response like so:
|
76
|
+
#
|
77
|
+
# rsp.facet_field_count('brand', 'Apple') => 17 # returns count as fixnum
|
78
|
+
#
|
79
|
+
# The list of values for this facet can be pulled from the response like so:
|
80
|
+
#
|
81
|
+
# rsp.facet_field_values('brand') => ['Apple', 'Microsoft', 'Dell'] # returns an array of strings
|
82
|
+
#
|
83
|
+
# <em>Faceting by query...</em>
|
84
|
+
#
|
85
|
+
# c.query('standard', :query => 'abc',
|
86
|
+
# :facets => [:query => {:city => 'seattle', :instock => true},
|
87
|
+
# :prefix => {:key => 'seattle_instock'}}])
|
88
|
+
#
|
89
|
+
# ...will request counts for the number of documents where "seattle" matches on the "city" field and "instock" is set to true.
|
90
|
+
# Faceting by query requires you to assign a name to the facet so the counts can easily be fetched from the response. Solr
|
91
|
+
# resolves facet querys to count by the actual facet query string, which can be cumbersome. The delsolr response object maintains
|
92
|
+
# a mapping of query name => query string for you so your application only needs to remember the query name.
|
93
|
+
#
|
94
|
+
# The count for this facet query can be pulled like so:
|
95
|
+
#
|
96
|
+
# rsp.facet_query_count_by_key('seattle_instock').
|
97
|
+
#
|
98
|
+
# [<b><tt>:sorts</tt></b>]
|
99
|
+
# (optional) array or string of sorts in Lucene syntax (<fieldname> <asc/desc>)
|
100
|
+
#
|
101
|
+
# c.query('standard', :query => 'abc', :sort => "product_name asc")
|
102
|
+
#
|
103
|
+
#
|
104
|
+
# [<b><tt>:limit</tt></b>]
|
105
|
+
# (optional) number to return (defaults to 10). (becomes the 'rows' param in the solr URL)
|
106
|
+
#
|
107
|
+
# c.query('standard', ;query => 'abc', :limit => 100)
|
108
|
+
#
|
109
|
+
# [<b><tt>:offset</tt></b>]
|
110
|
+
# (optional) offset (defaults to 0, becomes the 'start' param in the solr URL)
|
111
|
+
#
|
112
|
+
# c.query('standard', :query => 'abc', :offset => 40)
|
113
|
+
#
|
114
|
+
# [<b><tt>:enable_caching</tt></b>]
|
115
|
+
# (optional) switch to control whether or not to use the cache (for fetching or setting) for the current query. Only works if a cache store was passed to the constructor.
|
116
|
+
#
|
117
|
+
# c = DelSolr::Client.new(:server => 'solr1', :port => 8983, :cache => SomeCacheStore.new)
|
118
|
+
# c.query('standard', :query => 'abc', :filters => {:instock => true}, :enable_caching => true)
|
119
|
+
# c.query('standard', :query => 'abc', :filters => {:instock => true}, :enable_caching => true) # this one should hit the cache
|
120
|
+
#
|
121
|
+
# Cache keys are created from MD5's of the solr URL that is generated.
|
122
|
+
#
|
123
|
+
# [<b><tt>:boot</tt></b>] becomes the 'bq' param which is used for query time boosting
|
124
|
+
# [<b><tt>:fields</tt></b>] becomes the 'fl' param which decides which fields to return. Defaults to 'id,unique_id,score'
|
125
|
+
#
|
126
|
+
# NOTE: Any unrecognized options will be passed along as URL params in the solr request URL. This allows you to access solr features
|
127
|
+
# which are unsupported by DelSolr.
|
128
|
+
#
|
129
|
+
# Returns a DelSolr::Client::Response instance
|
130
|
+
def query(request_handler, opts = {})
|
131
|
+
|
132
|
+
raise "request_handler must be supplied" if request_handler.blank?
|
133
|
+
|
134
|
+
enable_caching = opts.delete(:enable_caching) && !@cache.nil?
|
135
|
+
ttl = opts.delete(:ttl) || 1.hours
|
136
|
+
|
137
|
+
query_builder = DelSolr::Client::QueryBuilder.new(request_handler, opts)
|
138
|
+
|
139
|
+
# it's important that the QueryBuilder returns strings in a deterministic fashion
|
140
|
+
# so that the cache keys will match for the same query.
|
141
|
+
cache_key = Digest::MD5.hexdigest(query_builder.request_string)
|
142
|
+
from_cache = false
|
143
|
+
|
144
|
+
# if we're caching, first try looking in the cache
|
145
|
+
if enable_caching
|
146
|
+
t1 = Time.now
|
147
|
+
body = @cache.get(cache_key) rescue body = nil
|
148
|
+
from_cache = true unless body.blank?
|
149
|
+
cache_time = (Time.now - t1).to_i * 1000 # retrieval time from the cache in ms
|
150
|
+
end
|
151
|
+
|
152
|
+
if body.blank? # cache miss (or wasn't enabled)
|
153
|
+
response = connection.post("#{configuration.path}/select", query_builder.request_string)
|
154
|
+
body = response.body
|
155
|
+
|
156
|
+
# We get UTF-8 from Solr back, make sure the string knows about it
|
157
|
+
# when running on Ruby >= 1.9
|
158
|
+
if body.respond_to?(:force_encoding)
|
159
|
+
body.force_encoding("UTF-8")
|
160
|
+
end
|
161
|
+
|
162
|
+
end
|
163
|
+
|
164
|
+
response = DelSolr::Client::Response.new(body, query_builder, :logger => logger, :from_cache => from_cache, :shortcuts => @shortcuts)
|
165
|
+
|
166
|
+
url = "http://#{configuration.full_path}/select?#{query_builder.request_string}"
|
167
|
+
if response && response.success?
|
168
|
+
log_query_success(url, response, from_cache, (from_cache ? cache_time : response.qtime))
|
169
|
+
else
|
170
|
+
# The response from solr will already be logged, but we should also
|
171
|
+
# log the full url to make debugging easier
|
172
|
+
log_query_error(url)
|
173
|
+
end
|
174
|
+
|
175
|
+
# Cache successful responses that don't come from the cache
|
176
|
+
if response && response.success? && enable_caching && !from_cache
|
177
|
+
# add to the cache if caching
|
178
|
+
@cache.set(cache_key, body, ttl)
|
179
|
+
end
|
180
|
+
|
181
|
+
response
|
182
|
+
end
|
183
|
+
|
184
|
+
# Adds a document to the buffer to be posted to solr (NOTE: does not perform the actual post)
|
185
|
+
#
|
186
|
+
# [<b><tt>docs</tt></b>]
|
187
|
+
# docs must be a DelSolr::Document or array of instances. See DelSolr::Document for how to setup a document
|
188
|
+
def update(docs)
|
189
|
+
self.pending_documents.push(*Array(docs))
|
190
|
+
true
|
191
|
+
end
|
192
|
+
|
193
|
+
# Exactly like <tt>update</tt>, but performs the post immediately. Use <tt>update</tt> if you wish to batch document updates.
|
194
|
+
def update!(docs, options = {})
|
195
|
+
update(docs) && post_update!(options)
|
196
|
+
end
|
197
|
+
|
198
|
+
# Calls <tt>update!</tt> on the docs and then <tt>commit!</tt>
|
199
|
+
def update_and_commit!(docs, options = {})
|
200
|
+
update!(docs, options) && commit!
|
201
|
+
end
|
202
|
+
|
203
|
+
# posts the buffer created by <tt>update</tt> to solr
|
204
|
+
def post_update!(options = {})
|
205
|
+
rsp = post(prepare_update_xml(options))
|
206
|
+
success?(rsp.body) or log_error(rsp.body)
|
207
|
+
end
|
208
|
+
|
209
|
+
# deletes <tt>unique_id</tt> from the index
|
210
|
+
def delete(unique_id)
|
211
|
+
rsp = post("<delete><id>#{unique_id}</id></delete>")
|
212
|
+
success?(rsp.body) or log_error(rsp.body)
|
213
|
+
end
|
214
|
+
|
215
|
+
# deletes documents matching <tt>query</tt> from the index
|
216
|
+
def delete_by_query(query)
|
217
|
+
rsp = post("<delete><query>#{query}</query></delete>")
|
218
|
+
success?(rsp.body) or log_error(rsp.body)
|
219
|
+
end
|
220
|
+
|
221
|
+
# commits all pending adds/deletes
|
222
|
+
def commit!
|
223
|
+
rsp = post("<commit/>")
|
224
|
+
success?(rsp.body) or log_error(rsp.body)
|
225
|
+
end
|
226
|
+
|
227
|
+
# posts the optimize directive to solr
|
228
|
+
def optimize!
|
229
|
+
rsp = post("<optimize/>")
|
230
|
+
success?(rsp.body) or log_error(rsp.body)
|
231
|
+
end
|
232
|
+
|
233
|
+
def setup_connection(&connection_block)
|
234
|
+
@connection_block = connection_block
|
235
|
+
end
|
236
|
+
|
237
|
+
# accessor to the connection instance
|
238
|
+
def connection
|
239
|
+
@connection ||= begin
|
240
|
+
Faraday.new(:url => "http://#{configuration.server}:#{configuration.port}", :timeout => configuration.timeout, &connection_block)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
# clears out the connection so a new one will be created
|
245
|
+
def reset_connection!
|
246
|
+
@connection = nil
|
247
|
+
end
|
248
|
+
|
249
|
+
# returns the array of documents that are waiting to be posted to solr
|
250
|
+
def pending_documents
|
251
|
+
@pending_documents ||= []
|
252
|
+
end
|
253
|
+
|
254
|
+
private
|
255
|
+
|
256
|
+
def connection_block
|
257
|
+
@connection_block ||= lambda do |faraday|
|
258
|
+
faraday.adapter Faraday.default_adapter
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def log_query_success(url, response, from_cache, query_time)
|
263
|
+
if logger
|
264
|
+
l = []
|
265
|
+
l << "#{query_time}ms"
|
266
|
+
l << (from_cache ? "CACHE" : "SOLR")
|
267
|
+
l << url
|
268
|
+
logger.info l.join(' ')
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def log_query_error(url)
|
273
|
+
logger.error "ERROR #{url}" if logger
|
274
|
+
end
|
275
|
+
|
276
|
+
# returns the update xml buffer
|
277
|
+
def prepare_update_xml(options = {})
|
278
|
+
r = ["<add#{options.to_xml_attribute_string}>\n"]
|
279
|
+
# copy and clear pending docs
|
280
|
+
working_docs, @pending_documents = @pending_documents, nil
|
281
|
+
working_docs.each { |doc| r << doc.xml }
|
282
|
+
r << "\n</add>\n"
|
283
|
+
r.join # not sure, but I think Array#join is faster then String#<< for large buffers
|
284
|
+
end
|
285
|
+
|
286
|
+
# helper for posting data to solr
|
287
|
+
def post(buffer)
|
288
|
+
connection.post("#{configuration.path}/update", buffer, {'Content-type' => 'text/xml;charset=utf-8'})
|
289
|
+
end
|
290
|
+
|
291
|
+
def success?(response_body)
|
292
|
+
response_body.include?('<result status="0"></result>') ||
|
293
|
+
response_body.include?('<lst name="responseHeader"><int name="status">0</int>')
|
294
|
+
end
|
295
|
+
|
296
|
+
def log_error(response_body)
|
297
|
+
return unless logger
|
298
|
+
logger.error(response_body)
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|