delsolr 0.1.6 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/{README.txt → README.rdoc} +0 -0
- data/Rakefile +9 -0
- data/lib/delsolr.rb +7 -278
- data/lib/delsolr/client.rb +301 -0
- data/lib/delsolr/{configuration.rb → client/configuration.rb} +0 -0
- data/lib/delsolr/{query_builder.rb → client/query_builder.rb} +1 -1
- data/lib/delsolr/{response.rb → client/response.rb} +7 -7
- data/lib/delsolr/document.rb +12 -22
- data/lib/delsolr/extensions.rb +15 -1
- metadata +65 -27
- data/test/test_client.rb +0 -268
- data/test/test_helper.rb +0 -2
- data/test/test_query_builder.rb +0 -179
- data/test/test_response.rb +0 -145
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4de05b98f6f01c63e8aac3747b3db1f5ec80095b
|
4
|
+
data.tar.gz: 0884ee4981d08641891f106121171a3593f034ab
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 714d83d32d936eaf17f8204dc67028c4451d704e037be49f325e8b2e8e74c68ce2ea72c5d08e3e92f62c940956d80c01d6a613be1d69231df86ea8f6141fe0b4
|
7
|
+
data.tar.gz: 29460a4feb7892d5d55445c2e8846fe64c14f820e909ea8c4378bcf10b713f716e859d2216091fa457e53bff2518e8ac7eae618be1bfaf80f266216cd10889ad
|
data/{README.txt → README.rdoc}
RENAMED
File without changes
|
data/Rakefile
ADDED
data/lib/delsolr.rb
CHANGED
@@ -1,289 +1,18 @@
|
|
1
1
|
#
|
2
2
|
# DelSolr
|
3
|
-
#
|
3
|
+
#
|
4
4
|
# ben@avvo.com 9.1.2008
|
5
5
|
#
|
6
6
|
# see README.txt
|
7
7
|
#
|
8
8
|
|
9
|
-
|
10
|
-
require '
|
11
|
-
|
9
|
+
require 'faraday'
|
10
|
+
require 'json'
|
12
11
|
require 'digest/md5'
|
13
12
|
|
14
|
-
require
|
15
|
-
require File.join(File.dirname(__FILE__), 'delsolr', 'configuration')
|
16
|
-
require File.join(File.dirname(__FILE__), 'delsolr', 'query_builder')
|
17
|
-
require File.join(File.dirname(__FILE__), 'delsolr', 'document')
|
18
|
-
require File.join(File.dirname(__FILE__), 'delsolr', 'extensions')
|
19
|
-
|
13
|
+
require 'delsolr/extensions'
|
20
14
|
|
21
15
|
module DelSolr
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
attr_reader :configuration, :logger
|
26
|
-
|
27
|
-
#
|
28
|
-
# [<b><tt>:server</tt></b>]
|
29
|
-
# the server you want to connect to
|
30
|
-
#
|
31
|
-
# [<b><tt>:port</tt></b>]
|
32
|
-
# the port you want to connect to
|
33
|
-
#
|
34
|
-
# [<b><tt>:cache</tt></b>]
|
35
|
-
# (optional) a cache instance (any object the supports get and set)
|
36
|
-
#
|
37
|
-
# [<b><tt>:shortcuts</tt></b>]
|
38
|
-
# (optional) a list of values in the doc fields to generate short cuts for (ie: [:scores, :id], you will be able to call <tt>rsp.scores</tt> and have it return an array of scores, likewise for <tt>ids</tt>.) Defaults to [:id, :unique_id, :score]
|
39
|
-
#
|
40
|
-
# [<b><tt>:path</tt></b>]
|
41
|
-
# (optional) the path of the solr install (defaults to "/solr")
|
42
|
-
#
|
43
|
-
# [<b><tt>:logger</tt></b>]
|
44
|
-
# (optional) Log4r logger object
|
45
|
-
def initialize(options = {})
|
46
|
-
@configuration = DelSolr::Client::Configuration.new(options[:server], options[:port], options[:timeout], options[:path])
|
47
|
-
@cache = options[:cache]
|
48
|
-
@logger = options[:logger]
|
49
|
-
@shortcuts = options[:shortcuts]
|
50
|
-
end
|
51
|
-
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# <tt>request_handler</tt> - type of query to perform (should match up w/ request handlers defined in solrconfig.xml)
|
55
|
-
#
|
56
|
-
#
|
57
|
-
# options
|
58
|
-
#
|
59
|
-
# [<b><tt>:query</tt></b>]
|
60
|
-
# (required) effectively the 'q' param in the solr URL. The treatment of <tt>:query</tt> depends on the type
|
61
|
-
# of request handler you are using. The supported values are Strings and Hashes. Any valid Lucene query string is acceptable
|
62
|
-
# (ie: :query => "brand:apple" and :query => "apply" are both valid). If given a Hash delsolr will build the appropriate
|
63
|
-
# query string given a hash of fieldnames => values. For instance, the following two queries are effectively the
|
64
|
-
# same. Both will end up passing "brand:apple" as the 'q' param to solr.
|
65
|
-
#
|
66
|
-
# c.query('standard', :query => {:brand => 'apple'})
|
67
|
-
# c.query('standard', :query => "brand:apple")
|
68
|
-
#
|
69
|
-
# [<b><tt>:filters</tt></b>]
|
70
|
-
# (optional)array, string, or hash of additional filters to apply. Filters end up in the 'fq' param in the solr query URL.
|
71
|
-
# The value can be a String, Array of Strings, or Hash. The following are all equivelent.
|
72
|
-
#
|
73
|
-
# c.query('standard', :query => 'abc', :filters => {:instock => true})
|
74
|
-
# c.query('standard', :query => 'abc', :filters => "instock:true")
|
75
|
-
# c.query('standard', :query => 'abc', :filters => ["instock:true"])
|
76
|
-
#
|
77
|
-
# as are the following
|
78
|
-
#
|
79
|
-
# c.query('standard', :query => 'abc', :filters => {:instock => true, :onsale => true})
|
80
|
-
# c.query('standard', :query => 'abc', :filters => ["instock:true", "onsale:true"])
|
81
|
-
#
|
82
|
-
#
|
83
|
-
# [<b><tt>:facets</tt></b>]
|
84
|
-
# (optional) array of hashes for all the facet params (ie: {:field => 'instock_b', :limit => 15, :mincount => 5})
|
85
|
-
#
|
86
|
-
# <em>Faceting by field...</em>
|
87
|
-
#
|
88
|
-
# c.query('standard', :query => 'abc', :facets => [{:field => 'brand', :limit => 15, :mincount => 5}])
|
89
|
-
#
|
90
|
-
# ...will request counts for the 'brand' field name that have a minimum of 5 documents, returning
|
91
|
-
# a max/limit of 15. The counts for this facet can be pulled from the response like so:
|
92
|
-
#
|
93
|
-
# rsp.facet_field_count('brand', 'Apple') => 17 # returns count as fixnum
|
94
|
-
#
|
95
|
-
# The list of values for this facet can be pulled from the response like so:
|
96
|
-
#
|
97
|
-
# rsp.facet_field_values('brand') => ['Apple', 'Microsoft', 'Dell'] # returns an array of strings
|
98
|
-
#
|
99
|
-
# <em>Faceting by query...</em>
|
100
|
-
#
|
101
|
-
# c.query('standard', :query => 'abc',
|
102
|
-
# :facets => [:query => {:city => 'seattle', :instock => true},
|
103
|
-
# :prefix => {:key => 'seattle_instock'}}])
|
104
|
-
#
|
105
|
-
# ...will request counts for the number of documents where "seattle" matches on the "city" field and "instock" is set to true.
|
106
|
-
# Faceting by query requires you to assign a name to the facet so the counts can easily be fetched from the response. Solr
|
107
|
-
# resolves facet querys to count by the actual facet query string, which can be cumbersome. The delsolr response object maintains
|
108
|
-
# a mapping of query name => query string for you so your application only needs to remember the query name.
|
109
|
-
#
|
110
|
-
# The count for this facet query can be pulled like so:
|
111
|
-
#
|
112
|
-
# rsp.facet_query_count_by_key('seattle_instock').
|
113
|
-
#
|
114
|
-
# [<b><tt>:sorts</tt></b>]
|
115
|
-
# (optional) array or string of sorts in Lucene syntax (<fieldname> <asc/desc>)
|
116
|
-
#
|
117
|
-
# c.query('standard', :query => 'abc', :sort => "product_name asc")
|
118
|
-
#
|
119
|
-
#
|
120
|
-
# [<b><tt>:limit</tt></b>]
|
121
|
-
# (optional) number to return (defaults to 10). (becomes the 'rows' param in the solr URL)
|
122
|
-
#
|
123
|
-
# c.query('standard', ;query => 'abc', :limit => 100)
|
124
|
-
#
|
125
|
-
# [<b><tt>:offset</tt></b>]
|
126
|
-
# (optional) offset (defaults to 0, becomes the 'start' param in the solr URL)
|
127
|
-
#
|
128
|
-
# c.query('standard', :query => 'abc', :offset => 40)
|
129
|
-
#
|
130
|
-
# [<b><tt>:enable_caching</tt></b>]
|
131
|
-
# (optional) switch to control whether or not to use the cache (for fetching or setting) for the current query. Only works if a cache store was passed to the constructor.
|
132
|
-
#
|
133
|
-
# c = DelSolr::Client.new(:server => 'solr1', :port => 8983, :cache => SomeCacheStore.new)
|
134
|
-
# c.query('standard', :query => 'abc', :filters => {:instock => true}, :enable_caching => true)
|
135
|
-
# c.query('standard', :query => 'abc', :filters => {:instock => true}, :enable_caching => true) # this one should hit the cache
|
136
|
-
#
|
137
|
-
# Cache keys are created from MD5's of the solr URL that is generated.
|
138
|
-
#
|
139
|
-
# [<b><tt>:boot</tt></b>] becomes the 'bq' param which is used for query time boosting
|
140
|
-
# [<b><tt>:fields</tt></b>] becomes the 'fl' param which decides which fields to return. Defaults to 'id,unique_id,score'
|
141
|
-
#
|
142
|
-
# NOTE: Any unrecognized options will be passed along as URL params in the solr request URL. This allows you to access solr features
|
143
|
-
# which are unsupported by DelSolr.
|
144
|
-
#
|
145
|
-
# Returns a DelSolr::Client::Response instance
|
146
|
-
def query(request_handler, opts = {})
|
147
|
-
|
148
|
-
raise "request_handler must be supplied" if request_handler.blank?
|
149
|
-
|
150
|
-
enable_caching = opts.delete(:enable_caching) && !@cache.nil?
|
151
|
-
ttl = opts.delete(:ttl) || 1.hours
|
152
|
-
|
153
|
-
query_builder = DelSolr::Client::QueryBuilder.new(request_handler, opts)
|
154
|
-
|
155
|
-
# it's important that the QueryBuilder returns strings in a deterministic fashion
|
156
|
-
# so that the cache keys will match for the same query.
|
157
|
-
cache_key = Digest::MD5.hexdigest(query_builder.request_string)
|
158
|
-
from_cache = false
|
159
|
-
|
160
|
-
# if we're caching, first try looking in the cache
|
161
|
-
if enable_caching
|
162
|
-
t1 = Time.now
|
163
|
-
body = @cache.get(cache_key) rescue body = nil
|
164
|
-
from_cache = true unless body.blank?
|
165
|
-
cache_time = (Time.now - t1).to_i * 1000 # retrieval time from the cache in ms
|
166
|
-
end
|
167
|
-
|
168
|
-
if body.blank? # cache miss (or wasn't enabled)
|
169
|
-
header, body = connection.post("#{configuration.path}/select", query_builder.request_string)
|
170
|
-
# We get UTF-8 from Solr back, make sure the string knows about it
|
171
|
-
# when running on Ruby >= 1.9
|
172
|
-
if body.respond_to?(:force_encoding)
|
173
|
-
body.force_encoding("UTF-8")
|
174
|
-
end
|
175
|
-
|
176
|
-
# add to the cache if caching
|
177
|
-
if enable_caching
|
178
|
-
begin
|
179
|
-
@cache.set(cache_key, body, ttl)
|
180
|
-
rescue
|
181
|
-
end
|
182
|
-
end
|
183
|
-
end
|
184
|
-
|
185
|
-
response = DelSolr::Client::Response.new(body, query_builder, :logger => logger, :from_cache => from_cache, :shortcuts => @shortcuts)
|
186
|
-
if logger
|
187
|
-
if response && response.success?
|
188
|
-
response_stat_string = "#{from_cache ? cache_time : response.qtime},#{response.total},"
|
189
|
-
end
|
190
|
-
logger.info "#{from_cache ? 'C' : 'S'},#{response_stat_string}http://#{configuration.full_path}/select?#{response.request_url}"
|
191
|
-
end
|
192
|
-
response
|
193
|
-
# If we error, just return nil and let the client decide what to do
|
194
|
-
rescue StandardError
|
195
|
-
logger.info "http://#{configuration.full_path}#{query_builder.request_string}" if logger && configuration && query_builder
|
196
|
-
return nil
|
197
|
-
end
|
198
|
-
|
199
|
-
# Adds a document to the buffer to be posted to solr (NOTE: does not perform the actual post)
|
200
|
-
#
|
201
|
-
# [<b><tt>docs</tt></b>]
|
202
|
-
# docs must be a DelSolr::Document or array of instances. See DelSolr::Document for how to setup a document
|
203
|
-
def update(docs)
|
204
|
-
self.pending_documents.push(*Array(docs))
|
205
|
-
true
|
206
|
-
end
|
207
|
-
|
208
|
-
# Exactly like <tt>update</tt>, but performs the post immediately. Use <tt>update</tt> if you wish to batch document updates.
|
209
|
-
def update!(docs)
|
210
|
-
update(docs) && post_update!
|
211
|
-
end
|
212
|
-
|
213
|
-
# Calls <tt>update!</tt> on the docs and then <tt>commit!</tt>
|
214
|
-
def update_and_commit!(docs)
|
215
|
-
update!(docs) && commit!
|
216
|
-
end
|
217
|
-
|
218
|
-
# posts the buffer created by <tt>update</tt> to solr
|
219
|
-
def post_update!
|
220
|
-
h,b = post(prepare_update_xml())
|
221
|
-
success?(b)
|
222
|
-
end
|
223
|
-
|
224
|
-
# deletes <tt>unique_id</tt> from the index
|
225
|
-
def delete(unique_id)
|
226
|
-
h,b = post("<delete><id>#{unique_id}</id></delete>")
|
227
|
-
success?(b)
|
228
|
-
end
|
229
|
-
|
230
|
-
# not implemented
|
231
|
-
def delete_by_query(query)
|
232
|
-
raise 'not implemented yet :('
|
233
|
-
end
|
234
|
-
|
235
|
-
# commits all pending adds/deletes
|
236
|
-
def commit!
|
237
|
-
h,b = post("<commit/>")
|
238
|
-
success?(b)
|
239
|
-
end
|
240
|
-
|
241
|
-
# posts the optimize directive to solr
|
242
|
-
def optimize!
|
243
|
-
h,b = post("<optimize/>")
|
244
|
-
success?(b)
|
245
|
-
end
|
246
|
-
|
247
|
-
# accessor to the connection instance
|
248
|
-
def connection
|
249
|
-
@connection ||= begin
|
250
|
-
c = Net::HTTP.new(configuration.server, configuration.port)
|
251
|
-
c.read_timeout = configuration.timeout
|
252
|
-
raise "Failed to connect to #{configuration.server}:#{configuration.port}" if c.nil?
|
253
|
-
c
|
254
|
-
end
|
255
|
-
end
|
256
|
-
|
257
|
-
# clears out the connection so a new one will be created
|
258
|
-
def reset_connection!
|
259
|
-
@connection = nil
|
260
|
-
end
|
261
|
-
|
262
|
-
# returns the array of documents that are waiting to be posted to solr
|
263
|
-
def pending_documents
|
264
|
-
@pending_documents ||= []
|
265
|
-
end
|
266
|
-
|
267
|
-
private
|
268
|
-
|
269
|
-
# returns the update xml buffer
|
270
|
-
def prepare_update_xml
|
271
|
-
r = ["<add>\n"]
|
272
|
-
# copy and clear pending docs
|
273
|
-
working_docs, @pending_documents = @pending_documents, nil
|
274
|
-
working_docs.each { |doc| r << doc.xml }
|
275
|
-
r << "\n</add>\n"
|
276
|
-
r.join # not sure, but I think Array#join is faster then String#<< for large buffers
|
277
|
-
end
|
278
|
-
|
279
|
-
# helper for posting data to solr
|
280
|
-
def post(buffer)
|
281
|
-
connection.post("#{configuration.path}/update", buffer, {'Content-type' => 'text/xml;charset=utf-8'})
|
282
|
-
end
|
283
|
-
|
284
|
-
def success?(response_body)
|
285
|
-
response_body == '<result status="0"></result>'
|
286
|
-
end
|
287
|
-
|
288
|
-
end
|
289
|
-
end
|
16
|
+
autoload :Client, 'delsolr/client'
|
17
|
+
autoload :Document, 'delsolr/document'
|
18
|
+
end
|
@@ -0,0 +1,301 @@
|
|
1
|
+
module DelSolr
|
2
|
+
|
3
|
+
class Client
|
4
|
+
autoload :Configuration, "delsolr/client/configuration"
|
5
|
+
autoload :QueryBuilder, "delsolr/client/query_builder"
|
6
|
+
autoload :Response, "delsolr/client/response"
|
7
|
+
|
8
|
+
attr_reader :configuration, :logger
|
9
|
+
|
10
|
+
#
|
11
|
+
# [<b><tt>:server</tt></b>]
|
12
|
+
# the server you want to connect to
|
13
|
+
#
|
14
|
+
# [<b><tt>:port</tt></b>]
|
15
|
+
# the port you want to connect to
|
16
|
+
#
|
17
|
+
# [<b><tt>:cache</tt></b>]
|
18
|
+
# (optional) a cache instance (any object the supports get and set)
|
19
|
+
#
|
20
|
+
# [<b><tt>:shortcuts</tt></b>]
|
21
|
+
# (optional) a list of values in the doc fields to generate short cuts for (ie: [:scores, :id], you will be able to call <tt>rsp.scores</tt> and have it return an array of scores, likewise for <tt>ids</tt>.) Defaults to [:id, :unique_id, :score]
|
22
|
+
#
|
23
|
+
# [<b><tt>:path</tt></b>]
|
24
|
+
# (optional) the path of the solr install (defaults to "/solr")
|
25
|
+
#
|
26
|
+
# [<b><tt>:logger</tt></b>]
|
27
|
+
# (optional) Log4r logger object
|
28
|
+
def initialize(options = {}, &connection_block)
|
29
|
+
@configuration = DelSolr::Client::Configuration.new(options[:server], options[:port], options[:timeout], options[:path])
|
30
|
+
@cache = options[:cache]
|
31
|
+
@logger = options[:logger]
|
32
|
+
@shortcuts = options[:shortcuts]
|
33
|
+
setup_connection(&connection_block) if connection_block
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
#
|
38
|
+
# <tt>request_handler</tt> - type of query to perform (should match up w/ request handlers defined in solrconfig.xml)
|
39
|
+
#
|
40
|
+
#
|
41
|
+
# options
|
42
|
+
#
|
43
|
+
# [<b><tt>:query</tt></b>]
|
44
|
+
# (required) effectively the 'q' param in the solr URL. The treatment of <tt>:query</tt> depends on the type
|
45
|
+
# of request handler you are using. The supported values are Strings and Hashes. Any valid Lucene query string is acceptable
|
46
|
+
# (ie: :query => "brand:apple" and :query => "apply" are both valid). If given a Hash delsolr will build the appropriate
|
47
|
+
# query string given a hash of fieldnames => values. For instance, the following two queries are effectively the
|
48
|
+
# same. Both will end up passing "brand:apple" as the 'q' param to solr.
|
49
|
+
#
|
50
|
+
# c.query('standard', :query => {:brand => 'apple'})
|
51
|
+
# c.query('standard', :query => "brand:apple")
|
52
|
+
#
|
53
|
+
# [<b><tt>:filters</tt></b>]
|
54
|
+
# (optional)array, string, or hash of additional filters to apply. Filters end up in the 'fq' param in the solr query URL.
|
55
|
+
# The value can be a String, Array of Strings, or Hash. The following are all equivelent.
|
56
|
+
#
|
57
|
+
# c.query('standard', :query => 'abc', :filters => {:instock => true})
|
58
|
+
# c.query('standard', :query => 'abc', :filters => "instock:true")
|
59
|
+
# c.query('standard', :query => 'abc', :filters => ["instock:true"])
|
60
|
+
#
|
61
|
+
# as are the following
|
62
|
+
#
|
63
|
+
# c.query('standard', :query => 'abc', :filters => {:instock => true, :onsale => true})
|
64
|
+
# c.query('standard', :query => 'abc', :filters => ["instock:true", "onsale:true"])
|
65
|
+
#
|
66
|
+
#
|
67
|
+
# [<b><tt>:facets</tt></b>]
|
68
|
+
# (optional) array of hashes for all the facet params (ie: {:field => 'instock_b', :limit => 15, :mincount => 5})
|
69
|
+
#
|
70
|
+
# <em>Faceting by field...</em>
|
71
|
+
#
|
72
|
+
# c.query('standard', :query => 'abc', :facets => [{:field => 'brand', :limit => 15, :mincount => 5}])
|
73
|
+
#
|
74
|
+
# ...will request counts for the 'brand' field name that have a minimum of 5 documents, returning
|
75
|
+
# a max/limit of 15. The counts for this facet can be pulled from the response like so:
|
76
|
+
#
|
77
|
+
# rsp.facet_field_count('brand', 'Apple') => 17 # returns count as fixnum
|
78
|
+
#
|
79
|
+
# The list of values for this facet can be pulled from the response like so:
|
80
|
+
#
|
81
|
+
# rsp.facet_field_values('brand') => ['Apple', 'Microsoft', 'Dell'] # returns an array of strings
|
82
|
+
#
|
83
|
+
# <em>Faceting by query...</em>
|
84
|
+
#
|
85
|
+
# c.query('standard', :query => 'abc',
|
86
|
+
# :facets => [:query => {:city => 'seattle', :instock => true},
|
87
|
+
# :prefix => {:key => 'seattle_instock'}}])
|
88
|
+
#
|
89
|
+
# ...will request counts for the number of documents where "seattle" matches on the "city" field and "instock" is set to true.
|
90
|
+
# Faceting by query requires you to assign a name to the facet so the counts can easily be fetched from the response. Solr
|
91
|
+
# resolves facet querys to count by the actual facet query string, which can be cumbersome. The delsolr response object maintains
|
92
|
+
# a mapping of query name => query string for you so your application only needs to remember the query name.
|
93
|
+
#
|
94
|
+
# The count for this facet query can be pulled like so:
|
95
|
+
#
|
96
|
+
# rsp.facet_query_count_by_key('seattle_instock').
|
97
|
+
#
|
98
|
+
# [<b><tt>:sorts</tt></b>]
|
99
|
+
# (optional) array or string of sorts in Lucene syntax (<fieldname> <asc/desc>)
|
100
|
+
#
|
101
|
+
# c.query('standard', :query => 'abc', :sort => "product_name asc")
|
102
|
+
#
|
103
|
+
#
|
104
|
+
# [<b><tt>:limit</tt></b>]
|
105
|
+
# (optional) number to return (defaults to 10). (becomes the 'rows' param in the solr URL)
|
106
|
+
#
|
107
|
+
# c.query('standard', ;query => 'abc', :limit => 100)
|
108
|
+
#
|
109
|
+
# [<b><tt>:offset</tt></b>]
|
110
|
+
# (optional) offset (defaults to 0, becomes the 'start' param in the solr URL)
|
111
|
+
#
|
112
|
+
# c.query('standard', :query => 'abc', :offset => 40)
|
113
|
+
#
|
114
|
+
# [<b><tt>:enable_caching</tt></b>]
|
115
|
+
# (optional) switch to control whether or not to use the cache (for fetching or setting) for the current query. Only works if a cache store was passed to the constructor.
|
116
|
+
#
|
117
|
+
# c = DelSolr::Client.new(:server => 'solr1', :port => 8983, :cache => SomeCacheStore.new)
|
118
|
+
# c.query('standard', :query => 'abc', :filters => {:instock => true}, :enable_caching => true)
|
119
|
+
# c.query('standard', :query => 'abc', :filters => {:instock => true}, :enable_caching => true) # this one should hit the cache
|
120
|
+
#
|
121
|
+
# Cache keys are created from MD5's of the solr URL that is generated.
|
122
|
+
#
|
123
|
+
# [<b><tt>:boot</tt></b>] becomes the 'bq' param which is used for query time boosting
|
124
|
+
# [<b><tt>:fields</tt></b>] becomes the 'fl' param which decides which fields to return. Defaults to 'id,unique_id,score'
|
125
|
+
#
|
126
|
+
# NOTE: Any unrecognized options will be passed along as URL params in the solr request URL. This allows you to access solr features
|
127
|
+
# which are unsupported by DelSolr.
|
128
|
+
#
|
129
|
+
# Returns a DelSolr::Client::Response instance
|
130
|
+
def query(request_handler, opts = {})
|
131
|
+
|
132
|
+
raise "request_handler must be supplied" if request_handler.blank?
|
133
|
+
|
134
|
+
enable_caching = opts.delete(:enable_caching) && !@cache.nil?
|
135
|
+
ttl = opts.delete(:ttl) || 1.hours
|
136
|
+
|
137
|
+
query_builder = DelSolr::Client::QueryBuilder.new(request_handler, opts)
|
138
|
+
|
139
|
+
# it's important that the QueryBuilder returns strings in a deterministic fashion
|
140
|
+
# so that the cache keys will match for the same query.
|
141
|
+
cache_key = Digest::MD5.hexdigest(query_builder.request_string)
|
142
|
+
from_cache = false
|
143
|
+
|
144
|
+
# if we're caching, first try looking in the cache
|
145
|
+
if enable_caching
|
146
|
+
t1 = Time.now
|
147
|
+
body = @cache.get(cache_key) rescue body = nil
|
148
|
+
from_cache = true unless body.blank?
|
149
|
+
cache_time = (Time.now - t1).to_i * 1000 # retrieval time from the cache in ms
|
150
|
+
end
|
151
|
+
|
152
|
+
if body.blank? # cache miss (or wasn't enabled)
|
153
|
+
response = connection.post("#{configuration.path}/select", query_builder.request_string)
|
154
|
+
body = response.body
|
155
|
+
|
156
|
+
# We get UTF-8 from Solr back, make sure the string knows about it
|
157
|
+
# when running on Ruby >= 1.9
|
158
|
+
if body.respond_to?(:force_encoding)
|
159
|
+
body.force_encoding("UTF-8")
|
160
|
+
end
|
161
|
+
|
162
|
+
end
|
163
|
+
|
164
|
+
response = DelSolr::Client::Response.new(body, query_builder, :logger => logger, :from_cache => from_cache, :shortcuts => @shortcuts)
|
165
|
+
|
166
|
+
url = "http://#{configuration.full_path}/select?#{query_builder.request_string}"
|
167
|
+
if response && response.success?
|
168
|
+
log_query_success(url, response, from_cache, (from_cache ? cache_time : response.qtime))
|
169
|
+
else
|
170
|
+
# The response from solr will already be logged, but we should also
|
171
|
+
# log the full url to make debugging easier
|
172
|
+
log_query_error(url)
|
173
|
+
end
|
174
|
+
|
175
|
+
# Cache successful responses that don't come from the cache
|
176
|
+
if response && response.success? && enable_caching && !from_cache
|
177
|
+
# add to the cache if caching
|
178
|
+
@cache.set(cache_key, body, ttl)
|
179
|
+
end
|
180
|
+
|
181
|
+
response
|
182
|
+
end
|
183
|
+
|
184
|
+
# Adds a document to the buffer to be posted to solr (NOTE: does not perform the actual post)
|
185
|
+
#
|
186
|
+
# [<b><tt>docs</tt></b>]
|
187
|
+
# docs must be a DelSolr::Document or array of instances. See DelSolr::Document for how to setup a document
|
188
|
+
def update(docs)
|
189
|
+
self.pending_documents.push(*Array(docs))
|
190
|
+
true
|
191
|
+
end
|
192
|
+
|
193
|
+
# Exactly like <tt>update</tt>, but performs the post immediately. Use <tt>update</tt> if you wish to batch document updates.
|
194
|
+
def update!(docs, options = {})
|
195
|
+
update(docs) && post_update!(options)
|
196
|
+
end
|
197
|
+
|
198
|
+
# Calls <tt>update!</tt> on the docs and then <tt>commit!</tt>
|
199
|
+
def update_and_commit!(docs, options = {})
|
200
|
+
update!(docs, options) && commit!
|
201
|
+
end
|
202
|
+
|
203
|
+
# posts the buffer created by <tt>update</tt> to solr
|
204
|
+
def post_update!(options = {})
|
205
|
+
rsp = post(prepare_update_xml(options))
|
206
|
+
success?(rsp.body) or log_error(rsp.body)
|
207
|
+
end
|
208
|
+
|
209
|
+
# deletes <tt>unique_id</tt> from the index
|
210
|
+
def delete(unique_id)
|
211
|
+
rsp = post("<delete><id>#{unique_id}</id></delete>")
|
212
|
+
success?(rsp.body) or log_error(rsp.body)
|
213
|
+
end
|
214
|
+
|
215
|
+
# deletes documents matching <tt>query</tt> from the index
|
216
|
+
def delete_by_query(query)
|
217
|
+
rsp = post("<delete><query>#{query}</query></delete>")
|
218
|
+
success?(rsp.body) or log_error(rsp.body)
|
219
|
+
end
|
220
|
+
|
221
|
+
# commits all pending adds/deletes
|
222
|
+
def commit!
|
223
|
+
rsp = post("<commit/>")
|
224
|
+
success?(rsp.body) or log_error(rsp.body)
|
225
|
+
end
|
226
|
+
|
227
|
+
# posts the optimize directive to solr
|
228
|
+
def optimize!
|
229
|
+
rsp = post("<optimize/>")
|
230
|
+
success?(rsp.body) or log_error(rsp.body)
|
231
|
+
end
|
232
|
+
|
233
|
+
def setup_connection(&connection_block)
|
234
|
+
@connection_block = connection_block
|
235
|
+
end
|
236
|
+
|
237
|
+
# accessor to the connection instance
|
238
|
+
def connection
|
239
|
+
@connection ||= begin
|
240
|
+
Faraday.new(:url => "http://#{configuration.server}:#{configuration.port}", :timeout => configuration.timeout, &connection_block)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
# clears out the connection so a new one will be created
|
245
|
+
def reset_connection!
|
246
|
+
@connection = nil
|
247
|
+
end
|
248
|
+
|
249
|
+
# returns the array of documents that are waiting to be posted to solr
|
250
|
+
def pending_documents
|
251
|
+
@pending_documents ||= []
|
252
|
+
end
|
253
|
+
|
254
|
+
private
|
255
|
+
|
256
|
+
def connection_block
|
257
|
+
@connection_block ||= lambda do |faraday|
|
258
|
+
faraday.adapter Faraday.default_adapter
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def log_query_success(url, response, from_cache, query_time)
|
263
|
+
if logger
|
264
|
+
l = []
|
265
|
+
l << "#{query_time}ms"
|
266
|
+
l << (from_cache ? "CACHE" : "SOLR")
|
267
|
+
l << url
|
268
|
+
logger.info l.join(' ')
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def log_query_error(url)
|
273
|
+
logger.error "ERROR #{url}" if logger
|
274
|
+
end
|
275
|
+
|
276
|
+
# returns the update xml buffer
|
277
|
+
def prepare_update_xml(options = {})
|
278
|
+
r = ["<add#{options.to_xml_attribute_string}>\n"]
|
279
|
+
# copy and clear pending docs
|
280
|
+
working_docs, @pending_documents = @pending_documents, nil
|
281
|
+
working_docs.each { |doc| r << doc.xml }
|
282
|
+
r << "\n</add>\n"
|
283
|
+
r.join # not sure, but I think Array#join is faster then String#<< for large buffers
|
284
|
+
end
|
285
|
+
|
286
|
+
# helper for posting data to solr
|
287
|
+
def post(buffer)
|
288
|
+
connection.post("#{configuration.path}/update", buffer, {'Content-type' => 'text/xml;charset=utf-8'})
|
289
|
+
end
|
290
|
+
|
291
|
+
def success?(response_body)
|
292
|
+
response_body.include?('<result status="0"></result>') ||
|
293
|
+
response_body.include?('<lst name="responseHeader"><int name="status">0</int>')
|
294
|
+
end
|
295
|
+
|
296
|
+
def log_error(response_body)
|
297
|
+
return unless logger
|
298
|
+
logger.error(response_body)
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|