rsolr 0.12.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/ruby.yml +29 -0
  3. data/.gitignore +13 -0
  4. data/.rspec +2 -0
  5. data/CHANGES.txt +63 -260
  6. data/Gemfile +13 -0
  7. data/README.rdoc +177 -63
  8. data/Rakefile +19 -0
  9. data/lib/rsolr/char.rb +6 -0
  10. data/lib/rsolr/client.rb +344 -86
  11. data/lib/rsolr/document.rb +66 -0
  12. data/lib/rsolr/error.rb +182 -0
  13. data/lib/rsolr/field.rb +87 -0
  14. data/lib/rsolr/generator.rb +5 -0
  15. data/lib/rsolr/json.rb +60 -0
  16. data/lib/rsolr/response.rb +95 -0
  17. data/lib/rsolr/uri.rb +25 -0
  18. data/lib/rsolr/version.rb +7 -0
  19. data/lib/rsolr/xml.rb +150 -0
  20. data/lib/rsolr.rb +47 -35
  21. data/rsolr.gemspec +44 -31
  22. data/spec/api/client_spec.rb +423 -0
  23. data/spec/api/document_spec.rb +48 -0
  24. data/spec/api/error_spec.rb +158 -0
  25. data/spec/api/json_spec.rb +248 -0
  26. data/spec/api/pagination_spec.rb +31 -0
  27. data/spec/api/rsolr_spec.rb +31 -0
  28. data/spec/api/uri_spec.rb +37 -0
  29. data/spec/api/xml_spec.rb +255 -0
  30. data/spec/fixtures/basic_configs/_rest_managed.json +1 -0
  31. data/spec/fixtures/basic_configs/currency.xml +67 -0
  32. data/spec/fixtures/basic_configs/lang/stopwords_en.txt +54 -0
  33. data/spec/fixtures/basic_configs/protwords.txt +21 -0
  34. data/spec/fixtures/basic_configs/schema.xml +530 -0
  35. data/spec/fixtures/basic_configs/solrconfig.xml +572 -0
  36. data/spec/fixtures/basic_configs/stopwords.txt +14 -0
  37. data/spec/fixtures/basic_configs/synonyms.txt +29 -0
  38. data/spec/integration/solr5_spec.rb +38 -0
  39. data/spec/lib/rsolr/client_spec.rb +19 -0
  40. data/spec/spec_helper.rb +94 -0
  41. metadata +228 -54
  42. data/lib/rsolr/connection/net_http.rb +0 -48
  43. data/lib/rsolr/connection/requestable.rb +0 -43
  44. data/lib/rsolr/connection/utils.rb +0 -73
  45. data/lib/rsolr/connection.rb +0 -9
  46. data/lib/rsolr/message/document.rb +0 -48
  47. data/lib/rsolr/message/field.rb +0 -20
  48. data/lib/rsolr/message/generator.rb +0 -89
  49. data/lib/rsolr/message.rb +0 -8
data/lib/rsolr/client.rb CHANGED
@@ -1,114 +1,372 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'faraday'
5
+ require 'uri'
6
+
1
7
  class RSolr::Client
2
-
3
- attr_reader :connection
4
-
5
- # "connection" is instance of:
6
- # RSolr::Adapter::HTTP
7
- # RSolr::Adapter::Direct (jRuby only)
8
- # or any other class that uses the connection "interface"
9
- def initialize(connection)
8
+ DEFAULT_URL = 'http://127.0.0.1:8983/solr/'
9
+
10
+ class << self
11
+ def default_wt
12
+ @default_wt ||= :json
13
+ end
14
+
15
+ def default_wt= value
16
+ @default_wt = value
17
+ end
18
+ end
19
+
20
+ attr_reader :uri, :proxy, :update_format, :options, :update_path
21
+
22
+ def initialize connection, options = {}
23
+ @proxy = @uri = nil
10
24
  @connection = connection
25
+ unless false === options[:url]
26
+ @uri = extract_url_from_options(options)
27
+ if options[:proxy]
28
+ proxy_url = options[:proxy].dup
29
+ proxy_url << "/" unless proxy_url.nil? or proxy_url[-1] == ?/
30
+ @proxy = ::URI.parse proxy_url if proxy_url
31
+ elsif options[:proxy] == false
32
+ @proxy = false # used to avoid setting the proxy from the environment.
33
+ end
34
+ end
35
+ @update_format = options.delete(:update_format) || RSolr::JSON::Generator
36
+ @update_path = options.fetch(:update_path, 'update')
37
+ @options = options
38
+
39
+ if options[:read_timeout]
40
+ warn "DEPRECATION: Rsolr.new/connect option `read_timeout` is deprecated and will be removed in Rsolr 3. `timeout` is currently a synonym, use that instead."
41
+ end
42
+ end
43
+
44
+ def extract_url_from_options(options)
45
+ url = options[:url] ? options[:url].dup : DEFAULT_URL
46
+ url << "/" unless url[-1] == ?/
47
+ uri = ::URI.parse(url)
48
+ # URI::HTTPS is a subclass of URI::HTTP, so this check accepts HTTP(S)
49
+ raise ArgumentError, "You must provide an HTTP(S) url." unless uri.kind_of?(URI::HTTP)
50
+ uri
51
+ end
52
+
53
+ # returns the request uri object.
54
+ def base_request_uri
55
+ base_uri.request_uri if base_uri
56
+ end
57
+
58
+ # returns the URI uri object.
59
+ def base_uri
60
+ @uri
61
+ end
62
+
63
+ # Create the get, post, and head methods
64
+ %W(get post head).each do |meth|
65
+ class_eval <<-RUBY
66
+ def #{meth} path, opts = {}, &block
67
+ send_and_receive path, opts.merge(:method => :#{meth}), &block
68
+ end
69
+ RUBY
70
+ end
71
+
72
+ # A paginated request method.
73
+ # Converts the page and per_page
74
+ # arguments into "rows" and "start".
75
+ def paginate page, per_page, path, opts = nil
76
+ opts ||= {}
77
+ opts[:params] ||= {}
78
+ raise "'rows' or 'start' params should not be set when using +paginate+" if ["start", "rows"].include?(opts[:params].keys)
79
+ execute build_paginated_request(page, per_page, path, opts)
11
80
  end
12
-
13
- # Send a request to a request handler using the method name.
14
- # Also proxies to the #paginate method if the method starts with "paginate_"
15
- def method_missing(method_name, *args, &blk)
16
- request("/#{method_name}", *args, &blk)
17
- end
18
-
19
- # sends data to the update handler
20
- # data can be a string of xml, or an object that returns xml from its #to_xml method
21
- def update(data, params={})
22
- request '/update', params, data
23
- end
24
-
25
- # send request solr
26
- # params is hash with valid solr request params (:q, :fl, :qf etc..)
27
- # if params[:wt] is not set, the default is :ruby
28
- # if :wt is something other than :ruby, the raw response body is used
29
- # otherwise, a simple Hash is returned
30
- # NOTE: to get raw ruby, use :wt=>'ruby' <- a string, not a symbol like :ruby
31
- #
32
- #
33
- def request(path, params={}, *extra)
34
- response = @connection.request(path, map_params(params), *extra)
35
- adapt_response(response)
36
- end
37
-
38
- #
81
+
82
+ # POST XML messages to /update with optional params.
83
+ #
84
+ # http://wiki.apache.org/solr/UpdateXmlMessages#add.2BAC8-update
85
+ #
86
+ # If not set, opts[:headers] will be set to a hash with the key
87
+ # 'Content-Type' set to 'text/xml'
88
+ #
89
+ # +opts+ can/should contain:
90
+ #
91
+ # :data - posted data
92
+ # :headers - http headers
93
+ # :params - solr query parameter hash
94
+ #
95
+ def update opts = {}
96
+ opts[:headers] ||= {}
97
+ opts[:headers]['Content-Type'] ||= builder.content_type
98
+ post opts.fetch(:path, update_path), opts
99
+ end
100
+
101
+ # +add+ creates xml "add" documents and sends the xml data to the +update+ method
102
+ #
103
+ # http://wiki.apache.org/solr/UpdateXmlMessages#add.2BAC8-update
104
+ #
39
105
  # single record:
40
- # solr.update(:id=>1, :name=>'one')
106
+ # solr.add(:id=>1, :name=>'one')
107
+ #
108
+ # add using an array
41
109
  #
42
- # update using an array
43
- # solr.update([{:id=>1, :name=>'one'}, {:id=>2, :name=>'two'}])
110
+ # solr.add(
111
+ # [{:id=>1, :name=>'one'}, {:id=>2, :name=>'two'}],
112
+ # :add_attributes => {:boost=>5.0, :commitWithin=>10}
113
+ # )
44
114
  #
45
- def add(doc, &block)
46
- update message.add(doc, &block)
115
+ def add doc, opts = {}
116
+ add_attributes = opts.delete :add_attributes
117
+ update opts.merge(:data => builder.add(doc, add_attributes))
47
118
  end
48
119
 
49
- # send </commit>
50
- def commit
51
- update message.commit
120
+ # send "commit" xml with opts
121
+ #
122
+ # http://wiki.apache.org/solr/UpdateXmlMessages#A.22commit.22_and_.22optimize.22
123
+ #
124
+ def commit opts = {}
125
+ commit_attrs = opts.delete :commit_attributes
126
+ update opts.merge(:data => builder.commit( commit_attrs ))
52
127
  end
53
128
 
54
- # send </optimize>
55
- def optimize
56
- update message.optimize
129
+ # soft commit
130
+ #
131
+ # https://lucene.apache.org/solr/guide/updatehandlers-in-solrconfig.html#commit-and-softcommit
132
+ #
133
+ def soft_commit opts = {}
134
+ commit(opts.merge params: { softCommit: true })
135
+ end
136
+
137
+ # send "optimize" xml with opts.
138
+ #
139
+ # http://wiki.apache.org/solr/UpdateXmlMessages#A.22commit.22_and_.22optimize.22
140
+ #
141
+ def optimize opts = {}
142
+ optimize_attributes = opts.delete :optimize_attributes
143
+ update opts.merge(:data => builder.optimize(optimize_attributes))
57
144
  end
58
145
 
59
146
  # send </rollback>
147
+ #
148
+ # http://wiki.apache.org/solr/UpdateXmlMessages#A.22rollback.22
149
+ #
60
150
  # NOTE: solr 1.4 only
61
- def rollback
62
- update message.rollback
151
+ def rollback opts = {}
152
+ update opts.merge(:data => builder.rollback)
63
153
  end
64
154
 
65
155
  # Delete one or many documents by id
66
156
  # solr.delete_by_id 10
67
157
  # solr.delete_by_id([12, 41, 199])
68
- def delete_by_id(id)
69
- update message.delete_by_id(id)
158
+ def delete_by_id id, opts = {}
159
+ update opts.merge(:data => builder.delete_by_id(id))
70
160
  end
71
161
 
72
- # delete one or many documents by query
162
+ # delete one or many documents by query.
163
+ #
164
+ # http://wiki.apache.org/solr/UpdateXmlMessages#A.22delete.22_by_ID_and_by_Query
165
+ #
73
166
  # solr.delete_by_query 'available:0'
74
167
  # solr.delete_by_query ['quantity:0', 'manu:"FQ"']
75
- def delete_by_query(query)
76
- update message.delete_by_query(query)
168
+ def delete_by_query query, opts = {}
169
+ update opts.merge(:data => builder.delete_by_query(query))
170
+ end
171
+
172
+ def builder
173
+ @builder ||= if update_format.is_a? Class
174
+ update_format.new
175
+ elsif update_format == :json
176
+ RSolr::JSON::Generator.new
177
+ elsif update_format == :xml
178
+ RSolr::Xml::Generator.new
179
+ else
180
+ update_format
181
+ end
182
+ end
183
+
184
+ # +send_and_receive+ is the main request method responsible for sending requests to the +connection+ object.
185
+ #
186
+ # "path" : A string value that usually represents a solr request handler
187
+ # "opts" : A hash, which can contain the following keys:
188
+ # :method : required - the http method (:get, :post or :head)
189
+ # :params : optional - the query string params in hash form
190
+ # :data : optional - post data -- if a hash is given, it's sent as "application/x-www-form-urlencoded; charset=UTF-8"
191
+ # :headers : optional - hash of request headers
192
+ # All other options are passed right along to the connection's +send_and_receive+ method (:get, :post, or :head)
193
+ #
194
+ # +send_and_receive+ returns either a string or hash on a successful ruby request.
195
+ # When the :params[:wt] => :ruby, the response will be a hash, else a string.
196
+ #
197
+ # creates a request context hash,
198
+ # sends it to the connection's +execute+ method
199
+ # which returns a simple hash,
200
+ # then passes the request/response into +adapt_response+.
201
+ def send_and_receive path, opts
202
+ request_context = build_request path, opts
203
+ execute request_context
204
+ end
205
+
206
+ #
207
+ def execute request_context
208
+ raw_response = begin
209
+ response = connection.send(request_context[:method], request_context[:uri].to_s) do |req|
210
+ req.body = request_context[:data] if request_context[:method] == :post and request_context[:data]
211
+ req.headers.merge!(request_context[:headers]) if request_context[:headers]
212
+ end
213
+
214
+ { status: response.status.to_i, headers: response.headers, body: response.body.force_encoding('utf-8') }
215
+ rescue Faraday::TimeoutError => e
216
+ raise RSolr::Error::Timeout.new(request_context, e.response)
217
+ rescue Errno::ECONNREFUSED, defined?(Faraday::ConnectionFailed) ? Faraday::ConnectionFailed : Faraday::Error::ConnectionFailed
218
+ raise RSolr::Error::ConnectionRefused.new(request_context)
219
+ rescue Faraday::Error => e
220
+ raise RSolr::Error::Http.new(request_context, e.response)
221
+ end
222
+ adapt_response(request_context, raw_response) unless raw_response.nil?
223
+ end
224
+
225
+ # +build_request+ accepts a path and options hash,
226
+ # then prepares a normalized hash to return for sending
227
+ # to a solr connection driver.
228
+ # +build_request+ sets up the uri/query string
229
+ # and converts the +data+ arg to form-urlencoded,
230
+ # if the +data+ arg is a hash.
231
+ # returns a hash with the following keys:
232
+ # :method
233
+ # :params
234
+ # :headers
235
+ # :data
236
+ # :uri
237
+ # :path
238
+ # :query
239
+ def build_request path, opts
240
+ raise "path must be a string or symbol, not #{path.inspect}" unless [String,Symbol].include?(path.class)
241
+ path = path.to_s
242
+ opts[:proxy] = proxy unless proxy.nil?
243
+ opts[:method] ||= :get
244
+ raise "The :data option can only be used if :method => :post" if opts[:method] != :post and opts[:data]
245
+ opts[:params] = params_with_wt(opts[:params])
246
+ query = RSolr::Uri.params_to_solr(opts[:params]) unless opts[:params].empty?
247
+ opts[:query] = query
248
+ if opts[:data].is_a? Hash
249
+ opts[:data] = RSolr::Uri.params_to_solr opts[:data]
250
+ opts[:headers] ||= {}
251
+ opts[:headers]['Content-Type'] ||= 'application/x-www-form-urlencoded; charset=UTF-8'
252
+ end
253
+ opts[:path] = path
254
+ opts[:uri] = base_uri.merge(path.to_s + (query ? "?#{query}" : "")) if base_uri
255
+
256
+ opts
257
+ end
258
+
259
+ def params_with_wt(params)
260
+ return { wt: default_wt } if params.nil?
261
+ return params if params.key?(:wt) || params.key?('wt')
262
+ { wt: default_wt }.merge(params)
263
+ end
264
+
265
+ def build_paginated_request page, per_page, path, opts
266
+ per_page = per_page.to_s.to_i
267
+ page = page.to_s.to_i-1
268
+ page = page < 1 ? 0 : page
269
+ opts[:params]["start"] = page * per_page
270
+ opts[:params]["rows"] = per_page
271
+ build_request path, opts
272
+ end
273
+
274
+ # This method will evaluate the :body value
275
+ # if the params[:uri].params[:wt] == :ruby
276
+ # ... otherwise, the body is returned as is.
277
+ # The return object has methods attached, :request and :response.
278
+ # These methods give you access to the original
279
+ # request and response from the connection.
280
+ #
281
+ # +adapt_response+ will raise an InvalidRubyResponse
282
+ # if :wt == :ruby and the body
283
+ # couldn't be evaluated.
284
+ def adapt_response request, response
285
+ raise "The response does not have the correct keys => :body, :headers, :status" unless
286
+ %W(body headers status) == response.keys.map{|k|k.to_s}.sort
287
+
288
+ result = if respond_to? "evaluate_#{request[:params][:wt]}_response", true
289
+ send "evaluate_#{request[:params][:wt]}_response", request, response
290
+ else
291
+ response[:body]
292
+ end
293
+
294
+ if result.is_a?(Hash) || request[:method] == :head
295
+ result = RSolr::HashWithResponse.new(request, response, result)
296
+ end
297
+
298
+ result
77
299
  end
78
-
79
- # shortcut to RSolr::Message::Generator
80
- def message *opts
81
- @message ||= RSolr::Message::Generator.new
300
+
301
+ def connection
302
+ @connection ||= begin
303
+ conn_opts = { request: {} }
304
+ conn_opts[:url] = uri.to_s
305
+ conn_opts[:proxy] = proxy if proxy
306
+ conn_opts[:request][:open_timeout] = options[:open_timeout] if options[:open_timeout]
307
+
308
+ if options[:read_timeout] || options[:timeout]
309
+ # read_timeout was being passed to faraday as timeout since Rsolr 2.0,
310
+ # it's now deprecated, just use `timeout` directly.
311
+ conn_opts[:request][:timeout] = options[:timeout] || options[:read_timeout]
312
+ end
313
+
314
+ conn_opts[:request][:params_encoder] = Faraday::FlatParamsEncoder
315
+
316
+ Faraday.new(conn_opts) do |conn|
317
+ if uri.user && uri.password
318
+ case Faraday::VERSION
319
+ when /^0/
320
+ conn.basic_auth uri.user, uri.password
321
+ when /^1/
322
+ conn.request :basic_auth, uri.user, uri.password
323
+ else
324
+ conn.request :authorization, :basic_auth, uri.user, uri.password
325
+ end
326
+ end
327
+
328
+ conn.response :raise_error
329
+ conn.request :retry, max: options[:retry_after_limit], interval: 0.05,
330
+ interval_randomness: 0.5, backoff_factor: 2,
331
+ exceptions: ['Faraday::Error', 'Timeout::Error'] if options[:retry_503]
332
+ conn.adapter options[:adapter] || Faraday.default_adapter || :net_http
333
+ end
334
+ end
82
335
  end
83
-
336
+
84
337
  protected
85
-
86
- # sets default params etc.. - could be used as a mapping hook
87
- # type of request should be passed in here? -> map_params(:query, {})
88
- def map_params(params)
89
- params||={}
90
- {:wt=>:ruby}.merge(params)
91
- end
92
-
93
- # "connection_response" must be a hash with the following keys:
94
- # :params - a sub hash of standard solr params
95
- # : body - the raw response body from the solr server
96
- # This method will evaluate the :body value if the params[:wt] == :ruby
97
- # otherwise, the body is returned
98
- # The return object has a special method attached called #raw
99
- # This method gives you access to the original response from the connection,
100
- # so you can access things like the actual :url sent to solr,
101
- # the raw :body, original :params and original :data
102
- def adapt_response(connection_response)
103
- data = connection_response[:body]
104
- # if the wt is :ruby, evaluate the ruby string response
105
- if connection_response[:params][:wt] == :ruby
106
- data = Kernel.eval(data)
338
+
339
+ # converts the method name for the solr request handler path.
340
+ def method_missing name, *args
341
+ if name.to_s =~ /^paginated?_(.+)$/
342
+ paginate args[0], args[1], $1, *args[2..-1]
343
+ else
344
+ send_and_receive name, *args
107
345
  end
108
- # attach a method called #raw that returns the original connection response value
109
- def data.raw; @raw end
110
- data.send(:instance_variable_set, '@raw', connection_response)
111
- data
112
346
  end
113
-
114
- end
347
+
348
+ # evaluates the response[:body],
349
+ # attempts to bring the ruby string to life.
350
+ # If a SyntaxError is raised, then
351
+ # this method intercepts and raises a
352
+ # RSolr::Error::InvalidRubyResponse
353
+ # instead, giving full access to the
354
+ # request/response objects.
355
+ def evaluate_ruby_response request, response
356
+ Kernel.eval response[:body].to_s
357
+ rescue SyntaxError
358
+ raise RSolr::Error::InvalidRubyResponse.new request, response
359
+ end
360
+
361
+ def evaluate_json_response request, response
362
+ return if response[:body].nil? || response[:body].empty?
363
+
364
+ JSON.parse response[:body].to_s
365
+ rescue JSON::ParserError
366
+ raise RSolr::Error::InvalidJsonResponse.new request, response
367
+ end
368
+
369
+ def default_wt
370
+ self.options[:default_wt] || self.class.default_wt
371
+ end
372
+ end
@@ -0,0 +1,66 @@
1
+ module RSolr
2
+ class Document
3
+ CHILD_DOCUMENT_KEY = '_childDocuments_'.freeze
4
+ ATOMIC_MULTI_VALUE_OPERATIONS = %i[set add add-distinct remove]
5
+
6
+ # "attrs" is a hash for setting the "doc" xml attributes
7
+ # "fields" is an array of Field objects
8
+ attr_accessor :attrs, :fields
9
+
10
+ # "doc_hash" must be a Hash/Mash object
11
+ # If a value in the "doc_hash" is an array,
12
+ # a field object is created for each value...
13
+ def initialize(doc_hash = {})
14
+ @fields = []
15
+ doc_hash.each_pair do |field, values|
16
+ add_field(field, values)
17
+ end
18
+ @attrs={}
19
+ end
20
+
21
+ # returns an array of fields that match the "name" arg
22
+ def fields_by_name(name)
23
+ @fields.select{|f|f.name==name}
24
+ end
25
+
26
+ # returns the *first* field that matches the "name" arg
27
+ def field_by_name(name)
28
+ @fields.detect{|f|f.name==name}
29
+ end
30
+
31
+ #
32
+ # Add a field value to the document. Options map directly to
33
+ # XML attributes in the Solr <field> node.
34
+ # See http://wiki.apache.org/solr/UpdateXmlMessages#head-8315b8028923d028950ff750a57ee22cbf7977c6
35
+ #
36
+ # === Example:
37
+ #
38
+ # document.add_field('title', 'A Title', :boost => 2.0)
39
+ #
40
+ def add_field(name, values, options = {})
41
+ RSolr::Array.wrap(values).each do |v|
42
+ field_attrs = { name: name }
43
+ field_attrs[:type] = DocumentField if name.to_s == CHILD_DOCUMENT_KEY
44
+
45
+ @fields << RSolr::Field.instance(options.merge(field_attrs), v)
46
+ end
47
+ end
48
+
49
+ def as_json
50
+ @fields.group_by(&:name).each_with_object({}) do |(field, values), result|
51
+ v = values.map(&:as_json)
52
+ if v.length > 1 && v.first.is_a?(Hash)
53
+ if v.first.key?(:value)
54
+ v = v.first.merge(value: v.map { |single| single[:value] })
55
+ else
56
+ (v.first.keys & ATOMIC_MULTI_VALUE_OPERATIONS).each do |op|
57
+ v = [{ op => v.map { |single| single[op] } }]
58
+ end
59
+ end
60
+ end
61
+ v = v.first if v.length == 1 && field.to_s != CHILD_DOCUMENT_KEY
62
+ result[field] = v
63
+ end
64
+ end
65
+ end
66
+ end