sean-rets 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,391 @@
1
+ require 'httpclient'
2
+ require 'logger'
3
+ require_relative 'http_client'
4
+
5
+ module Rets
6
+ class HttpError < StandardError ; end
7
+
8
+ class Client
9
+ DEFAULT_OPTIONS = {}
10
+
11
+ COUNT = Struct.new(:exclude, :include, :only).new(0,1,2)
12
+
13
+ attr_accessor :login_url, :options, :logger
14
+ attr_writer :capabilities, :metadata
15
+
16
+ def initialize(options)
17
+ @options = options
18
+ clean_setup
19
+ end
20
+
21
+ def clean_setup
22
+ self.options = DEFAULT_OPTIONS.merge(@options)
23
+ self.login_url = self.options[:login_url]
24
+
25
+ @cached_metadata = nil
26
+ @capabilities = nil
27
+ @metadata = nil
28
+ @tries = nil
29
+ self.capabilities = nil
30
+
31
+ self.logger = @options[:logger] || FakeLogger.new
32
+ @client_progress = ClientProgressReporter.new(self.logger, options[:stats_collector], options[:stats_prefix])
33
+ @cached_metadata = @options[:metadata]
34
+ if @options[:http_proxy]
35
+ @http = HTTPClient.new(options.fetch(:http_proxy))
36
+
37
+ if @options[:proxy_username]
38
+ @http.set_proxy_auth(options.fetch(:proxy_username), options.fetch(:proxy_password))
39
+ end
40
+ else
41
+ @http = HTTPClient.new
42
+ end
43
+
44
+ if @options[:receive_timeout]
45
+ @http.receive_timeout = @options[:receive_timeout]
46
+ end
47
+
48
+ @http.set_cookie_store(options[:cookie_store]) if options[:cookie_store]
49
+
50
+ @http_client = Rets::HttpClient.new(@http, @options, @logger, @login_url)
51
+ if options[:http_timing_stats_collector]
52
+ @http_client = Rets::MeasuringHttpClient.new(@http_client, options.fetch(:http_timing_stats_collector), options.fetch(:http_timing_stats_prefix))
53
+ end
54
+ if options[:lock_around_http_requests]
55
+ @http_client = Rets::LockingHttpClient.new(@http_client, options.fetch(:locker), options.fetch(:lock_name), options.fetch(:lock_options))
56
+ end
57
+ end
58
+
59
+ # Attempts to login by making an empty request to the URL
60
+ # provided in initialize. Returns the capabilities that the
61
+ # RETS server provides, per http://retsdoc.onconfluence.com/display/rets172/4.10+Capability+URL+List.
62
+ def login
63
+ res = http_get(login_url)
64
+ unless res.status_code == 200
65
+ raise UnknownResponse, "bad response to login, expected a 200, but got #{res.status_code}. Body was #{res.body}."
66
+ end
67
+ self.capabilities = extract_capabilities(Nokogiri.parse(res.body))
68
+ raise UnknownResponse, "Cannot read rets server capabilities." unless @capabilities
69
+ @capabilities
70
+ end
71
+
72
+ def logout
73
+ unless capabilities["Logout"]
74
+ raise NoLogout.new('No logout method found for rets client')
75
+ end
76
+ http_get(capability_url("Logout"))
77
+ rescue UnknownResponse => e
78
+ unless e.message.match(/expected a 200, but got 401/)
79
+ raise e
80
+ end
81
+ end
82
+
83
+ # Finds records.
84
+ #
85
+ # [quantity] Return the first record, or an array of records.
86
+ # Uses a symbol <tt>:first</tt> or <tt>:all</tt>, respectively.
87
+ #
88
+ # [opts] A hash of arguments used to construct the search query,
89
+ # using the following keys:
90
+ #
91
+ # <tt>:search_type</tt>:: Required. The resource to search for.
92
+ # <tt>:class</tt>:: Required. The class of the resource to search for.
93
+ # <tt>:query</tt>:: Required. The DMQL2 query string to execute.
94
+ # <tt>:limit</tt>:: The number of records to request from the server.
95
+ # <tt>:resolve</tt>:: Provide resolved values that use metadata instead
96
+ # of raw system values.
97
+ #
98
+ # Any other keys are converted to the RETS query format, and passed
99
+ # to the server as part of the query. For instance, the key <tt>:offset</tt>
100
+ # will be sent as +Offset+.
101
+ #
102
+ def find(quantity, opts = {})
103
+ case quantity
104
+ when :first then find_with_retries(opts.merge(:limit => 1)).first
105
+ when :all then find_with_retries(opts)
106
+ else raise ArgumentError, "First argument must be :first or :all"
107
+ end
108
+ end
109
+
110
+ alias search find
111
+
112
+ def find_with_retries(opts = {})
113
+ retries = 0
114
+ resolve = opts.delete(:resolve)
115
+ begin
116
+ find_every(opts, resolve)
117
+ rescue AuthorizationFailure, InvalidRequest => e
118
+ if retries < opts.fetch(:max_retries, 3)
119
+ retries += 1
120
+ @client_progress.find_with_retries_failed_a_retry(e, retries)
121
+ clean_setup
122
+ retry
123
+ else
124
+ @client_progress.find_with_retries_exceeded_retry_count(e)
125
+ raise e
126
+ end
127
+ end
128
+ end
129
+
130
+ def find_every(opts, resolve)
131
+ params = {"QueryType" => "DMQL2", "Format" => "COMPACT"}.merge(fixup_keys(opts))
132
+ res = http_post(capability_url("Search"), params)
133
+
134
+ if opts[:count] == COUNT.only
135
+ Parser::Compact.get_count(res.body)
136
+ else
137
+ results = Parser::Compact.parse_document(res.body.encode("UTF-8", "binary", :invalid => :replace, :undef => :replace))
138
+ if resolve
139
+ rets_class = find_rets_class(opts[:search_type], opts[:class])
140
+ decorate_results(results, rets_class)
141
+ else
142
+ results
143
+ end
144
+ end
145
+ end
146
+
147
+ def find_rets_class(resource_name, rets_class_name)
148
+ metadata.tree[resource_name].find_rets_class(rets_class_name)
149
+ end
150
+
151
+ def decorate_results(results, rets_class)
152
+ results.map do |result|
153
+ decorate_result(result, rets_class)
154
+ end
155
+ end
156
+
157
+ def decorate_result(result, rets_class)
158
+ result.each do |key, value|
159
+ table = rets_class.find_table(key)
160
+ if table
161
+ result[key] = table.resolve(value.to_s)
162
+ else
163
+ #can't resolve just leave the value be
164
+ raise "Value could not be interpreted. Key #{key} Value #{value}"
165
+ @client_progress.could_not_resolve_find_metadata(key)
166
+ end
167
+ end
168
+ end
169
+
170
+ # Returns an array of all objects associated with the given resource.
171
+ def all_objects(opts = {})
172
+ objects("*", opts)
173
+ end
174
+
175
+ # Returns an array of specified objects.
176
+ def objects(object_ids, opts = {})
177
+ response = case object_ids
178
+ when String then fetch_object(object_ids, opts)
179
+ when Array then fetch_object(object_ids.join(","), opts)
180
+ else raise ArgumentError, "Expected instance of String or Array, but got #{object_ids.inspect}."
181
+ end
182
+
183
+ create_parts_from_response(response)
184
+ end
185
+
186
+ def create_parts_from_response(response)
187
+ content_type = response.header["content-type"][0]
188
+
189
+ if content_type.nil?
190
+ raise MalformedResponse, "Unable to read content-type from response: #{response.inspect}"
191
+ end
192
+
193
+ if content_type.include?("multipart")
194
+ boundary = content_type.scan(/boundary="?([^;"]*)?/).join
195
+
196
+ parts = Parser::Multipart.parse(response.body, boundary)
197
+
198
+ logger.debug "Rets::Client: Found #{parts.size} parts"
199
+
200
+ return parts
201
+ else
202
+ # fake a multipart for interface compatibility
203
+ headers = {}
204
+ response.headers.each { |k,v| headers[k] = v[0] }
205
+
206
+ part = Parser::Multipart::Part.new(headers, response.body)
207
+
208
+ return [part]
209
+ end
210
+ end
211
+
212
+ # Returns a single object.
213
+ #
214
+ # resource RETS resource as defined in the resource metadata.
215
+ # object_type an object type defined in the object metadata.
216
+ # resource_id the KeyField value of the given resource instance.
217
+ # object_id can be "*" or a colon delimited string of integers or an array of integers.
218
+ def object(object_id, opts = {})
219
+ response = fetch_object(Array(object_id).join(':'), opts)
220
+ response.body
221
+ end
222
+
223
+ def fetch_object(object_id, opts = {})
224
+ params = {
225
+ "Resource" => opts.fetch(:resource),
226
+ "Type" => opts.fetch(:object_type),
227
+ "ID" => "#{opts.fetch(:resource_id)}:#{object_id}",
228
+ "Location" => opts.fetch(:location, 0)
229
+ }
230
+
231
+ extra_headers = {
232
+ "Accept" => "image/jpeg, image/png;q=0.5, image/gif;q=0.1",
233
+ }
234
+
235
+ http_post(capability_url("GetObject"), params, extra_headers)
236
+ end
237
+
238
+ # Changes keys to be camel cased, per the RETS standard for queries.
239
+ def fixup_keys(hash)
240
+ fixed_hash = {}
241
+
242
+ hash.each do |key, value|
243
+ camel_cased_key = key.to_s.capitalize.gsub(/_(\w)/) { $1.upcase }
244
+
245
+ fixed_hash[camel_cased_key] = value
246
+ end
247
+
248
+ fixed_hash
249
+ end
250
+
251
+ def metadata
252
+ return @metadata if @metadata
253
+
254
+ if @cached_metadata && (@options[:skip_metadata_uptodate_check] ||
255
+ @cached_metadata.current?(capabilities["MetadataTimestamp"], capabilities["MetadataVersion"]))
256
+ @client_progress.use_cached_metadata
257
+ self.metadata = @cached_metadata
258
+ else
259
+ @client_progress.bad_cached_metadata(@cached_metadata)
260
+ self.metadata = Metadata::Root.new(logger, retrieve_metadata)
261
+ end
262
+ end
263
+
264
+ def retrieve_metadata
265
+ raw_metadata = {}
266
+ Metadata::METADATA_TYPES.each {|type|
267
+ raw_metadata[type] = retrieve_metadata_type(type)
268
+ }
269
+ raw_metadata
270
+ end
271
+
272
+ def retrieve_metadata_type(type)
273
+ res = http_post(capability_url("GetMetadata"),
274
+ { "Format" => "COMPACT",
275
+ "Type" => "METADATA-#{type}",
276
+ "ID" => "0"
277
+ })
278
+ res.body
279
+ end
280
+
281
+ # The capabilies as provided by the RETS server during login.
282
+ #
283
+ # Currently, only the path in the endpoint URLs is used[1]. Host,
284
+ # port, other details remaining constant with those provided to
285
+ # the constructor.
286
+ #
287
+ # [1] In fact, sometimes only a path is returned from the server.
288
+ def capabilities
289
+ @capabilities || login
290
+ end
291
+
292
+ def capability_url(name)
293
+ val = capabilities[name] || capabilities[name.downcase]
294
+
295
+ raise UnknownCapability.new(name) unless val
296
+
297
+ begin
298
+ if val.downcase.match(/^https?:\/\//)
299
+ uri = URI.parse(val)
300
+ else
301
+ uri = URI.parse(login_url)
302
+ uri.path = val
303
+ end
304
+ rescue URI::InvalidURIError
305
+ raise MalformedResponse, "Unable to parse capability URL: #{name} => #{val.inspect}"
306
+ end
307
+ uri.to_s
308
+ end
309
+
310
+ def extract_capabilities(document)
311
+ raw_key_values = document.xpath("/RETS/RETS-RESPONSE").text.strip
312
+
313
+ hash = Hash.new{|h,k| h.key?(k.downcase) ? h[k.downcase] : nil }
314
+
315
+ # ... :(
316
+ # Feel free to make this better. It has a test.
317
+ raw_key_values.split(/\n/).
318
+ map { |r| r.split(/=/, 2) }.
319
+ each { |k,v| hash[k.strip.downcase] = v.strip }
320
+
321
+ hash
322
+ end
323
+
324
+ def save_cookie_store(force=nil)
325
+ @http_client.save_cookie_store(force)
326
+ end
327
+
328
+ def http_get(url, params=nil, extra_headers={})
329
+ @http_client.http_get(url, params, extra_headers)
330
+ end
331
+
332
+ def http_post(url, params, extra_headers = {})
333
+ @http_client.http_post(url, params, extra_headers)
334
+ end
335
+
336
+ def tries
337
+ @tries ||= 1
338
+
339
+ (@tries += 1) - 1
340
+ end
341
+
342
+ class FakeLogger < Logger
343
+ def initialize
344
+ super("/dev/null")
345
+ end
346
+ end
347
+
348
+ class ErrorChecker
349
+ def self.check(response)
350
+ # some RETS servers returns HTTP code 412 when session cookie expired, yet the response body
351
+ # passes XML check. We need to special case for this situation.
352
+ # This method is also called from multipart.rb where there are headers and body but no status_code
353
+ if response.respond_to?(:status_code) && response.status_code == 412
354
+ raise HttpError, "HTTP status: #{response.status_code}, body: #{response.body}"
355
+ end
356
+
357
+ # some RETS servers return success code in XML body but failure code 4xx in http status
358
+ # If xml body is present we ignore http status
359
+
360
+ if !response.body.empty?
361
+ begin
362
+ xml = Nokogiri::XML.parse(response.body, nil, nil, Nokogiri::XML::ParseOptions::STRICT)
363
+
364
+ rets_element = xml.xpath("/RETS")
365
+ if rets_element.empty?
366
+ return
367
+ end
368
+ reply_text = (rets_element.attr("ReplyText") || rets_element.attr("replyText")).value
369
+ reply_code = (rets_element.attr("ReplyCode") || rets_element.attr("replyCode")).value.to_i
370
+
371
+ if reply_code.nonzero?
372
+ raise InvalidRequest.new(reply_code, reply_text)
373
+ else
374
+ return
375
+ end
376
+ rescue Nokogiri::XML::SyntaxError
377
+ #Not xml
378
+ end
379
+ end
380
+
381
+ if response.respond_to?(:ok?) && ! response.ok?
382
+ if response.status_code == 401
383
+ raise AuthorizationFailure.new(response.status_code, response.body)
384
+ else
385
+ raise HttpError, "HTTP status: #{response.status_code}, body: #{response.body}"
386
+ end
387
+ end
388
+ end
389
+ end
390
+ end
391
+ end
@@ -0,0 +1,44 @@
1
+ module Rets
2
+ class NullStatsReporter
3
+ def time(metric_name, &block)
4
+ block.call
5
+ end
6
+
7
+ def gauge(metric_name, measurement)
8
+ end
9
+
10
+ def count(metric_name, count=1)
11
+ end
12
+ end
13
+
14
+ class ClientProgressReporter
15
+ def initialize(logger, stats, stats_prefix)
16
+ @logger = logger
17
+ @stats = stats || NullStatsReporter.new
18
+ @stats_prefix = stats_prefix
19
+ end
20
+
21
+ def find_with_retries_failed_a_retry(exception, retries)
22
+ @stats.count("#{@stats_prefix}find_with_retries_failed_retry")
23
+ @logger.warn("Rets::Client: Failed with message: #{exception.message}")
24
+ @logger.info("Rets::Client: Retry #{retries}/3")
25
+ end
26
+
27
+ def find_with_retries_exceeded_retry_count(exception)
28
+ @stats.count("#{@stats_prefix}find_with_retries_exceeded_retry_count")
29
+ end
30
+
31
+ def could_not_resolve_find_metadata(key)
32
+ @stats.count("#{@stats_prefix}could_not_resolve_find_metadata")
33
+ @logger.warn "Rets::Client: Can't resolve find metadata for #{key.inspect}"
34
+ end
35
+
36
+ def use_cached_metadata
37
+ @logger.info "Rets::Client: Use cached metadata"
38
+ end
39
+
40
+ def bad_cached_metadata(cached_metadata)
41
+ @logger.info cached_metadata ? "Rets::Client: Cached metadata out of date" : "Rets::Client: Cached metadata unavailable"
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,91 @@
1
+ module Rets
2
+ class HttpClient
3
+ attr_reader :http, :options, :logger, :login_url
4
+
5
+ def initialize(http, options, logger, login_url)
6
+ @http = http
7
+ @options = options
8
+ @logger = logger
9
+ @login_url = login_url
10
+ @options.fetch(:ca_certs, []).each {|c| @http.ssl_config.add_trust_ca(c) }
11
+ end
12
+
13
+ def http_get(url, params=nil, extra_headers={})
14
+ http.set_auth(url, options[:username], options[:password])
15
+ headers = extra_headers.merge(rets_extra_headers)
16
+ res = nil
17
+ log_http_traffic("POST", url, params, headers) do
18
+ res = http.get(url, params, headers)
19
+ end
20
+ Client::ErrorChecker.check(res)
21
+ res
22
+ end
23
+
24
+ def http_post(url, params, extra_headers = {})
25
+ http.set_auth(url, options[:username], options[:password])
26
+ headers = extra_headers.merge(rets_extra_headers)
27
+ res = nil
28
+ log_http_traffic("POST", url, params, headers) do
29
+ res = http.post(url, params, headers)
30
+ end
31
+ Client::ErrorChecker.check(res)
32
+ res
33
+ end
34
+
35
+ def log_http_traffic(method, url, params, headers, &block)
36
+ # optimization, we don't want to compute log params
37
+ # if logging is off
38
+ if logger.debug?
39
+ logger.debug "Rets::Client >> #{method} #{url}"
40
+ logger.debug "Rets::Client >> params = #{params.inspect}"
41
+ logger.debug "Rets::Client >> headers = #{headers.inspect}"
42
+ end
43
+
44
+ res = block.call
45
+
46
+ # optimization, we don't want to compute log params
47
+ # if logging is off, especially when there is a loop just
48
+ # for logging
49
+ if logger.debug?
50
+ logger.debug "Rets::Client << Status #{res.status_code}"
51
+ res.headers.each { |k, v| logger.debug "Rets::Client << #{k}: #{v}" }
52
+ end
53
+ end
54
+
55
+ def save_cookie_store(force=nil)
56
+ if options[:cookie_store]
57
+ if force
58
+ @http.cookie_manager.save_all_cookies(true, true, true)
59
+ else
60
+ @http.save_cookie_store
61
+ end
62
+ end
63
+ end
64
+
65
+ def rets_extra_headers
66
+ user_agent = options[:agent] || "Client/1.0"
67
+ rets_version = options[:version] || "RETS/1.7.2"
68
+
69
+ headers = {
70
+ "User-Agent" => user_agent,
71
+ "RETS-Version" => rets_version
72
+ }
73
+
74
+ if options[:ua_password]
75
+ up = Digest::MD5.hexdigest "#{user_agent}:#{options[:ua_password]}"
76
+ session_id = http_cookie('RETS-Session-ID') || ''
77
+ digest = Digest::MD5.hexdigest "#{up}::#{session_id}:#{rets_version}"
78
+ headers.merge!("RETS-UA-Authorization" => "Digest #{digest}")
79
+ end
80
+
81
+ headers
82
+ end
83
+
84
+ def http_cookie(name)
85
+ http.cookies.each do |c|
86
+ return c.value if c.name.downcase == name.downcase && c.match?(URI.parse(login_url))
87
+ end
88
+ nil
89
+ end
90
+ end
91
+ end