sean-rets 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,391 @@
1
+ require 'httpclient'
2
+ require 'logger'
3
+ require_relative 'http_client'
4
+
5
+ module Rets
6
+ class HttpError < StandardError ; end
7
+
8
+ class Client
9
+ DEFAULT_OPTIONS = {}
10
+
11
+ COUNT = Struct.new(:exclude, :include, :only).new(0,1,2)
12
+
13
+ attr_accessor :login_url, :options, :logger
14
+ attr_writer :capabilities, :metadata
15
+
16
+ def initialize(options)
17
+ @options = options
18
+ clean_setup
19
+ end
20
+
21
+ def clean_setup
22
+ self.options = DEFAULT_OPTIONS.merge(@options)
23
+ self.login_url = self.options[:login_url]
24
+
25
+ @cached_metadata = nil
26
+ @capabilities = nil
27
+ @metadata = nil
28
+ @tries = nil
29
+ self.capabilities = nil
30
+
31
+ self.logger = @options[:logger] || FakeLogger.new
32
+ @client_progress = ClientProgressReporter.new(self.logger, options[:stats_collector], options[:stats_prefix])
33
+ @cached_metadata = @options[:metadata]
34
+ if @options[:http_proxy]
35
+ @http = HTTPClient.new(options.fetch(:http_proxy))
36
+
37
+ if @options[:proxy_username]
38
+ @http.set_proxy_auth(options.fetch(:proxy_username), options.fetch(:proxy_password))
39
+ end
40
+ else
41
+ @http = HTTPClient.new
42
+ end
43
+
44
+ if @options[:receive_timeout]
45
+ @http.receive_timeout = @options[:receive_timeout]
46
+ end
47
+
48
+ @http.set_cookie_store(options[:cookie_store]) if options[:cookie_store]
49
+
50
+ @http_client = Rets::HttpClient.new(@http, @options, @logger, @login_url)
51
+ if options[:http_timing_stats_collector]
52
+ @http_client = Rets::MeasuringHttpClient.new(@http_client, options.fetch(:http_timing_stats_collector), options.fetch(:http_timing_stats_prefix))
53
+ end
54
+ if options[:lock_around_http_requests]
55
+ @http_client = Rets::LockingHttpClient.new(@http_client, options.fetch(:locker), options.fetch(:lock_name), options.fetch(:lock_options))
56
+ end
57
+ end
58
+
59
+ # Attempts to login by making an empty request to the URL
60
+ # provided in initialize. Returns the capabilities that the
61
+ # RETS server provides, per http://retsdoc.onconfluence.com/display/rets172/4.10+Capability+URL+List.
62
+ def login
63
+ res = http_get(login_url)
64
+ unless res.status_code == 200
65
+ raise UnknownResponse, "bad response to login, expected a 200, but got #{res.status_code}. Body was #{res.body}."
66
+ end
67
+ self.capabilities = extract_capabilities(Nokogiri.parse(res.body))
68
+ raise UnknownResponse, "Cannot read rets server capabilities." unless @capabilities
69
+ @capabilities
70
+ end
71
+
72
+ def logout
73
+ unless capabilities["Logout"]
74
+ raise NoLogout.new('No logout method found for rets client')
75
+ end
76
+ http_get(capability_url("Logout"))
77
+ rescue UnknownResponse => e
78
+ unless e.message.match(/expected a 200, but got 401/)
79
+ raise e
80
+ end
81
+ end
82
+
83
+ # Finds records.
84
+ #
85
+ # [quantity] Return the first record, or an array of records.
86
+ # Uses a symbol <tt>:first</tt> or <tt>:all</tt>, respectively.
87
+ #
88
+ # [opts] A hash of arguments used to construct the search query,
89
+ # using the following keys:
90
+ #
91
+ # <tt>:search_type</tt>:: Required. The resource to search for.
92
+ # <tt>:class</tt>:: Required. The class of the resource to search for.
93
+ # <tt>:query</tt>:: Required. The DMQL2 query string to execute.
94
+ # <tt>:limit</tt>:: The number of records to request from the server.
95
+ # <tt>:resolve</tt>:: Provide resolved values that use metadata instead
96
+ # of raw system values.
97
+ #
98
+ # Any other keys are converted to the RETS query format, and passed
99
+ # to the server as part of the query. For instance, the key <tt>:offset</tt>
100
+ # will be sent as +Offset+.
101
+ #
102
+ def find(quantity, opts = {})
103
+ case quantity
104
+ when :first then find_with_retries(opts.merge(:limit => 1)).first
105
+ when :all then find_with_retries(opts)
106
+ else raise ArgumentError, "First argument must be :first or :all"
107
+ end
108
+ end
109
+
110
+ alias search find
111
+
112
+ def find_with_retries(opts = {})
113
+ retries = 0
114
+ resolve = opts.delete(:resolve)
115
+ begin
116
+ find_every(opts, resolve)
117
+ rescue AuthorizationFailure, InvalidRequest => e
118
+ if retries < opts.fetch(:max_retries, 3)
119
+ retries += 1
120
+ @client_progress.find_with_retries_failed_a_retry(e, retries)
121
+ clean_setup
122
+ retry
123
+ else
124
+ @client_progress.find_with_retries_exceeded_retry_count(e)
125
+ raise e
126
+ end
127
+ end
128
+ end
129
+
130
+ def find_every(opts, resolve)
131
+ params = {"QueryType" => "DMQL2", "Format" => "COMPACT"}.merge(fixup_keys(opts))
132
+ res = http_post(capability_url("Search"), params)
133
+
134
+ if opts[:count] == COUNT.only
135
+ Parser::Compact.get_count(res.body)
136
+ else
137
+ results = Parser::Compact.parse_document(res.body.encode("UTF-8", "binary", :invalid => :replace, :undef => :replace))
138
+ if resolve
139
+ rets_class = find_rets_class(opts[:search_type], opts[:class])
140
+ decorate_results(results, rets_class)
141
+ else
142
+ results
143
+ end
144
+ end
145
+ end
146
+
147
+ def find_rets_class(resource_name, rets_class_name)
148
+ metadata.tree[resource_name].find_rets_class(rets_class_name)
149
+ end
150
+
151
+ def decorate_results(results, rets_class)
152
+ results.map do |result|
153
+ decorate_result(result, rets_class)
154
+ end
155
+ end
156
+
157
+ def decorate_result(result, rets_class)
158
+ result.each do |key, value|
159
+ table = rets_class.find_table(key)
160
+ if table
161
+ result[key] = table.resolve(value.to_s)
162
+ else
163
+ #can't resolve just leave the value be
164
+ raise "Value could not be interpreted. Key #{key} Value #{value}"
165
+ @client_progress.could_not_resolve_find_metadata(key)
166
+ end
167
+ end
168
+ end
169
+
170
+ # Returns an array of all objects associated with the given resource.
171
+ def all_objects(opts = {})
172
+ objects("*", opts)
173
+ end
174
+
175
+ # Returns an array of specified objects.
176
+ def objects(object_ids, opts = {})
177
+ response = case object_ids
178
+ when String then fetch_object(object_ids, opts)
179
+ when Array then fetch_object(object_ids.join(","), opts)
180
+ else raise ArgumentError, "Expected instance of String or Array, but got #{object_ids.inspect}."
181
+ end
182
+
183
+ create_parts_from_response(response)
184
+ end
185
+
186
+ def create_parts_from_response(response)
187
+ content_type = response.header["content-type"][0]
188
+
189
+ if content_type.nil?
190
+ raise MalformedResponse, "Unable to read content-type from response: #{response.inspect}"
191
+ end
192
+
193
+ if content_type.include?("multipart")
194
+ boundary = content_type.scan(/boundary="?([^;"]*)?/).join
195
+
196
+ parts = Parser::Multipart.parse(response.body, boundary)
197
+
198
+ logger.debug "Rets::Client: Found #{parts.size} parts"
199
+
200
+ return parts
201
+ else
202
+ # fake a multipart for interface compatibility
203
+ headers = {}
204
+ response.headers.each { |k,v| headers[k] = v[0] }
205
+
206
+ part = Parser::Multipart::Part.new(headers, response.body)
207
+
208
+ return [part]
209
+ end
210
+ end
211
+
212
+ # Returns a single object.
213
+ #
214
+ # resource RETS resource as defined in the resource metadata.
215
+ # object_type an object type defined in the object metadata.
216
+ # resource_id the KeyField value of the given resource instance.
217
+ # object_id can be "*" or a colon delimited string of integers or an array of integers.
218
+ def object(object_id, opts = {})
219
+ response = fetch_object(Array(object_id).join(':'), opts)
220
+ response.body
221
+ end
222
+
223
+ def fetch_object(object_id, opts = {})
224
+ params = {
225
+ "Resource" => opts.fetch(:resource),
226
+ "Type" => opts.fetch(:object_type),
227
+ "ID" => "#{opts.fetch(:resource_id)}:#{object_id}",
228
+ "Location" => opts.fetch(:location, 0)
229
+ }
230
+
231
+ extra_headers = {
232
+ "Accept" => "image/jpeg, image/png;q=0.5, image/gif;q=0.1",
233
+ }
234
+
235
+ http_post(capability_url("GetObject"), params, extra_headers)
236
+ end
237
+
238
+ # Changes keys to be camel cased, per the RETS standard for queries.
239
+ def fixup_keys(hash)
240
+ fixed_hash = {}
241
+
242
+ hash.each do |key, value|
243
+ camel_cased_key = key.to_s.capitalize.gsub(/_(\w)/) { $1.upcase }
244
+
245
+ fixed_hash[camel_cased_key] = value
246
+ end
247
+
248
+ fixed_hash
249
+ end
250
+
251
+ def metadata
252
+ return @metadata if @metadata
253
+
254
+ if @cached_metadata && (@options[:skip_metadata_uptodate_check] ||
255
+ @cached_metadata.current?(capabilities["MetadataTimestamp"], capabilities["MetadataVersion"]))
256
+ @client_progress.use_cached_metadata
257
+ self.metadata = @cached_metadata
258
+ else
259
+ @client_progress.bad_cached_metadata(@cached_metadata)
260
+ self.metadata = Metadata::Root.new(logger, retrieve_metadata)
261
+ end
262
+ end
263
+
264
+ def retrieve_metadata
265
+ raw_metadata = {}
266
+ Metadata::METADATA_TYPES.each {|type|
267
+ raw_metadata[type] = retrieve_metadata_type(type)
268
+ }
269
+ raw_metadata
270
+ end
271
+
272
+ def retrieve_metadata_type(type)
273
+ res = http_post(capability_url("GetMetadata"),
274
+ { "Format" => "COMPACT",
275
+ "Type" => "METADATA-#{type}",
276
+ "ID" => "0"
277
+ })
278
+ res.body
279
+ end
280
+
281
+ # The capabilies as provided by the RETS server during login.
282
+ #
283
+ # Currently, only the path in the endpoint URLs is used[1]. Host,
284
+ # port, other details remaining constant with those provided to
285
+ # the constructor.
286
+ #
287
+ # [1] In fact, sometimes only a path is returned from the server.
288
+ def capabilities
289
+ @capabilities || login
290
+ end
291
+
292
+ def capability_url(name)
293
+ val = capabilities[name] || capabilities[name.downcase]
294
+
295
+ raise UnknownCapability.new(name) unless val
296
+
297
+ begin
298
+ if val.downcase.match(/^https?:\/\//)
299
+ uri = URI.parse(val)
300
+ else
301
+ uri = URI.parse(login_url)
302
+ uri.path = val
303
+ end
304
+ rescue URI::InvalidURIError
305
+ raise MalformedResponse, "Unable to parse capability URL: #{name} => #{val.inspect}"
306
+ end
307
+ uri.to_s
308
+ end
309
+
310
+ def extract_capabilities(document)
311
+ raw_key_values = document.xpath("/RETS/RETS-RESPONSE").text.strip
312
+
313
+ hash = Hash.new{|h,k| h.key?(k.downcase) ? h[k.downcase] : nil }
314
+
315
+ # ... :(
316
+ # Feel free to make this better. It has a test.
317
+ raw_key_values.split(/\n/).
318
+ map { |r| r.split(/=/, 2) }.
319
+ each { |k,v| hash[k.strip.downcase] = v.strip }
320
+
321
+ hash
322
+ end
323
+
324
+ def save_cookie_store(force=nil)
325
+ @http_client.save_cookie_store(force)
326
+ end
327
+
328
+ def http_get(url, params=nil, extra_headers={})
329
+ @http_client.http_get(url, params, extra_headers)
330
+ end
331
+
332
+ def http_post(url, params, extra_headers = {})
333
+ @http_client.http_post(url, params, extra_headers)
334
+ end
335
+
336
+ def tries
337
+ @tries ||= 1
338
+
339
+ (@tries += 1) - 1
340
+ end
341
+
342
+ class FakeLogger < Logger
343
+ def initialize
344
+ super("/dev/null")
345
+ end
346
+ end
347
+
348
+ class ErrorChecker
349
+ def self.check(response)
350
+ # some RETS servers returns HTTP code 412 when session cookie expired, yet the response body
351
+ # passes XML check. We need to special case for this situation.
352
+ # This method is also called from multipart.rb where there are headers and body but no status_code
353
+ if response.respond_to?(:status_code) && response.status_code == 412
354
+ raise HttpError, "HTTP status: #{response.status_code}, body: #{response.body}"
355
+ end
356
+
357
+ # some RETS servers return success code in XML body but failure code 4xx in http status
358
+ # If xml body is present we ignore http status
359
+
360
+ if !response.body.empty?
361
+ begin
362
+ xml = Nokogiri::XML.parse(response.body, nil, nil, Nokogiri::XML::ParseOptions::STRICT)
363
+
364
+ rets_element = xml.xpath("/RETS")
365
+ if rets_element.empty?
366
+ return
367
+ end
368
+ reply_text = (rets_element.attr("ReplyText") || rets_element.attr("replyText")).value
369
+ reply_code = (rets_element.attr("ReplyCode") || rets_element.attr("replyCode")).value.to_i
370
+
371
+ if reply_code.nonzero?
372
+ raise InvalidRequest.new(reply_code, reply_text)
373
+ else
374
+ return
375
+ end
376
+ rescue Nokogiri::XML::SyntaxError
377
+ #Not xml
378
+ end
379
+ end
380
+
381
+ if response.respond_to?(:ok?) && ! response.ok?
382
+ if response.status_code == 401
383
+ raise AuthorizationFailure.new(response.status_code, response.body)
384
+ else
385
+ raise HttpError, "HTTP status: #{response.status_code}, body: #{response.body}"
386
+ end
387
+ end
388
+ end
389
+ end
390
+ end
391
+ end
@@ -0,0 +1,44 @@
1
+ module Rets
2
+ class NullStatsReporter
3
+ def time(metric_name, &block)
4
+ block.call
5
+ end
6
+
7
+ def gauge(metric_name, measurement)
8
+ end
9
+
10
+ def count(metric_name, count=1)
11
+ end
12
+ end
13
+
14
+ class ClientProgressReporter
15
+ def initialize(logger, stats, stats_prefix)
16
+ @logger = logger
17
+ @stats = stats || NullStatsReporter.new
18
+ @stats_prefix = stats_prefix
19
+ end
20
+
21
+ def find_with_retries_failed_a_retry(exception, retries)
22
+ @stats.count("#{@stats_prefix}find_with_retries_failed_retry")
23
+ @logger.warn("Rets::Client: Failed with message: #{exception.message}")
24
+ @logger.info("Rets::Client: Retry #{retries}/3")
25
+ end
26
+
27
+ def find_with_retries_exceeded_retry_count(exception)
28
+ @stats.count("#{@stats_prefix}find_with_retries_exceeded_retry_count")
29
+ end
30
+
31
+ def could_not_resolve_find_metadata(key)
32
+ @stats.count("#{@stats_prefix}could_not_resolve_find_metadata")
33
+ @logger.warn "Rets::Client: Can't resolve find metadata for #{key.inspect}"
34
+ end
35
+
36
+ def use_cached_metadata
37
+ @logger.info "Rets::Client: Use cached metadata"
38
+ end
39
+
40
+ def bad_cached_metadata(cached_metadata)
41
+ @logger.info cached_metadata ? "Rets::Client: Cached metadata out of date" : "Rets::Client: Cached metadata unavailable"
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,91 @@
1
+ module Rets
2
+ class HttpClient
3
+ attr_reader :http, :options, :logger, :login_url
4
+
5
+ def initialize(http, options, logger, login_url)
6
+ @http = http
7
+ @options = options
8
+ @logger = logger
9
+ @login_url = login_url
10
+ @options.fetch(:ca_certs, []).each {|c| @http.ssl_config.add_trust_ca(c) }
11
+ end
12
+
13
+ def http_get(url, params=nil, extra_headers={})
14
+ http.set_auth(url, options[:username], options[:password])
15
+ headers = extra_headers.merge(rets_extra_headers)
16
+ res = nil
17
+ log_http_traffic("POST", url, params, headers) do
18
+ res = http.get(url, params, headers)
19
+ end
20
+ Client::ErrorChecker.check(res)
21
+ res
22
+ end
23
+
24
+ def http_post(url, params, extra_headers = {})
25
+ http.set_auth(url, options[:username], options[:password])
26
+ headers = extra_headers.merge(rets_extra_headers)
27
+ res = nil
28
+ log_http_traffic("POST", url, params, headers) do
29
+ res = http.post(url, params, headers)
30
+ end
31
+ Client::ErrorChecker.check(res)
32
+ res
33
+ end
34
+
35
+ def log_http_traffic(method, url, params, headers, &block)
36
+ # optimization, we don't want to compute log params
37
+ # if logging is off
38
+ if logger.debug?
39
+ logger.debug "Rets::Client >> #{method} #{url}"
40
+ logger.debug "Rets::Client >> params = #{params.inspect}"
41
+ logger.debug "Rets::Client >> headers = #{headers.inspect}"
42
+ end
43
+
44
+ res = block.call
45
+
46
+ # optimization, we don't want to compute log params
47
+ # if logging is off, especially when there is a loop just
48
+ # for logging
49
+ if logger.debug?
50
+ logger.debug "Rets::Client << Status #{res.status_code}"
51
+ res.headers.each { |k, v| logger.debug "Rets::Client << #{k}: #{v}" }
52
+ end
53
+ end
54
+
55
+ def save_cookie_store(force=nil)
56
+ if options[:cookie_store]
57
+ if force
58
+ @http.cookie_manager.save_all_cookies(true, true, true)
59
+ else
60
+ @http.save_cookie_store
61
+ end
62
+ end
63
+ end
64
+
65
+ def rets_extra_headers
66
+ user_agent = options[:agent] || "Client/1.0"
67
+ rets_version = options[:version] || "RETS/1.7.2"
68
+
69
+ headers = {
70
+ "User-Agent" => user_agent,
71
+ "RETS-Version" => rets_version
72
+ }
73
+
74
+ if options[:ua_password]
75
+ up = Digest::MD5.hexdigest "#{user_agent}:#{options[:ua_password]}"
76
+ session_id = http_cookie('RETS-Session-ID') || ''
77
+ digest = Digest::MD5.hexdigest "#{up}::#{session_id}:#{rets_version}"
78
+ headers.merge!("RETS-UA-Authorization" => "Digest #{digest}")
79
+ end
80
+
81
+ headers
82
+ end
83
+
84
+ def http_cookie(name)
85
+ http.cookies.each do |c|
86
+ return c.value if c.name.downcase == name.downcase && c.match?(URI.parse(login_url))
87
+ end
88
+ nil
89
+ end
90
+ end
91
+ end