rets4r 0.8.5 → 1.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. data/.document +5 -0
  2. data/{test/client/data/1.5/metadata.xml → .gemtest} +0 -0
  3. data/CHANGELOG +611 -66
  4. data/CONTRIBUTORS +6 -2
  5. data/Gemfile +1 -0
  6. data/LICENSE +22 -0
  7. data/MANIFEST +63 -0
  8. data/NEWS +203 -0
  9. data/{README → README.rdoc} +11 -4
  10. data/RUBYS +7 -7
  11. data/Rakefile +48 -0
  12. data/TODO +5 -1
  13. data/examples/client_get_object.rb +31 -42
  14. data/examples/client_login.rb +20 -18
  15. data/examples/client_mapper.rb +17 -0
  16. data/examples/client_metadata.rb +28 -28
  17. data/examples/client_parser.rb +9 -0
  18. data/examples/client_search.rb +25 -27
  19. data/examples/settings.yml +114 -0
  20. data/lib/rets4r.rb +14 -1
  21. data/lib/rets4r/auth.rb +70 -66
  22. data/lib/rets4r/client.rb +470 -650
  23. data/lib/rets4r/client/data.rb +13 -13
  24. data/lib/rets4r/client/dataobject.rb +27 -19
  25. data/lib/rets4r/client/exceptions.rb +116 -0
  26. data/lib/rets4r/client/links.rb +32 -0
  27. data/lib/rets4r/client/metadata.rb +12 -12
  28. data/lib/rets4r/client/parsers/compact.rb +42 -0
  29. data/lib/rets4r/client/parsers/compact_nokogiri.rb +91 -0
  30. data/lib/rets4r/client/parsers/metadata.rb +92 -0
  31. data/lib/rets4r/client/parsers/response_parser.rb +100 -0
  32. data/lib/rets4r/client/requester.rb +143 -0
  33. data/lib/rets4r/client/transaction.rb +30 -33
  34. data/lib/rets4r/core_ext/array/extract_options.rb +15 -0
  35. data/lib/rets4r/core_ext/class/attribute_accessors.rb +58 -0
  36. data/lib/rets4r/core_ext/hash/keys.rb +46 -0
  37. data/lib/rets4r/core_ext/hash/slice.rb +39 -0
  38. data/lib/rets4r/listing_mapper.rb +17 -0
  39. data/lib/rets4r/listing_service.rb +35 -0
  40. data/lib/rets4r/loader.rb +8 -0
  41. data/lib/tasks/annotations.rake +121 -0
  42. data/lib/tasks/coverage.rake +13 -0
  43. data/rets4r.gemspec +24 -0
  44. data/spec/rets4r_compact_data_parser_spec.rb +7 -0
  45. data/test/data/1.5/bad_compact.xml +7 -0
  46. data/test/data/1.5/count_only_compact.xml +3 -0
  47. data/test/{client/data → data}/1.5/error.xml +0 -0
  48. data/test/{client/data → data}/1.5/invalid_compact.xml +0 -0
  49. data/test/{client/data → data}/1.5/login.xml +0 -0
  50. data/test/data/1.5/metadata.xml +0 -0
  51. data/test/{client/data → data}/1.5/search_compact.xml +0 -0
  52. data/test/data/1.5/search_compact_big.xml +136 -0
  53. data/test/{client/data → data}/1.5/search_unescaped_compact.xml +0 -0
  54. data/test/data/listing_service.yml +36 -0
  55. data/test/test_auth.rb +68 -0
  56. data/test/test_client.rb +342 -0
  57. data/test/test_client_links.rb +39 -0
  58. data/test/test_compact_nokogiri.rb +64 -0
  59. data/test/test_helper.rb +12 -0
  60. data/test/test_listing_mapper.rb +112 -0
  61. data/test/test_loader.rb +24 -0
  62. data/test/test_parser.rb +96 -0
  63. data/test/test_quality.rb +57 -0
  64. metadata +168 -53
  65. data/GPL +0 -340
  66. data/examples/metadata.xml +0 -42
  67. data/lib/rets4r/client/metadataindex.rb +0 -82
  68. data/lib/rets4r/client/parser.rb +0 -141
  69. data/lib/rets4r/client/parser/rexml.rb +0 -75
  70. data/lib/rets4r/client/parser/xmlparser.rb +0 -95
  71. data/test/client/parser/tc_rexml.rb +0 -17
  72. data/test/client/parser/tc_xmlparser.rb +0 -21
  73. data/test/client/tc_auth.rb +0 -68
  74. data/test/client/tc_client.rb +0 -320
  75. data/test/client/tc_metadataindex.rb +0 -36
  76. data/test/client/test_parser.rb +0 -128
  77. data/test/client/ts_all.rb +0 -8
  78. data/test/ts_all.rb +0 -1
  79. data/test/ts_client.rb +0 -1
@@ -7,661 +7,481 @@
7
7
  # either the dual license version in 2003 (see the file RUBYS), or any later
8
8
  # version.
9
9
  #
10
- # TODO
11
- # Case-insensitive header
10
+ # TODO: 1.0 Support (Adding this support should be fairly easy)
11
+ # TODO: 2.0 Support (Adding this support will be very difficult since it is a completely different methodology)
12
+ # TODO: Case-insensitive header
12
13
 
13
14
  require 'digest/md5'
14
15
  require 'net/http'
15
16
  require 'uri'
16
17
  require 'cgi'
17
- require 'rets4r/auth'
18
- require 'rets4r/client/dataobject'
19
- require 'thread'
18
+ require 'auth'
19
+ require 'client/dataobject'
20
+ require 'client/parsers/response_parser'
21
+ require 'client/parsers/compact'
22
+ require 'rets4r/client/links'
23
+ require 'rets4r/client/requester'
24
+ require 'rets4r/client/exceptions'
20
25
  require 'logger'
26
+ require 'webrick/httputils'
21
27
 
22
28
  module RETS4R
23
- class Client
24
- OUTPUT_RAW = 0 # Nothing done. Simply returns the XML.
25
- OUTPUT_DOM = 1 # Returns a DOM object (REXML) **** NO LONGER SUPPORTED! ****
26
- OUTPUT_RUBY = 2 # Returns a RETS::Data object
27
-
28
- METHOD_GET = 'GET'
29
- METHOD_POST = 'POST'
30
- METHOD_HEAD = 'HEAD'
31
-
32
- DEFAULT_OUTPUT = OUTPUT_RUBY
33
- DEFAULT_METHOD = METHOD_POST
34
- DEFAULT_RETRY = 2
35
- DEFAULT_USER_AGENT = 'RETS4R/0.8.5'
36
- DEFAULT_RETS_VERSION = '1.7'
37
- SUPPORTED_RETS_VERSIONS = ['1.5', '1.7']
38
- CAPABILITY_LIST = ['Action', 'ChangePassword', 'GetObject', 'Login', 'LoginComplete', 'Logout', 'Search', 'GetMetadata', 'Update']
39
- SUPPORTED_PARSERS = [] # This will be populated by parsers as they load
40
-
41
- # These are the response messages as defined in the RETS 1.5e2 and 1.7d6 specifications.
42
- # Provided for convenience and are used by the HTTPError class to provide more useful
43
- # messages.
44
- RETS_HTTP_MESSAGES = {
45
- '200' => 'Operation successful.',
46
- '400' => 'The request could not be understood by the server due to malformed syntax.',
47
- '401' => 'Either the header did not contain an acceptable Authorization or the username/password was invalid. The server response MUST include a WWW-Authenticate header field.',
48
- '402' => 'The requested transaction requires a payment which could not be authorized.',
49
- '403' => 'The server understood the request, but is refusing to fulfill it.',
50
- '404' => 'The server has not found anything matching the Request-URI.',
51
- '405' => 'The method specified in the Request-Line is not allowed for the resource identified by the Request-URI.',
52
- '406' => 'The resource identified by the request is only capable of generating response entities which have content characteristics not acceptable according to the accept headers sent in the request.',
53
- '408' => 'The client did not produce a request within the time that the server was prepared to wait.',
54
- '411' => 'The server refuses to accept the request without a defined Content-Length.',
55
- '412' => 'Transaction not permitted at this point in the session.',
56
- '413' => 'The server is refusing to process a request because the request entity is larger than the server is willing or able to process.',
57
- '414' => 'The server is refusing to service the request because the Request-URI is longer than the server is willing to interpret. This error usually only occurs for a GET method.',
58
- '500' => 'The server encountered an unexpected condition which prevented it from fulfilling the request.',
59
- '501' => 'The server does not support the functionality required to fulfill the request.',
60
- '503' => 'The server is currently unable to handle the request due to a temporary overloading or maintenance of the server.',
61
- '505' => 'The server does not support, or refuses to support, the HTTP protocol version that was used in the request message.',
62
- }
63
-
64
- attr_accessor :mimemap, :logger
65
-
66
- # We load our parsers here so that they can modify the client class appropriately. Because
67
- # the default parser will be the first parser to list itself in the DEFAULT_PARSER array,
68
- # we need to require them in the order of preference. Hence, XMLParser is loaded first because
69
- # it is preferred to REXML since it is much faster.
70
- require 'rets4r/client/parser/xmlparser'
71
- require 'rets4r/client/parser/rexml'
72
-
73
- # Set it as the first
74
- DEFAULT_PARSER = SUPPORTED_PARSERS[0]
75
-
76
- # Constructor
77
- #
78
- # Requires the URL to the RETS server and takes an optional output format. The output format
79
- # determines the type of data returned by the various RETS transaction methods.
80
- def initialize(url, output = DEFAULT_OUTPUT)
81
- raise Unsupported.new('DOM output is no longer supported.') if output == OUTPUT_DOM
82
-
83
- @urls = { 'Login' => URI.parse(url) }
84
- @nc = 0
85
- @headers = {
86
- 'User-Agent' => DEFAULT_USER_AGENT,
87
- 'Accept' => '*/*',
88
- 'RETS-Version' => "RETS/#{DEFAULT_RETS_VERSION}",
89
- 'RETS-Session-ID' => '0'
90
- }
91
- @request_method = DEFAULT_METHOD
92
- @parser_class = DEFAULT_PARSER
93
- @semaphore = Mutex.new
94
- @output = output
95
-
96
- self.mimemap = {
97
- 'image/jpeg' => 'jpg',
98
- 'image/gif' => 'gif'
99
- }
100
-
101
- if block_given?
102
- yield self
103
- end
104
- end
105
-
106
- # Assigns a block that will be called just before the request is sent.
107
- # This block must accept three parameters:
108
- # * self
109
- # * Net::HTTP instance
110
- # * Hash of headers
111
- #
112
- # The block's return value will be ignored. If you want to prevent the request
113
- # to go through, raise an exception.
114
- #
115
- # == Example
116
- #
117
- # client = RETS4R::Client.new(...)
118
- # # Make a new pre_request_block that calculates the RETS-UA-Authorization header.
119
- # client.set_pre_request_block do |rets, http, headers|
120
- # a1 = Digest::MD5.hexdigest([headers["User-Agent"], @password].join(":"))
121
- # if headers.has_key?("Cookie") then
122
- # cookie = headers["Cookie"].split(";").map(&:strip).select {|c| c =~ /rets-session-id/i}
123
- # cookie = cookie ? cookie.split("=").last : ""
124
- # else
125
- # cookie = ""
126
- # end
127
- #
128
- # parts = [a1, "", cookie, headers["RETS-Version"]]
129
- # headers["RETS-UA-Authorization"] = "Digest " + Digest::MD5.hexdigest(parts.join(":"))
130
- # end
131
- def set_pre_request_block(&block)
132
- @pre_request_block = block
133
- end
134
-
135
- # We only allow external read access to URLs because they are internally set based on the
136
- # results of various queries.
137
- def urls
138
- @urls
139
- end
140
-
141
- # Parses the provided XML returns it in the specified output format.
142
- # Requires an XML string and takes an optional output format to override the instance output
143
- # format variable. We current create a new parser each time, which seems a bit wasteful, but
144
- # it allows for the parser to be changed in the middle of a session as well as XML::Parser
145
- # requiring a new instance for each execution...that could be encapsulated within its parser
146
- # class,though, so we should benchmark and see if it will make a big difference with the
147
- # REXML parse, which I doubt.
148
- def parse(xml, output = false)
149
- if xml == ''
150
- trans = Transaction.new()
151
- trans.reply_code = -1
152
- trans.reply_text = 'No transaction body was returned!'
153
- end
154
-
155
- if output == OUTPUT_RAW || @output == OUTPUT_RAW
156
- xml
157
- else
158
- begin
159
- parser = @parser_class.new
160
- parser.logger = logger
161
- parser.output = output ? output : @output
162
-
163
- parser.parse(xml)
164
- rescue
165
- raise ParserException.new($!)
166
- end
167
- end
168
- end
169
-
170
- # Setup Methods (accessors and mutators)
171
- def set_output(output = DEFAULT_OUTPUT)
172
- @output = output
173
- end
174
-
175
- def get_output
176
- @output
177
- end
178
-
179
- def set_parser_class(klass, force = false)
180
- if force || SUPPORTED_PARSERS.include?(klass)
181
- @parser_class = klass
182
- else
183
- message = "The parser class '#{klass}' is not supported!"
184
- debug(message)
185
-
186
- raise Unsupported.new(message)
187
- end
188
- end
189
-
190
- def get_parser_class
191
- @parser_class
192
- end
193
-
194
- def set_header(name, value)
195
- if value.nil? then
196
- @headers.delete(name)
197
- else
198
- @headers[name] = value
199
- end
200
-
201
- debug("Set header '#{name}' to '#{value}'")
202
- end
203
-
204
- def get_header(name)
205
- @headers[name]
206
- end
207
-
208
- def set_user_agent(name)
209
- set_header('User-Agent', name)
210
- end
211
-
212
- def get_user_agent
213
- get_header('User-Agent')
214
- end
215
-
216
- def set_rets_version(version)
217
- if (SUPPORTED_RETS_VERSIONS.include? version)
218
- set_header('RETS-Version', "RETS/#{version}")
219
- else
220
- raise Unsupported.new("The client does not support RETS version '#{version}'.")
221
- end
222
- end
223
-
224
- def get_rets_version
225
- (get_header('RETS-Version') || "").gsub("RETS/", "")
226
- end
227
-
228
- def set_request_method(method)
229
- @request_method = method
230
- end
231
-
232
- def get_request_method
233
- @request_method
234
- end
235
-
236
- # Provide more Ruby-like attribute accessors instead of get/set methods
237
- alias_method :user_agent=, :set_user_agent
238
- alias_method :user_agent, :get_user_agent
239
- alias_method :request_method=, :set_request_method
240
- alias_method :request_method, :get_request_method
241
- alias_method :rets_version=, :set_rets_version
242
- alias_method :rets_version, :get_rets_version
243
- alias_method :parser_class=, :set_parser_class
244
- alias_method :parser_class, :get_parser_class
245
- alias_method :output=, :set_output
246
- alias_method :output, :get_output
247
-
248
- #### RETS Transaction Methods ####
249
- #
250
- # Most of these transaction methods mirror the RETS specification methods, so if you are
251
- # unsure what they mean, you should check the RETS specification. The latest version can be
252
- # found at http://www.rets.org
253
-
254
- # Attempts to log into the server using the provided username and password.
255
- #
256
- # If called with a block, the results of the login action are yielded,
257
- # and logout is called when the block returns. In that case, #login
258
- # returns the block's value. If called without a block, returns the
259
- # result.
260
- #
261
- # As specified in the RETS specification, the Action URL is called and
262
- # the results made available in the #secondary_results accessor of the
263
- # results object.
264
- def login(username, password) #:yields: login_results
265
- @username = username
266
- @password = password
267
-
268
- # We are required to set the Accept header to this by the RETS 1.5 specification.
269
- set_header('Accept', '*/*')
270
-
271
- response = request(@urls['Login'])
272
-
273
- # Parse response to get other URLS
274
- results = self.parse(response.body, OUTPUT_RUBY)
275
-
276
- if (results.success?)
277
- CAPABILITY_LIST.each do |capability|
278
- next unless results.response[capability]
279
- base = @urls['Login'].clone
280
- base.path = results.response[capability]
281
-
282
- @urls[capability] = base
283
- end
284
-
285
- debug("Capability URL List: #{@urls.inspect}")
286
- else
287
- raise LoginError.new(response.message + "(#{results.reply_code}: #{results.reply_text})")
288
- end
289
-
290
- if @output != OUTPUT_RUBY
291
- results = self.parse(response.body)
292
- end
293
-
294
- # Perform the mandatory get request on the action URL.
295
- results.secondary_response = perform_action_url
296
-
297
- # We only yield
298
- if block_given?
299
- begin
300
- yield results
301
- ensure
302
- self.logout
303
- end
304
- else
305
- results
306
- end
307
- end
308
-
309
- # Logs out of the RETS server.
310
- def logout()
311
- # If no logout URL is provided, then we assume that logout is not necessary (not to
312
- # mention impossible without a URL). We don't throw an exception, though, but we might
313
- # want to if this becomes an issue in the future.
314
-
315
- request(@urls['Logout']) if @urls['Logout']
316
- end
317
-
318
- # Requests Metadata from the server. An optional type and id can be specified to request
319
- # subsets of the Metadata. Please see the RETS specification for more details on this.
320
- # The format variable tells the server which format to return the Metadata in. Unless you
321
- # need the raw metadata in a specified format, you really shouldn't specify the format.
322
- #
323
- # If called with a block, yields the results and returns the value of the block, or
324
- # returns the metadata directly.
325
- def get_metadata(type = 'METADATA-SYSTEM', id = '*', format = 'COMPACT')
326
- header = {
327
- 'Accept' => 'text/xml,text/plain;q=0.5'
328
- }
329
-
330
- data = {
331
- 'Type' => type,
332
- 'ID' => id,
333
- 'Format' => format
334
- }
335
-
336
- response = request(@urls['GetMetadata'], data, header)
337
-
338
- result = self.parse(response.body)
339
-
340
- if block_given?
341
- yield result
342
- else
343
- result
344
- end
345
- end
346
-
347
- # Performs a GetObject transaction on the server. For details on the arguments, please see
348
- # the RETS specification on GetObject requests.
349
- #
350
- # This method either returns an Array of DataObject instances, or yields each DataObject
351
- # as it is created. If a block is given, the number of objects yielded is returned.
352
- def get_object(resource, type, id, location = 1) #:yields: data_object
353
- header = {
354
- 'Accept' => mimemap.keys.join(',')
355
- }
356
-
357
- data = {
358
- 'Resource' => resource,
359
- 'Type' => type,
360
- 'ID' => id,
361
- 'Location' => location.to_s
362
- }
363
-
364
- response = request(@urls['GetObject'], data, header)
365
- results = block_given? ? 0 : []
366
-
367
- if response['content-type'].include?('multipart/parallel')
368
- content_type = process_content_type(response['content-type'])
369
-
370
- parts = response.body.split("\r\n--#{content_type['boundary']}")
371
- parts.shift # Get rid of the initial boundary
372
-
373
- parts.each do |part|
374
- (raw_header, raw_data) = part.split("\r\n\r\n")
375
-
376
- next unless raw_data
377
-
378
- data_header = process_header(raw_header)
379
- data_object = DataObject.new(data_header, raw_data)
380
-
381
- if block_given?
382
- yield data_object
383
- results += 1
384
- else
385
- results << data_object
386
- end
387
- end
388
- else
389
- info = {
390
- 'content-type' => response['content-type'], # Compatibility shim. Deprecated.
391
- 'Content-Type' => response['content-type'],
392
- 'Object-ID' => response['Object-ID'],
393
- 'Content-ID' => response['Content-ID']
394
- }
395
-
396
- if response['Transfer-Encoding'].to_s.downcase == "chunked" || response['Content-Length'].to_i > 100 then
397
- data_object = DataObject.new(info, response.body)
398
- if block_given?
399
- yield data_object
400
- results += 1
401
- else
402
- results << data_object
403
- end
404
- end
405
- end
406
-
407
- results
408
- end
409
-
410
- # Peforms a RETS search transaction. Again, please see the RETS specification for details
411
- # on what these parameters mean. The options parameter takes a hash of options that will
412
- # added to the search statement.
413
- def search(search_type, klass, query, options = false)
414
- header = {}
415
-
416
- # Required Data
417
- data = {
418
- 'SearchType' => search_type,
419
- 'Class' => klass,
420
- 'Query' => query,
421
- 'QueryType' => 'DMQL2',
422
- 'Format' => 'COMPACT',
423
- 'Count' => '0'
424
- }
425
-
426
- # Options
427
- #--
428
- # We might want to switch this to merge!, but I've kept it like this for now because it
429
- # explicitly casts each value as a string prior to performing the search, so we find out now
430
- # if can't force a value into the string context. I suppose it doesn't really matter when
431
- # that happens, though...
432
- #++
433
- options.each { |k,v| data[k] = v.to_s } if options
434
-
435
- response = request(@urls['Search'], data, header)
436
-
437
- results = self.parse(response.body)
438
-
439
- if block_given?
440
- yield results
441
- else
442
- return results
443
- end
444
- end
445
-
446
- private
447
-
448
- def process_content_type(text)
449
- content = {}
450
-
451
- field_start = text.index(';')
452
-
453
- content['content-type'] = text[0 ... field_start].strip
454
- fields = text[field_start..-1]
455
-
456
- parts = text.split(';')
457
-
458
- parts.each do |part|
459
- (name, value) = part.split('=')
460
-
461
- content[name.strip] = value ? value.strip : value
462
- end
463
-
464
- content
465
- end
466
-
467
- # Processes the HTTP header
468
- #--
469
- # Could we switch over to using CGI for this?
470
- #++
471
- def process_header(raw)
472
- header = {}
473
-
474
- raw.each do |line|
475
- (name, value) = line.split(':')
476
-
477
- header[name.strip] = value.strip if name && value
478
- end
479
-
480
- header
481
- end
482
-
483
- # Given a hash, it returns a URL encoded query string.
484
- def create_query_string(hash)
485
- parts = hash.map {|key,value| "#{CGI.escape(key)}=#{CGI.escape(value)}"}
486
- return parts.join('&')
487
- end
488
-
489
- # This is the primary transaction method, which the other public methods make use of.
490
- # Given a url for the transaction (endpoint) it makes a request to the RETS server.
491
- #
492
- #--
493
- # This needs to be better documented, but for now please see the public transaction methods
494
- # for how to make use of this method.
495
- #++
496
- def request(url, data = {}, header = {}, method = @request_method, retry_auth = DEFAULT_RETRY)
497
- headers, response = nil
498
- begin
499
- @semaphore.lock
500
-
501
- http = Net::HTTP.new(url.host, url.port)
502
-
503
- if logger && logger.debug?
504
- http.set_debug_output HTTPDebugLogger.new(logger)
505
- end
506
-
507
- http.start do |http|
508
- begin
509
- uri = url.path
510
-
511
- if ! data.empty? && method == METHOD_GET
512
- uri += "?#{create_query_string(data)}"
513
- end
514
-
515
- headers = @headers
516
- headers.merge(header) unless header.empty?
517
-
518
- @pre_request_block.call(self, http, headers) if @pre_request_block
519
-
520
- debug("Request headers: #{headers.inspect}")
521
-
522
- @semaphore.unlock
523
-
524
- post_data = data.map {|k,v| "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}" }.join('&') if method == METHOD_POST
525
- response = method == METHOD_POST ? http.post(uri, post_data, headers) :
526
- http.get(uri, headers)
527
-
528
- debug("Response headers: #{response.to_hash.inspect}")
529
-
530
- @semaphore.lock
531
-
532
- if response.code == '401'
533
- # Authentication is required
534
- raise AuthRequired
535
- elsif response.code.to_i >= 300
536
- # We have a non-successful response that we cannot handle
537
- @semaphore.unlock if @semaphore.locked?
538
- raise HTTPError.new(response)
539
- else
540
- cookies = []
541
- if set_cookies = response.get_fields('set-cookie') then
542
- set_cookies.each do |cookie|
543
- cookies << cookie.split(";").first
544
- end
545
- end
546
- set_header('Cookie', cookies.join("; ")) unless cookies.empty?
547
- set_header('RETS-Session-ID', response['RETS-Session-ID']) if response['RETS-Session-ID']
548
- end
549
- rescue AuthRequired
550
- @nc += 1
551
-
552
- if retry_auth > 0
553
- retry_auth -= 1
554
- set_header('Authorization', Auth.authenticate(response, @username, @password, url.path, method, @headers['RETS-Request-ID'], get_user_agent, @nc))
555
- retry
556
- else
557
- @semaphore.unlock if @semaphore.locked?
558
- raise LoginError.new(response.message)
559
- end
560
- end
561
-
562
- debug(response.body)
563
- end
564
-
565
- @semaphore.unlock if @semaphore.locked?
566
-
567
- return response
568
-
569
- #rescue
570
- #data = {"request" => headers, "body" => response.body}
571
- #data["response"] = response.respond_to?(:headers) ? response.headers : response
572
- #data = data.respond_to?(:to_yaml) ? data.to_yaml : data.inspect
573
- #raise RETSException, "#{$!.message}\nRequest/Response Details:\n#{data}"
574
- end
575
- end
576
-
577
- # If an action URL is present in the URL capability list, it calls that action URL and returns the
578
- # raw result. Throws a generic RETSException if it is unable to follow the URL.
579
- def perform_action_url
580
- begin
581
- if @urls.has_key?('Action')
582
- return request(@urls['Action'], {}, {}, METHOD_GET)
583
- end
584
- rescue
585
- raise RETSException.new("Unable to follow action URL: '#{$!}'.")
586
- end
587
- end
588
-
589
- # Shorthand for sending debug messages to the logger if a logger is provided
590
- def debug(message)
591
- logger.debug(message) if logger
592
- end
593
-
594
- # Provides a proxy class to allow for net/http to log its debug to the logger.
595
- class HTTPDebugLogger
596
- def initialize(logger)
597
- @logger = logger
598
- end
599
-
600
- def <<(data)
601
- @logger.debug(data)
602
- end
603
- end
604
-
605
- #### Exceptions ####
606
-
607
- # This exception should be thrown when a generic client error is encountered.
608
- class ClientException < Exception
609
- end
610
-
611
- # This exception should be thrown when there is an error with the parser, which is
612
- # considered a subcomponent of the RETS client. It also includes the XML data that
613
- # that was being processed at the time of the exception.
614
- class ParserException < ClientException
615
- attr_accessor :file
616
- end
617
-
618
- # The client does not currently support a specified action.
619
- class Unsupported < ClientException
620
- end
621
-
622
- # The HTTP response returned by the server indicates that there was an error processing
623
- # the request and the client cannot continue on its own without intervention.
624
- class HTTPError < ClientException
625
- attr_accessor :http_response
626
-
627
- # Takes a HTTPResponse object
628
- def initialize(http_response)
629
- self.http_response = http_response
630
- end
631
-
632
- # Shorthand for calling HTTPResponse#code
633
- def code
634
- http_response.code
635
- end
636
-
637
- # Shorthand for calling HTTPResponse#message
638
- def message
639
- http_response.message
640
- end
641
-
642
- # Returns the RETS specification message for the HTTP response code
643
- def rets_message
644
- Client::RETS_HTTP_MESSAGES[code]
645
- end
646
-
647
- def to_s
648
- "#{code} #{message}: #{rets_message}"
649
- end
650
- end
651
-
652
- # A general RETS level exception was encountered. This would include HTTP and RETS
653
- # specification level errors as well as informative mishaps such as authentication being
654
- # required for access.
655
- class RETSException < Exception
656
- end
657
-
658
- # There was a problem with logging into the RETS server.
659
- class LoginError < RETSException
660
- end
661
-
662
- # For internal client use only, it is thrown when the a RETS request is made but a password
663
- # is prompted for.
664
- class AuthRequired < RETSException
665
- end
666
- end
667
- end
29
+ class Client
30
+ COMPACT_FORMAT = 'COMPACT'
31
+
32
+ METHOD_GET = 'GET'
33
+ METHOD_POST = 'POST'
34
+ METHOD_HEAD = 'HEAD'
35
+
36
+ DEFAULT_METHOD = METHOD_GET
37
+ DEFAULT_RETRY = 2
38
+ SUPPORTED_RETS_VERSIONS = ['1.5', '1.7']
39
+ CAPABILITY_LIST = [
40
+ 'Action',
41
+ 'ChangePassword',
42
+ 'GetObject',
43
+ 'Login',
44
+ 'LoginComplete',
45
+ 'Logout',
46
+ 'Search',
47
+ 'GetMetadata',
48
+ 'Update'
49
+ ]
50
+
51
+ # These are the response messages as defined in the RETS 1.5e2 and 1.7d6 specifications.
52
+ # Provided for convenience and are used by the HTTPError class to provide more useful
53
+ # messages.
54
+ RETS_HTTP_MESSAGES = {
55
+ '200' => 'Operation successful.',
56
+ '400' => 'The request could not be understood by the server due to malformed syntax.',
57
+ '401' => 'Either the header did not contain an acceptable Authorization or the ' +
58
+ 'username/password was invalid. The server response MUST include a ' +
59
+ 'WWW-Authenticate header field.',
60
+ '402' => 'The requested transaction requires a payment which could not be authorized.',
61
+ '403' => 'The server understood the request, but is refusing to fulfill it.',
62
+ '404' => 'The server has not found anything matching the Request-URI.',
63
+ '405' => 'The method specified in the Request-Line is not allowed for the resource ' +
64
+ 'identified by the Request-URI.',
65
+ '406' => 'The resource identified by the request is only capable of generating response ' +
66
+ 'entities which have content characteristics not acceptable according to the accept ' +
67
+ 'headers sent in the request.',
68
+ '408' => 'The client did not produce a request within the time that the server was prepared to wait.',
69
+ '411' => 'The server refuses to accept the request without a defined Content-Length.',
70
+ '412' => 'Transaction not permitted at this point in the session.',
71
+ '413' => 'The server is refusing to process a request because the request entity is larger than ' +
72
+ 'the server is willing or able to process.',
73
+ '414' => 'The server is refusing to service the request because the Request-URI is longer than ' +
74
+ 'the server is willing to interpret. This error usually only occurs for a GET method.',
75
+ '500' => 'The server encountered an unexpected condition which prevented it from fulfilling ' +
76
+ 'the request.',
77
+ '501' => 'The server does not support the functionality required to fulfill the request.',
78
+ '503' => 'The server is currently unable to handle the request due to a temporary overloading ' +
79
+ 'or maintenance of the server.',
80
+ '505' => 'The server does not support, or refuses to support, the HTTP protocol version that ' +
81
+ 'was used in the request message.',
82
+ }
83
+
84
+ attr_accessor :mimemap
85
+ attr_reader :format, :urls
86
+
87
+ # Constructor
88
+ #
89
+ # Requires the URL to the RETS server and takes an optional output format. The output format
90
+ # determines the type of data returned by the various RETS transaction methods.
91
+ def initialize(url, format = COMPACT_FORMAT)
92
+ @request_struct = RETS4R::Client::Requester.new
93
+ @format = format
94
+ @urls = RETS4R::Client::Links.from_login_url(url)
95
+
96
+ @request_method = DEFAULT_METHOD
97
+
98
+ @response_parser = RETS4R::Client::ResponseParser.new
99
+
100
+ self.mimemap = {
101
+ 'image/jpeg' => 'jpg',
102
+ 'image/gif' => 'gif'
103
+ }
104
+
105
+ if block_given?
106
+ yield self
107
+ end
108
+ end
109
+
110
+ # Assigns a block that will be called just before the request is sent.
111
+ # This block must accept three parameters:
112
+ # * self
113
+ # * Net::HTTP instance
114
+ # * Hash of headers
115
+ #
116
+ # The block's return value will be ignored. If you want to prevent the request
117
+ # to go through, raise an exception.
118
+ #
119
+ # == Example
120
+ #
121
+ # client = RETS4R::Client.new(...)
122
+ # # Make a new pre_request_block that calculates the RETS-UA-Authorization header.
123
+ # client.set_pre_request_block do |rets, http, headers|
124
+ # a1 = Digest::MD5.hexdigest([headers["User-Agent"], @password].join(":"))
125
+ # if headers.has_key?("Cookie") then
126
+ # cookie = headers["Cookie"].split(";").map(&:strip).select {|c| c =~ /rets-session-id/i}
127
+ # cookie = cookie ? cookie.split("=").last : ""
128
+ # else
129
+ # cookie = ""
130
+ # end
131
+ #
132
+ # parts = [a1, "", cookie, headers["RETS-Version"]]
133
+ # headers["RETS-UA-Authorization"] = "Digest " + Digest::MD5.hexdigest(parts.join(":"))
134
+ # end
135
+ def set_pre_request_block(&block)
136
+ @request_struct.pre_request_block = block
137
+ end
138
+
139
+ # So very much delegated to the request struct
140
+ def set_header(name, value)
141
+ @request_struct.set_header(name, value)
142
+ end
143
+
144
+ def get_header(name)
145
+ @request_struct.headers[name]
146
+ end
147
+
148
+ def user_agent=(name)
149
+ @request_struct.set_header('User-Agent', name)
150
+ end
151
+
152
+ def user_agent
153
+ @request_struct.user_agent
154
+ end
155
+
156
+ def rets_version=(version)
157
+ @request_struct.rets_version = version
158
+ end
159
+
160
+ def rets_version
161
+ @request_struct.rets_version
162
+ end
163
+
164
+ def request_method=(method)
165
+ @request_method = method
166
+ @request_struct.method = method
167
+ end
168
+
169
+ def request_method
170
+ @request_method
171
+ end
172
+
173
+ def logger=(logger)
174
+ @logger = logger
175
+ @request_struct.logger = logger
176
+ end
177
+
178
+ def logger
179
+ @logger
180
+ end
181
+
182
+ #### RETS Transaction Methods ####
183
+ #
184
+ # Most of these transaction methods mirror the RETS specification methods, so if you are
185
+ # unsure what they mean, you should check the RETS specification. The latest version can be
186
+ # found at http://www.rets.org
187
+
188
+ # Attempts to log into the server using the provided username and password.
189
+ #
190
+ # If called with a block, the results of the login action are yielded,
191
+ # and logout is called when the block returns. In that case, #login
192
+ # returns the block's value. If called without a block, returns the
193
+ # result.
194
+ #
195
+ # As specified in the RETS specification, the Action URL is called and
196
+ # the results made available in the #secondary_results accessor of the
197
+ # results object.
198
+ def login(username, password) #:yields: login_results
199
+ @request_struct.username = username
200
+ @request_struct.password = password
201
+
202
+ # We are required to set the Accept header to this by the RETS 1.5 specification.
203
+ set_header('Accept', '*/*')
204
+
205
+ response = request(@urls.login)
206
+
207
+ # Parse response to get other URLS
208
+ results = @response_parser.parse_key_value(response.body)
209
+
210
+ if (results.success?)
211
+ CAPABILITY_LIST.each do |capability|
212
+ next unless results.response[capability]
213
+
214
+ uri = URI.parse(results.response[capability])
215
+
216
+ if uri.absolute?
217
+ @urls[capability] = uri
218
+ else
219
+ base = @urls.login.clone
220
+ base.path = results.response[capability]
221
+ @urls[capability] = base
222
+ end
223
+ end
224
+
225
+ logger.debug("Capability URL List: #{@urls.inspect}") if logger
226
+ else
227
+ raise LoginError.new(response.message + "(#{results.reply_code}: #{results.reply_text})")
228
+ end
229
+
230
+ # Perform the mandatory get request on the action URL.
231
+ results.secondary_response = perform_action_url
232
+
233
+ # We only yield
234
+ if block_given?
235
+ begin
236
+ yield results
237
+ ensure
238
+ self.logout
239
+ end
240
+ else
241
+ results
242
+ end
243
+ end
244
+
245
+ # Logs out of the RETS server.
246
+ def logout()
247
+ # If no logout URL is provided, then we assume that logout is not necessary (not to
248
+ # mention impossible without a URL). We don't throw an exception, though, but we might
249
+ # want to if this becomes an issue in the future.
250
+
251
+ request(@urls.logout) if @urls.logout
252
+ end
253
+
254
+ # Requests Metadata from the server. An optional type and id can be specified to request
255
+ # subsets of the Metadata. Please see the RETS specification for more details on this.
256
+ # The format variable tells the server which format to return the Metadata in. Unless you
257
+ # need the raw metadata in a specified format, you really shouldn't specify the format.
258
+ #
259
+ # If called with a block, yields the results and returns the value of the block, or
260
+ # returns the metadata directly.
261
+ def get_metadata(type = 'METADATA-SYSTEM', id = '*')
262
+ xml = download_metadata(type, id)
263
+
264
+ result = @response_parser.parse_metadata(xml, @format)
265
+
266
+ if block_given?
267
+ yield result
268
+ else
269
+ result
270
+ end
271
+ end
272
+
273
+ def download_metadata(type, id)
274
+ header = {
275
+ 'Accept' => 'text/xml,text/plain;q=0.5'
276
+ }
277
+
278
+ data = {
279
+ 'Type' => type,
280
+ 'ID' => id,
281
+ 'Format' => @format
282
+ }
283
+
284
+ request(@urls.metadata, data, header).body
285
+ end
286
+
287
+ # Performs a GetObject transaction on the server. For details on the arguments, please see
288
+ # the RETS specification on GetObject requests.
289
+ #
290
+ # This method either returns an Array of DataObject instances, or yields each DataObject
291
+ # as it is created. If a block is given, the number of objects yielded is returned.
292
+ #
293
+ # TODO: how much of this could we move over to WEBrick::HTTPRequest#parse?
294
+ def get_object(resource, type, id, location = false) #:yields: data_object
295
+ header = {
296
+ 'Accept' => mimemap.keys.join(',')
297
+ }
298
+
299
+ data = {
300
+ 'Resource' => resource,
301
+ 'Type' => type,
302
+ 'ID' => id,
303
+ 'Location' => location ? '1' : '0'
304
+ }
305
+
306
+ response = request(@urls.objects, data, header)
307
+ results = block_given? ? 0 : []
308
+
309
+ if response['content-type'] && response['content-type'].include?('text/xml')
310
+ # This probably means that there was an error.
311
+ # Response parser will likely raise an exception.
312
+ rr = @response_parser.parse_object_response(response.body)
313
+ return rr
314
+ elsif response['content-type'] && response['content-type'].include?('multipart/parallel')
315
+ content_type = process_content_type(response['content-type'])
316
+
317
+ # TODO: log this
318
+ # puts "SPLIT ON #{content_type['boundary']}"
319
+ boundary = content_type['boundary']
320
+ if boundary =~ /\s*'([^']*)\s*/
321
+ boundary = $1
322
+ end
323
+ parts = response.body.split("\r\n--#{boundary}")
324
+
325
+ parts.shift # Get rid of the initial boundary
326
+
327
+ # TODO: log this
328
+ # puts "GOT PARTS #{parts.length}"
329
+
330
+ parts.each do |part|
331
+ (raw_header, raw_data) = part.split("\r\n\r\n")
332
+
333
+ # TODO: log this
334
+ # puts raw_data.nil?
335
+ next unless raw_data
336
+
337
+ data_header = process_header(raw_header)
338
+ data_object = DataObject.new(data_header, raw_data)
339
+
340
+ if block_given?
341
+ yield data_object
342
+ results += 1
343
+ else
344
+ results << data_object
345
+ end
346
+ end
347
+ else
348
+ info = {
349
+ 'content-type' => response['content-type'], # Compatibility shim. Deprecated.
350
+ 'Content-Type' => response['content-type'],
351
+ 'Object-ID' => response['Object-ID'],
352
+ 'Content-ID' => response['Content-ID']
353
+ }
354
+
355
+ if response['Transfer-Encoding'].to_s.downcase == "chunked" || response['Content-Length'].to_i > 100 then
356
+ data_object = DataObject.new(info, response.body)
357
+ if block_given?
358
+ yield data_object
359
+ results += 1
360
+ else
361
+ results << data_object
362
+ end
363
+ end
364
+ end
365
+
366
+ results
367
+ end
368
+
369
+ # Peforms a RETS search transaction. Again, please see the RETS specification for details
370
+ # on what these parameters mean. The options parameter takes a hash of options that will
371
+ # added to the search statement.
372
+ def search(search_type, klass, query, options = false)
373
+ header = {}
374
+
375
+ # Required Data
376
+ data = {
377
+ 'SearchType' => search_type,
378
+ 'Class' => klass,
379
+ 'Query' => query,
380
+ 'QueryType' => 'DMQL2',
381
+ 'Format' => format,
382
+ 'Count' => '0'
383
+ }
384
+
385
+ # Options
386
+ #--
387
+ # We might want to switch this to merge!, but I've kept it like this for now because it
388
+ # explicitly casts each value as a string prior to performing the search, so we find out now
389
+ # if can't force a value into the string context. I suppose it doesn't really matter when
390
+ # that happens, though...
391
+ #++
392
+ options.each { |k,v| data[k] = v.to_s } if options
393
+
394
+ response = request(@urls.search, data, header)
395
+
396
+ # TODO: make parser configurable
397
+ results = RETS4R::Client::CompactNokogiriParser.new(response.body)
398
+
399
+ if block_given?
400
+ results.each {|result| yield result}
401
+ else
402
+ return results.to_a
403
+ end
404
+ end
405
+
406
+ def count(search_type, klass, query)
407
+ header = {}
408
+ data = {
409
+ 'SearchType' => search_type,
410
+ 'Class' => klass,
411
+ 'Query' => query,
412
+ 'QueryType' => 'DMQL2',
413
+ 'Format' => format,
414
+ 'Count' => '2'
415
+ }
416
+ response = request(@urls.search, data, header)
417
+ result = @response_parser.parse_count(response.body)
418
+ return result
419
+ end
420
+
421
+ private
422
+
423
+ # XXX: This is crap. It does not properly handle quotes.
424
+ def process_content_type(text)
425
+ content = {}
426
+
427
+ field_start = text.index(';')
428
+
429
+ content['content-type'] = text[0 ... field_start].strip
430
+ fields = text[field_start..-1]
431
+
432
+ parts = text.split(';')
433
+
434
+ parts.each do |part|
435
+ (name, value) = part.gsub(/\"/, '').split('=')
436
+
437
+ content[name.strip] = value ? value.strip : value
438
+ end
439
+
440
+ content
441
+ end
442
+
443
+ # Processes the HTTP header
444
+ #--
445
+ #++
446
+ def process_header(raw)
447
+ # this util gives us arrays of values. We are only set up to handle one header value.
448
+ WEBrick::HTTPUtils.parse_header(raw.strip).map.inject({}) do |h,(k,v)|
449
+ h[k]=v.first; h
450
+ end
451
+ end
452
+
453
+ # This is the primary transaction method, which the other public methods make use of.
454
+ # Given a url for the transaction (endpoint) it makes a request to the RETS server.
455
+ #
456
+ #--
457
+ # This needs to be better documented, but for now please see the public transaction methods
458
+ # for how to make use of this method.
459
+ #++
460
+ def request(url, data = {}, header = {}, method = @request_method, retry_auth = DEFAULT_RETRY)
461
+ @request_struct.request(url, data, header, method, retry_auth)
462
+ end
463
+
464
+ # If an action URL is present in the URL capability list, it calls that action URL and returns the
465
+ # raw result. Throws a generic RETSException if it is unable to follow the URL.
466
+ def perform_action_url
467
+ begin
468
+ if @urls.has_key?('Action')
469
+ return request(@urls.action, {}, {}, METHOD_GET)
470
+ end
471
+ rescue
472
+ raise RETSException.new("Unable to follow action URL: '#{$!}'.")
473
+ end
474
+ end
475
+
476
+ # Provides a proxy class to allow for net/http to log its debug to the logger.
477
+ class HTTPDebugLogger
478
+ def initialize(logger)
479
+ @logger = logger
480
+ end
481
+
482
+ def <<(data)
483
+ @logger.debug(data)
484
+ end
485
+ end
486
+ end
487
+ end