rets4r 0.8.5 → 1.1.18

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. data/.document +5 -0
  2. data/{test/client/data/1.5/metadata.xml → .gemtest} +0 -0
  3. data/CHANGELOG +611 -66
  4. data/CONTRIBUTORS +6 -2
  5. data/Gemfile +1 -0
  6. data/LICENSE +22 -0
  7. data/MANIFEST +63 -0
  8. data/NEWS +203 -0
  9. data/{README → README.rdoc} +11 -4
  10. data/RUBYS +7 -7
  11. data/Rakefile +48 -0
  12. data/TODO +5 -1
  13. data/examples/client_get_object.rb +31 -42
  14. data/examples/client_login.rb +20 -18
  15. data/examples/client_mapper.rb +17 -0
  16. data/examples/client_metadata.rb +28 -28
  17. data/examples/client_parser.rb +9 -0
  18. data/examples/client_search.rb +25 -27
  19. data/examples/settings.yml +114 -0
  20. data/lib/rets4r.rb +14 -1
  21. data/lib/rets4r/auth.rb +70 -66
  22. data/lib/rets4r/client.rb +470 -650
  23. data/lib/rets4r/client/data.rb +13 -13
  24. data/lib/rets4r/client/dataobject.rb +27 -19
  25. data/lib/rets4r/client/exceptions.rb +116 -0
  26. data/lib/rets4r/client/links.rb +32 -0
  27. data/lib/rets4r/client/metadata.rb +12 -12
  28. data/lib/rets4r/client/parsers/compact.rb +42 -0
  29. data/lib/rets4r/client/parsers/compact_nokogiri.rb +91 -0
  30. data/lib/rets4r/client/parsers/metadata.rb +92 -0
  31. data/lib/rets4r/client/parsers/response_parser.rb +100 -0
  32. data/lib/rets4r/client/requester.rb +143 -0
  33. data/lib/rets4r/client/transaction.rb +30 -33
  34. data/lib/rets4r/core_ext/array/extract_options.rb +15 -0
  35. data/lib/rets4r/core_ext/class/attribute_accessors.rb +58 -0
  36. data/lib/rets4r/core_ext/hash/keys.rb +46 -0
  37. data/lib/rets4r/core_ext/hash/slice.rb +39 -0
  38. data/lib/rets4r/listing_mapper.rb +17 -0
  39. data/lib/rets4r/listing_service.rb +35 -0
  40. data/lib/rets4r/loader.rb +8 -0
  41. data/lib/tasks/annotations.rake +121 -0
  42. data/lib/tasks/coverage.rake +13 -0
  43. data/rets4r.gemspec +24 -0
  44. data/spec/rets4r_compact_data_parser_spec.rb +7 -0
  45. data/test/data/1.5/bad_compact.xml +7 -0
  46. data/test/data/1.5/count_only_compact.xml +3 -0
  47. data/test/{client/data → data}/1.5/error.xml +0 -0
  48. data/test/{client/data → data}/1.5/invalid_compact.xml +0 -0
  49. data/test/{client/data → data}/1.5/login.xml +0 -0
  50. data/test/data/1.5/metadata.xml +0 -0
  51. data/test/{client/data → data}/1.5/search_compact.xml +0 -0
  52. data/test/data/1.5/search_compact_big.xml +136 -0
  53. data/test/{client/data → data}/1.5/search_unescaped_compact.xml +0 -0
  54. data/test/data/listing_service.yml +36 -0
  55. data/test/test_auth.rb +68 -0
  56. data/test/test_client.rb +342 -0
  57. data/test/test_client_links.rb +39 -0
  58. data/test/test_compact_nokogiri.rb +64 -0
  59. data/test/test_helper.rb +12 -0
  60. data/test/test_listing_mapper.rb +112 -0
  61. data/test/test_loader.rb +24 -0
  62. data/test/test_parser.rb +96 -0
  63. data/test/test_quality.rb +57 -0
  64. metadata +168 -53
  65. data/GPL +0 -340
  66. data/examples/metadata.xml +0 -42
  67. data/lib/rets4r/client/metadataindex.rb +0 -82
  68. data/lib/rets4r/client/parser.rb +0 -141
  69. data/lib/rets4r/client/parser/rexml.rb +0 -75
  70. data/lib/rets4r/client/parser/xmlparser.rb +0 -95
  71. data/test/client/parser/tc_rexml.rb +0 -17
  72. data/test/client/parser/tc_xmlparser.rb +0 -21
  73. data/test/client/tc_auth.rb +0 -68
  74. data/test/client/tc_client.rb +0 -320
  75. data/test/client/tc_metadataindex.rb +0 -36
  76. data/test/client/test_parser.rb +0 -128
  77. data/test/client/ts_all.rb +0 -8
  78. data/test/ts_all.rb +0 -1
  79. data/test/ts_client.rb +0 -1
@@ -7,661 +7,481 @@
7
7
  # either the dual license version in 2003 (see the file RUBYS), or any later
8
8
  # version.
9
9
  #
10
- # TODO
11
- # Case-insensitive header
10
+ # TODO: 1.0 Support (Adding this support should be fairly easy)
11
+ # TODO: 2.0 Support (Adding this support will be very difficult since it is a completely different methodology)
12
+ # TODO: Case-insensitive header
12
13
 
13
14
  require 'digest/md5'
14
15
  require 'net/http'
15
16
  require 'uri'
16
17
  require 'cgi'
17
- require 'rets4r/auth'
18
- require 'rets4r/client/dataobject'
19
- require 'thread'
18
+ require 'auth'
19
+ require 'client/dataobject'
20
+ require 'client/parsers/response_parser'
21
+ require 'client/parsers/compact'
22
+ require 'rets4r/client/links'
23
+ require 'rets4r/client/requester'
24
+ require 'rets4r/client/exceptions'
20
25
  require 'logger'
26
+ require 'webrick/httputils'
21
27
 
22
28
  module RETS4R
23
- class Client
24
- OUTPUT_RAW = 0 # Nothing done. Simply returns the XML.
25
- OUTPUT_DOM = 1 # Returns a DOM object (REXML) **** NO LONGER SUPPORTED! ****
26
- OUTPUT_RUBY = 2 # Returns a RETS::Data object
27
-
28
- METHOD_GET = 'GET'
29
- METHOD_POST = 'POST'
30
- METHOD_HEAD = 'HEAD'
31
-
32
- DEFAULT_OUTPUT = OUTPUT_RUBY
33
- DEFAULT_METHOD = METHOD_POST
34
- DEFAULT_RETRY = 2
35
- DEFAULT_USER_AGENT = 'RETS4R/0.8.5'
36
- DEFAULT_RETS_VERSION = '1.7'
37
- SUPPORTED_RETS_VERSIONS = ['1.5', '1.7']
38
- CAPABILITY_LIST = ['Action', 'ChangePassword', 'GetObject', 'Login', 'LoginComplete', 'Logout', 'Search', 'GetMetadata', 'Update']
39
- SUPPORTED_PARSERS = [] # This will be populated by parsers as they load
40
-
41
- # These are the response messages as defined in the RETS 1.5e2 and 1.7d6 specifications.
42
- # Provided for convenience and are used by the HTTPError class to provide more useful
43
- # messages.
44
- RETS_HTTP_MESSAGES = {
45
- '200' => 'Operation successful.',
46
- '400' => 'The request could not be understood by the server due to malformed syntax.',
47
- '401' => 'Either the header did not contain an acceptable Authorization or the username/password was invalid. The server response MUST include a WWW-Authenticate header field.',
48
- '402' => 'The requested transaction requires a payment which could not be authorized.',
49
- '403' => 'The server understood the request, but is refusing to fulfill it.',
50
- '404' => 'The server has not found anything matching the Request-URI.',
51
- '405' => 'The method specified in the Request-Line is not allowed for the resource identified by the Request-URI.',
52
- '406' => 'The resource identified by the request is only capable of generating response entities which have content characteristics not acceptable according to the accept headers sent in the request.',
53
- '408' => 'The client did not produce a request within the time that the server was prepared to wait.',
54
- '411' => 'The server refuses to accept the request without a defined Content-Length.',
55
- '412' => 'Transaction not permitted at this point in the session.',
56
- '413' => 'The server is refusing to process a request because the request entity is larger than the server is willing or able to process.',
57
- '414' => 'The server is refusing to service the request because the Request-URI is longer than the server is willing to interpret. This error usually only occurs for a GET method.',
58
- '500' => 'The server encountered an unexpected condition which prevented it from fulfilling the request.',
59
- '501' => 'The server does not support the functionality required to fulfill the request.',
60
- '503' => 'The server is currently unable to handle the request due to a temporary overloading or maintenance of the server.',
61
- '505' => 'The server does not support, or refuses to support, the HTTP protocol version that was used in the request message.',
62
- }
63
-
64
- attr_accessor :mimemap, :logger
65
-
66
- # We load our parsers here so that they can modify the client class appropriately. Because
67
- # the default parser will be the first parser to list itself in the DEFAULT_PARSER array,
68
- # we need to require them in the order of preference. Hence, XMLParser is loaded first because
69
- # it is preferred to REXML since it is much faster.
70
- require 'rets4r/client/parser/xmlparser'
71
- require 'rets4r/client/parser/rexml'
72
-
73
- # Set it as the first
74
- DEFAULT_PARSER = SUPPORTED_PARSERS[0]
75
-
76
- # Constructor
77
- #
78
- # Requires the URL to the RETS server and takes an optional output format. The output format
79
- # determines the type of data returned by the various RETS transaction methods.
80
- def initialize(url, output = DEFAULT_OUTPUT)
81
- raise Unsupported.new('DOM output is no longer supported.') if output == OUTPUT_DOM
82
-
83
- @urls = { 'Login' => URI.parse(url) }
84
- @nc = 0
85
- @headers = {
86
- 'User-Agent' => DEFAULT_USER_AGENT,
87
- 'Accept' => '*/*',
88
- 'RETS-Version' => "RETS/#{DEFAULT_RETS_VERSION}",
89
- 'RETS-Session-ID' => '0'
90
- }
91
- @request_method = DEFAULT_METHOD
92
- @parser_class = DEFAULT_PARSER
93
- @semaphore = Mutex.new
94
- @output = output
95
-
96
- self.mimemap = {
97
- 'image/jpeg' => 'jpg',
98
- 'image/gif' => 'gif'
99
- }
100
-
101
- if block_given?
102
- yield self
103
- end
104
- end
105
-
106
- # Assigns a block that will be called just before the request is sent.
107
- # This block must accept three parameters:
108
- # * self
109
- # * Net::HTTP instance
110
- # * Hash of headers
111
- #
112
- # The block's return value will be ignored. If you want to prevent the request
113
- # to go through, raise an exception.
114
- #
115
- # == Example
116
- #
117
- # client = RETS4R::Client.new(...)
118
- # # Make a new pre_request_block that calculates the RETS-UA-Authorization header.
119
- # client.set_pre_request_block do |rets, http, headers|
120
- # a1 = Digest::MD5.hexdigest([headers["User-Agent"], @password].join(":"))
121
- # if headers.has_key?("Cookie") then
122
- # cookie = headers["Cookie"].split(";").map(&:strip).select {|c| c =~ /rets-session-id/i}
123
- # cookie = cookie ? cookie.split("=").last : ""
124
- # else
125
- # cookie = ""
126
- # end
127
- #
128
- # parts = [a1, "", cookie, headers["RETS-Version"]]
129
- # headers["RETS-UA-Authorization"] = "Digest " + Digest::MD5.hexdigest(parts.join(":"))
130
- # end
131
- def set_pre_request_block(&block)
132
- @pre_request_block = block
133
- end
134
-
135
- # We only allow external read access to URLs because they are internally set based on the
136
- # results of various queries.
137
- def urls
138
- @urls
139
- end
140
-
141
- # Parses the provided XML returns it in the specified output format.
142
- # Requires an XML string and takes an optional output format to override the instance output
143
- # format variable. We current create a new parser each time, which seems a bit wasteful, but
144
- # it allows for the parser to be changed in the middle of a session as well as XML::Parser
145
- # requiring a new instance for each execution...that could be encapsulated within its parser
146
- # class,though, so we should benchmark and see if it will make a big difference with the
147
- # REXML parse, which I doubt.
148
- def parse(xml, output = false)
149
- if xml == ''
150
- trans = Transaction.new()
151
- trans.reply_code = -1
152
- trans.reply_text = 'No transaction body was returned!'
153
- end
154
-
155
- if output == OUTPUT_RAW || @output == OUTPUT_RAW
156
- xml
157
- else
158
- begin
159
- parser = @parser_class.new
160
- parser.logger = logger
161
- parser.output = output ? output : @output
162
-
163
- parser.parse(xml)
164
- rescue
165
- raise ParserException.new($!)
166
- end
167
- end
168
- end
169
-
170
- # Setup Methods (accessors and mutators)
171
- def set_output(output = DEFAULT_OUTPUT)
172
- @output = output
173
- end
174
-
175
- def get_output
176
- @output
177
- end
178
-
179
- def set_parser_class(klass, force = false)
180
- if force || SUPPORTED_PARSERS.include?(klass)
181
- @parser_class = klass
182
- else
183
- message = "The parser class '#{klass}' is not supported!"
184
- debug(message)
185
-
186
- raise Unsupported.new(message)
187
- end
188
- end
189
-
190
- def get_parser_class
191
- @parser_class
192
- end
193
-
194
- def set_header(name, value)
195
- if value.nil? then
196
- @headers.delete(name)
197
- else
198
- @headers[name] = value
199
- end
200
-
201
- debug("Set header '#{name}' to '#{value}'")
202
- end
203
-
204
- def get_header(name)
205
- @headers[name]
206
- end
207
-
208
- def set_user_agent(name)
209
- set_header('User-Agent', name)
210
- end
211
-
212
- def get_user_agent
213
- get_header('User-Agent')
214
- end
215
-
216
- def set_rets_version(version)
217
- if (SUPPORTED_RETS_VERSIONS.include? version)
218
- set_header('RETS-Version', "RETS/#{version}")
219
- else
220
- raise Unsupported.new("The client does not support RETS version '#{version}'.")
221
- end
222
- end
223
-
224
- def get_rets_version
225
- (get_header('RETS-Version') || "").gsub("RETS/", "")
226
- end
227
-
228
- def set_request_method(method)
229
- @request_method = method
230
- end
231
-
232
- def get_request_method
233
- @request_method
234
- end
235
-
236
- # Provide more Ruby-like attribute accessors instead of get/set methods
237
- alias_method :user_agent=, :set_user_agent
238
- alias_method :user_agent, :get_user_agent
239
- alias_method :request_method=, :set_request_method
240
- alias_method :request_method, :get_request_method
241
- alias_method :rets_version=, :set_rets_version
242
- alias_method :rets_version, :get_rets_version
243
- alias_method :parser_class=, :set_parser_class
244
- alias_method :parser_class, :get_parser_class
245
- alias_method :output=, :set_output
246
- alias_method :output, :get_output
247
-
248
- #### RETS Transaction Methods ####
249
- #
250
- # Most of these transaction methods mirror the RETS specification methods, so if you are
251
- # unsure what they mean, you should check the RETS specification. The latest version can be
252
- # found at http://www.rets.org
253
-
254
- # Attempts to log into the server using the provided username and password.
255
- #
256
- # If called with a block, the results of the login action are yielded,
257
- # and logout is called when the block returns. In that case, #login
258
- # returns the block's value. If called without a block, returns the
259
- # result.
260
- #
261
- # As specified in the RETS specification, the Action URL is called and
262
- # the results made available in the #secondary_results accessor of the
263
- # results object.
264
- def login(username, password) #:yields: login_results
265
- @username = username
266
- @password = password
267
-
268
- # We are required to set the Accept header to this by the RETS 1.5 specification.
269
- set_header('Accept', '*/*')
270
-
271
- response = request(@urls['Login'])
272
-
273
- # Parse response to get other URLS
274
- results = self.parse(response.body, OUTPUT_RUBY)
275
-
276
- if (results.success?)
277
- CAPABILITY_LIST.each do |capability|
278
- next unless results.response[capability]
279
- base = @urls['Login'].clone
280
- base.path = results.response[capability]
281
-
282
- @urls[capability] = base
283
- end
284
-
285
- debug("Capability URL List: #{@urls.inspect}")
286
- else
287
- raise LoginError.new(response.message + "(#{results.reply_code}: #{results.reply_text})")
288
- end
289
-
290
- if @output != OUTPUT_RUBY
291
- results = self.parse(response.body)
292
- end
293
-
294
- # Perform the mandatory get request on the action URL.
295
- results.secondary_response = perform_action_url
296
-
297
- # We only yield
298
- if block_given?
299
- begin
300
- yield results
301
- ensure
302
- self.logout
303
- end
304
- else
305
- results
306
- end
307
- end
308
-
309
- # Logs out of the RETS server.
310
- def logout()
311
- # If no logout URL is provided, then we assume that logout is not necessary (not to
312
- # mention impossible without a URL). We don't throw an exception, though, but we might
313
- # want to if this becomes an issue in the future.
314
-
315
- request(@urls['Logout']) if @urls['Logout']
316
- end
317
-
318
- # Requests Metadata from the server. An optional type and id can be specified to request
319
- # subsets of the Metadata. Please see the RETS specification for more details on this.
320
- # The format variable tells the server which format to return the Metadata in. Unless you
321
- # need the raw metadata in a specified format, you really shouldn't specify the format.
322
- #
323
- # If called with a block, yields the results and returns the value of the block, or
324
- # returns the metadata directly.
325
- def get_metadata(type = 'METADATA-SYSTEM', id = '*', format = 'COMPACT')
326
- header = {
327
- 'Accept' => 'text/xml,text/plain;q=0.5'
328
- }
329
-
330
- data = {
331
- 'Type' => type,
332
- 'ID' => id,
333
- 'Format' => format
334
- }
335
-
336
- response = request(@urls['GetMetadata'], data, header)
337
-
338
- result = self.parse(response.body)
339
-
340
- if block_given?
341
- yield result
342
- else
343
- result
344
- end
345
- end
346
-
347
- # Performs a GetObject transaction on the server. For details on the arguments, please see
348
- # the RETS specification on GetObject requests.
349
- #
350
- # This method either returns an Array of DataObject instances, or yields each DataObject
351
- # as it is created. If a block is given, the number of objects yielded is returned.
352
- def get_object(resource, type, id, location = 1) #:yields: data_object
353
- header = {
354
- 'Accept' => mimemap.keys.join(',')
355
- }
356
-
357
- data = {
358
- 'Resource' => resource,
359
- 'Type' => type,
360
- 'ID' => id,
361
- 'Location' => location.to_s
362
- }
363
-
364
- response = request(@urls['GetObject'], data, header)
365
- results = block_given? ? 0 : []
366
-
367
- if response['content-type'].include?('multipart/parallel')
368
- content_type = process_content_type(response['content-type'])
369
-
370
- parts = response.body.split("\r\n--#{content_type['boundary']}")
371
- parts.shift # Get rid of the initial boundary
372
-
373
- parts.each do |part|
374
- (raw_header, raw_data) = part.split("\r\n\r\n")
375
-
376
- next unless raw_data
377
-
378
- data_header = process_header(raw_header)
379
- data_object = DataObject.new(data_header, raw_data)
380
-
381
- if block_given?
382
- yield data_object
383
- results += 1
384
- else
385
- results << data_object
386
- end
387
- end
388
- else
389
- info = {
390
- 'content-type' => response['content-type'], # Compatibility shim. Deprecated.
391
- 'Content-Type' => response['content-type'],
392
- 'Object-ID' => response['Object-ID'],
393
- 'Content-ID' => response['Content-ID']
394
- }
395
-
396
- if response['Transfer-Encoding'].to_s.downcase == "chunked" || response['Content-Length'].to_i > 100 then
397
- data_object = DataObject.new(info, response.body)
398
- if block_given?
399
- yield data_object
400
- results += 1
401
- else
402
- results << data_object
403
- end
404
- end
405
- end
406
-
407
- results
408
- end
409
-
410
- # Peforms a RETS search transaction. Again, please see the RETS specification for details
411
- # on what these parameters mean. The options parameter takes a hash of options that will
412
- # added to the search statement.
413
- def search(search_type, klass, query, options = false)
414
- header = {}
415
-
416
- # Required Data
417
- data = {
418
- 'SearchType' => search_type,
419
- 'Class' => klass,
420
- 'Query' => query,
421
- 'QueryType' => 'DMQL2',
422
- 'Format' => 'COMPACT',
423
- 'Count' => '0'
424
- }
425
-
426
- # Options
427
- #--
428
- # We might want to switch this to merge!, but I've kept it like this for now because it
429
- # explicitly casts each value as a string prior to performing the search, so we find out now
430
- # if can't force a value into the string context. I suppose it doesn't really matter when
431
- # that happens, though...
432
- #++
433
- options.each { |k,v| data[k] = v.to_s } if options
434
-
435
- response = request(@urls['Search'], data, header)
436
-
437
- results = self.parse(response.body)
438
-
439
- if block_given?
440
- yield results
441
- else
442
- return results
443
- end
444
- end
445
-
446
- private
447
-
448
- def process_content_type(text)
449
- content = {}
450
-
451
- field_start = text.index(';')
452
-
453
- content['content-type'] = text[0 ... field_start].strip
454
- fields = text[field_start..-1]
455
-
456
- parts = text.split(';')
457
-
458
- parts.each do |part|
459
- (name, value) = part.split('=')
460
-
461
- content[name.strip] = value ? value.strip : value
462
- end
463
-
464
- content
465
- end
466
-
467
- # Processes the HTTP header
468
- #--
469
- # Could we switch over to using CGI for this?
470
- #++
471
- def process_header(raw)
472
- header = {}
473
-
474
- raw.each do |line|
475
- (name, value) = line.split(':')
476
-
477
- header[name.strip] = value.strip if name && value
478
- end
479
-
480
- header
481
- end
482
-
483
- # Given a hash, it returns a URL encoded query string.
484
- def create_query_string(hash)
485
- parts = hash.map {|key,value| "#{CGI.escape(key)}=#{CGI.escape(value)}"}
486
- return parts.join('&')
487
- end
488
-
489
- # This is the primary transaction method, which the other public methods make use of.
490
- # Given a url for the transaction (endpoint) it makes a request to the RETS server.
491
- #
492
- #--
493
- # This needs to be better documented, but for now please see the public transaction methods
494
- # for how to make use of this method.
495
- #++
496
- def request(url, data = {}, header = {}, method = @request_method, retry_auth = DEFAULT_RETRY)
497
- headers, response = nil
498
- begin
499
- @semaphore.lock
500
-
501
- http = Net::HTTP.new(url.host, url.port)
502
-
503
- if logger && logger.debug?
504
- http.set_debug_output HTTPDebugLogger.new(logger)
505
- end
506
-
507
- http.start do |http|
508
- begin
509
- uri = url.path
510
-
511
- if ! data.empty? && method == METHOD_GET
512
- uri += "?#{create_query_string(data)}"
513
- end
514
-
515
- headers = @headers
516
- headers.merge(header) unless header.empty?
517
-
518
- @pre_request_block.call(self, http, headers) if @pre_request_block
519
-
520
- debug("Request headers: #{headers.inspect}")
521
-
522
- @semaphore.unlock
523
-
524
- post_data = data.map {|k,v| "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}" }.join('&') if method == METHOD_POST
525
- response = method == METHOD_POST ? http.post(uri, post_data, headers) :
526
- http.get(uri, headers)
527
-
528
- debug("Response headers: #{response.to_hash.inspect}")
529
-
530
- @semaphore.lock
531
-
532
- if response.code == '401'
533
- # Authentication is required
534
- raise AuthRequired
535
- elsif response.code.to_i >= 300
536
- # We have a non-successful response that we cannot handle
537
- @semaphore.unlock if @semaphore.locked?
538
- raise HTTPError.new(response)
539
- else
540
- cookies = []
541
- if set_cookies = response.get_fields('set-cookie') then
542
- set_cookies.each do |cookie|
543
- cookies << cookie.split(";").first
544
- end
545
- end
546
- set_header('Cookie', cookies.join("; ")) unless cookies.empty?
547
- set_header('RETS-Session-ID', response['RETS-Session-ID']) if response['RETS-Session-ID']
548
- end
549
- rescue AuthRequired
550
- @nc += 1
551
-
552
- if retry_auth > 0
553
- retry_auth -= 1
554
- set_header('Authorization', Auth.authenticate(response, @username, @password, url.path, method, @headers['RETS-Request-ID'], get_user_agent, @nc))
555
- retry
556
- else
557
- @semaphore.unlock if @semaphore.locked?
558
- raise LoginError.new(response.message)
559
- end
560
- end
561
-
562
- debug(response.body)
563
- end
564
-
565
- @semaphore.unlock if @semaphore.locked?
566
-
567
- return response
568
-
569
- #rescue
570
- #data = {"request" => headers, "body" => response.body}
571
- #data["response"] = response.respond_to?(:headers) ? response.headers : response
572
- #data = data.respond_to?(:to_yaml) ? data.to_yaml : data.inspect
573
- #raise RETSException, "#{$!.message}\nRequest/Response Details:\n#{data}"
574
- end
575
- end
576
-
577
- # If an action URL is present in the URL capability list, it calls that action URL and returns the
578
- # raw result. Throws a generic RETSException if it is unable to follow the URL.
579
- def perform_action_url
580
- begin
581
- if @urls.has_key?('Action')
582
- return request(@urls['Action'], {}, {}, METHOD_GET)
583
- end
584
- rescue
585
- raise RETSException.new("Unable to follow action URL: '#{$!}'.")
586
- end
587
- end
588
-
589
- # Shorthand for sending debug messages to the logger if a logger is provided
590
- def debug(message)
591
- logger.debug(message) if logger
592
- end
593
-
594
- # Provides a proxy class to allow for net/http to log its debug to the logger.
595
- class HTTPDebugLogger
596
- def initialize(logger)
597
- @logger = logger
598
- end
599
-
600
- def <<(data)
601
- @logger.debug(data)
602
- end
603
- end
604
-
605
- #### Exceptions ####
606
-
607
- # This exception should be thrown when a generic client error is encountered.
608
- class ClientException < Exception
609
- end
610
-
611
- # This exception should be thrown when there is an error with the parser, which is
612
- # considered a subcomponent of the RETS client. It also includes the XML data that
613
- # that was being processed at the time of the exception.
614
- class ParserException < ClientException
615
- attr_accessor :file
616
- end
617
-
618
- # The client does not currently support a specified action.
619
- class Unsupported < ClientException
620
- end
621
-
622
- # The HTTP response returned by the server indicates that there was an error processing
623
- # the request and the client cannot continue on its own without intervention.
624
- class HTTPError < ClientException
625
- attr_accessor :http_response
626
-
627
- # Takes a HTTPResponse object
628
- def initialize(http_response)
629
- self.http_response = http_response
630
- end
631
-
632
- # Shorthand for calling HTTPResponse#code
633
- def code
634
- http_response.code
635
- end
636
-
637
- # Shorthand for calling HTTPResponse#message
638
- def message
639
- http_response.message
640
- end
641
-
642
- # Returns the RETS specification message for the HTTP response code
643
- def rets_message
644
- Client::RETS_HTTP_MESSAGES[code]
645
- end
646
-
647
- def to_s
648
- "#{code} #{message}: #{rets_message}"
649
- end
650
- end
651
-
652
- # A general RETS level exception was encountered. This would include HTTP and RETS
653
- # specification level errors as well as informative mishaps such as authentication being
654
- # required for access.
655
- class RETSException < Exception
656
- end
657
-
658
- # There was a problem with logging into the RETS server.
659
- class LoginError < RETSException
660
- end
661
-
662
- # For internal client use only, it is thrown when the a RETS request is made but a password
663
- # is prompted for.
664
- class AuthRequired < RETSException
665
- end
666
- end
667
- end
29
+ class Client
30
+ COMPACT_FORMAT = 'COMPACT'
31
+
32
+ METHOD_GET = 'GET'
33
+ METHOD_POST = 'POST'
34
+ METHOD_HEAD = 'HEAD'
35
+
36
+ DEFAULT_METHOD = METHOD_GET
37
+ DEFAULT_RETRY = 2
38
+ SUPPORTED_RETS_VERSIONS = ['1.5', '1.7']
39
+ CAPABILITY_LIST = [
40
+ 'Action',
41
+ 'ChangePassword',
42
+ 'GetObject',
43
+ 'Login',
44
+ 'LoginComplete',
45
+ 'Logout',
46
+ 'Search',
47
+ 'GetMetadata',
48
+ 'Update'
49
+ ]
50
+
51
+ # These are the response messages as defined in the RETS 1.5e2 and 1.7d6 specifications.
52
+ # Provided for convenience and are used by the HTTPError class to provide more useful
53
+ # messages.
54
+ RETS_HTTP_MESSAGES = {
55
+ '200' => 'Operation successful.',
56
+ '400' => 'The request could not be understood by the server due to malformed syntax.',
57
+ '401' => 'Either the header did not contain an acceptable Authorization or the ' +
58
+ 'username/password was invalid. The server response MUST include a ' +
59
+ 'WWW-Authenticate header field.',
60
+ '402' => 'The requested transaction requires a payment which could not be authorized.',
61
+ '403' => 'The server understood the request, but is refusing to fulfill it.',
62
+ '404' => 'The server has not found anything matching the Request-URI.',
63
+ '405' => 'The method specified in the Request-Line is not allowed for the resource ' +
64
+ 'identified by the Request-URI.',
65
+ '406' => 'The resource identified by the request is only capable of generating response ' +
66
+ 'entities which have content characteristics not acceptable according to the accept ' +
67
+ 'headers sent in the request.',
68
+ '408' => 'The client did not produce a request within the time that the server was prepared to wait.',
69
+ '411' => 'The server refuses to accept the request without a defined Content-Length.',
70
+ '412' => 'Transaction not permitted at this point in the session.',
71
+ '413' => 'The server is refusing to process a request because the request entity is larger than ' +
72
+ 'the server is willing or able to process.',
73
+ '414' => 'The server is refusing to service the request because the Request-URI is longer than ' +
74
+ 'the server is willing to interpret. This error usually only occurs for a GET method.',
75
+ '500' => 'The server encountered an unexpected condition which prevented it from fulfilling ' +
76
+ 'the request.',
77
+ '501' => 'The server does not support the functionality required to fulfill the request.',
78
+ '503' => 'The server is currently unable to handle the request due to a temporary overloading ' +
79
+ 'or maintenance of the server.',
80
+ '505' => 'The server does not support, or refuses to support, the HTTP protocol version that ' +
81
+ 'was used in the request message.',
82
+ }
83
+
84
+ attr_accessor :mimemap
85
+ attr_reader :format, :urls
86
+
87
+ # Constructor
88
+ #
89
+ # Requires the URL to the RETS server and takes an optional output format. The output format
90
+ # determines the type of data returned by the various RETS transaction methods.
91
+ def initialize(url, format = COMPACT_FORMAT)
92
+ @request_struct = RETS4R::Client::Requester.new
93
+ @format = format
94
+ @urls = RETS4R::Client::Links.from_login_url(url)
95
+
96
+ @request_method = DEFAULT_METHOD
97
+
98
+ @response_parser = RETS4R::Client::ResponseParser.new
99
+
100
+ self.mimemap = {
101
+ 'image/jpeg' => 'jpg',
102
+ 'image/gif' => 'gif'
103
+ }
104
+
105
+ if block_given?
106
+ yield self
107
+ end
108
+ end
109
+
110
+ # Assigns a block that will be called just before the request is sent.
111
+ # This block must accept three parameters:
112
+ # * self
113
+ # * Net::HTTP instance
114
+ # * Hash of headers
115
+ #
116
+ # The block's return value will be ignored. If you want to prevent the request
117
+ # to go through, raise an exception.
118
+ #
119
+ # == Example
120
+ #
121
+ # client = RETS4R::Client.new(...)
122
+ # # Make a new pre_request_block that calculates the RETS-UA-Authorization header.
123
+ # client.set_pre_request_block do |rets, http, headers|
124
+ # a1 = Digest::MD5.hexdigest([headers["User-Agent"], @password].join(":"))
125
+ # if headers.has_key?("Cookie") then
126
+ # cookie = headers["Cookie"].split(";").map(&:strip).select {|c| c =~ /rets-session-id/i}
127
+ # cookie = cookie ? cookie.split("=").last : ""
128
+ # else
129
+ # cookie = ""
130
+ # end
131
+ #
132
+ # parts = [a1, "", cookie, headers["RETS-Version"]]
133
+ # headers["RETS-UA-Authorization"] = "Digest " + Digest::MD5.hexdigest(parts.join(":"))
134
+ # end
135
+ def set_pre_request_block(&block)
136
+ @request_struct.pre_request_block = block
137
+ end
138
+
139
+ # So very much delegated to the request struct
140
+ def set_header(name, value)
141
+ @request_struct.set_header(name, value)
142
+ end
143
+
144
+ def get_header(name)
145
+ @request_struct.headers[name]
146
+ end
147
+
148
+ def user_agent=(name)
149
+ @request_struct.set_header('User-Agent', name)
150
+ end
151
+
152
+ def user_agent
153
+ @request_struct.user_agent
154
+ end
155
+
156
+ def rets_version=(version)
157
+ @request_struct.rets_version = version
158
+ end
159
+
160
+ def rets_version
161
+ @request_struct.rets_version
162
+ end
163
+
164
+ def request_method=(method)
165
+ @request_method = method
166
+ @request_struct.method = method
167
+ end
168
+
169
+ def request_method
170
+ @request_method
171
+ end
172
+
173
+ def logger=(logger)
174
+ @logger = logger
175
+ @request_struct.logger = logger
176
+ end
177
+
178
+ def logger
179
+ @logger
180
+ end
181
+
182
+ #### RETS Transaction Methods ####
183
+ #
184
+ # Most of these transaction methods mirror the RETS specification methods, so if you are
185
+ # unsure what they mean, you should check the RETS specification. The latest version can be
186
+ # found at http://www.rets.org
187
+
188
+ # Attempts to log into the server using the provided username and password.
189
+ #
190
+ # If called with a block, the results of the login action are yielded,
191
+ # and logout is called when the block returns. In that case, #login
192
+ # returns the block's value. If called without a block, returns the
193
+ # result.
194
+ #
195
+ # As specified in the RETS specification, the Action URL is called and
196
+ # the results made available in the #secondary_results accessor of the
197
+ # results object.
198
+ def login(username, password) #:yields: login_results
199
+ @request_struct.username = username
200
+ @request_struct.password = password
201
+
202
+ # We are required to set the Accept header to this by the RETS 1.5 specification.
203
+ set_header('Accept', '*/*')
204
+
205
+ response = request(@urls.login)
206
+
207
+ # Parse response to get other URLS
208
+ results = @response_parser.parse_key_value(response.body)
209
+
210
+ if (results.success?)
211
+ CAPABILITY_LIST.each do |capability|
212
+ next unless results.response[capability]
213
+
214
+ uri = URI.parse(results.response[capability])
215
+
216
+ if uri.absolute?
217
+ @urls[capability] = uri
218
+ else
219
+ base = @urls.login.clone
220
+ base.path = results.response[capability]
221
+ @urls[capability] = base
222
+ end
223
+ end
224
+
225
+ logger.debug("Capability URL List: #{@urls.inspect}") if logger
226
+ else
227
+ raise LoginError.new(response.message + "(#{results.reply_code}: #{results.reply_text})")
228
+ end
229
+
230
+ # Perform the mandatory get request on the action URL.
231
+ results.secondary_response = perform_action_url
232
+
233
+ # We only yield
234
+ if block_given?
235
+ begin
236
+ yield results
237
+ ensure
238
+ self.logout
239
+ end
240
+ else
241
+ results
242
+ end
243
+ end
244
+
245
+ # Logs out of the RETS server.
246
+ def logout()
247
+ # If no logout URL is provided, then we assume that logout is not necessary (not to
248
+ # mention impossible without a URL). We don't throw an exception, though, but we might
249
+ # want to if this becomes an issue in the future.
250
+
251
+ request(@urls.logout) if @urls.logout
252
+ end
253
+
254
+ # Requests Metadata from the server. An optional type and id can be specified to request
255
+ # subsets of the Metadata. Please see the RETS specification for more details on this.
256
+ # The format variable tells the server which format to return the Metadata in. Unless you
257
+ # need the raw metadata in a specified format, you really shouldn't specify the format.
258
+ #
259
+ # If called with a block, yields the results and returns the value of the block, or
260
+ # returns the metadata directly.
261
+ def get_metadata(type = 'METADATA-SYSTEM', id = '*')
262
+ xml = download_metadata(type, id)
263
+
264
+ result = @response_parser.parse_metadata(xml, @format)
265
+
266
+ if block_given?
267
+ yield result
268
+ else
269
+ result
270
+ end
271
+ end
272
+
273
+ def download_metadata(type, id)
274
+ header = {
275
+ 'Accept' => 'text/xml,text/plain;q=0.5'
276
+ }
277
+
278
+ data = {
279
+ 'Type' => type,
280
+ 'ID' => id,
281
+ 'Format' => @format
282
+ }
283
+
284
+ request(@urls.metadata, data, header).body
285
+ end
286
+
287
+ # Performs a GetObject transaction on the server. For details on the arguments, please see
288
+ # the RETS specification on GetObject requests.
289
+ #
290
+ # This method either returns an Array of DataObject instances, or yields each DataObject
291
+ # as it is created. If a block is given, the number of objects yielded is returned.
292
+ #
293
+ # TODO: how much of this could we move over to WEBrick::HTTPRequest#parse?
294
+ def get_object(resource, type, id, location = false) #:yields: data_object
295
+ header = {
296
+ 'Accept' => mimemap.keys.join(',')
297
+ }
298
+
299
+ data = {
300
+ 'Resource' => resource,
301
+ 'Type' => type,
302
+ 'ID' => id,
303
+ 'Location' => location ? '1' : '0'
304
+ }
305
+
306
+ response = request(@urls.objects, data, header)
307
+ results = block_given? ? 0 : []
308
+
309
+ if response['content-type'] && response['content-type'].include?('text/xml')
310
+ # This probably means that there was an error.
311
+ # Response parser will likely raise an exception.
312
+ rr = @response_parser.parse_object_response(response.body)
313
+ return rr
314
+ elsif response['content-type'] && response['content-type'].include?('multipart/parallel')
315
+ content_type = process_content_type(response['content-type'])
316
+
317
+ # TODO: log this
318
+ # puts "SPLIT ON #{content_type['boundary']}"
319
+ boundary = content_type['boundary']
320
+ if boundary =~ /\s*'([^']*)\s*/
321
+ boundary = $1
322
+ end
323
+ parts = response.body.split("\r\n--#{boundary}")
324
+
325
+ parts.shift # Get rid of the initial boundary
326
+
327
+ # TODO: log this
328
+ # puts "GOT PARTS #{parts.length}"
329
+
330
+ parts.each do |part|
331
+ (raw_header, raw_data) = part.split("\r\n\r\n")
332
+
333
+ # TODO: log this
334
+ # puts raw_data.nil?
335
+ next unless raw_data
336
+
337
+ data_header = process_header(raw_header)
338
+ data_object = DataObject.new(data_header, raw_data)
339
+
340
+ if block_given?
341
+ yield data_object
342
+ results += 1
343
+ else
344
+ results << data_object
345
+ end
346
+ end
347
+ else
348
+ info = {
349
+ 'content-type' => response['content-type'], # Compatibility shim. Deprecated.
350
+ 'Content-Type' => response['content-type'],
351
+ 'Object-ID' => response['Object-ID'],
352
+ 'Content-ID' => response['Content-ID']
353
+ }
354
+
355
+ if response['Transfer-Encoding'].to_s.downcase == "chunked" || response['Content-Length'].to_i > 100 then
356
+ data_object = DataObject.new(info, response.body)
357
+ if block_given?
358
+ yield data_object
359
+ results += 1
360
+ else
361
+ results << data_object
362
+ end
363
+ end
364
+ end
365
+
366
+ results
367
+ end
368
+
369
+ # Peforms a RETS search transaction. Again, please see the RETS specification for details
370
+ # on what these parameters mean. The options parameter takes a hash of options that will
371
+ # added to the search statement.
372
+ def search(search_type, klass, query, options = false)
373
+ header = {}
374
+
375
+ # Required Data
376
+ data = {
377
+ 'SearchType' => search_type,
378
+ 'Class' => klass,
379
+ 'Query' => query,
380
+ 'QueryType' => 'DMQL2',
381
+ 'Format' => format,
382
+ 'Count' => '0'
383
+ }
384
+
385
+ # Options
386
+ #--
387
+ # We might want to switch this to merge!, but I've kept it like this for now because it
388
+ # explicitly casts each value as a string prior to performing the search, so we find out now
389
+ # if can't force a value into the string context. I suppose it doesn't really matter when
390
+ # that happens, though...
391
+ #++
392
+ options.each { |k,v| data[k] = v.to_s } if options
393
+
394
+ response = request(@urls.search, data, header)
395
+
396
+ # TODO: make parser configurable
397
+ results = RETS4R::Client::CompactNokogiriParser.new(response.body)
398
+
399
+ if block_given?
400
+ results.each {|result| yield result}
401
+ else
402
+ return results.to_a
403
+ end
404
+ end
405
+
406
+ def count(search_type, klass, query)
407
+ header = {}
408
+ data = {
409
+ 'SearchType' => search_type,
410
+ 'Class' => klass,
411
+ 'Query' => query,
412
+ 'QueryType' => 'DMQL2',
413
+ 'Format' => format,
414
+ 'Count' => '2'
415
+ }
416
+ response = request(@urls.search, data, header)
417
+ result = @response_parser.parse_count(response.body)
418
+ return result
419
+ end
420
+
421
+ private
422
+
423
+ # XXX: This is crap. It does not properly handle quotes.
424
+ def process_content_type(text)
425
+ content = {}
426
+
427
+ field_start = text.index(';')
428
+
429
+ content['content-type'] = text[0 ... field_start].strip
430
+ fields = text[field_start..-1]
431
+
432
+ parts = text.split(';')
433
+
434
+ parts.each do |part|
435
+ (name, value) = part.gsub(/\"/, '').split('=')
436
+
437
+ content[name.strip] = value ? value.strip : value
438
+ end
439
+
440
+ content
441
+ end
442
+
443
+ # Processes the HTTP header
444
+ #--
445
+ #++
446
+ def process_header(raw)
447
+ # this util gives us arrays of values. We are only set up to handle one header value.
448
+ WEBrick::HTTPUtils.parse_header(raw.strip).map.inject({}) do |h,(k,v)|
449
+ h[k]=v.first; h
450
+ end
451
+ end
452
+
453
+ # This is the primary transaction method, which the other public methods make use of.
454
+ # Given a url for the transaction (endpoint) it makes a request to the RETS server.
455
+ #
456
+ #--
457
+ # This needs to be better documented, but for now please see the public transaction methods
458
+ # for how to make use of this method.
459
+ #++
460
+ def request(url, data = {}, header = {}, method = @request_method, retry_auth = DEFAULT_RETRY)
461
+ @request_struct.request(url, data, header, method, retry_auth)
462
+ end
463
+
464
+ # If an action URL is present in the URL capability list, it calls that action URL and returns the
465
+ # raw result. Throws a generic RETSException if it is unable to follow the URL.
466
+ def perform_action_url
467
+ begin
468
+ if @urls.has_key?('Action')
469
+ return request(@urls.action, {}, {}, METHOD_GET)
470
+ end
471
+ rescue
472
+ raise RETSException.new("Unable to follow action URL: '#{$!}'.")
473
+ end
474
+ end
475
+
476
+ # Provides a proxy class to allow for net/http to log its debug to the logger.
477
+ class HTTPDebugLogger
478
+ def initialize(logger)
479
+ @logger = logger
480
+ end
481
+
482
+ def <<(data)
483
+ @logger.debug(data)
484
+ end
485
+ end
486
+ end
487
+ end