jschairb-rets4r 1.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/.document +5 -0
  2. data/CHANGELOG +566 -0
  3. data/CONTRIBUTORS +7 -0
  4. data/Gemfile +10 -0
  5. data/LICENSE +29 -0
  6. data/MANIFEST +62 -0
  7. data/NEWS +186 -0
  8. data/README.rdoc +43 -0
  9. data/RUBYS +56 -0
  10. data/Rakefile +50 -0
  11. data/TODO +35 -0
  12. data/examples/client_get_object.rb +49 -0
  13. data/examples/client_login.rb +39 -0
  14. data/examples/client_mapper.rb +17 -0
  15. data/examples/client_metadata.rb +42 -0
  16. data/examples/client_parser.rb +9 -0
  17. data/examples/client_search.rb +49 -0
  18. data/examples/settings.yml +114 -0
  19. data/lib/rets4r.rb +14 -0
  20. data/lib/rets4r/auth.rb +73 -0
  21. data/lib/rets4r/client.rb +487 -0
  22. data/lib/rets4r/client/data.rb +14 -0
  23. data/lib/rets4r/client/dataobject.rb +28 -0
  24. data/lib/rets4r/client/exceptions.rb +116 -0
  25. data/lib/rets4r/client/links.rb +32 -0
  26. data/lib/rets4r/client/metadata.rb +15 -0
  27. data/lib/rets4r/client/parsers/compact.rb +42 -0
  28. data/lib/rets4r/client/parsers/compact_nokogiri.rb +91 -0
  29. data/lib/rets4r/client/parsers/metadata.rb +92 -0
  30. data/lib/rets4r/client/parsers/response_parser.rb +100 -0
  31. data/lib/rets4r/client/requester.rb +143 -0
  32. data/lib/rets4r/client/transaction.rb +31 -0
  33. data/lib/rets4r/core_ext/array/extract_options.rb +15 -0
  34. data/lib/rets4r/core_ext/class/attribute_accessors.rb +58 -0
  35. data/lib/rets4r/core_ext/hash/keys.rb +46 -0
  36. data/lib/rets4r/core_ext/hash/slice.rb +39 -0
  37. data/lib/rets4r/listing_mapper.rb +17 -0
  38. data/lib/rets4r/listing_service.rb +35 -0
  39. data/lib/rets4r/loader.rb +8 -0
  40. data/lib/tasks/annotations.rake +121 -0
  41. data/lib/tasks/coverage.rake +13 -0
  42. data/rets4r.gemspec +24 -0
  43. data/spec/rets4r_compact_data_parser_spec.rb +7 -0
  44. data/test/data/1.5/bad_compact.xml +7 -0
  45. data/test/data/1.5/count_only_compact.xml +3 -0
  46. data/test/data/1.5/error.xml +1 -0
  47. data/test/data/1.5/invalid_compact.xml +4 -0
  48. data/test/data/1.5/login.xml +16 -0
  49. data/test/data/1.5/metadata.xml +0 -0
  50. data/test/data/1.5/search_compact.xml +8 -0
  51. data/test/data/1.5/search_compact_big.xml +136 -0
  52. data/test/data/1.5/search_unescaped_compact.xml +8 -0
  53. data/test/data/listing_service.yml +36 -0
  54. data/test/test_auth.rb +68 -0
  55. data/test/test_client.rb +342 -0
  56. data/test/test_client_links.rb +39 -0
  57. data/test/test_compact_nokogiri.rb +64 -0
  58. data/test/test_helper.rb +12 -0
  59. data/test/test_listing_mapper.rb +112 -0
  60. data/test/test_loader.rb +24 -0
  61. data/test/test_parser.rb +96 -0
  62. data/test/test_quality.rb +57 -0
  63. metadata +211 -0
@@ -0,0 +1,487 @@
1
+ # RETS4R Client
2
+ #
3
+ # Copyright (c) 2006 Scott Patterson <scott.patterson@digitalaun.com>
4
+ #
5
+ # This program is copyrighted free software by Scott Patterson. You can
6
+ # redistribute it and/or modify it under the same terms of Ruby's license;
7
+ # either the dual license version in 2003 (see the file RUBYS), or any later
8
+ # version.
9
+ #
10
+ # TODO: 1.0 Support (Adding this support should be fairly easy)
11
+ # TODO: 2.0 Support (Adding this support will be very difficult since it is a completely different methodology)
12
+ # TODO: Case-insensitive header
13
+
14
+ require 'digest/md5'
15
+ require 'net/http'
16
+ require 'uri'
17
+ require 'cgi'
18
+ require 'auth'
19
+ require 'client/dataobject'
20
+ require 'client/parsers/response_parser'
21
+ require 'client/parsers/compact'
22
+ require 'rets4r/client/links'
23
+ require 'rets4r/client/requester'
24
+ require 'rets4r/client/exceptions'
25
+ require 'logger'
26
+ require 'webrick/httputils'
27
+
28
+ module RETS4R
29
+ class Client
30
+ COMPACT_FORMAT = 'COMPACT'
31
+
32
+ METHOD_GET = 'GET'
33
+ METHOD_POST = 'POST'
34
+ METHOD_HEAD = 'HEAD'
35
+
36
+ DEFAULT_METHOD = METHOD_GET
37
+ DEFAULT_RETRY = 2
38
+ SUPPORTED_RETS_VERSIONS = ['1.5', '1.7', '1.7.2']
39
+ CAPABILITY_LIST = [
40
+ 'Action',
41
+ 'ChangePassword',
42
+ 'GetObject',
43
+ 'Login',
44
+ 'LoginComplete',
45
+ 'Logout',
46
+ 'Search',
47
+ 'GetMetadata',
48
+ 'Update'
49
+ ]
50
+
51
+ # These are the response messages as defined in the RETS 1.5e2 and 1.7d6 specifications.
52
+ # Provided for convenience and are used by the HTTPError class to provide more useful
53
+ # messages.
54
+ RETS_HTTP_MESSAGES = {
55
+ '200' => 'Operation successful.',
56
+ '400' => 'The request could not be understood by the server due to malformed syntax.',
57
+ '401' => 'Either the header did not contain an acceptable Authorization or the ' +
58
+ 'username/password was invalid. The server response MUST include a ' +
59
+ 'WWW-Authenticate header field.',
60
+ '402' => 'The requested transaction requires a payment which could not be authorized.',
61
+ '403' => 'The server understood the request, but is refusing to fulfill it.',
62
+ '404' => 'The server has not found anything matching the Request-URI.',
63
+ '405' => 'The method specified in the Request-Line is not allowed for the resource ' +
64
+ 'identified by the Request-URI.',
65
+ '406' => 'The resource identified by the request is only capable of generating response ' +
66
+ 'entities which have content characteristics not acceptable according to the accept ' +
67
+ 'headers sent in the request.',
68
+ '408' => 'The client did not produce a request within the time that the server was prepared to wait.',
69
+ '411' => 'The server refuses to accept the request without a defined Content-Length.',
70
+ '412' => 'Transaction not permitted at this point in the session.',
71
+ '413' => 'The server is refusing to process a request because the request entity is larger than ' +
72
+ 'the server is willing or able to process.',
73
+ '414' => 'The server is refusing to service the request because the Request-URI is longer than ' +
74
+ 'the server is willing to interpret. This error usually only occurs for a GET method.',
75
+ '500' => 'The server encountered an unexpected condition which prevented it from fulfilling ' +
76
+ 'the request.',
77
+ '501' => 'The server does not support the functionality required to fulfill the request.',
78
+ '503' => 'The server is currently unable to handle the request due to a temporary overloading ' +
79
+ 'or maintenance of the server.',
80
+ '505' => 'The server does not support, or refuses to support, the HTTP protocol version that ' +
81
+ 'was used in the request message.',
82
+ }
83
+
84
+ attr_accessor :mimemap
85
+ attr_reader :format, :urls
86
+
87
+ # Constructor
88
+ #
89
+ # Requires the URL to the RETS server and takes an optional output format. The output format
90
+ # determines the type of data returned by the various RETS transaction methods.
91
+ def initialize(url, format = COMPACT_FORMAT)
92
+ @request_struct = RETS4R::Client::Requester.new
93
+ @format = format
94
+ @urls = RETS4R::Client::Links.from_login_url(url)
95
+
96
+ @request_method = DEFAULT_METHOD
97
+
98
+ @response_parser = RETS4R::Client::ResponseParser.new
99
+
100
+ self.mimemap = {
101
+ 'image/jpeg' => 'jpg',
102
+ 'image/gif' => 'gif'
103
+ }
104
+
105
+ if block_given?
106
+ yield self
107
+ end
108
+ end
109
+
110
+ # Assigns a block that will be called just before the request is sent.
111
+ # This block must accept three parameters:
112
+ # * self
113
+ # * Net::HTTP instance
114
+ # * Hash of headers
115
+ #
116
+ # The block's return value will be ignored. If you want to prevent the request
117
+ # to go through, raise an exception.
118
+ #
119
+ # == Example
120
+ #
121
+ # client = RETS4R::Client.new(...)
122
+ # # Make a new pre_request_block that calculates the RETS-UA-Authorization header.
123
+ # client.set_pre_request_block do |rets, http, headers|
124
+ # a1 = Digest::MD5.hexdigest([headers["User-Agent"], @password].join(":"))
125
+ # if headers.has_key?("Cookie") then
126
+ # cookie = headers["Cookie"].split(";").map(&:strip).select {|c| c =~ /rets-session-id/i}
127
+ # cookie = cookie ? cookie.split("=").last : ""
128
+ # else
129
+ # cookie = ""
130
+ # end
131
+ #
132
+ # parts = [a1, "", cookie, headers["RETS-Version"]]
133
+ # headers["RETS-UA-Authorization"] = "Digest " + Digest::MD5.hexdigest(parts.join(":"))
134
+ # end
135
+ def set_pre_request_block(&block)
136
+ @request_struct.pre_request_block = block
137
+ end
138
+
139
+ # So very much delegated to the request struct
140
+ def set_header(name, value)
141
+ @request_struct.set_header(name, value)
142
+ end
143
+
144
+ def get_header(name)
145
+ @request_struct.headers[name]
146
+ end
147
+
148
+ def user_agent=(name)
149
+ @request_struct.set_header('User-Agent', name)
150
+ end
151
+
152
+ def user_agent
153
+ @request_struct.user_agent
154
+ end
155
+
156
+ def rets_version=(version)
157
+ @request_struct.rets_version = version
158
+ end
159
+
160
+ def rets_version
161
+ @request_struct.rets_version
162
+ end
163
+
164
+ def request_method=(method)
165
+ @request_method = method
166
+ @request_struct.method = method
167
+ end
168
+
169
+ def request_method
170
+ @request_method
171
+ end
172
+
173
+ def logger=(logger)
174
+ @logger = logger
175
+ @request_struct.logger = logger
176
+ end
177
+
178
+ def logger
179
+ @logger
180
+ end
181
+
182
+ #### RETS Transaction Methods ####
183
+ #
184
+ # Most of these transaction methods mirror the RETS specification methods, so if you are
185
+ # unsure what they mean, you should check the RETS specification. The latest version can be
186
+ # found at http://www.rets.org
187
+
188
+ # Attempts to log into the server using the provided username and password.
189
+ #
190
+ # If called with a block, the results of the login action are yielded,
191
+ # and logout is called when the block returns. In that case, #login
192
+ # returns the block's value. If called without a block, returns the
193
+ # result.
194
+ #
195
+ # As specified in the RETS specification, the Action URL is called and
196
+ # the results made available in the #secondary_results accessor of the
197
+ # results object.
198
+ def login(username, password) #:yields: login_results
199
+ @request_struct.username = username
200
+ @request_struct.password = password
201
+
202
+ # We are required to set the Accept header to this by the RETS 1.5 specification.
203
+ set_header('Accept', '*/*')
204
+
205
+ response = request(@urls.login)
206
+
207
+ # Parse response to get other URLS
208
+ results = @response_parser.parse_key_value(response.body)
209
+
210
+ if (results.success?)
211
+ CAPABILITY_LIST.each do |capability|
212
+ next unless results.response[capability]
213
+
214
+ uri = URI.parse(results.response[capability])
215
+
216
+ if uri.absolute?
217
+ @urls[capability] = uri
218
+ else
219
+ base = @urls.login.clone
220
+ base.path = results.response[capability]
221
+ @urls[capability] = base
222
+ end
223
+ end
224
+
225
+ logger.debug("Capability URL List: #{@urls.inspect}") if logger
226
+ else
227
+ raise LoginError.new(response.message + "(#{results.reply_code}: #{results.reply_text})")
228
+ end
229
+
230
+ # Perform the mandatory get request on the action URL.
231
+ results.secondary_response = perform_action_url
232
+
233
+ # We only yield
234
+ if block_given?
235
+ begin
236
+ yield results
237
+ ensure
238
+ self.logout
239
+ end
240
+ else
241
+ results
242
+ end
243
+ end
244
+
245
+ # Logs out of the RETS server.
246
+ def logout()
247
+ # If no logout URL is provided, then we assume that logout is not necessary (not to
248
+ # mention impossible without a URL). We don't throw an exception, though, but we might
249
+ # want to if this becomes an issue in the future.
250
+
251
+ request(@urls.logout) if @urls.logout
252
+ end
253
+
254
+ # Requests Metadata from the server. An optional type and id can be specified to request
255
+ # subsets of the Metadata. Please see the RETS specification for more details on this.
256
+ # The format variable tells the server which format to return the Metadata in. Unless you
257
+ # need the raw metadata in a specified format, you really shouldn't specify the format.
258
+ #
259
+ # If called with a block, yields the results and returns the value of the block, or
260
+ # returns the metadata directly.
261
+ def get_metadata(type = 'METADATA-SYSTEM', id = '*')
262
+ xml = download_metadata(type, id)
263
+
264
+ result = @response_parser.parse_metadata(xml, @format)
265
+
266
+ if block_given?
267
+ yield result
268
+ else
269
+ result
270
+ end
271
+ end
272
+
273
+ def download_metadata(type, id)
274
+ header = {
275
+ 'Accept' => 'text/xml,text/plain;q=0.5'
276
+ }
277
+
278
+ data = {
279
+ 'Type' => type,
280
+ 'ID' => id,
281
+ 'Format' => @format
282
+ }
283
+
284
+ request(@urls.metadata, data, header).body
285
+ end
286
+
287
+ # Performs a GetObject transaction on the server. For details on the arguments, please see
288
+ # the RETS specification on GetObject requests.
289
+ #
290
+ # This method either returns an Array of DataObject instances, or yields each DataObject
291
+ # as it is created. If a block is given, the number of objects yielded is returned.
292
+ #
293
+ # TODO: how much of this could we move over to WEBrick::HTTPRequest#parse?
294
+ def get_object(resource, type, id, location = false) #:yields: data_object
295
+ header = {
296
+ 'Accept' => mimemap.keys.join(',')
297
+ }
298
+
299
+ data = {
300
+ 'Resource' => resource,
301
+ 'Type' => type,
302
+ 'ID' => id,
303
+ 'Location' => location ? '1' : '0'
304
+ }
305
+
306
+ response = request(@urls.objects, data, header)
307
+ results = block_given? ? 0 : []
308
+
309
+ if response['content-type'] && response['content-type'].include?('text/xml')
310
+ # This probably means that there was an error.
311
+ # Response parser will likely raise an exception.
312
+ rr = @response_parser.parse_object_response(response.body)
313
+ return rr
314
+ elsif response['content-type'] && response['content-type'].include?('multipart/parallel')
315
+ content_type = process_content_type(response['content-type'])
316
+
317
+ # TODO: log this
318
+ # puts "SPLIT ON #{content_type['boundary']}"
319
+ boundary = content_type['boundary']
320
+ if boundary =~ /\s*'([^']*)\s*/
321
+ boundary = $1
322
+ end
323
+ parts = response.body.split("\r\n--#{boundary}")
324
+
325
+ parts.shift # Get rid of the initial boundary
326
+
327
+ # TODO: log this
328
+ # puts "GOT PARTS #{parts.length}"
329
+
330
+ parts.each do |part|
331
+ (raw_header, raw_data) = part.split("\r\n\r\n")
332
+
333
+ # TODO: log this
334
+ # puts raw_data.nil?
335
+ next unless raw_data
336
+
337
+ data_header = process_header(raw_header)
338
+ data_object = DataObject.new(data_header, raw_data)
339
+
340
+ if block_given?
341
+ yield data_object
342
+ results += 1
343
+ else
344
+ results << data_object
345
+ end
346
+ end
347
+ else
348
+ info = {
349
+ 'content-type' => response['content-type'], # Compatibility shim. Deprecated.
350
+ 'Content-Type' => response['content-type'],
351
+ 'Object-ID' => response['Object-ID'],
352
+ 'Content-ID' => response['Content-ID']
353
+ }
354
+
355
+ if response['Transfer-Encoding'].to_s.downcase == "chunked" || response['Content-Length'].to_i > 100 then
356
+ data_object = DataObject.new(info, response.body)
357
+ if block_given?
358
+ yield data_object
359
+ results += 1
360
+ else
361
+ results << data_object
362
+ end
363
+ end
364
+ end
365
+
366
+ results
367
+ end
368
+
369
+ # Peforms a RETS search transaction. Again, please see the RETS specification for details
370
+ # on what these parameters mean. The options parameter takes a hash of options that will
371
+ # added to the search statement.
372
+ def search(search_type, klass, query, options = false)
373
+ header = {}
374
+
375
+ # Required Data
376
+ data = {
377
+ 'SearchType' => search_type,
378
+ 'Class' => klass,
379
+ 'Query' => query,
380
+ 'QueryType' => 'DMQL2',
381
+ 'Format' => format,
382
+ 'Count' => '0'
383
+ }
384
+
385
+ # Options
386
+ #--
387
+ # We might want to switch this to merge!, but I've kept it like this for now because it
388
+ # explicitly casts each value as a string prior to performing the search, so we find out now
389
+ # if can't force a value into the string context. I suppose it doesn't really matter when
390
+ # that happens, though...
391
+ #++
392
+ options.each { |k,v| data[k] = v.to_s } if options
393
+
394
+ response = request(@urls.search, data, header)
395
+
396
+ # TODO: make parser configurable
397
+ results = RETS4R::Client::CompactNokogiriParser.new(response.body)
398
+
399
+ if block_given?
400
+ results.each {|result| yield result}
401
+ else
402
+ return results.to_a
403
+ end
404
+ end
405
+
406
+ def count(search_type, klass, query)
407
+ header = {}
408
+ data = {
409
+ 'SearchType' => search_type,
410
+ 'Class' => klass,
411
+ 'Query' => query,
412
+ 'QueryType' => 'DMQL2',
413
+ 'Format' => format,
414
+ 'Count' => '2'
415
+ }
416
+ response = request(@urls.search, data, header)
417
+ result = @response_parser.parse_count(response.body)
418
+ return result
419
+ end
420
+
421
+ private
422
+
423
+ # XXX: This is crap. It does not properly handle quotes.
424
+ def process_content_type(text)
425
+ content = {}
426
+
427
+ field_start = text.index(';')
428
+
429
+ content['content-type'] = text[0 ... field_start].strip
430
+ fields = text[field_start..-1]
431
+
432
+ parts = text.split(';')
433
+
434
+ parts.each do |part|
435
+ (name, value) = part.gsub(/\"/, '').split('=')
436
+
437
+ content[name.strip] = value ? value.strip : value
438
+ end
439
+
440
+ content
441
+ end
442
+
443
+ # Processes the HTTP header
444
+ #--
445
+ #++
446
+ def process_header(raw)
447
+ # this util gives us arrays of values. We are only set up to handle one header value.
448
+ WEBrick::HTTPUtils.parse_header(raw.strip).map.inject({}) do |h,(k,v)|
449
+ h[k]=v.first; h
450
+ end
451
+ end
452
+
453
+ # This is the primary transaction method, which the other public methods make use of.
454
+ # Given a url for the transaction (endpoint) it makes a request to the RETS server.
455
+ #
456
+ #--
457
+ # This needs to be better documented, but for now please see the public transaction methods
458
+ # for how to make use of this method.
459
+ #++
460
+ def request(url, data = {}, header = {}, method = @request_method, retry_auth = DEFAULT_RETRY)
461
+ @request_struct.request(url, data, header, method, retry_auth)
462
+ end
463
+
464
+ # If an action URL is present in the URL capability list, it calls that action URL and returns the
465
+ # raw result. Throws a generic RETSException if it is unable to follow the URL.
466
+ def perform_action_url
467
+ begin
468
+ if @urls.has_key?('Action')
469
+ return request(@urls.action, {}, {}, METHOD_GET)
470
+ end
471
+ rescue
472
+ raise RETSException.new("Unable to follow action URL: '#{$!}'.")
473
+ end
474
+ end
475
+
476
+ # Provides a proxy class to allow for net/http to log its debug to the logger.
477
+ class HTTPDebugLogger
478
+ def initialize(logger)
479
+ @logger = logger
480
+ end
481
+
482
+ def <<(data)
483
+ @logger.debug(data)
484
+ end
485
+ end
486
+ end
487
+ end