sru 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,6 @@
1
1
  require 'uri'
2
2
  require 'cgi'
3
3
  require 'net/http'
4
- require 'rexml/document'
5
4
 
6
5
  module SRU
7
6
 
@@ -30,9 +29,24 @@ module SRU
30
29
  # explain request to determine the version to be used in
31
30
  # subsequent requests.
32
31
 
33
- def initialize(base)
32
+ def initialize(base,options={})
34
33
  @server = URI.parse base
35
-
34
+ @parser = options.fetch(:parser, 'rexml')
35
+ case @parser
36
+ when 'libxml'
37
+ begin
38
+ require 'rubygems'
39
+ require 'xml/libxml'
40
+ rescue
41
+ raise SRU::Exception, "unknown parser: #{@parser}", caller
42
+ end
43
+ when 'rexml'
44
+ require 'rexml/document'
45
+ require 'rexml/xpath'
46
+ else
47
+ raise SRU::Exception, "unknown parser: #{@parser}", caller
48
+ end
49
+
36
50
  # stash this away for future requests
37
51
  @version = self.explain.version
38
52
  end
@@ -46,7 +60,7 @@ module SRU
46
60
 
47
61
  def explain
48
62
  doc = get_doc(:operation => 'explain')
49
- return ExplainResponse.new(doc)
63
+ return ExplainResponse.new(doc, @parser)
50
64
  end
51
65
 
52
66
 
@@ -62,8 +76,9 @@ module SRU
62
76
  options[:query] = query
63
77
  options[:operation] = 'searchRetrieve'
64
78
  options[:maximumRecords] = 10 unless options.has_key? :maximumRecords
79
+ options[:recordSchema] = 'dc' unless options.has_key? :recordSchema
65
80
  doc = get_doc(options)
66
- return SearchResponse.new(doc)
81
+ return SearchResponse.new(doc, @parser)
67
82
  end
68
83
 
69
84
 
@@ -78,7 +93,7 @@ module SRU
78
93
  options[:operation] = 'scan'
79
94
  options[:maximumTerms] = 5 unless options.has_key? :maximumTerms
80
95
  doc = get_doc(options)
81
- return ScanResponse.new(doc)
96
+ return ScanResponse.new(doc, @parser)
82
97
  end
83
98
 
84
99
  private
@@ -88,7 +103,8 @@ module SRU
88
103
 
89
104
  def get_doc(hash)
90
105
  # all requests get a version
91
- hash[:version] = @version
106
+ hash[:version] = @version
107
+
92
108
 
93
109
  # don't want to monkey with the original
94
110
  uri = @server.clone
@@ -99,12 +115,22 @@ module SRU
99
115
  "#{entry[0]}=#{CGI.escape(entry[1].to_s)}"
100
116
  }
101
117
  uri.query = parts.join('&')
102
-
103
118
  # fetch the xml and build/return a document object from it
104
119
  begin
105
120
  xml = Net::HTTP.get(uri)
106
- return REXML::Document.new(xml)
107
- rescue
121
+ # load appropriate parser
122
+ case @parser
123
+ when 'libxml'
124
+ xmlObj = LibXML::XML::Parser.new()
125
+ # not sure why but the explain namespace does bad things to
126
+ # libxml
127
+ #xml = xml.gsub(' xmlns="http://explain.z3950.org/dtd/2.0/"', '')
128
+ xmlObj.string = xml
129
+ return xmlObj.parse
130
+ when 'rexml'
131
+ return REXML::Document.new(xml)
132
+ end
133
+ rescue
108
134
  raise SRU::Exception, "exception during SRU operation", caller
109
135
  end
110
136
  end
@@ -2,37 +2,36 @@ require 'sru/response'
2
2
 
3
3
  module SRU
4
4
  class ExplainResponse < Response
5
-
6
5
  def to_s
7
6
  return "host=#{host} port=#{port} database=#{database} version=#{version}"
8
7
  end
9
8
 
10
9
  def host
11
- return xpath('.//serverInfo/host')
10
+ return xpath(@doc,'.//ns0:serverInfo/ns0:host', @namespaces )
12
11
  end
13
12
 
14
13
  def port
15
- port = xpath('.//serverInfo/port')
14
+ port = xpath(@doc, './/ns0:serverInfo/ns0:port', @namespaces)
16
15
  return nil if not port
17
16
  return Integer(port)
18
17
  end
19
18
 
20
19
  def database
21
- return xpath('.//serverInfo/database')
20
+ return xpath(@doc, './/ns0:serverInfo/ns0:database', @namespaces)
22
21
  end
23
22
 
24
23
  def number_of_records
25
- return xpath('.//configInfo/numberOfRecords')
24
+ return xpath(@doc, './/ns0:configInfo/ns0:numberOfRecords', @namespaces)
26
25
  end
27
26
 
28
27
  def version
29
- version = xpath('.//zs:explainResponse/zs:version')
28
+ version = xpath(@doc, './/zs:version', @namespaces)
30
29
  return version if version
31
30
 
32
31
  # also look here
33
- info = xpath_first('.//serverInfo')
34
- return info.attributes['version'] if info
35
-
32
+ info = xpath(@doc, './/ns0:serverInfo', @namespaces)
33
+ #return info.attributes['version'] if info
34
+ return get_attribute(info, "version") if info
36
35
  return nil
37
36
  end
38
37
  end
@@ -1,37 +1,24 @@
1
- require 'rexml/xpath'
2
-
3
1
  module SRU
4
2
 
5
3
  # base class for all SRU responses
6
4
  class Response
7
- attr_reader :doc
8
-
9
- # namespaces for use in xpath queries
10
- @@namespaces = {'zs' => 'http://www.loc.gov/zing/srw/'}
5
+ require 'sru/xpath'
6
+ include SRU::XPath
11
7
 
12
- def initialize(doc)
13
- @doc = doc
14
- end
15
8
 
16
- protected
17
-
18
- # get all nodes that match an xpath
19
- def xpath_all(path)
20
- return REXML::XPath.match(@doc, path, @@namespaces)
21
- end
9
+ attr_reader :doc
10
+ attr_reader :parser
11
+ attr_reader :namespaces
22
12
 
23
- # get the first node that matches an xpath
24
- def xpath_first(path)
25
- elements = xpath_all(path)
26
- return elements[0] if elements != nil
27
- return nil
28
- end
13
+ def initialize(doc, parser = 'rexml')
14
+ @doc = doc
15
+ @parser = parser
16
+ # namespaces for use in xpath queries
17
+ # this is technically more correct and is required for
18
+ # libxml to be able to parse the explain block.
19
+ @namespaces = {'zs' => 'http://www.loc.gov/zing/srw/',
20
+ 'ns0' => 'http://explain.z3950.org/dtd/2.0/'}
29
21
 
30
- # get the text inside the first node that matches the xpath
31
- def xpath(path)
32
- e = xpath_first(path)
33
- return e.text if e != nil
34
- return nil
35
22
  end
36
23
  end
37
24
 
@@ -7,7 +7,7 @@ module SRU
7
7
  include Enumerable
8
8
 
9
9
  def each
10
- for term_node in xpath_all('.//zs:term')
10
+ for term_node in xpath_all(@doc, './/zs:term', @namespaces)
11
11
  yield Term.new(term_node)
12
12
  end
13
13
  end
@@ -15,16 +15,17 @@ module SRU
15
15
  include Enumerable
16
16
 
17
17
  def number_of_records
18
- return Integer(xpath('.//zs:numberOfRecords'))
18
+ return Integer(xpath(@doc, './/zs:numberOfRecords', @namespaces))
19
19
  end
20
20
 
21
21
  # Returns the contents of each recordData element in a
22
22
  # SRU searchRetrieve response.
23
23
 
24
24
  def each
25
- for record_data in xpath_all('.//zs:recordData')
26
- if record_data.elements.size > 0
27
- yield record_data.elements[1]
25
+ obj = xpath_all(@doc, './/zs:recordData', @namespaces)
26
+ for record_data in obj
27
+ if obj.size > 0
28
+ yield record_data
28
29
  end
29
30
  end
30
31
  end
@@ -7,11 +7,11 @@ module SRU
7
7
 
8
8
  def initialize(element)
9
9
  super element
10
- @value = xpath('value')
11
- @number_of_records = xpath('numberOfRecords')
12
- @display_term = xpath('displayTerm')
13
- @where_in_list = xpath('whereInList')
14
- @extra_term_data = xpath_first('extraTermData')
10
+ @value = xpath(@doc, 'value')
11
+ @number_of_records = xpath(@doc, 'numberOfRecords')
12
+ @display_term = xpath(@doc, 'displayTerm')
13
+ @where_in_list = xpath(@doc, 'whereInList')
14
+ @extra_term_data = xpath_first(@doc, 'extraTermData')
15
15
  end
16
16
  end
17
17
  end
@@ -0,0 +1,82 @@
1
+ module SRU
2
+ module XPath
3
+
4
+ # get all matching nodes
5
+ def xpath_all(pdoc, path, namespace = '')
6
+ case parser_type(pdoc)
7
+ when 'libxml'
8
+ if namespace!=""
9
+ return pdoc.find(path, namespace).to_a if pdoc.find(path, namespace)
10
+ else
11
+ return pdoc.find(path).to_a if pdoc.find(path)
12
+ end
13
+ when 'rexml'
14
+ if namespace!=""
15
+ return REXML::XPath.match(pdoc, path, namespace)
16
+ else
17
+ return REXML::XPath.match(pdoc, path);
18
+ end
19
+ end
20
+ return []
21
+ end
22
+
23
+ # get first matching node
24
+ def xpath_first(pdoc, path, namespace = '')
25
+ elements = xpath_all(pdoc, path, namespace )
26
+ return elements[0] if elements != nil
27
+ return nil
28
+ end
29
+
30
+ # get text for first matching node
31
+ def xpath(pdoc, path, namespace = '')
32
+ el = xpath_first(pdoc, path, namespace)
33
+ return unless el
34
+ case parser_type(pdoc)
35
+ when 'libxml'
36
+ return el.content
37
+ when 'rexml'
38
+ return el.text
39
+ end
40
+ return nil
41
+ end
42
+
43
+ # figure out an attribute
44
+ def get_attribute(node, attr_name)
45
+ case node.class.to_s
46
+ when 'REXML::XML::Element'
47
+ return node.attribute(attr_name)
48
+ when 'LibXML::XML::Node'
49
+ #There has been a method shift between 0.5 and 0.7
50
+ if defined?(node.property) == nil
51
+ return node.attributes[attr_name]
52
+ else
53
+ return node.property(attr_name)
54
+ end
55
+ #begin
56
+ # return node.attributes[attr_name]
57
+ #rescue
58
+ # return node.property(attr_name)
59
+ #end
60
+ end
61
+ return nil
62
+ end
63
+
64
+ private
65
+
66
+ # figure out what sort of object we should do xpath on
67
+ def parser_type(x)
68
+ case x.class.to_s
69
+ when 'LibXML::XML::Document'
70
+ return 'libxml'
71
+ when 'LibXML::XML::Node'
72
+ return 'libxml'
73
+ when 'LibXML::XML::Node::Set'
74
+ return 'libxml'
75
+ when 'REXML::Element'
76
+ return 'rexml'
77
+ when 'REXML::Document'
78
+ return 'rexml'
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,55 @@
1
+ class TcLibxmlClientTests < Test::Unit::TestCase
2
+
3
+ def test_explain
4
+ client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'libxml'
5
+ explain = client.explain
6
+ assert_equal SRU::ExplainResponse, explain.class
7
+ assert_equal '1.1', explain.version
8
+ assert_equal 'localhost', explain.host
9
+ assert_equal 7090, explain.port
10
+ assert_equal 'voyager', explain.database
11
+ assert_equal 'host=localhost port=7090 database=voyager version=1.1',
12
+ explain.to_s
13
+ end
14
+
15
+ def test_search_retrieve
16
+ client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'libxml'
17
+ results = client.search_retrieve 'twain', :maximumRecords => 5
18
+ assert_equal 5, results.entries.size
19
+ assert results.number_of_records > 2000
20
+ assert_equal LibXML::XML::Node, results.entries[0].class
21
+ assert_equal 'recordData', results.entries[0].name
22
+
23
+ # hopefully there isn't a document that matches this :)
24
+ results = client.search_retrieve 'fidkidkdiejfl'
25
+ assert_equal 0, results.entries.size
26
+ end
27
+
28
+ def test_default_maximum_records
29
+ client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'libxml'
30
+ results = client.search_retrieve 'twain'
31
+ assert_equal 10, results.entries.size
32
+ end
33
+
34
+ # need to find a target that supports scan so we can exercise it
35
+ #def test_scan
36
+ # # this scan response appears to be canned might need to change
37
+ # client = SRU::Client.new 'http://tweed.lib.ed.ac.uk:8080/elf/search/copac'
38
+ # scan = client.scan('foobar')
39
+ # assert scan.entries.size > 0
40
+ # assert_equal SRU::Term, scan.entries[0].class
41
+ # assert_equal 'low', scan.entries[0].value
42
+ # assert_equal '1', scan.entries[0].number_of_records
43
+ #end
44
+
45
+ def test_xml_exception
46
+ assert_raise(SRU::Exception) {SRU::Client.new 'http://www.google.com'}
47
+ end
48
+
49
+ def test_http_exception
50
+ assert_raise(SRU::Exception) {SRU::Client.new 'http://example.com'}
51
+ end
52
+
53
+ end
54
+
55
+
@@ -1,24 +1,24 @@
1
- class ClientTests < Test::Unit::TestCase
1
+ class TCRexmlClientTests < Test::Unit::TestCase
2
2
 
3
3
  def test_explain
4
- client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager'
4
+ client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager',:parser=>'rexml'
5
5
  explain = client.explain
6
6
  assert_equal SRU::ExplainResponse, explain.class
7
7
  assert_equal '1.1', explain.version
8
- assert_equal 'z3950.loc.gov', explain.host
8
+ assert_equal 'localhost', explain.host
9
9
  assert_equal 7090, explain.port
10
10
  assert_equal 'voyager', explain.database
11
- assert_equal 'host=z3950.loc.gov port=7090 database=voyager version=1.1',
11
+ assert_equal 'host=localhost port=7090 database=voyager version=1.1',
12
12
  explain.to_s
13
13
  end
14
14
 
15
15
  def test_search_retrieve
16
- client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager'
16
+ client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'rexml'
17
17
  results = client.search_retrieve 'twain', :maximumRecords => 5
18
18
  assert_equal 5, results.entries.size
19
19
  assert results.number_of_records > 2000
20
20
  assert_equal REXML::Element, results.entries[0].class
21
- assert_equal 'record', results.entries[0].name
21
+ assert_equal 'recordData', results.entries[0].name
22
22
 
23
23
  # hopefully there isn't a document that matches this :)
24
24
  results = client.search_retrieve 'fidkidkdiejfl'
@@ -26,7 +26,7 @@ class ClientTests < Test::Unit::TestCase
26
26
  end
27
27
 
28
28
  def test_default_maximum_records
29
- client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager'
29
+ client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'rexml'
30
30
  results = client.search_retrieve 'twain'
31
31
  assert_equal 10, results.entries.size
32
32
  end
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.2
2
+ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: sru
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.0.4
7
- date: 2007-12-13 00:00:00 -05:00
6
+ version: 0.0.5
7
+ date: 2008-09-12 00:00:00 -07:00
8
8
  summary: a Ruby library for Search and Retrieve by URL
9
9
  require_paths:
10
10
  - lib
@@ -30,15 +30,17 @@ authors:
30
30
  - Ed Summers
31
31
  files:
32
32
  - lib/sru
33
- - lib/sru.rb
34
- - lib/sru/client.rb
35
- - lib/sru/exception.rb
33
+ - lib/sru/xpath.rb
36
34
  - lib/sru/explain.rb
35
+ - lib/sru/exception.rb
36
+ - lib/sru/search_retrieve.rb
37
37
  - lib/sru/response.rb
38
38
  - lib/sru/scan.rb
39
- - lib/sru/search_retrieve.rb
40
39
  - lib/sru/term.rb
41
- - test/client_test.rb
40
+ - lib/sru/client.rb
41
+ - lib/sru.rb
42
+ - test/tc_rexml_client_test.rb
43
+ - test/tc_libxml_client_test.rb
42
44
  test_files: []
43
45
 
44
46
  rdoc_options: []