sru 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,6 @@
1
1
  require 'uri'
2
2
  require 'cgi'
3
3
  require 'net/http'
4
- require 'rexml/document'
5
4
 
6
5
  module SRU
7
6
 
@@ -30,9 +29,24 @@ module SRU
30
29
  # explain request to determine the version to be used in
31
30
  # subsequent requests.
32
31
 
33
- def initialize(base)
32
+ def initialize(base,options={})
34
33
  @server = URI.parse base
35
-
34
+ @parser = options.fetch(:parser, 'rexml')
35
+ case @parser
36
+ when 'libxml'
37
+ begin
38
+ require 'rubygems'
39
+ require 'xml/libxml'
40
+ rescue
41
+ raise SRU::Exception, "unknown parser: #{@parser}", caller
42
+ end
43
+ when 'rexml'
44
+ require 'rexml/document'
45
+ require 'rexml/xpath'
46
+ else
47
+ raise SRU::Exception, "unknown parser: #{@parser}", caller
48
+ end
49
+
36
50
  # stash this away for future requests
37
51
  @version = self.explain.version
38
52
  end
@@ -46,7 +60,7 @@ module SRU
46
60
 
47
61
  def explain
48
62
  doc = get_doc(:operation => 'explain')
49
- return ExplainResponse.new(doc)
63
+ return ExplainResponse.new(doc, @parser)
50
64
  end
51
65
 
52
66
 
@@ -62,8 +76,9 @@ module SRU
62
76
  options[:query] = query
63
77
  options[:operation] = 'searchRetrieve'
64
78
  options[:maximumRecords] = 10 unless options.has_key? :maximumRecords
79
+ options[:recordSchema] = 'dc' unless options.has_key? :recordSchema
65
80
  doc = get_doc(options)
66
- return SearchResponse.new(doc)
81
+ return SearchResponse.new(doc, @parser)
67
82
  end
68
83
 
69
84
 
@@ -78,7 +93,7 @@ module SRU
78
93
  options[:operation] = 'scan'
79
94
  options[:maximumTerms] = 5 unless options.has_key? :maximumTerms
80
95
  doc = get_doc(options)
81
- return ScanResponse.new(doc)
96
+ return ScanResponse.new(doc, @parser)
82
97
  end
83
98
 
84
99
  private
@@ -88,7 +103,8 @@ module SRU
88
103
 
89
104
  def get_doc(hash)
90
105
  # all requests get a version
91
- hash[:version] = @version
106
+ hash[:version] = @version
107
+
92
108
 
93
109
  # don't want to monkey with the original
94
110
  uri = @server.clone
@@ -99,12 +115,22 @@ module SRU
99
115
  "#{entry[0]}=#{CGI.escape(entry[1].to_s)}"
100
116
  }
101
117
  uri.query = parts.join('&')
102
-
103
118
  # fetch the xml and build/return a document object from it
104
119
  begin
105
120
  xml = Net::HTTP.get(uri)
106
- return REXML::Document.new(xml)
107
- rescue
121
+ # load appropriate parser
122
+ case @parser
123
+ when 'libxml'
124
+ xmlObj = LibXML::XML::Parser.new()
125
+ # not sure why but the explain namespace does bad things to
126
+ # libxml
127
+ #xml = xml.gsub(' xmlns="http://explain.z3950.org/dtd/2.0/"', '')
128
+ xmlObj.string = xml
129
+ return xmlObj.parse
130
+ when 'rexml'
131
+ return REXML::Document.new(xml)
132
+ end
133
+ rescue
108
134
  raise SRU::Exception, "exception during SRU operation", caller
109
135
  end
110
136
  end
@@ -2,37 +2,36 @@ require 'sru/response'
2
2
 
3
3
  module SRU
4
4
  class ExplainResponse < Response
5
-
6
5
  def to_s
7
6
  return "host=#{host} port=#{port} database=#{database} version=#{version}"
8
7
  end
9
8
 
10
9
  def host
11
- return xpath('.//serverInfo/host')
10
+ return xpath(@doc,'.//ns0:serverInfo/ns0:host', @namespaces )
12
11
  end
13
12
 
14
13
  def port
15
- port = xpath('.//serverInfo/port')
14
+ port = xpath(@doc, './/ns0:serverInfo/ns0:port', @namespaces)
16
15
  return nil if not port
17
16
  return Integer(port)
18
17
  end
19
18
 
20
19
  def database
21
- return xpath('.//serverInfo/database')
20
+ return xpath(@doc, './/ns0:serverInfo/ns0:database', @namespaces)
22
21
  end
23
22
 
24
23
  def number_of_records
25
- return xpath('.//configInfo/numberOfRecords')
24
+ return xpath(@doc, './/ns0:configInfo/ns0:numberOfRecords', @namespaces)
26
25
  end
27
26
 
28
27
  def version
29
- version = xpath('.//zs:explainResponse/zs:version')
28
+ version = xpath(@doc, './/zs:version', @namespaces)
30
29
  return version if version
31
30
 
32
31
  # also look here
33
- info = xpath_first('.//serverInfo')
34
- return info.attributes['version'] if info
35
-
32
+ info = xpath(@doc, './/ns0:serverInfo', @namespaces)
33
+ #return info.attributes['version'] if info
34
+ return get_attribute(info, "version") if info
36
35
  return nil
37
36
  end
38
37
  end
@@ -1,37 +1,24 @@
1
- require 'rexml/xpath'
2
-
3
1
  module SRU
4
2
 
5
3
  # base class for all SRU responses
6
4
  class Response
7
- attr_reader :doc
8
-
9
- # namespaces for use in xpath queries
10
- @@namespaces = {'zs' => 'http://www.loc.gov/zing/srw/'}
5
+ require 'sru/xpath'
6
+ include SRU::XPath
11
7
 
12
- def initialize(doc)
13
- @doc = doc
14
- end
15
8
 
16
- protected
17
-
18
- # get all nodes that match an xpath
19
- def xpath_all(path)
20
- return REXML::XPath.match(@doc, path, @@namespaces)
21
- end
9
+ attr_reader :doc
10
+ attr_reader :parser
11
+ attr_reader :namespaces
22
12
 
23
- # get the first node that matches an xpath
24
- def xpath_first(path)
25
- elements = xpath_all(path)
26
- return elements[0] if elements != nil
27
- return nil
28
- end
13
+ def initialize(doc, parser = 'rexml')
14
+ @doc = doc
15
+ @parser = parser
16
+ # namespaces for use in xpath queries
17
+ # this is technically more correct and is required for
18
+ # libxml to be able to parse the explain block.
19
+ @namespaces = {'zs' => 'http://www.loc.gov/zing/srw/',
20
+ 'ns0' => 'http://explain.z3950.org/dtd/2.0/'}
29
21
 
30
- # get the text inside the first node that matches the xpath
31
- def xpath(path)
32
- e = xpath_first(path)
33
- return e.text if e != nil
34
- return nil
35
22
  end
36
23
  end
37
24
 
@@ -7,7 +7,7 @@ module SRU
7
7
  include Enumerable
8
8
 
9
9
  def each
10
- for term_node in xpath_all('.//zs:term')
10
+ for term_node in xpath_all(@doc, './/zs:term', @namespaces)
11
11
  yield Term.new(term_node)
12
12
  end
13
13
  end
@@ -15,16 +15,17 @@ module SRU
15
15
  include Enumerable
16
16
 
17
17
  def number_of_records
18
- return Integer(xpath('.//zs:numberOfRecords'))
18
+ return Integer(xpath(@doc, './/zs:numberOfRecords', @namespaces))
19
19
  end
20
20
 
21
21
  # Returns the contents of each recordData element in a
22
22
  # SRU searchRetrieve response.
23
23
 
24
24
  def each
25
- for record_data in xpath_all('.//zs:recordData')
26
- if record_data.elements.size > 0
27
- yield record_data.elements[1]
25
+ obj = xpath_all(@doc, './/zs:recordData', @namespaces)
26
+ for record_data in obj
27
+ if obj.size > 0
28
+ yield record_data
28
29
  end
29
30
  end
30
31
  end
@@ -7,11 +7,11 @@ module SRU
7
7
 
8
8
  def initialize(element)
9
9
  super element
10
- @value = xpath('value')
11
- @number_of_records = xpath('numberOfRecords')
12
- @display_term = xpath('displayTerm')
13
- @where_in_list = xpath('whereInList')
14
- @extra_term_data = xpath_first('extraTermData')
10
+ @value = xpath(@doc, 'value')
11
+ @number_of_records = xpath(@doc, 'numberOfRecords')
12
+ @display_term = xpath(@doc, 'displayTerm')
13
+ @where_in_list = xpath(@doc, 'whereInList')
14
+ @extra_term_data = xpath_first(@doc, 'extraTermData')
15
15
  end
16
16
  end
17
17
  end
@@ -0,0 +1,82 @@
1
+ module SRU
2
+ module XPath
3
+
4
+ # get all matching nodes
5
+ def xpath_all(pdoc, path, namespace = '')
6
+ case parser_type(pdoc)
7
+ when 'libxml'
8
+ if namespace!=""
9
+ return pdoc.find(path, namespace).to_a if pdoc.find(path, namespace)
10
+ else
11
+ return pdoc.find(path).to_a if pdoc.find(path)
12
+ end
13
+ when 'rexml'
14
+ if namespace!=""
15
+ return REXML::XPath.match(pdoc, path, namespace)
16
+ else
17
+ return REXML::XPath.match(pdoc, path);
18
+ end
19
+ end
20
+ return []
21
+ end
22
+
23
+ # get first matching node
24
+ def xpath_first(pdoc, path, namespace = '')
25
+ elements = xpath_all(pdoc, path, namespace )
26
+ return elements[0] if elements != nil
27
+ return nil
28
+ end
29
+
30
+ # get text for first matching node
31
+ def xpath(pdoc, path, namespace = '')
32
+ el = xpath_first(pdoc, path, namespace)
33
+ return unless el
34
+ case parser_type(pdoc)
35
+ when 'libxml'
36
+ return el.content
37
+ when 'rexml'
38
+ return el.text
39
+ end
40
+ return nil
41
+ end
42
+
43
+ # figure out an attribute
44
+ def get_attribute(node, attr_name)
45
+ case node.class.to_s
46
+ when 'REXML::XML::Element'
47
+ return node.attribute(attr_name)
48
+ when 'LibXML::XML::Node'
49
+ #There has been a method shift between 0.5 and 0.7
50
+ if defined?(node.property) == nil
51
+ return node.attributes[attr_name]
52
+ else
53
+ return node.property(attr_name)
54
+ end
55
+ #begin
56
+ # return node.attributes[attr_name]
57
+ #rescue
58
+ # return node.property(attr_name)
59
+ #end
60
+ end
61
+ return nil
62
+ end
63
+
64
+ private
65
+
66
+ # figure out what sort of object we should do xpath on
67
+ def parser_type(x)
68
+ case x.class.to_s
69
+ when 'LibXML::XML::Document'
70
+ return 'libxml'
71
+ when 'LibXML::XML::Node'
72
+ return 'libxml'
73
+ when 'LibXML::XML::Node::Set'
74
+ return 'libxml'
75
+ when 'REXML::Element'
76
+ return 'rexml'
77
+ when 'REXML::Document'
78
+ return 'rexml'
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,55 @@
1
+ class TcLibxmlClientTests < Test::Unit::TestCase
2
+
3
+ def test_explain
4
+ client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'libxml'
5
+ explain = client.explain
6
+ assert_equal SRU::ExplainResponse, explain.class
7
+ assert_equal '1.1', explain.version
8
+ assert_equal 'localhost', explain.host
9
+ assert_equal 7090, explain.port
10
+ assert_equal 'voyager', explain.database
11
+ assert_equal 'host=localhost port=7090 database=voyager version=1.1',
12
+ explain.to_s
13
+ end
14
+
15
+ def test_search_retrieve
16
+ client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'libxml'
17
+ results = client.search_retrieve 'twain', :maximumRecords => 5
18
+ assert_equal 5, results.entries.size
19
+ assert results.number_of_records > 2000
20
+ assert_equal LibXML::XML::Node, results.entries[0].class
21
+ assert_equal 'recordData', results.entries[0].name
22
+
23
+ # hopefully there isn't a document that matches this :)
24
+ results = client.search_retrieve 'fidkidkdiejfl'
25
+ assert_equal 0, results.entries.size
26
+ end
27
+
28
+ def test_default_maximum_records
29
+ client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'libxml'
30
+ results = client.search_retrieve 'twain'
31
+ assert_equal 10, results.entries.size
32
+ end
33
+
34
+ # need to find a target that supports scan so we can exercise it
35
+ #def test_scan
36
+ # # this scan response appears to be canned might need to change
37
+ # client = SRU::Client.new 'http://tweed.lib.ed.ac.uk:8080/elf/search/copac'
38
+ # scan = client.scan('foobar')
39
+ # assert scan.entries.size > 0
40
+ # assert_equal SRU::Term, scan.entries[0].class
41
+ # assert_equal 'low', scan.entries[0].value
42
+ # assert_equal '1', scan.entries[0].number_of_records
43
+ #end
44
+
45
+ def test_xml_exception
46
+ assert_raise(SRU::Exception) {SRU::Client.new 'http://www.google.com'}
47
+ end
48
+
49
+ def test_http_exception
50
+ assert_raise(SRU::Exception) {SRU::Client.new 'http://example.com'}
51
+ end
52
+
53
+ end
54
+
55
+
@@ -1,24 +1,24 @@
1
- class ClientTests < Test::Unit::TestCase
1
+ class TCRexmlClientTests < Test::Unit::TestCase
2
2
 
3
3
  def test_explain
4
- client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager'
4
+ client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager',:parser=>'rexml'
5
5
  explain = client.explain
6
6
  assert_equal SRU::ExplainResponse, explain.class
7
7
  assert_equal '1.1', explain.version
8
- assert_equal 'z3950.loc.gov', explain.host
8
+ assert_equal 'localhost', explain.host
9
9
  assert_equal 7090, explain.port
10
10
  assert_equal 'voyager', explain.database
11
- assert_equal 'host=z3950.loc.gov port=7090 database=voyager version=1.1',
11
+ assert_equal 'host=localhost port=7090 database=voyager version=1.1',
12
12
  explain.to_s
13
13
  end
14
14
 
15
15
  def test_search_retrieve
16
- client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager'
16
+ client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'rexml'
17
17
  results = client.search_retrieve 'twain', :maximumRecords => 5
18
18
  assert_equal 5, results.entries.size
19
19
  assert results.number_of_records > 2000
20
20
  assert_equal REXML::Element, results.entries[0].class
21
- assert_equal 'record', results.entries[0].name
21
+ assert_equal 'recordData', results.entries[0].name
22
22
 
23
23
  # hopefully there isn't a document that matches this :)
24
24
  results = client.search_retrieve 'fidkidkdiejfl'
@@ -26,7 +26,7 @@ class ClientTests < Test::Unit::TestCase
26
26
  end
27
27
 
28
28
  def test_default_maximum_records
29
- client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager'
29
+ client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'rexml'
30
30
  results = client.search_retrieve 'twain'
31
31
  assert_equal 10, results.entries.size
32
32
  end
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.2
2
+ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: sru
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.0.4
7
- date: 2007-12-13 00:00:00 -05:00
6
+ version: 0.0.5
7
+ date: 2008-09-12 00:00:00 -07:00
8
8
  summary: a Ruby library for Search and Retrieve by URL
9
9
  require_paths:
10
10
  - lib
@@ -30,15 +30,17 @@ authors:
30
30
  - Ed Summers
31
31
  files:
32
32
  - lib/sru
33
- - lib/sru.rb
34
- - lib/sru/client.rb
35
- - lib/sru/exception.rb
33
+ - lib/sru/xpath.rb
36
34
  - lib/sru/explain.rb
35
+ - lib/sru/exception.rb
36
+ - lib/sru/search_retrieve.rb
37
37
  - lib/sru/response.rb
38
38
  - lib/sru/scan.rb
39
- - lib/sru/search_retrieve.rb
40
39
  - lib/sru/term.rb
41
- - test/client_test.rb
40
+ - lib/sru/client.rb
41
+ - lib/sru.rb
42
+ - test/tc_rexml_client_test.rb
43
+ - test/tc_libxml_client_test.rb
42
44
  test_files: []
43
45
 
44
46
  rdoc_options: []