sru 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/sru/client.rb +36 -10
- data/lib/sru/explain.rb +8 -9
- data/lib/sru/response.rb +13 -26
- data/lib/sru/scan.rb +1 -1
- data/lib/sru/search_retrieve.rb +5 -4
- data/lib/sru/term.rb +5 -5
- data/lib/sru/xpath.rb +82 -0
- data/test/tc_libxml_client_test.rb +55 -0
- data/test/{client_test.rb → tc_rexml_client_test.rb} +7 -7
- metadata +10 -8
data/lib/sru/client.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require 'uri'
|
2
2
|
require 'cgi'
|
3
3
|
require 'net/http'
|
4
|
-
require 'rexml/document'
|
5
4
|
|
6
5
|
module SRU
|
7
6
|
|
@@ -30,9 +29,24 @@ module SRU
|
|
30
29
|
# explain request to determine the version to be used in
|
31
30
|
# subsequent requests.
|
32
31
|
|
33
|
-
def initialize(base)
|
32
|
+
def initialize(base,options={})
|
34
33
|
@server = URI.parse base
|
35
|
-
|
34
|
+
@parser = options.fetch(:parser, 'rexml')
|
35
|
+
case @parser
|
36
|
+
when 'libxml'
|
37
|
+
begin
|
38
|
+
require 'rubygems'
|
39
|
+
require 'xml/libxml'
|
40
|
+
rescue
|
41
|
+
raise SRU::Exception, "unknown parser: #{@parser}", caller
|
42
|
+
end
|
43
|
+
when 'rexml'
|
44
|
+
require 'rexml/document'
|
45
|
+
require 'rexml/xpath'
|
46
|
+
else
|
47
|
+
raise SRU::Exception, "unknown parser: #{@parser}", caller
|
48
|
+
end
|
49
|
+
|
36
50
|
# stash this away for future requests
|
37
51
|
@version = self.explain.version
|
38
52
|
end
|
@@ -46,7 +60,7 @@ module SRU
|
|
46
60
|
|
47
61
|
def explain
|
48
62
|
doc = get_doc(:operation => 'explain')
|
49
|
-
return ExplainResponse.new(doc)
|
63
|
+
return ExplainResponse.new(doc, @parser)
|
50
64
|
end
|
51
65
|
|
52
66
|
|
@@ -62,8 +76,9 @@ module SRU
|
|
62
76
|
options[:query] = query
|
63
77
|
options[:operation] = 'searchRetrieve'
|
64
78
|
options[:maximumRecords] = 10 unless options.has_key? :maximumRecords
|
79
|
+
options[:recordSchema] = 'dc' unless options.has_key? :recordSchema
|
65
80
|
doc = get_doc(options)
|
66
|
-
return SearchResponse.new(doc)
|
81
|
+
return SearchResponse.new(doc, @parser)
|
67
82
|
end
|
68
83
|
|
69
84
|
|
@@ -78,7 +93,7 @@ module SRU
|
|
78
93
|
options[:operation] = 'scan'
|
79
94
|
options[:maximumTerms] = 5 unless options.has_key? :maximumTerms
|
80
95
|
doc = get_doc(options)
|
81
|
-
return ScanResponse.new(doc)
|
96
|
+
return ScanResponse.new(doc, @parser)
|
82
97
|
end
|
83
98
|
|
84
99
|
private
|
@@ -88,7 +103,8 @@ module SRU
|
|
88
103
|
|
89
104
|
def get_doc(hash)
|
90
105
|
# all requests get a version
|
91
|
-
hash[:version] = @version
|
106
|
+
hash[:version] = @version
|
107
|
+
|
92
108
|
|
93
109
|
# don't want to monkey with the original
|
94
110
|
uri = @server.clone
|
@@ -99,12 +115,22 @@ module SRU
|
|
99
115
|
"#{entry[0]}=#{CGI.escape(entry[1].to_s)}"
|
100
116
|
}
|
101
117
|
uri.query = parts.join('&')
|
102
|
-
|
103
118
|
# fetch the xml and build/return a document object from it
|
104
119
|
begin
|
105
120
|
xml = Net::HTTP.get(uri)
|
106
|
-
|
107
|
-
|
121
|
+
# load appropriate parser
|
122
|
+
case @parser
|
123
|
+
when 'libxml'
|
124
|
+
xmlObj = LibXML::XML::Parser.new()
|
125
|
+
# not sure why but the explain namespace does bad things to
|
126
|
+
# libxml
|
127
|
+
#xml = xml.gsub(' xmlns="http://explain.z3950.org/dtd/2.0/"', '')
|
128
|
+
xmlObj.string = xml
|
129
|
+
return xmlObj.parse
|
130
|
+
when 'rexml'
|
131
|
+
return REXML::Document.new(xml)
|
132
|
+
end
|
133
|
+
rescue
|
108
134
|
raise SRU::Exception, "exception during SRU operation", caller
|
109
135
|
end
|
110
136
|
end
|
data/lib/sru/explain.rb
CHANGED
@@ -2,37 +2,36 @@ require 'sru/response'
|
|
2
2
|
|
3
3
|
module SRU
|
4
4
|
class ExplainResponse < Response
|
5
|
-
|
6
5
|
def to_s
|
7
6
|
return "host=#{host} port=#{port} database=#{database} version=#{version}"
|
8
7
|
end
|
9
8
|
|
10
9
|
def host
|
11
|
-
return xpath('.//serverInfo/host')
|
10
|
+
return xpath(@doc,'.//ns0:serverInfo/ns0:host', @namespaces )
|
12
11
|
end
|
13
12
|
|
14
13
|
def port
|
15
|
-
port = xpath('.//serverInfo/port')
|
14
|
+
port = xpath(@doc, './/ns0:serverInfo/ns0:port', @namespaces)
|
16
15
|
return nil if not port
|
17
16
|
return Integer(port)
|
18
17
|
end
|
19
18
|
|
20
19
|
def database
|
21
|
-
return xpath('.//serverInfo/database')
|
20
|
+
return xpath(@doc, './/ns0:serverInfo/ns0:database', @namespaces)
|
22
21
|
end
|
23
22
|
|
24
23
|
def number_of_records
|
25
|
-
return xpath('.//configInfo/numberOfRecords')
|
24
|
+
return xpath(@doc, './/ns0:configInfo/ns0:numberOfRecords', @namespaces)
|
26
25
|
end
|
27
26
|
|
28
27
|
def version
|
29
|
-
version = xpath('.//zs:
|
28
|
+
version = xpath(@doc, './/zs:version', @namespaces)
|
30
29
|
return version if version
|
31
30
|
|
32
31
|
# also look here
|
33
|
-
info =
|
34
|
-
return info.attributes['version'] if info
|
35
|
-
|
32
|
+
info = xpath(@doc, './/ns0:serverInfo', @namespaces)
|
33
|
+
#return info.attributes['version'] if info
|
34
|
+
return get_attribute(info, "version") if info
|
36
35
|
return nil
|
37
36
|
end
|
38
37
|
end
|
data/lib/sru/response.rb
CHANGED
@@ -1,37 +1,24 @@
|
|
1
|
-
require 'rexml/xpath'
|
2
|
-
|
3
1
|
module SRU
|
4
2
|
|
5
3
|
# base class for all SRU responses
|
6
4
|
class Response
|
7
|
-
|
8
|
-
|
9
|
-
# namespaces for use in xpath queries
|
10
|
-
@@namespaces = {'zs' => 'http://www.loc.gov/zing/srw/'}
|
5
|
+
require 'sru/xpath'
|
6
|
+
include SRU::XPath
|
11
7
|
|
12
|
-
def initialize(doc)
|
13
|
-
@doc = doc
|
14
|
-
end
|
15
8
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
def xpath_all(path)
|
20
|
-
return REXML::XPath.match(@doc, path, @@namespaces)
|
21
|
-
end
|
9
|
+
attr_reader :doc
|
10
|
+
attr_reader :parser
|
11
|
+
attr_reader :namespaces
|
22
12
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
13
|
+
def initialize(doc, parser = 'rexml')
|
14
|
+
@doc = doc
|
15
|
+
@parser = parser
|
16
|
+
# namespaces for use in xpath queries
|
17
|
+
# this is technically more correct and is required for
|
18
|
+
# libxml to be able to parse the explain block.
|
19
|
+
@namespaces = {'zs' => 'http://www.loc.gov/zing/srw/',
|
20
|
+
'ns0' => 'http://explain.z3950.org/dtd/2.0/'}
|
29
21
|
|
30
|
-
# get the text inside the first node that matches the xpath
|
31
|
-
def xpath(path)
|
32
|
-
e = xpath_first(path)
|
33
|
-
return e.text if e != nil
|
34
|
-
return nil
|
35
22
|
end
|
36
23
|
end
|
37
24
|
|
data/lib/sru/scan.rb
CHANGED
data/lib/sru/search_retrieve.rb
CHANGED
@@ -15,16 +15,17 @@ module SRU
|
|
15
15
|
include Enumerable
|
16
16
|
|
17
17
|
def number_of_records
|
18
|
-
return Integer(xpath('.//zs:numberOfRecords'))
|
18
|
+
return Integer(xpath(@doc, './/zs:numberOfRecords', @namespaces))
|
19
19
|
end
|
20
20
|
|
21
21
|
# Returns the contents of each recordData element in a
|
22
22
|
# SRU searchRetrieve response.
|
23
23
|
|
24
24
|
def each
|
25
|
-
|
26
|
-
|
27
|
-
|
25
|
+
obj = xpath_all(@doc, './/zs:recordData', @namespaces)
|
26
|
+
for record_data in obj
|
27
|
+
if obj.size > 0
|
28
|
+
yield record_data
|
28
29
|
end
|
29
30
|
end
|
30
31
|
end
|
data/lib/sru/term.rb
CHANGED
@@ -7,11 +7,11 @@ module SRU
|
|
7
7
|
|
8
8
|
def initialize(element)
|
9
9
|
super element
|
10
|
-
@value = xpath('value')
|
11
|
-
@number_of_records = xpath('numberOfRecords')
|
12
|
-
@display_term = xpath('displayTerm')
|
13
|
-
@where_in_list = xpath('whereInList')
|
14
|
-
@extra_term_data = xpath_first('extraTermData')
|
10
|
+
@value = xpath(@doc, 'value')
|
11
|
+
@number_of_records = xpath(@doc, 'numberOfRecords')
|
12
|
+
@display_term = xpath(@doc, 'displayTerm')
|
13
|
+
@where_in_list = xpath(@doc, 'whereInList')
|
14
|
+
@extra_term_data = xpath_first(@doc, 'extraTermData')
|
15
15
|
end
|
16
16
|
end
|
17
17
|
end
|
data/lib/sru/xpath.rb
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
module SRU
|
2
|
+
module XPath
|
3
|
+
|
4
|
+
# get all matching nodes
|
5
|
+
def xpath_all(pdoc, path, namespace = '')
|
6
|
+
case parser_type(pdoc)
|
7
|
+
when 'libxml'
|
8
|
+
if namespace!=""
|
9
|
+
return pdoc.find(path, namespace).to_a if pdoc.find(path, namespace)
|
10
|
+
else
|
11
|
+
return pdoc.find(path).to_a if pdoc.find(path)
|
12
|
+
end
|
13
|
+
when 'rexml'
|
14
|
+
if namespace!=""
|
15
|
+
return REXML::XPath.match(pdoc, path, namespace)
|
16
|
+
else
|
17
|
+
return REXML::XPath.match(pdoc, path);
|
18
|
+
end
|
19
|
+
end
|
20
|
+
return []
|
21
|
+
end
|
22
|
+
|
23
|
+
# get first matching node
|
24
|
+
def xpath_first(pdoc, path, namespace = '')
|
25
|
+
elements = xpath_all(pdoc, path, namespace )
|
26
|
+
return elements[0] if elements != nil
|
27
|
+
return nil
|
28
|
+
end
|
29
|
+
|
30
|
+
# get text for first matching node
|
31
|
+
def xpath(pdoc, path, namespace = '')
|
32
|
+
el = xpath_first(pdoc, path, namespace)
|
33
|
+
return unless el
|
34
|
+
case parser_type(pdoc)
|
35
|
+
when 'libxml'
|
36
|
+
return el.content
|
37
|
+
when 'rexml'
|
38
|
+
return el.text
|
39
|
+
end
|
40
|
+
return nil
|
41
|
+
end
|
42
|
+
|
43
|
+
# figure out an attribute
|
44
|
+
def get_attribute(node, attr_name)
|
45
|
+
case node.class.to_s
|
46
|
+
when 'REXML::XML::Element'
|
47
|
+
return node.attribute(attr_name)
|
48
|
+
when 'LibXML::XML::Node'
|
49
|
+
#There has been a method shift between 0.5 and 0.7
|
50
|
+
if defined?(node.property) == nil
|
51
|
+
return node.attributes[attr_name]
|
52
|
+
else
|
53
|
+
return node.property(attr_name)
|
54
|
+
end
|
55
|
+
#begin
|
56
|
+
# return node.attributes[attr_name]
|
57
|
+
#rescue
|
58
|
+
# return node.property(attr_name)
|
59
|
+
#end
|
60
|
+
end
|
61
|
+
return nil
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
# figure out what sort of object we should do xpath on
|
67
|
+
def parser_type(x)
|
68
|
+
case x.class.to_s
|
69
|
+
when 'LibXML::XML::Document'
|
70
|
+
return 'libxml'
|
71
|
+
when 'LibXML::XML::Node'
|
72
|
+
return 'libxml'
|
73
|
+
when 'LibXML::XML::Node::Set'
|
74
|
+
return 'libxml'
|
75
|
+
when 'REXML::Element'
|
76
|
+
return 'rexml'
|
77
|
+
when 'REXML::Document'
|
78
|
+
return 'rexml'
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
class TcLibxmlClientTests < Test::Unit::TestCase
|
2
|
+
|
3
|
+
def test_explain
|
4
|
+
client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'libxml'
|
5
|
+
explain = client.explain
|
6
|
+
assert_equal SRU::ExplainResponse, explain.class
|
7
|
+
assert_equal '1.1', explain.version
|
8
|
+
assert_equal 'localhost', explain.host
|
9
|
+
assert_equal 7090, explain.port
|
10
|
+
assert_equal 'voyager', explain.database
|
11
|
+
assert_equal 'host=localhost port=7090 database=voyager version=1.1',
|
12
|
+
explain.to_s
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_search_retrieve
|
16
|
+
client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'libxml'
|
17
|
+
results = client.search_retrieve 'twain', :maximumRecords => 5
|
18
|
+
assert_equal 5, results.entries.size
|
19
|
+
assert results.number_of_records > 2000
|
20
|
+
assert_equal LibXML::XML::Node, results.entries[0].class
|
21
|
+
assert_equal 'recordData', results.entries[0].name
|
22
|
+
|
23
|
+
# hopefully there isn't a document that matches this :)
|
24
|
+
results = client.search_retrieve 'fidkidkdiejfl'
|
25
|
+
assert_equal 0, results.entries.size
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_default_maximum_records
|
29
|
+
client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'libxml'
|
30
|
+
results = client.search_retrieve 'twain'
|
31
|
+
assert_equal 10, results.entries.size
|
32
|
+
end
|
33
|
+
|
34
|
+
# need to find a target that supports scan so we can exercise it
|
35
|
+
#def test_scan
|
36
|
+
# # this scan response appears to be canned might need to change
|
37
|
+
# client = SRU::Client.new 'http://tweed.lib.ed.ac.uk:8080/elf/search/copac'
|
38
|
+
# scan = client.scan('foobar')
|
39
|
+
# assert scan.entries.size > 0
|
40
|
+
# assert_equal SRU::Term, scan.entries[0].class
|
41
|
+
# assert_equal 'low', scan.entries[0].value
|
42
|
+
# assert_equal '1', scan.entries[0].number_of_records
|
43
|
+
#end
|
44
|
+
|
45
|
+
def test_xml_exception
|
46
|
+
assert_raise(SRU::Exception) {SRU::Client.new 'http://www.google.com'}
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_http_exception
|
50
|
+
assert_raise(SRU::Exception) {SRU::Client.new 'http://example.com'}
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
|
@@ -1,24 +1,24 @@
|
|
1
|
-
class
|
1
|
+
class TCRexmlClientTests < Test::Unit::TestCase
|
2
2
|
|
3
3
|
def test_explain
|
4
|
-
client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager'
|
4
|
+
client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager',:parser=>'rexml'
|
5
5
|
explain = client.explain
|
6
6
|
assert_equal SRU::ExplainResponse, explain.class
|
7
7
|
assert_equal '1.1', explain.version
|
8
|
-
assert_equal '
|
8
|
+
assert_equal 'localhost', explain.host
|
9
9
|
assert_equal 7090, explain.port
|
10
10
|
assert_equal 'voyager', explain.database
|
11
|
-
assert_equal 'host=
|
11
|
+
assert_equal 'host=localhost port=7090 database=voyager version=1.1',
|
12
12
|
explain.to_s
|
13
13
|
end
|
14
14
|
|
15
15
|
def test_search_retrieve
|
16
|
-
client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager'
|
16
|
+
client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'rexml'
|
17
17
|
results = client.search_retrieve 'twain', :maximumRecords => 5
|
18
18
|
assert_equal 5, results.entries.size
|
19
19
|
assert results.number_of_records > 2000
|
20
20
|
assert_equal REXML::Element, results.entries[0].class
|
21
|
-
assert_equal '
|
21
|
+
assert_equal 'recordData', results.entries[0].name
|
22
22
|
|
23
23
|
# hopefully there isn't a document that matches this :)
|
24
24
|
results = client.search_retrieve 'fidkidkdiejfl'
|
@@ -26,7 +26,7 @@ class ClientTests < Test::Unit::TestCase
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def test_default_maximum_records
|
29
|
-
client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager'
|
29
|
+
client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'rexml'
|
30
30
|
results = client.search_retrieve 'twain'
|
31
31
|
assert_equal 10, results.entries.size
|
32
32
|
end
|
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.
|
2
|
+
rubygems_version: 0.9.4
|
3
3
|
specification_version: 1
|
4
4
|
name: sru
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.0.
|
7
|
-
date:
|
6
|
+
version: 0.0.5
|
7
|
+
date: 2008-09-12 00:00:00 -07:00
|
8
8
|
summary: a Ruby library for Search and Retrieve by URL
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -30,15 +30,17 @@ authors:
|
|
30
30
|
- Ed Summers
|
31
31
|
files:
|
32
32
|
- lib/sru
|
33
|
-
- lib/sru.rb
|
34
|
-
- lib/sru/client.rb
|
35
|
-
- lib/sru/exception.rb
|
33
|
+
- lib/sru/xpath.rb
|
36
34
|
- lib/sru/explain.rb
|
35
|
+
- lib/sru/exception.rb
|
36
|
+
- lib/sru/search_retrieve.rb
|
37
37
|
- lib/sru/response.rb
|
38
38
|
- lib/sru/scan.rb
|
39
|
-
- lib/sru/search_retrieve.rb
|
40
39
|
- lib/sru/term.rb
|
41
|
-
-
|
40
|
+
- lib/sru/client.rb
|
41
|
+
- lib/sru.rb
|
42
|
+
- test/tc_rexml_client_test.rb
|
43
|
+
- test/tc_libxml_client_test.rb
|
42
44
|
test_files: []
|
43
45
|
|
44
46
|
rdoc_options: []
|