oai 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ require 'oai/xpath'
2
+ require 'oai/response'
3
+ require 'oai/exception'
4
+ require 'oai/header'
5
+ require 'oai/record'
6
+ require 'oai/set'
7
+ require 'oai/metadata_format'
8
+ require 'oai/client'
9
+ require 'oai/identify'
10
+ require 'oai/list_identifiers'
11
+ require 'oai/list_metadata_formats'
12
+ require 'oai/get_record'
13
+ require 'oai/list_records'
14
+ require 'oai/list_sets'
@@ -0,0 +1,163 @@
1
+ require 'uri'
2
+ require 'net/http'
3
+ require 'rexml/document'
4
+ require 'cgi'
5
+
6
+ module OAI
7
+
8
+ # A OAI::Client provides a client api for issuing OAI-PMH verbs against
9
+ # a OAI-PMH server. The 6 OAI-PMH verbs translate directly to methods you
10
+ # can call on a OAI::Client object. Verb arguments are passed as a hash:
11
+ #
12
+ # client = OAI::Harvester.new ''http://www.pubmedcentral.gov/oai/oai.cgi'
13
+ # client.list_identifiers :metadata_prefix => 'oai_dc'
14
+ #
15
+ # It is worth noting that the api uses methods and parameter names with
16
+ # underscores in them rather than studly caps. So above list_identifiers
17
+ # and metadata_prefix are used instead of the listIdentifiers and
18
+ # metadataPrefix used in the OAI-PMH specification.
19
+ #
20
+ # Also, the from and until arguments which specify dates should be passed
21
+ # in as Date or DateTime objects depending on the granularity supported
22
+ # by the server.
23
+ #
24
+ # For detailed information on the arguments that can be used please consult
25
+ # the OAI-PMH docs at:
26
+ #
27
+ # http://www.openarchives.org/OAI/openarchivesprotocol.html
28
+
29
+ class Client
30
+
31
+ # The constructor which must be passed a valid base url for an oai
32
+ # service:
33
+ #
34
+ # client = OAI::Harvseter.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
35
+
36
+ def initialize(base_url)
37
+ @base = URI.parse base_url
38
+ end
39
+
40
+ # Equivalent to a Identify request. You'll get back a OAI::IdentifyResponse
41
+ # object which is essentially just a wrapper around a REXML::Document
42
+ # for the response.
43
+
44
+ def identify
45
+ return IdentifyResponse.new(do_request(:verb => 'Identify'))
46
+ end
47
+
48
+ # Equivalent to a ListMetadataFormats request. A ListMetadataFormatsResponse
49
+ # object is returned to you.
50
+
51
+ def list_metadata_formats(opts={})
52
+ opts[:verb] = 'ListMetadataFormats'
53
+ verify_verb_arguments opts, [:verb, :identifier]
54
+ return ListMetadataFormatsResponse.new(do_request(opts))
55
+ end
56
+
57
+ # Equivalent to a ListIdentifiers request. Pass in :from, :until arguments
58
+ # as Date or DateTime objects as appropriate depending on the granularity
59
+ # supported by the server.
60
+
61
+ def list_identifiers(opts={})
62
+ opts[:verb] = 'ListIdentifiers'
63
+ add_default_metadata_prefix opts
64
+ verify_verb_arguments opts, [:verb, :from, :until, :metadata_prefix, :set, :resumption_token]
65
+ return ListIdentifiersResponse.new(do_request(opts))
66
+ end
67
+
68
+ # Equivalent to a GetRecord request. You must supply an identifier
69
+ # argument. You should get back a OAI::GetRecordResponse object
70
+ # which you can extract a OAI::Record object from.
71
+
72
+ def get_record(opts={})
73
+ opts[:verb] = 'GetRecord'
74
+ add_default_metadata_prefix opts
75
+ verify_verb_arguments opts, [:verb, :identifier, :metadata_prefix]
76
+ return GetRecordResponse.new(do_request(opts))
77
+ end
78
+
79
+ # Equivalent to the ListRecords request. A ListRecordsResponse
80
+ # will be returned which you can use to iterate through records
81
+ #
82
+ # for record in client.list_records
83
+ # puts record.metadata
84
+ # end
85
+
86
+ def list_records(opts={})
87
+ opts[:verb] = 'ListRecords'
88
+ add_default_metadata_prefix opts
89
+ verify_verb_arguments opts, [:verb, :from, :until, :set,
90
+ :resumption_token, :metadata_prefix]
91
+ return ListRecordsResponse.new(do_request(opts))
92
+ end
93
+
94
+ # Equivalent to the ListSets request. A ListSetsResponse object
95
+ # will be returned which you can use for iterating through the
96
+ # OAI::Set objects
97
+ #
98
+ # for set in client.list_sets
99
+ # puts set
100
+ # end
101
+
102
+ def list_sets(opts={})
103
+ opts[:verb] = 'ListSets'
104
+ verify_verb_arguments opts, [:verb, :resumptionToken]
105
+ return ListSetsResponse.new(do_request(opts))
106
+ end
107
+
108
+ private
109
+
110
+ def do_request(hash)
111
+ uri = @base.clone
112
+
113
+ # build up the query string
114
+ parts = hash.entries.map do |entry|
115
+ key = studly(entry[0].to_s)
116
+ value = entry[1]
117
+ # dates get stringified using ISO8601, strings are url encoded
118
+ value = case value
119
+ when DateTime then value.strftime('%Y-%m-%dT%H:%M:%SZ');
120
+ when Date then value.strftime('%Y-%m-%d')
121
+ else CGI.escape(entry[1].to_s)
122
+ end
123
+ "#{key}=#{value}"
124
+ end
125
+ uri.query = parts.join('&')
126
+
127
+ # fire off the request and return an REXML::Document object
128
+ begin
129
+ xml = Net::HTTP.get(uri)
130
+ return REXML::Document.new(xml)
131
+ rescue
132
+ raise OAI::Exception, 'error during oai operation', caller
133
+ end
134
+ end
135
+
136
+ # convert foo_bar to fooBar thus allowing our ruby code to use
137
+ # the typical underscore idiom
138
+ def studly(s)
139
+ s.gsub(/_(\w)/) do |match|
140
+ match.sub! '_', ''
141
+ match.upcase
142
+ end
143
+ end
144
+
145
+ # add a metadata prefix unless it's there or we are working with
146
+ # a resumption token, and having one added could cause problems
147
+ def add_default_metadata_prefix(opts)
148
+ unless opts.has_key? :metadata_prefix or opts.has_key? :resumption_token
149
+ opts[:metadata_prefix] = 'oai_dc'
150
+ end
151
+ end
152
+
153
+ def verify_verb_arguments(opts, valid_opts)
154
+ opts.keys.each do |opt|
155
+ if opt =~ /[A-Z]/
156
+ raise OAI::Exception.new("#{opt} should use underscores")
157
+ elsif not valid_opts.include? opt
158
+ raise OAI::Exception.new("invalid option #{opt} in #{opts['verb']}")
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
@@ -0,0 +1,4 @@
1
+ module OAI
2
+ class Exception < RuntimeError
3
+ end
4
+ end
@@ -0,0 +1,11 @@
1
+ module OAI
2
+ class GetRecordResponse < Response
3
+ include OAI::XPath
4
+ attr_accessor :record
5
+
6
+ def initialize(doc)
7
+ super doc
8
+ @record = OAI::Record.new(xpath_first(doc, './/record'))
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,12 @@
1
+ module OAI
2
+ class Header
3
+ include OAI::XPath
4
+ attr_accessor :identifier, :datestamp, :set_spec
5
+
6
+ def initialize(element)
7
+ @identifier = xpath(element, './/identifier')
8
+ @datestamp = xpath(element, './/datestamp')
9
+ @set_spec = xpath(element, './/setSpec')
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,28 @@
1
+ module OAI
2
+ class IdentifyResponse < Response
3
+ include OAI::XPath
4
+ attr_accessor :repository_name, :base_url, :protocol, :admin_email,
5
+ :earliest_datestamp, :deleted_record, :granularity, :compression
6
+
7
+ def initialize(doc)
8
+ super doc
9
+ @repository_name = xpath(doc, './/Identify/repositoryName')
10
+ @base_url = xpath(doc, './/Identify/baseURL')
11
+ @protocol = xpath(doc, './/Identify/protocol')
12
+ @admin_email = xpath(doc, './/Identify/adminEmail')
13
+ @earliest_datestamp = xpath(doc, './/Identify/earliestDatestamp')
14
+ @deleted_record = xpath(doc, './/Identify/deletedRecord')
15
+ @granularity = xpath(doc, './/Identify/granularity')
16
+ @compression = xpath(doc, '..//Identify/compression')
17
+ end
18
+
19
+ def to_s
20
+ return "#{@repository_name} [#{@base_url}]"
21
+ end
22
+
23
+ # returns REXML::Element nodes for each description section
24
+ def descriptions
25
+ return xpath_all(doc, './/Identify/description')
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,12 @@
1
+ module OAI
2
+ class ListIdentifiersResponse < Response
3
+ include Enumerable
4
+ include OAI::XPath
5
+
6
+ def each
7
+ for header_element in xpath_all(@doc, './/ListIdentifiers/header')
8
+ yield OAI::Header.new(header_element)
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,12 @@
1
+ module OAI
2
+ class ListMetadataFormatsResponse < Response
3
+ include Enumerable
4
+ include OAI::XPath
5
+
6
+ def each
7
+ for format in xpath_all(@doc, './/metadataFormat')
8
+ yield MetadataFormat.new(format)
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,21 @@
1
+ module OAI
2
+
3
+ # allows for iteration across a list of records
4
+ #
5
+ # for record in client.list_records :metadata_prefix => 'oai_dc':
6
+ # puts record.metadata
7
+ # end
8
+ #
9
+ # you'll need to handle resumption tokens
10
+
11
+ class ListRecordsResponse < Response
12
+ include OAI::XPath
13
+ include Enumerable
14
+
15
+ def each
16
+ for record_element in xpath_all(@doc, './/ListRecords/record')
17
+ yield OAI::Record.new(record_element)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,19 @@
1
+ module OAI
2
+
3
+ # allows for iteration of the sets found in a oai-pmh server
4
+ #
5
+ # for set in client.list_sets
6
+ # puts set
7
+ # end
8
+
9
+ class ListSetsResponse < Response
10
+ include OAI::XPath
11
+ include Enumerable
12
+
13
+ def each
14
+ for set_element in xpath_all(@doc, './/set')
15
+ yield OAI::Set.new(set_element)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,12 @@
1
+ module OAI
2
+ class MetadataFormat
3
+ include OAI::XPath
4
+ attr_accessor :prefix, :schema, :namespace
5
+
6
+ def initialize(element)
7
+ @prefix = xpath(element, './/metadataPrefix')
8
+ @schema = xpath(element, './/schema')
9
+ @namespace = xpath(element, './/metadataNamespace')
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,11 @@
1
+ module OAI
2
+ class Record
3
+ include OAI::XPath
4
+ attr_accessor :header, :metadata
5
+
6
+ def initialize(element)
7
+ @header = OAI::Header.new xpath_first(element, './/header')
8
+ @metadata = xpath_first(element, './/metadata')
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,20 @@
1
+ module OAI
2
+ class Response
3
+ include OAI::XPath
4
+ attr_reader :doc, :resumption_token
5
+
6
+ def initialize(doc)
7
+ @doc = doc
8
+ @resumption_token = xpath(doc, './/resumptionToken')
9
+
10
+ # throw an exception if there was an error
11
+ error = xpath_first(doc, './/error')
12
+ if error
13
+ message = error.text
14
+ code = error.attributes['code']
15
+ raise OAI::Exception.new("#{message} [#{code}]")
16
+ end
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,20 @@
1
+ module OAI
2
+
3
+ # bundles up information about a set retrieved during a
4
+ # ListSets request
5
+
6
+ class Set
7
+ include OAI::XPath
8
+ attr_accessor :name, :spec, :description
9
+
10
+ def initialize(element)
11
+ @name = xpath(element, './/setName')
12
+ @spec = xpath(element, './/setSpec')
13
+ @description = xpath_first(element, './/setDescription')
14
+ end
15
+
16
+ def to_s
17
+ "#{@name} [#{@spec}]"
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,21 @@
1
+ require 'rexml/xpath'
2
+
3
+ module OAI
4
+ module XPath
5
+ def xpath_all(doc, path)
6
+ return REXML::XPath.match(doc, path)
7
+ end
8
+
9
+ def xpath_first(doc, path)
10
+ elements = xpath_all(doc, path)
11
+ return elements[0] if elements != nil
12
+ return nil
13
+ end
14
+
15
+ def xpath(doc, path)
16
+ e = xpath_first(doc, path)
17
+ return e.text if e != nil
18
+ return nil
19
+ end
20
+ end
21
+ end
data/test.rb ADDED
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift 'lib'
4
+
5
+ require 'test/unit'
6
+ require 'oai'
7
+ require 'test/tc_list_identifiers'
8
+ require 'test/tc_list_metadata_formats'
9
+ require 'test/tc_identify'
10
+ require 'test/tc_get_record'
11
+ require 'test/tc_list_records'
12
+ require 'test/tc_list_sets'
@@ -0,0 +1,27 @@
1
+ class GetRecordTest < Test::Unit::TestCase
2
+ def test_get_one
3
+ client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
4
+ response = client.get_record :identifier => 'oai:pubmedcentral.gov:13901'
5
+ assert_kind_of OAI::GetRecordResponse, response
6
+ assert_kind_of OAI::Record, response.record
7
+ assert_kind_of REXML::Element, response.record.metadata
8
+ assert_kind_of OAI::Header, response.record.header
9
+
10
+ # minimal check that the header is working
11
+ assert_equal 'oai:pubmedcentral.gov:13901',
12
+ response.record.header.identifier
13
+
14
+ # minimal check that the metadata is working
15
+ assert 'en', response.record.metadata.elements['.//dc:language'].text
16
+ end
17
+
18
+ def test_missing_identifier
19
+ client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
20
+ begin
21
+ client.get_record :metadata_prefix => 'oai_dc'
22
+ flunk 'invalid get_record did not throw OAI::Exception'
23
+ rescue OAI::Exception => e
24
+ assert_match /The request includes illegal arguments/, e.to_s
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,8 @@
1
+ class IdentifyTest < Test::Unit::TestCase
2
+ def test_ok
3
+ client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
4
+ response = client.identify
5
+ assert_kind_of OAI::IdentifyResponse, response
6
+ assert_equal 'PubMed Central (PMC3 - NLM DTD) [http://www.pubmedcentral.nih.gov:80/oai/oai.cgi]', response.to_s
7
+ end
8
+ end
@@ -0,0 +1,52 @@
1
+ require 'date'
2
+
3
+ class ListIdentifiersTest < Test::Unit::TestCase
4
+
5
+ def test_list_with_resumption_token
6
+ client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
7
+
8
+ # get a list of identifier headers
9
+ response = client.list_identifiers :metadata_prefix => 'oai_dc'
10
+ assert_kind_of OAI::ListIdentifiersResponse, response
11
+ assert_kind_of OAI::Response, response
12
+ assert response.entries.size > 0
13
+
14
+ # make sure header is put together reasonably
15
+ header = response.entries[0]
16
+ assert_kind_of OAI::Header, header
17
+ assert header.identifier
18
+ assert header.datestamp
19
+ assert header.set_spec
20
+
21
+ # exercise a resumption token and make sure first identifier is different
22
+ first_identifier = response.entries[0].identifier
23
+ token = response.resumption_token
24
+ assert_not_nil token
25
+ response = client.list_identifiers :resumption_token => token
26
+ assert response.entries.size > 0
27
+ assert_not_equal response.entries[0].identifier, first_identifier
28
+ end
29
+
30
+ def test_list_with_date_range
31
+ client = OAI::Client.new 'http://alcme.oclc.org/xtcat/servlet/OAIHandler'
32
+ from_date = Date.new(2001,1,1)
33
+ until_date = Date.new(2006,1,1)
34
+ response = client.list_identifiers :from => from_date, :until => until_date
35
+ assert response.entries.size > 0
36
+ end
37
+
38
+ def test_list_with_datetime_range
39
+ # xtcat should support higher granularity
40
+ client = OAI::Client.new 'http://alcme.oclc.org/xtcat/servlet/OAIHandler'
41
+ from_date = DateTime.new(2001,1,1)
42
+ until_date = DateTime.now
43
+ response = client.list_identifiers :from => from_date, :until => until_date
44
+ assert response.entries.size > 0
45
+ end
46
+
47
+ def test_invalid_argument
48
+ client = OAI::Client.new 'http://arXiv.org/oai2'
49
+ assert_raise(OAI::Exception) {client.list_identifiers :foo => 'bar'}
50
+ end
51
+
52
+ end
@@ -0,0 +1,15 @@
1
+ class ListMetadataFormatsTest < Test::Unit::TestCase
2
+ def test_list
3
+ client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
4
+ response = client.list_metadata_formats
5
+ assert_kind_of OAI::ListMetadataFormatsResponse, response
6
+ assert response.entries.size > 0
7
+
8
+ format = response.entries[0]
9
+ assert_kind_of OAI::MetadataFormat, format
10
+ assert_equal 'oai_dc', format.prefix
11
+ assert_equal 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', format.schema
12
+ assert_equal 'http://www.openarchives.org/OAI/2.0/oai_dc/', format.namespace
13
+ end
14
+ end
15
+
@@ -0,0 +1,9 @@
1
+ class GetRecordsTest < Test::Unit::TestCase
2
+ def test_get_records
3
+ client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
4
+ response = client.list_records
5
+ assert_kind_of OAI::ListRecordsResponse, response
6
+ assert response.entries.size > 0
7
+ assert_kind_of OAI::Record, response.entries[0]
8
+ end
9
+ end
@@ -0,0 +1,17 @@
1
+ class ListSetsTest < Test::Unit::TestCase
2
+
3
+ def test_list
4
+ client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
5
+ response = client.list_sets
6
+ assert_kind_of OAI::ListSetsResponse, response
7
+ assert response.entries.size > 0
8
+ assert_kind_of OAI::Set, response.entries[0]
9
+
10
+ # test iterator
11
+ for set in response
12
+ assert_kind_of OAI::Set, set
13
+ end
14
+ end
15
+
16
+ end
17
+
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.8.11
3
+ specification_version: 1
4
+ name: oai
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.0.1
7
+ date: 2006-04-20 00:00:00 -05:00
8
+ summary: A ruby library for working with the Open Archive Initiative Protocol for Metadata Harvesting (OAI-PMH)
9
+ require_paths:
10
+ - lib
11
+ email: ehs@pobox.com
12
+ homepage: http://www.textualize.com/ruby-marc
13
+ rubyforge_project:
14
+ description:
15
+ autorequire: oai
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ -
22
+ - ">"
23
+ - !ruby/object:Gem::Version
24
+ version: 0.0.0
25
+ version:
26
+ platform: ruby
27
+ signing_key:
28
+ cert_chain:
29
+ authors:
30
+ - Ed Summers
31
+ files:
32
+ - lib/oai
33
+ - lib/oai.rb
34
+ - lib/oai/client.rb
35
+ - lib/oai/exception.rb
36
+ - lib/oai/get_record.rb
37
+ - lib/oai/header.rb
38
+ - lib/oai/identify.rb
39
+ - lib/oai/list_identifiers.rb
40
+ - lib/oai/list_metadata_formats.rb
41
+ - lib/oai/list_records.rb
42
+ - lib/oai/list_sets.rb
43
+ - lib/oai/metadata_format.rb
44
+ - lib/oai/record.rb
45
+ - lib/oai/response.rb
46
+ - lib/oai/set.rb
47
+ - lib/oai/xpath.rb
48
+ - test/tc_get_record.rb
49
+ - test/tc_identify.rb
50
+ - test/tc_list_identifiers.rb
51
+ - test/tc_list_metadata_formats.rb
52
+ - test/tc_list_records.rb
53
+ - test/tc_list_sets.rb
54
+ test_files:
55
+ - test.rb
56
+ rdoc_options: []
57
+ extra_rdoc_files: []
58
+ executables: []
59
+ extensions: []
60
+ requirements: []
61
+ dependencies: []