oai 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,14 @@
1
+ require 'oai/xpath'
2
+ require 'oai/response'
3
+ require 'oai/exception'
4
+ require 'oai/header'
5
+ require 'oai/record'
6
+ require 'oai/set'
7
+ require 'oai/metadata_format'
8
+ require 'oai/client'
9
+ require 'oai/identify'
10
+ require 'oai/list_identifiers'
11
+ require 'oai/list_metadata_formats'
12
+ require 'oai/get_record'
13
+ require 'oai/list_records'
14
+ require 'oai/list_sets'
@@ -0,0 +1,163 @@
1
+ require 'uri'
2
+ require 'net/http'
3
+ require 'rexml/document'
4
+ require 'cgi'
5
+
6
+ module OAI
7
+
8
+ # A OAI::Client provides a client api for issuing OAI-PMH verbs against
9
+ # a OAI-PMH server. The 6 OAI-PMH verbs translate directly to methods you
10
+ # can call on a OAI::Client object. Verb arguments are passed as a hash:
11
+ #
12
+ # client = OAI::Harvester.new ''http://www.pubmedcentral.gov/oai/oai.cgi'
13
+ # client.list_identifiers :metadata_prefix => 'oai_dc'
14
+ #
15
+ # It is worth noting that the api uses methods and parameter names with
16
+ # underscores in them rather than studly caps. So above list_identifiers
17
+ # and metadata_prefix are used instead of the listIdentifiers and
18
+ # metadataPrefix used in the OAI-PMH specification.
19
+ #
20
+ # Also, the from and until arguments which specify dates should be passed
21
+ # in as Date or DateTime objects depending on the granularity supported
22
+ # by the server.
23
+ #
24
+ # For detailed information on the arguments that can be used please consult
25
+ # the OAI-PMH docs at:
26
+ #
27
+ # http://www.openarchives.org/OAI/openarchivesprotocol.html
28
+
29
+ class Client
30
+
31
+ # The constructor which must be passed a valid base url for an oai
32
+ # service:
33
+ #
34
+ # client = OAI::Harvseter.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
35
+
36
+ def initialize(base_url)
37
+ @base = URI.parse base_url
38
+ end
39
+
40
+ # Equivalent to a Identify request. You'll get back a OAI::IdentifyResponse
41
+ # object which is essentially just a wrapper around a REXML::Document
42
+ # for the response.
43
+
44
+ def identify
45
+ return IdentifyResponse.new(do_request(:verb => 'Identify'))
46
+ end
47
+
48
+ # Equivalent to a ListMetadataFormats request. A ListMetadataFormatsResponse
49
+ # object is returned to you.
50
+
51
+ def list_metadata_formats(opts={})
52
+ opts[:verb] = 'ListMetadataFormats'
53
+ verify_verb_arguments opts, [:verb, :identifier]
54
+ return ListMetadataFormatsResponse.new(do_request(opts))
55
+ end
56
+
57
+ # Equivalent to a ListIdentifiers request. Pass in :from, :until arguments
58
+ # as Date or DateTime objects as appropriate depending on the granularity
59
+ # supported by the server.
60
+
61
+ def list_identifiers(opts={})
62
+ opts[:verb] = 'ListIdentifiers'
63
+ add_default_metadata_prefix opts
64
+ verify_verb_arguments opts, [:verb, :from, :until, :metadata_prefix, :set, :resumption_token]
65
+ return ListIdentifiersResponse.new(do_request(opts))
66
+ end
67
+
68
+ # Equivalent to a GetRecord request. You must supply an identifier
69
+ # argument. You should get back a OAI::GetRecordResponse object
70
+ # which you can extract a OAI::Record object from.
71
+
72
+ def get_record(opts={})
73
+ opts[:verb] = 'GetRecord'
74
+ add_default_metadata_prefix opts
75
+ verify_verb_arguments opts, [:verb, :identifier, :metadata_prefix]
76
+ return GetRecordResponse.new(do_request(opts))
77
+ end
78
+
79
+ # Equivalent to the ListRecords request. A ListRecordsResponse
80
+ # will be returned which you can use to iterate through records
81
+ #
82
+ # for record in client.list_records
83
+ # puts record.metadata
84
+ # end
85
+
86
+ def list_records(opts={})
87
+ opts[:verb] = 'ListRecords'
88
+ add_default_metadata_prefix opts
89
+ verify_verb_arguments opts, [:verb, :from, :until, :set,
90
+ :resumption_token, :metadata_prefix]
91
+ return ListRecordsResponse.new(do_request(opts))
92
+ end
93
+
94
+ # Equivalent to the ListSets request. A ListSetsResponse object
95
+ # will be returned which you can use for iterating through the
96
+ # OAI::Set objects
97
+ #
98
+ # for set in client.list_sets
99
+ # puts set
100
+ # end
101
+
102
+ def list_sets(opts={})
103
+ opts[:verb] = 'ListSets'
104
+ verify_verb_arguments opts, [:verb, :resumptionToken]
105
+ return ListSetsResponse.new(do_request(opts))
106
+ end
107
+
108
+ private
109
+
110
+ def do_request(hash)
111
+ uri = @base.clone
112
+
113
+ # build up the query string
114
+ parts = hash.entries.map do |entry|
115
+ key = studly(entry[0].to_s)
116
+ value = entry[1]
117
+ # dates get stringified using ISO8601, strings are url encoded
118
+ value = case value
119
+ when DateTime then value.strftime('%Y-%m-%dT%H:%M:%SZ');
120
+ when Date then value.strftime('%Y-%m-%d')
121
+ else CGI.escape(entry[1].to_s)
122
+ end
123
+ "#{key}=#{value}"
124
+ end
125
+ uri.query = parts.join('&')
126
+
127
+ # fire off the request and return an REXML::Document object
128
+ begin
129
+ xml = Net::HTTP.get(uri)
130
+ return REXML::Document.new(xml)
131
+ rescue
132
+ raise OAI::Exception, 'error during oai operation', caller
133
+ end
134
+ end
135
+
136
+ # convert foo_bar to fooBar thus allowing our ruby code to use
137
+ # the typical underscore idiom
138
+ def studly(s)
139
+ s.gsub(/_(\w)/) do |match|
140
+ match.sub! '_', ''
141
+ match.upcase
142
+ end
143
+ end
144
+
145
+ # add a metadata prefix unless it's there or we are working with
146
+ # a resumption token, and having one added could cause problems
147
+ def add_default_metadata_prefix(opts)
148
+ unless opts.has_key? :metadata_prefix or opts.has_key? :resumption_token
149
+ opts[:metadata_prefix] = 'oai_dc'
150
+ end
151
+ end
152
+
153
+ def verify_verb_arguments(opts, valid_opts)
154
+ opts.keys.each do |opt|
155
+ if opt =~ /[A-Z]/
156
+ raise OAI::Exception.new("#{opt} should use underscores")
157
+ elsif not valid_opts.include? opt
158
+ raise OAI::Exception.new("invalid option #{opt} in #{opts['verb']}")
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
@@ -0,0 +1,4 @@
1
+ module OAI
2
+ class Exception < RuntimeError
3
+ end
4
+ end
@@ -0,0 +1,11 @@
1
+ module OAI
2
+ class GetRecordResponse < Response
3
+ include OAI::XPath
4
+ attr_accessor :record
5
+
6
+ def initialize(doc)
7
+ super doc
8
+ @record = OAI::Record.new(xpath_first(doc, './/record'))
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,12 @@
1
+ module OAI
2
+ class Header
3
+ include OAI::XPath
4
+ attr_accessor :identifier, :datestamp, :set_spec
5
+
6
+ def initialize(element)
7
+ @identifier = xpath(element, './/identifier')
8
+ @datestamp = xpath(element, './/datestamp')
9
+ @set_spec = xpath(element, './/setSpec')
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,28 @@
1
+ module OAI
2
+ class IdentifyResponse < Response
3
+ include OAI::XPath
4
+ attr_accessor :repository_name, :base_url, :protocol, :admin_email,
5
+ :earliest_datestamp, :deleted_record, :granularity, :compression
6
+
7
+ def initialize(doc)
8
+ super doc
9
+ @repository_name = xpath(doc, './/Identify/repositoryName')
10
+ @base_url = xpath(doc, './/Identify/baseURL')
11
+ @protocol = xpath(doc, './/Identify/protocol')
12
+ @admin_email = xpath(doc, './/Identify/adminEmail')
13
+ @earliest_datestamp = xpath(doc, './/Identify/earliestDatestamp')
14
+ @deleted_record = xpath(doc, './/Identify/deletedRecord')
15
+ @granularity = xpath(doc, './/Identify/granularity')
16
+ @compression = xpath(doc, '..//Identify/compression')
17
+ end
18
+
19
+ def to_s
20
+ return "#{@repository_name} [#{@base_url}]"
21
+ end
22
+
23
+ # returns REXML::Element nodes for each description section
24
+ def descriptions
25
+ return xpath_all(doc, './/Identify/description')
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,12 @@
1
+ module OAI
2
+ class ListIdentifiersResponse < Response
3
+ include Enumerable
4
+ include OAI::XPath
5
+
6
+ def each
7
+ for header_element in xpath_all(@doc, './/ListIdentifiers/header')
8
+ yield OAI::Header.new(header_element)
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,12 @@
1
+ module OAI
2
+ class ListMetadataFormatsResponse < Response
3
+ include Enumerable
4
+ include OAI::XPath
5
+
6
+ def each
7
+ for format in xpath_all(@doc, './/metadataFormat')
8
+ yield MetadataFormat.new(format)
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,21 @@
1
+ module OAI
2
+
3
+ # allows for iteration across a list of records
4
+ #
5
+ # for record in client.list_records :metadata_prefix => 'oai_dc':
6
+ # puts record.metadata
7
+ # end
8
+ #
9
+ # you'll need to handle resumption tokens
10
+
11
+ class ListRecordsResponse < Response
12
+ include OAI::XPath
13
+ include Enumerable
14
+
15
+ def each
16
+ for record_element in xpath_all(@doc, './/ListRecords/record')
17
+ yield OAI::Record.new(record_element)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,19 @@
1
+ module OAI
2
+
3
+ # allows for iteration of the sets found in a oai-pmh server
4
+ #
5
+ # for set in client.list_sets
6
+ # puts set
7
+ # end
8
+
9
+ class ListSetsResponse < Response
10
+ include OAI::XPath
11
+ include Enumerable
12
+
13
+ def each
14
+ for set_element in xpath_all(@doc, './/set')
15
+ yield OAI::Set.new(set_element)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,12 @@
1
+ module OAI
2
+ class MetadataFormat
3
+ include OAI::XPath
4
+ attr_accessor :prefix, :schema, :namespace
5
+
6
+ def initialize(element)
7
+ @prefix = xpath(element, './/metadataPrefix')
8
+ @schema = xpath(element, './/schema')
9
+ @namespace = xpath(element, './/metadataNamespace')
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,11 @@
1
+ module OAI
2
+ class Record
3
+ include OAI::XPath
4
+ attr_accessor :header, :metadata
5
+
6
+ def initialize(element)
7
+ @header = OAI::Header.new xpath_first(element, './/header')
8
+ @metadata = xpath_first(element, './/metadata')
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,20 @@
1
+ module OAI
2
+ class Response
3
+ include OAI::XPath
4
+ attr_reader :doc, :resumption_token
5
+
6
+ def initialize(doc)
7
+ @doc = doc
8
+ @resumption_token = xpath(doc, './/resumptionToken')
9
+
10
+ # throw an exception if there was an error
11
+ error = xpath_first(doc, './/error')
12
+ if error
13
+ message = error.text
14
+ code = error.attributes['code']
15
+ raise OAI::Exception.new("#{message} [#{code}]")
16
+ end
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,20 @@
1
+ module OAI
2
+
3
+ # bundles up information about a set retrieved during a
4
+ # ListSets request
5
+
6
+ class Set
7
+ include OAI::XPath
8
+ attr_accessor :name, :spec, :description
9
+
10
+ def initialize(element)
11
+ @name = xpath(element, './/setName')
12
+ @spec = xpath(element, './/setSpec')
13
+ @description = xpath_first(element, './/setDescription')
14
+ end
15
+
16
+ def to_s
17
+ "#{@name} [#{@spec}]"
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,21 @@
1
+ require 'rexml/xpath'
2
+
3
+ module OAI
4
+ module XPath
5
+ def xpath_all(doc, path)
6
+ return REXML::XPath.match(doc, path)
7
+ end
8
+
9
+ def xpath_first(doc, path)
10
+ elements = xpath_all(doc, path)
11
+ return elements[0] if elements != nil
12
+ return nil
13
+ end
14
+
15
+ def xpath(doc, path)
16
+ e = xpath_first(doc, path)
17
+ return e.text if e != nil
18
+ return nil
19
+ end
20
+ end
21
+ end
data/test.rb ADDED
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift 'lib'
4
+
5
+ require 'test/unit'
6
+ require 'oai'
7
+ require 'test/tc_list_identifiers'
8
+ require 'test/tc_list_metadata_formats'
9
+ require 'test/tc_identify'
10
+ require 'test/tc_get_record'
11
+ require 'test/tc_list_records'
12
+ require 'test/tc_list_sets'
@@ -0,0 +1,27 @@
1
+ class GetRecordTest < Test::Unit::TestCase
2
+ def test_get_one
3
+ client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
4
+ response = client.get_record :identifier => 'oai:pubmedcentral.gov:13901'
5
+ assert_kind_of OAI::GetRecordResponse, response
6
+ assert_kind_of OAI::Record, response.record
7
+ assert_kind_of REXML::Element, response.record.metadata
8
+ assert_kind_of OAI::Header, response.record.header
9
+
10
+ # minimal check that the header is working
11
+ assert_equal 'oai:pubmedcentral.gov:13901',
12
+ response.record.header.identifier
13
+
14
+ # minimal check that the metadata is working
15
+ assert 'en', response.record.metadata.elements['.//dc:language'].text
16
+ end
17
+
18
+ def test_missing_identifier
19
+ client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
20
+ begin
21
+ client.get_record :metadata_prefix => 'oai_dc'
22
+ flunk 'invalid get_record did not throw OAI::Exception'
23
+ rescue OAI::Exception => e
24
+ assert_match /The request includes illegal arguments/, e.to_s
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,8 @@
1
+ class IdentifyTest < Test::Unit::TestCase
2
+ def test_ok
3
+ client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
4
+ response = client.identify
5
+ assert_kind_of OAI::IdentifyResponse, response
6
+ assert_equal 'PubMed Central (PMC3 - NLM DTD) [http://www.pubmedcentral.nih.gov:80/oai/oai.cgi]', response.to_s
7
+ end
8
+ end
@@ -0,0 +1,52 @@
1
+ require 'date'
2
+
3
+ class ListIdentifiersTest < Test::Unit::TestCase
4
+
5
+ def test_list_with_resumption_token
6
+ client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
7
+
8
+ # get a list of identifier headers
9
+ response = client.list_identifiers :metadata_prefix => 'oai_dc'
10
+ assert_kind_of OAI::ListIdentifiersResponse, response
11
+ assert_kind_of OAI::Response, response
12
+ assert response.entries.size > 0
13
+
14
+ # make sure header is put together reasonably
15
+ header = response.entries[0]
16
+ assert_kind_of OAI::Header, header
17
+ assert header.identifier
18
+ assert header.datestamp
19
+ assert header.set_spec
20
+
21
+ # exercise a resumption token and make sure first identifier is different
22
+ first_identifier = response.entries[0].identifier
23
+ token = response.resumption_token
24
+ assert_not_nil token
25
+ response = client.list_identifiers :resumption_token => token
26
+ assert response.entries.size > 0
27
+ assert_not_equal response.entries[0].identifier, first_identifier
28
+ end
29
+
30
+ def test_list_with_date_range
31
+ client = OAI::Client.new 'http://alcme.oclc.org/xtcat/servlet/OAIHandler'
32
+ from_date = Date.new(2001,1,1)
33
+ until_date = Date.new(2006,1,1)
34
+ response = client.list_identifiers :from => from_date, :until => until_date
35
+ assert response.entries.size > 0
36
+ end
37
+
38
+ def test_list_with_datetime_range
39
+ # xtcat should support higher granularity
40
+ client = OAI::Client.new 'http://alcme.oclc.org/xtcat/servlet/OAIHandler'
41
+ from_date = DateTime.new(2001,1,1)
42
+ until_date = DateTime.now
43
+ response = client.list_identifiers :from => from_date, :until => until_date
44
+ assert response.entries.size > 0
45
+ end
46
+
47
+ def test_invalid_argument
48
+ client = OAI::Client.new 'http://arXiv.org/oai2'
49
+ assert_raise(OAI::Exception) {client.list_identifiers :foo => 'bar'}
50
+ end
51
+
52
+ end
@@ -0,0 +1,15 @@
1
+ class ListMetadataFormatsTest < Test::Unit::TestCase
2
+ def test_list
3
+ client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
4
+ response = client.list_metadata_formats
5
+ assert_kind_of OAI::ListMetadataFormatsResponse, response
6
+ assert response.entries.size > 0
7
+
8
+ format = response.entries[0]
9
+ assert_kind_of OAI::MetadataFormat, format
10
+ assert_equal 'oai_dc', format.prefix
11
+ assert_equal 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', format.schema
12
+ assert_equal 'http://www.openarchives.org/OAI/2.0/oai_dc/', format.namespace
13
+ end
14
+ end
15
+
@@ -0,0 +1,9 @@
1
+ class GetRecordsTest < Test::Unit::TestCase
2
+ def test_get_records
3
+ client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
4
+ response = client.list_records
5
+ assert_kind_of OAI::ListRecordsResponse, response
6
+ assert response.entries.size > 0
7
+ assert_kind_of OAI::Record, response.entries[0]
8
+ end
9
+ end
@@ -0,0 +1,17 @@
1
+ class ListSetsTest < Test::Unit::TestCase
2
+
3
+ def test_list
4
+ client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
5
+ response = client.list_sets
6
+ assert_kind_of OAI::ListSetsResponse, response
7
+ assert response.entries.size > 0
8
+ assert_kind_of OAI::Set, response.entries[0]
9
+
10
+ # test iterator
11
+ for set in response
12
+ assert_kind_of OAI::Set, set
13
+ end
14
+ end
15
+
16
+ end
17
+
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.8.11
3
+ specification_version: 1
4
+ name: oai
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.0.1
7
+ date: 2006-04-20 00:00:00 -05:00
8
+ summary: A ruby library for working with the Open Archive Initiative Protocol for Metadata Harvesting (OAI-PMH)
9
+ require_paths:
10
+ - lib
11
+ email: ehs@pobox.com
12
+ homepage: http://www.textualize.com/ruby-marc
13
+ rubyforge_project:
14
+ description:
15
+ autorequire: oai
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ -
22
+ - ">"
23
+ - !ruby/object:Gem::Version
24
+ version: 0.0.0
25
+ version:
26
+ platform: ruby
27
+ signing_key:
28
+ cert_chain:
29
+ authors:
30
+ - Ed Summers
31
+ files:
32
+ - lib/oai
33
+ - lib/oai.rb
34
+ - lib/oai/client.rb
35
+ - lib/oai/exception.rb
36
+ - lib/oai/get_record.rb
37
+ - lib/oai/header.rb
38
+ - lib/oai/identify.rb
39
+ - lib/oai/list_identifiers.rb
40
+ - lib/oai/list_metadata_formats.rb
41
+ - lib/oai/list_records.rb
42
+ - lib/oai/list_sets.rb
43
+ - lib/oai/metadata_format.rb
44
+ - lib/oai/record.rb
45
+ - lib/oai/response.rb
46
+ - lib/oai/set.rb
47
+ - lib/oai/xpath.rb
48
+ - test/tc_get_record.rb
49
+ - test/tc_identify.rb
50
+ - test/tc_list_identifiers.rb
51
+ - test/tc_list_metadata_formats.rb
52
+ - test/tc_list_records.rb
53
+ - test/tc_list_sets.rb
54
+ test_files:
55
+ - test.rb
56
+ rdoc_options: []
57
+ extra_rdoc_files: []
58
+ executables: []
59
+ extensions: []
60
+ requirements: []
61
+ dependencies: []