harvestdor 0.0.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3036c2f23661946012d909179b67effa433c2dd9
4
+ data.tar.gz: 149e27d97f4c9d48ca46772d550537aca8a55362
5
+ SHA512:
6
+ metadata.gz: 4406cda66fcb2a726564ba912c60364433b9209ac03218fc2fbf70200fe679f8c964fa416abc0f163eecdabc4db51ad7b57de148f38cf4773b84a4de3fde854e
7
+ data.tar.gz: b18c647163a3970ae02cb3ee79ba0df04f4d5920ccbc5740a83b19b2958297ce2b1e0c761128293ae7bbb51633e419a6689b839f33f34f602a5292b2a078a4db
data/.gitignore ADDED
@@ -0,0 +1,25 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ .travis
7
+ .rvmrc
8
+ Gemfile.lock
9
+ InstalledFiles
10
+ _yardoc
11
+ coverage
12
+ doc/
13
+ lib/bundler/man
14
+ pkg
15
+ rdoc
16
+ spec/reports
17
+ spec/test_logs
18
+ test/tmp
19
+ test/version_tmp
20
+ tmp
21
+ logs
22
+ .DS_Store
23
+ *.tmproj
24
+ tmtags
25
+ .idea/*
data/.travis.yml ADDED
@@ -0,0 +1,14 @@
1
+ language: ruby
2
+ script: rake rspec
3
+ rvm:
4
+ - ruby-head
5
+ - 2.1.1
6
+ - 2.0.0
7
+ - 1.9.3
8
+ - jruby-19mode # JRuby in 1.9 mode
9
+ # - jruby-head
10
+ notifications:
11
+ email:
12
+ - ndushay@stanford.edu
13
+ - laneymcg@stanford.edu
14
+ - bess@stanford.edu
data/.yardopts ADDED
@@ -0,0 +1,3 @@
1
+ --title 'Harvestdor Gem Documentation'
2
+ lib/**/*.rb -
3
+ README.rdoc LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # See harvestdor.gemspec for this gem's dependencies
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,5 @@
1
+ Copyright (c) 20XX-2012. The Board of Trustees of the Leland Stanford Junior University. All rights reserved.
2
+
3
+ Redistribution and use of this distribution in source and binary forms, with or without modification, are permitted provided that: The above copyright notice and this permission notice appear in all copies and supporting documentation; The name, identifiers, and trademarks of The Board of Trustees of the Leland Stanford Junior University are not used in advertising or publicity without the express prior written permission of The Board of Trustees of the Leland Stanford Junior University; Recipients acknowledge that this distribution is made available as a research courtesy, "as is", potentially with defects, without any obligation on the part of The Board of Trustees of the Leland Stanford Junior University to provide support, services, or repair;
4
+
5
+ THE BOARD OF TRUSTEES OF THE LELAND STANFORD JUNIOR UNIVERSITY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, WITH REGARD TO THIS SOFTWARE, INCLUDING WITHOUT LIMITATION ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, AND IN NO EVENT SHALL THE BOARD OF TRUSTEES OF THE LELAND STANFORD JUNIOR UNIVERSITY BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, TORT (INCLUDING NEGLIGENCE) OR STRICT LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,162 @@
1
+ = Harvestdor
2
+ {<img src="https://travis-ci.org/sul-dlss/harvestdor.svg?branch=master" alt="Build Status" />}[https://travis-ci.org/sul-dlss/harvestdor] {<img src="https://gemnasium.com/sul-dlss/harvestdor.svg" alt="Dependency Status" />}[https://gemnasium.com/sul-dlss/harvestdor]
3
+
4
+ A Gem to harvest metadata from DOR.
5
+
6
+ == Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ gem 'harvestdor'
11
+
12
+ And then execute:
13
+
14
+ $ bundle
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install harvestdor
19
+
20
+ == Usage
21
+
22
+ === Configuration
23
+
24
+ ==== Possible configuration options (with default values unless otherwise indicated)
25
+
26
+ client = Harvestdor::Client.new({ # Example with all possible options
27
+ :log_dir => File.join(File.dirname(__FILE__), "..", "logs"),
28
+ :log_name => 'harvestdor.log',
29
+ :purl => 'http://purl.stanford.edu',
30
+ :http_options => { 'ssl' => {
31
+ 'verify' => false
32
+ },
33
+ 'request' => {
34
+ 'timeout' => 60, # open/read timeout (seconds)
35
+ 'open_timeout' => 60 # connection open timeout (seconds)
36
+ }
37
+ },
38
+ :oai_repository_url => 'https://dor-oaiprovider-prod.stanford.edu/oai', # The OAI repository to connect to
39
+ :oai_client_debug => false,
40
+ :default_metadata_prefix => 'mods',
41
+ :default_from_date => '2012-12-01', # default value is nil
42
+ :default_until_date => '2014-12-01', # default value is nil
43
+ :default_set => 'is_governed_by_hy787xj5878', # default value is nil
44
+ })
45
+
46
+ ==== Option 1: use a yaml file
47
+
48
+ for contents of yml -- see spec/config/oai.yml
49
+
50
+ client = Harvestdor::Client.new({:config_yml_path => path_to_my_yml})
51
+ client.druids_via_oai do |druid|
52
+ # do stuff with the druid, e.g.
53
+ # cm = client.content_metadata(druid)
54
+ # mods = client.mods(druid)
55
+ # create solr doc from mods and cm
56
+ # write solr doc to your app's index
57
+ end
58
+
59
+ ==== Option 2: pass in non-default configurations as a hash
60
+
61
+ client = Harvestdor::Client.new({:oai_repository_url => 'http://my_oai.org', :default_from_date => '2012-12-01'})
62
+ client.druids_via_oai do |druid|
63
+ # do stuff with the druid, e.g.
64
+ # cm = client.content_metadata(druid)
65
+ # mods = client.mods(druid)
66
+ # create solr doc from mods and cm
67
+ # write solr doc to your app's index
68
+ end
69
+
70
+ ==== Option 3: set the attributes explicitly in your code
71
+
72
+ client = Harvestdor::Client.new
73
+ client.config.oai_repository_url = 'http://my_oai.org'
74
+ client.oai_records do |rec|
75
+ # do stuff with the OAI rec, e.g.
76
+ # manipulate metadata into solr doc
77
+ # manipulate stuff from record.about into solr doc
78
+ # write solr doc to your app's index
79
+ end
80
+
81
+ ==== Option 4: set the OAI parameters as params in call to Harvestdor::Client.harvest_xx
82
+
83
+ client = Harvestdor::Client.new({oai_repository_url = 'http://my_oai.org'})
84
+ client.oai_headers(:metadataPrefix => 'foo', :from => '2012-11-27', :set => 'is_governed_by_hy787xj5878') do | id |
85
+ # do stuff with the druid
86
+ end
87
+
88
+ === OAI Harvesting
89
+
90
+ Harvestdor::Client.druids_via_oai gets enumerated druids for the records in your specified set / date range (druids are not preceded by 'druid:')
91
+
92
+ Harvestdor::Client.oai_records gets enumerated OAI record objects in your specified set / data range, with the metadata format you indicated
93
+
94
+ You can also get these as arrays:
95
+
96
+ druid_array = client.druids_via_oai(:metadataPrefix => 'foo', :from => '(last_harvested_date)', :set => 'asdfasdf')
97
+
98
+ === XML from PURL pages
99
+
100
+ You can get, for example, the contentMetadata for a druid:
101
+
102
+ it "#content_metadata retrieves contentMetadata as a Nokogiri::XML::Document" do
103
+ cm = Harvestdor.content_metadata('bb375wb8869', 'http://purl-test.stanford.edu')
104
+ cm.should be_kind_of(Nokogiri::XML::Document)
105
+ cm.root.name.should == 'contentMetadata'
106
+ cm.root.attributes['objectId'].text.should == @druid
107
+ end
108
+
109
+ Or the MODS metadata:
110
+
111
+ it "#mods returns a Nokogiri::XML::Document from the purl mods" do
112
+ x = Harvestdor.mods('bb375wb8869', 'http://purl-test.stanford.edu')
113
+ x.should be_kind_of(Nokogiri::XML::Document)
114
+ x.root.name.should == 'mods'
115
+ x.root.namespace.href.should == Harvestdor::MODS_NAMESPACE
116
+ end
117
+
118
+ Similarly for
119
+ # mods
120
+ # public_xml (all of it)
121
+ # content_metadata
122
+ # identity_metadata
123
+ # rights_metadata
124
+ # rdf
125
+ # dc
126
+
127
+ You can also do this from a Harvestdor::Client object, and it will use the purl from the Client.config:
128
+
129
+ client = Harvestdor::Client.new({:purl_url => 'http://thisone.org'})
130
+ client.identity_metadata('bb375wb8869')
131
+
132
+
133
+ === TODO: Last Harvested Datestamp (Incremental Harvests)
134
+
135
+ Harvestdor::Client.last_datestamp
136
+
137
+ persist this information with your app for incremental harvests
138
+
139
+
140
+ == Contributing
141
+
142
+ # Fork it
143
+ # Create your feature branch (`git checkout -b my-new-feature`)
144
+ # Write code and tests.
145
+ # Commit your changes (`git commit -am 'Added some feature'`)
146
+ # Push to the branch (`git push origin my-new-feature`)
147
+ # Create new Pull Request
148
+
149
+ == Releases
150
+
151
+ * <b>0.0.13</b> Updated to work with Faraday 0.9, releases via rubygems instead of sul-gems
152
+ * <b>0.0.11</b> better error handling, and better testing for errors
153
+ * <b>0.0.10</b> tweak specs to test that unnec fetching isn't done.
154
+ * <b>0.0.9</b> allows public xml to be passed as Nokogiri::XML::Document to content_metadata, etc. to avoid unnec fetching
155
+ * <b>0.0.8</b> avoid undefined method 'size' from scrub_oai_args when using a non-nil default date param
156
+ * <b>0.0.7</b> add oai client timeout overrides, update README
157
+ * <b>0.0.6</b> refactoring oai_harvest for greater simplicity and passing errors through, add oai_record (get_record OAI request)
158
+ * <b>0.0.5</b> don't send empty string arguments to OAI server so you can get actual results
159
+ * <b>0.0.4</b> add integration spec and get it working with actual OAI server
160
+ * <b>0.0.3</b> add method to get mods from purl
161
+ * <b>0.0.2</b> tidy up README
162
+ * <b>0.0.1</b> initial commit
data/Rakefile ADDED
@@ -0,0 +1,50 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake'
4
+ require 'bundler'
5
+
6
+ require 'rspec/core/rake_task'
7
+ require 'yard'
8
+ require 'yard/rake/yardoc_task'
9
+
10
+ begin
11
+ Bundler.setup(:default, :development)
12
+ rescue Bundler::BundlerError => e
13
+ $stderr.puts e.message
14
+ $stderr.puts "Run `bundle install` to install missing gems"
15
+ exit e.status_code
16
+ end
17
+
18
+ task :default => :ci
19
+
20
+ desc "run continuous integration suite (tests, coverage, docs)"
21
+ task :ci => [:rspec, :doc]
22
+
23
+ task :spec => :rspec
24
+
25
+ desc "run specs EXCEPT integration specs"
26
+ RSpec::Core::RakeTask.new(:spec_fast) do |spec|
27
+ spec.rspec_opts = ["-c", "-f progress", "--tty", "-t ~integration", "-r ./spec/spec_helper.rb"]
28
+ end
29
+
30
+ RSpec::Core::RakeTask.new(:rspec) do |spec|
31
+ spec.rspec_opts = ["-c", "-f progress", "--tty", "-r ./spec/spec_helper.rb"]
32
+ end
33
+
34
+ # Use yard to build docs
35
+ begin
36
+ project_root = File.expand_path(File.dirname(__FILE__))
37
+ doc_dest_dir = File.join(project_root, 'doc')
38
+
39
+ YARD::Rake::YardocTask.new(:doc) do |yt|
40
+ yt.files = Dir.glob(File.join(project_root, 'lib', '**', '*.rb')) +
41
+ [ File.join(project_root, 'README.rdoc') ]
42
+ yt.options = ['--output-dir', doc_dest_dir, '--readme', 'README.rdoc', '--title', 'Harvestdor Gem Documentation']
43
+ end
44
+ rescue LoadError
45
+ desc "Generate YARD Documentation"
46
+ task :doc do
47
+ abort "Please install the YARD gem to generate rdoc."
48
+ end
49
+ end
50
+
@@ -0,0 +1,41 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'harvestdor/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "harvestdor"
8
+ gem.version = Harvestdor::VERSION
9
+ gem.authors = ["Naomi Dushay"]
10
+ gem.email = ["ndushay@stanford.edu"]
11
+ gem.description = %q{Harvest DOR object metadata via a relationship (e.g. hydra:isGovernedBy rdf:resource="info:fedora/druid:hy787xj5878") and dates}
12
+ gem.summary = %q{Harvest DOR object metadata}
13
+ gem.homepage = "https://consul.stanford.edu/display/chimera/Chimera+project"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^spec/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_dependency 'oai', '~> 0.3.0'
21
+ gem.add_dependency 'faraday', '>= 0.9.0'
22
+ gem.add_dependency 'confstruct'
23
+ gem.add_dependency 'nokogiri'
24
+
25
+ # Runtime dependencies
26
+ # gem.add_runtime_dependency 'nokogiri'
27
+
28
+ # Development dependencies
29
+ # Bundler will install these gems too if you've checked out solrmarc-wrapper source from git and run 'bundle install'
30
+ # It will not add these as dependencies if you require solrmarc-wrapper for other projects
31
+ gem.add_development_dependency "rake"
32
+ # docs
33
+ gem.add_development_dependency "rdoc"
34
+ gem.add_development_dependency "yard"
35
+ # tests
36
+ gem.add_development_dependency 'rspec'
37
+ gem.add_development_dependency 'simplecov'
38
+ gem.add_development_dependency 'simplecov-rcov'
39
+ # gem.add_development_dependency 'ruby-debug19'
40
+
41
+ end
@@ -0,0 +1,12 @@
1
+ module Harvestdor
2
+ module Errors
3
+ MissingPurlPage = Class.new(StandardError)
4
+ MissingMods = Class.new(StandardError)
5
+ MissingPublicXml = Class.new(StandardError)
6
+ MissingContentMetadata = Class.new(StandardError)
7
+ MissingIdentityMetadata = Class.new(StandardError)
8
+ MissingRightsMetadata = Class.new(StandardError)
9
+ MissingRDF = Class.new(StandardError)
10
+ MissingDC = Class.new(StandardError)
11
+ end
12
+ end
@@ -0,0 +1,115 @@
1
+ require 'oai'
2
+
3
+ module Harvestdor
4
+
5
+ # Mixin: methods to perform an OAI harvest and iterate over results
6
+ class Client
7
+
8
+ # return Array of OAI::Records from the OAI harvest indicated by OAI params (metadata_prefix, from, until, set)
9
+ # @param [Hash] oai_args optional OAI params (:metadata_prefix, :from, :until, :set) to be used in lieu of config default values
10
+ # @return [Array<OAI::Record>] or enumeration over it, if block is given
11
+ def oai_records oai_args = {}
12
+ return to_enum(:oai_records, oai_args).to_a unless block_given?
13
+
14
+ harvest(:list_records, scrub_oai_args(oai_args)) do |oai_rec|
15
+ yield oai_rec
16
+ end
17
+ end
18
+
19
+ # return Array of OAI::Headers from the OAI harvest indicated by OAI params (metadata_prefix, from, until, set)
20
+ # @param [Hash] oai_args optional OAI params (:metadata_prefix, :from, :until, :set) to be used in lieu of config default values
21
+ # @return [Array<OAI::Header>] or enumeration over it, if block is given
22
+ def oai_headers oai_args = {}
23
+ return to_enum(:oai_headers, oai_args).to_a unless block_given?
24
+
25
+ harvest(:list_identifiers, scrub_oai_args(oai_args)) do |oai_hdr|
26
+ yield oai_hdr
27
+ end
28
+ end
29
+
30
+ # return Array of druids contained in the OAI harvest indicated by OAI params (metadata_prefix, from, until, set)
31
+ # @param [Hash] oai_args optional OAI params (:metadata_prefix, :from, :until, :set) to be used in lieu of config default values
32
+ # @return [Array<String>] or enumeration over it, if block is given
33
+ def druids_via_oai oai_args = {}
34
+ return to_enum(:druids_via_oai, oai_args).to_a unless block_given?
35
+
36
+ harvest(:list_identifiers, scrub_oai_args(oai_args)) do |oai_hdr|
37
+ yield Harvestdor.druid(oai_hdr)
38
+ end
39
+ end
40
+
41
+ # get a single OAI record using a get_record OAI request
42
+ # @param [String] druid (which will be turned into OAI identifier)
43
+ # @param [String] md_prefix the OAI metadata prefix determining which metadata will be in the retrieved OAI::Record object
44
+ # @return [OAI::Record] record object retrieved from OAI server
45
+ def oai_record druid, md_prefix = 'mods'
46
+ prefix = md_prefix ? md_prefix : config.default_metadata_prefix
47
+ oai_client.get_record({:identifier => "oai:searchworks.stanford.edu/druid:#{druid}", :metadata_prefix => prefix}).record
48
+ end
49
+
50
+ protected #---------------------------------------------------------------------
51
+
52
+ # @param [Hash] oai_args Hash of OAI params (metadata_prefix, from, until, set) to be used in lieu of config default values
53
+ # @return [Hash] OAI params (metadata_prefix, from, until, set) cleaned up for making harvest request
54
+ def scrub_oai_args oai_args = {}
55
+ scrubbed_args={}
56
+ scrubbed_args[:metadata_prefix] = oai_args.keys.include?(:metadata_prefix) ? oai_args[:metadata_prefix] : config.default_metadata_prefix
57
+ scrubbed_args[:from] = oai_args.keys.include?(:from) ? oai_args[:from] : config.default_from_date
58
+ scrubbed_args[:until] = oai_args.keys.include?(:until) ? oai_args[:until] : config.default_until_date
59
+ scrubbed_args[:set] = oai_args.keys.include?(:set) ? oai_args[:set] : config.default_set
60
+ scrubbed_args.each { |k, v|
61
+ scrubbed_args.delete(k) if v.nil? || v == ''
62
+ }
63
+ scrubbed_args
64
+ end
65
+
66
+ # harvest OAI headers or OAI records and return a response object with one entry for each record/header retrieved
67
+ # follows resumption tokens (i.e. chunks are all present in result)
68
+ # @param [Symbol] verb :list_identifiers or :list_records
69
+ # @param [Hash] oai_args OAI params (metadata_prefix, from, until, set) used for request
70
+ # @return response to OAI request, as one enumerable object
71
+ # TODO: This could be moved into ruby-oai?
72
+ def harvest (verb, oai_args, &block)
73
+ response = oai_client.send verb, oai_args
74
+ while response && response.entries.size > 0
75
+ response.entries.each &block
76
+
77
+ token = response.resumption_token
78
+ if token.nil? or token.empty?
79
+ break
80
+ else
81
+ response = oai_client.send(verb, :resumption_token => token)
82
+ end
83
+ end
84
+ rescue Faraday::Error::TimeoutError => e
85
+ logger.error "No response from OAI Provider"
86
+ logger.error e
87
+ raise e
88
+ rescue OAI::Exception => e
89
+ # possibly unnecessary after ruby-oai 0.0.14
90
+ logger.error "Received unexpected OAI::Exception"
91
+ logger.error e
92
+ raise e
93
+ end
94
+
95
+ end # class OaiHarvester
96
+
97
+ end # module Harvestdor
98
+
99
+ module OAI
100
+ class Client
101
+ # monkey patch to adjust timeouts
102
+ # Do the actual HTTP get, following any temporary redirects
103
+ def get(uri)
104
+ # OLD: response = @http_client.get uri
105
+ response = @http_client.get do |req|
106
+ req.url uri
107
+ # FIXME: hard-coded default settings in harvestdor are used here
108
+ # values are in seconds
109
+ req.options[:timeout] = Harvestdor::Client.default_config.http_options.timeout # open/read timeout
110
+ req.options[:open_timeout] = Harvestdor::Client.default_config.http_options.open_timeout # connection open timeout
111
+ end
112
+ response.body
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,200 @@
1
+ require 'nokogiri'
2
+
3
+ module Harvestdor
4
+ # Mixin: code to retrieve Purl public xml pieces
5
+
6
+ RDF_NAMESPACE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
7
+ OAI_DC_NAMESPACE = 'http://www.openarchives.org/OAI/2.0/oai_dc/'
8
+ MODS_NAMESPACE = 'http://www.loc.gov/mods/v3'
9
+
10
+ # the MODS metadata for this fedora object, from the purl server
11
+ # @param [String] druid e.g. ab123cd4567
12
+ # @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
13
+ # @return [Nokogiri::XML::Document] the MODS for the fedora object
14
+ def self.mods druid, purl_url = Harvestdor::PURL_DEFAULT
15
+ begin
16
+ Nokogiri::XML(open("#{purl_url}/#{druid}.mods"),nil,'UTF-8')
17
+ rescue OpenURI::HTTPError
18
+ raise Harvestdor::Errors::MissingMods.new(druid)
19
+ end
20
+ end
21
+
22
+ # the public xml for this fedora object, from the purl page
23
+ # @param [String] druid e.g. ab123cd4567
24
+ # @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
25
+ # @return [Nokogiri::XML::Document] the public xml for the fedora object
26
+ def self.public_xml druid, purl_url = Harvestdor::PURL_DEFAULT
27
+ return druid if druid.instance_of?(Nokogiri::XML::Document)
28
+ begin
29
+ ng_doc = Nokogiri::XML(open("#{purl_url}/#{druid}.xml"))
30
+ raise Harvestdor::Errors::MissingPublicXml.new(druid) if !ng_doc || ng_doc.children.empty?
31
+ ng_doc
32
+ rescue OpenURI::HTTPError
33
+ raise Harvestdor::Errors::MissingPurlPage.new(druid)
34
+ end
35
+ end
36
+
37
+ # the contentMetadata for this fedora object, from the purl xml
38
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
39
+ # a Nokogiri::XML::Document containing the public_xml for an object
40
+ # @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
41
+ # @return [Nokogiri::XML::Document] the contentMetadata for the fedora object
42
+ def self.content_metadata object, purl_url = Harvestdor::PURL_DEFAULT
43
+ pub_xml_ng_doc = pub_xml(object, purl_url)
44
+ begin
45
+ # preserve namespaces, etc for the node
46
+ ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/contentMetadata').to_xml)
47
+ raise Harvestdor::Errors::MissingContentMetadata.new(object.inspect) if !ng_doc || ng_doc.children.empty?
48
+ ng_doc
49
+ rescue
50
+ raise Harvestdor::Errors::MissingContentMetadata.new(object.inspect)
51
+ end
52
+ end
53
+
54
+ # the identityMetadata for this fedora object, from the purl xml
55
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
56
+ # a Nokogiri::XML::Document containing the public_xml for an object
57
+ # @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
58
+ # @return [Nokogiri::XML::Document] the identityMetadata for the fedora object
59
+ def self.identity_metadata object, purl_url = Harvestdor::PURL_DEFAULT
60
+ pub_xml_ng_doc = pub_xml(object, purl_url)
61
+ begin
62
+ # preserve namespaces, etc for the node
63
+ ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/identityMetadata').to_xml)
64
+ raise Harvestdor::Errors::MissingIdentityMetadata.new(object.inspect) if !ng_doc || ng_doc.children.empty?
65
+ ng_doc
66
+ rescue
67
+ raise Harvestdor::Errors::MissingIdentityMetadata.new(object.inspect)
68
+ end
69
+ end
70
+
71
+ # the rightsMetadata for this fedora object, from the purl xml
72
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
73
+ # a Nokogiri::XML::Document containing the public_xml for an object
74
+ # @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
75
+ # @return [Nokogiri::XML::Document] the rightsMetadata for the fedora object
76
+ def self.rights_metadata object, purl_url = Harvestdor::PURL_DEFAULT
77
+ pub_xml_ng_doc = pub_xml(object, purl_url)
78
+ begin
79
+ # preserve namespaces, etc for the node
80
+ ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/rightsMetadata').to_xml)
81
+ raise Harvestdor::Errors::MissingRightsMetadata.new(object.inspect) if !ng_doc || ng_doc.children.empty?
82
+ ng_doc
83
+ rescue
84
+ raise Harvestdor::Errors::MissingRightsMetadata.new(object.inspect)
85
+ end
86
+ end
87
+
88
+ # the RDF for this fedora object, from the purl xml
89
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
90
+ # a Nokogiri::XML::Document containing the public_xml for an object
91
+ # @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
92
+ # @return [Nokogiri::XML::Document] the RDF for the fedora object
93
+ def self.rdf object, purl_url = Harvestdor::PURL_DEFAULT
94
+ pub_xml_ng_doc = pub_xml(object, purl_url)
95
+ begin
96
+ # preserve namespaces, etc for the node
97
+ ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/rdf:RDF', {'rdf' => Harvestdor::RDF_NAMESPACE}).to_xml)
98
+ raise Harvestdor::Errors::MissingRDF.new(object.inspect) if !ng_doc || ng_doc.children.empty?
99
+ ng_doc
100
+ rescue
101
+ raise Harvestdor::Errors::MissingRDF.new(object.inspect)
102
+ end
103
+ end
104
+
105
+ # the Dublin Core for this fedora object, from the purl xml
106
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
107
+ # a Nokogiri::XML::Document containing the public_xml for an object
108
+ # @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
109
+ # @return [Nokogiri::XML::Document] the dc for the fedora object
110
+ def self.dc object, purl_url = Harvestdor::PURL_DEFAULT
111
+ pub_xml_ng_doc = pub_xml(object, purl_url)
112
+ begin
113
+ # preserve namespaces, etc for the node
114
+ ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/dc:dc', {'dc' => Harvestdor::OAI_DC_NAMESPACE}).to_xml)
115
+ raise Harvestdor::Errors::MissingDC.new(object.inspect) if !ng_doc || ng_doc.children.empty?
116
+ ng_doc
117
+ rescue
118
+ raise Harvestdor::Errors::MissingDC.new(object.inspect)
119
+ end
120
+ end
121
+
122
+
123
+ class Client
124
+
125
+ # the public xml for this fedora object, from the purl server
126
+ # @param [String] druid e.g. ab123cd4567, in the purl url
127
+ # @return [Nokogiri::XML::Document] the MODS metadata for the fedora object
128
+ def mods druid
129
+ Harvestdor.mods(druid, config.purl)
130
+ end
131
+
132
+ # the public xml for this fedora object, from the purl xml
133
+ # @param [String] druid e.g. ab123cd4567, in the purl url
134
+ # @return [Nokogiri::XML::Document] the public xml for the fedora object
135
+ def public_xml druid
136
+ Harvestdor.public_xml(druid, config.purl)
137
+ end
138
+
139
+ # the contentMetadata for this fedora object, from the purl xml
140
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
141
+ # a Nokogiri::XML::Document containing the public_xml for an object
142
+ # @return [Nokogiri::XML::Document] the contentMetadata for the fedora object
143
+ def content_metadata object
144
+ Harvestdor.content_metadata(object, config.purl)
145
+ end
146
+
147
+ # the identityMetadata for this fedora object, from the purl xml
148
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
149
+ # a Nokogiri::XML::Document containing the public_xml for an object
150
+ # @return [Nokogiri::XML::Document] the identityMetadata for the fedora object
151
+ def identity_metadata object
152
+ Harvestdor.identity_metadata(object, config.purl)
153
+ end
154
+
155
+ # the rightsMetadata for this fedora object, from the purl xml
156
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
157
+ # a Nokogiri::XML::Document containing the public_xml for an object
158
+ # @return [Nokogiri::XML::Document] the rightsMetadata for the fedora object
159
+ def rights_metadata object
160
+ Harvestdor.rights_metadata(object, config.purl)
161
+ end
162
+
163
+ # the RDF for this fedora object, from the purl xml
164
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
165
+ # a Nokogiri::XML::Document containing the public_xml for an object
166
+ # @return [Nokogiri::XML::Document] the RDF for the fedora object
167
+ def rdf object
168
+ Harvestdor.rdf(object, config.purl)
169
+ end
170
+
171
+ # the Dublin Core for this fedora object, from the purl xml
172
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
173
+ # a Nokogiri::XML::Document containing the public_xml for an object
174
+ # @return [Nokogiri::XML::Document] the dc for the fedora object
175
+ def dc object
176
+ Harvestdor.dc(object, config.purl)
177
+ end
178
+
179
+ end # class Client
180
+
181
+ protected #--------------------------------------------
182
+
183
+ # @param [Object] object a String containing a druid (e.g. ab123cd4567), or
184
+ # a Nokogiri::XML::Document containing the public_xml for an object
185
+ # @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
186
+ # @return [Nokogiri::XML::Document] the public xml for a DOR object
187
+ def self.pub_xml(object, purl_url = Harvestdor::PURL_DEFAULT)
188
+ case
189
+ when object.instance_of?(String)
190
+ # it's a druid
191
+ pub_xml_ng_doc = Harvestdor.public_xml(object, purl_url)
192
+ when object.instance_of?(Nokogiri::XML::Document)
193
+ pub_xml_ng_doc = object
194
+ else
195
+ raise "expected String or Nokogiri::XML::Document for first argument, got #{object.class}"
196
+ end
197
+ pub_xml_ng_doc
198
+ end
199
+
200
+ end # module Harvestdor
@@ -0,0 +1,3 @@
1
+ module Harvestdor
2
+ VERSION = "0.0.13"
3
+ end