harvestdor 0.0.14 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.hound.yml +2 -0
- data/.rubocop.yml +3 -0
- data/.rubocop_todo.yml +249 -0
- data/.travis.yml +3 -1
- data/Gemfile +0 -4
- data/README.rdoc +13 -58
- data/Rakefile +10 -6
- data/harvestdor.gemspec +9 -14
- data/lib/harvestdor/client.rb +110 -0
- data/lib/harvestdor/errors.rb +8 -8
- data/lib/harvestdor/purl_xml.rb +13 -72
- data/lib/harvestdor/version.rb +1 -1
- data/lib/harvestdor.rb +10 -106
- data/spec/config/example.yml +16 -0
- data/spec/harvestdor_client_spec.rb +7 -65
- data/spec/purl_xml_spec.rb +16 -16
- metadata +53 -29
- data/lib/harvestdor/oai_harvest.rb +0 -115
- data/spec/config/oai.yml +0 -37
- data/spec/harvestdor_spec.rb +0 -23
- data/spec/oai_harvest_spec.rb +0 -220
- data/spec/oai_integration_spec.rb +0 -139
data/lib/harvestdor/purl_xml.rb
CHANGED
@@ -2,7 +2,7 @@ require 'nokogiri'
|
|
2
2
|
|
3
3
|
module Harvestdor
|
4
4
|
# Mixin: code to retrieve Purl public xml pieces
|
5
|
-
|
5
|
+
|
6
6
|
RDF_NAMESPACE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
7
7
|
OAI_DC_NAMESPACE = 'http://www.openarchives.org/OAI/2.0/oai_dc/'
|
8
8
|
MODS_NAMESPACE = 'http://www.loc.gov/mods/v3'
|
@@ -28,14 +28,14 @@ module Harvestdor
|
|
28
28
|
begin
|
29
29
|
ng_doc = Nokogiri::XML(open("#{purl_url}/#{druid}.xml"))
|
30
30
|
raise Harvestdor::Errors::MissingPublicXml.new(druid) if !ng_doc || ng_doc.children.empty?
|
31
|
-
ng_doc
|
31
|
+
ng_doc
|
32
32
|
rescue OpenURI::HTTPError
|
33
33
|
raise Harvestdor::Errors::MissingPurlPage.new(druid)
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
37
|
# the contentMetadata for this fedora object, from the purl xml
|
38
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
38
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
39
39
|
# a Nokogiri::XML::Document containing the public_xml for an object
|
40
40
|
# @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
|
41
41
|
# @return [Nokogiri::XML::Document] the contentMetadata for the fedora object
|
@@ -45,14 +45,14 @@ module Harvestdor
|
|
45
45
|
# preserve namespaces, etc for the node
|
46
46
|
ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/contentMetadata').to_xml)
|
47
47
|
raise Harvestdor::Errors::MissingContentMetadata.new(object.inspect) if !ng_doc || ng_doc.children.empty?
|
48
|
-
ng_doc
|
48
|
+
ng_doc
|
49
49
|
rescue
|
50
50
|
raise Harvestdor::Errors::MissingContentMetadata.new(object.inspect)
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
54
54
|
# the identityMetadata for this fedora object, from the purl xml
|
55
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
55
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
56
56
|
# a Nokogiri::XML::Document containing the public_xml for an object
|
57
57
|
# @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
|
58
58
|
# @return [Nokogiri::XML::Document] the identityMetadata for the fedora object
|
@@ -62,14 +62,14 @@ module Harvestdor
|
|
62
62
|
# preserve namespaces, etc for the node
|
63
63
|
ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/identityMetadata').to_xml)
|
64
64
|
raise Harvestdor::Errors::MissingIdentityMetadata.new(object.inspect) if !ng_doc || ng_doc.children.empty?
|
65
|
-
ng_doc
|
65
|
+
ng_doc
|
66
66
|
rescue
|
67
67
|
raise Harvestdor::Errors::MissingIdentityMetadata.new(object.inspect)
|
68
68
|
end
|
69
69
|
end
|
70
70
|
|
71
71
|
# the rightsMetadata for this fedora object, from the purl xml
|
72
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
72
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
73
73
|
# a Nokogiri::XML::Document containing the public_xml for an object
|
74
74
|
# @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
|
75
75
|
# @return [Nokogiri::XML::Document] the rightsMetadata for the fedora object
|
@@ -86,7 +86,7 @@ module Harvestdor
|
|
86
86
|
end
|
87
87
|
|
88
88
|
# the RDF for this fedora object, from the purl xml
|
89
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
89
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
90
90
|
# a Nokogiri::XML::Document containing the public_xml for an object
|
91
91
|
# @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
|
92
92
|
# @return [Nokogiri::XML::Document] the RDF for the fedora object
|
@@ -103,7 +103,7 @@ module Harvestdor
|
|
103
103
|
end
|
104
104
|
|
105
105
|
# the Dublin Core for this fedora object, from the purl xml
|
106
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
106
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
107
107
|
# a Nokogiri::XML::Document containing the public_xml for an object
|
108
108
|
# @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
|
109
109
|
# @return [Nokogiri::XML::Document] the dc for the fedora object
|
@@ -119,73 +119,14 @@ module Harvestdor
|
|
119
119
|
end
|
120
120
|
end
|
121
121
|
|
122
|
-
|
123
|
-
class Client
|
124
|
-
|
125
|
-
# the public xml for this fedora object, from the purl server
|
126
|
-
# @param [String] druid e.g. ab123cd4567, in the purl url
|
127
|
-
# @return [Nokogiri::XML::Document] the MODS metadata for the fedora object
|
128
|
-
def mods druid
|
129
|
-
Harvestdor.mods(druid, config.purl)
|
130
|
-
end
|
131
|
-
|
132
|
-
# the public xml for this fedora object, from the purl xml
|
133
|
-
# @param [String] druid e.g. ab123cd4567, in the purl url
|
134
|
-
# @return [Nokogiri::XML::Document] the public xml for the fedora object
|
135
|
-
def public_xml druid
|
136
|
-
Harvestdor.public_xml(druid, config.purl)
|
137
|
-
end
|
138
|
-
|
139
|
-
# the contentMetadata for this fedora object, from the purl xml
|
140
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
141
|
-
# a Nokogiri::XML::Document containing the public_xml for an object
|
142
|
-
# @return [Nokogiri::XML::Document] the contentMetadata for the fedora object
|
143
|
-
def content_metadata object
|
144
|
-
Harvestdor.content_metadata(object, config.purl)
|
145
|
-
end
|
146
|
-
|
147
|
-
# the identityMetadata for this fedora object, from the purl xml
|
148
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
149
|
-
# a Nokogiri::XML::Document containing the public_xml for an object
|
150
|
-
# @return [Nokogiri::XML::Document] the identityMetadata for the fedora object
|
151
|
-
def identity_metadata object
|
152
|
-
Harvestdor.identity_metadata(object, config.purl)
|
153
|
-
end
|
154
|
-
|
155
|
-
# the rightsMetadata for this fedora object, from the purl xml
|
156
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
157
|
-
# a Nokogiri::XML::Document containing the public_xml for an object
|
158
|
-
# @return [Nokogiri::XML::Document] the rightsMetadata for the fedora object
|
159
|
-
def rights_metadata object
|
160
|
-
Harvestdor.rights_metadata(object, config.purl)
|
161
|
-
end
|
162
|
-
|
163
|
-
# the RDF for this fedora object, from the purl xml
|
164
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
165
|
-
# a Nokogiri::XML::Document containing the public_xml for an object
|
166
|
-
# @return [Nokogiri::XML::Document] the RDF for the fedora object
|
167
|
-
def rdf object
|
168
|
-
Harvestdor.rdf(object, config.purl)
|
169
|
-
end
|
170
|
-
|
171
|
-
# the Dublin Core for this fedora object, from the purl xml
|
172
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
173
|
-
# a Nokogiri::XML::Document containing the public_xml for an object
|
174
|
-
# @return [Nokogiri::XML::Document] the dc for the fedora object
|
175
|
-
def dc object
|
176
|
-
Harvestdor.dc(object, config.purl)
|
177
|
-
end
|
178
|
-
|
179
|
-
end # class Client
|
180
|
-
|
181
122
|
protected #--------------------------------------------
|
182
|
-
|
183
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
123
|
+
|
124
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
184
125
|
# a Nokogiri::XML::Document containing the public_xml for an object
|
185
126
|
# @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
|
186
127
|
# @return [Nokogiri::XML::Document] the public xml for a DOR object
|
187
128
|
def self.pub_xml(object, purl_url = Harvestdor::PURL_DEFAULT)
|
188
|
-
case
|
129
|
+
case
|
189
130
|
when object.instance_of?(String)
|
190
131
|
# it's a druid
|
191
132
|
pub_xml_ng_doc = Harvestdor.public_xml(object, purl_url)
|
@@ -194,7 +135,7 @@ module Harvestdor
|
|
194
135
|
else
|
195
136
|
raise "expected String or Nokogiri::XML::Document for first argument, got #{object.class}"
|
196
137
|
end
|
197
|
-
pub_xml_ng_doc
|
138
|
+
pub_xml_ng_doc
|
198
139
|
end
|
199
140
|
|
200
141
|
end # module Harvestdor
|
data/lib/harvestdor/version.rb
CHANGED
data/lib/harvestdor.rb
CHANGED
@@ -1,121 +1,25 @@
|
|
1
1
|
require 'harvestdor/errors'
|
2
|
-
require 'harvestdor/oai_harvest'
|
3
2
|
require 'harvestdor/purl_xml'
|
4
3
|
require 'harvestdor/version'
|
4
|
+
require 'harvestdor/client'
|
5
5
|
# external gems
|
6
6
|
require 'confstruct'
|
7
|
-
require 'oai'
|
8
7
|
# stdlib
|
9
8
|
require 'logger'
|
10
9
|
require 'open-uri'
|
11
10
|
require 'yaml'
|
12
11
|
|
13
12
|
module Harvestdor
|
14
|
-
|
13
|
+
|
15
14
|
LOG_NAME_DEFAULT = "harvestdor.log"
|
16
15
|
LOG_DIR_DEFAULT = File.join(File.dirname(__FILE__), "..", "logs")
|
17
16
|
PURL_DEFAULT = 'http://purl.stanford.edu'
|
18
|
-
HTTP_OPTIONS_DEFAULT = { 'ssl' => {
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
OAI_CLIENT_DEBUG_DEFAULT = false
|
27
|
-
OAI_REPOSITORY_URL_DEFAULT = 'https://dor-oaiprovider-prod.stanford.edu/oai'
|
28
|
-
DEFAULT_METADATA_PREFIX = 'mods'
|
29
|
-
DEFAULT_FROM_DATE = nil
|
30
|
-
DEFAULT_UNTIL_DATE = nil
|
31
|
-
DEFAULT_SET = nil
|
32
|
-
|
33
|
-
class Client
|
34
|
-
|
35
|
-
# Set default values for the construction of Harvestdor::Client objects
|
36
|
-
def self.default_config
|
37
|
-
@class_config ||= Confstruct::Configuration.new({
|
38
|
-
:log_dir => LOG_DIR_DEFAULT,
|
39
|
-
:log_name => LOG_NAME_DEFAULT,
|
40
|
-
:purl => PURL_DEFAULT,
|
41
|
-
:http_options => HTTP_OPTIONS_DEFAULT,
|
42
|
-
:oai_repository_url => OAI_REPOSITORY_URL_DEFAULT,
|
43
|
-
:oai_client_debug => OAI_CLIENT_DEBUG_DEFAULT,
|
44
|
-
:default_metadata_prefix => DEFAULT_METADATA_PREFIX,
|
45
|
-
:default_from_date => DEFAULT_FROM_DATE,
|
46
|
-
:default_until_date => DEFAULT_UNTIL_DATE,
|
47
|
-
:default_set => DEFAULT_SET
|
48
|
-
})
|
49
|
-
end
|
50
|
-
|
51
|
-
# Initialize a new instance of Harvestdor::Client
|
52
|
-
# @param Hash options
|
53
|
-
# @example
|
54
|
-
# client = Harvestdor::Client.new({ # Example with all possible options
|
55
|
-
# :log_dir => File.join(File.dirname(__FILE__), "..", "logs"),
|
56
|
-
# :log_name => 'harvestdor.log',
|
57
|
-
# :purl => 'http://purl.stanford.edu',
|
58
|
-
# :http_options => { 'ssl' => {
|
59
|
-
# 'verify' => false
|
60
|
-
# },
|
61
|
-
# 'request' => {
|
62
|
-
# 'timeout' => 30, # open/read timeout (seconds)
|
63
|
-
# 'open_timeout' => 30 # connection open timeout (seconds)
|
64
|
-
# }
|
65
|
-
# },
|
66
|
-
# :oai_repository_url => 'https://dor-oaiprovider-prod.stanford.edu/oai', # The OAI repository to connect to
|
67
|
-
# :oai_client_debug => false,
|
68
|
-
# :default_metadata_prefix => 'mods',
|
69
|
-
# :default_from_date => '2012-12-01',
|
70
|
-
# :default_until_date => '2014-12-01',
|
71
|
-
# :default_set => nil,
|
72
|
-
# })
|
73
|
-
def initialize options = {}
|
74
|
-
config.configure(YAML.load_file(options[:config_yml_path])) if options[:config_yml_path]
|
75
|
-
config.configure options
|
76
|
-
yield(config) if block_given?
|
77
|
-
end
|
78
|
-
|
79
|
-
def config
|
80
|
-
@config ||= Confstruct::Configuration.new(self.class.default_config)
|
81
|
-
end
|
82
|
-
|
83
|
-
# @return OAI::Client an instantiated OAI::Client object, based on config options
|
84
|
-
def oai_client
|
85
|
-
@oai_client ||= OAI::Client.new config.oai_repository_url, :debug => config.oai_client_debug, :http => oai_http_client
|
86
|
-
end
|
87
|
-
|
88
|
-
def logger
|
89
|
-
@logger ||= self.class.logger(config.log_dir, config.log_name)
|
90
|
-
end
|
91
|
-
|
92
|
-
protected #---------------------------------------------------------------------
|
93
|
-
|
94
|
-
def oai_http_client
|
95
|
-
logger.info "Constructing OAI http client with faraday options #{config.http_options.to_hash.inspect}"
|
96
|
-
@oai_http_client ||= Faraday.new config.oai_repository_url, config.http_options.to_hash
|
97
|
-
end
|
98
|
-
|
99
|
-
# Global, memoized, lazy initialized instance of a logger
|
100
|
-
# @param [String] log_dir directory for to get log file
|
101
|
-
# @param [String] log_name name of log file
|
102
|
-
def self.logger(log_dir, log_name)
|
103
|
-
Dir.mkdir(log_dir) unless File.directory?(log_dir)
|
104
|
-
@logger ||= Logger.new(File.join(log_dir, log_name), 'daily')
|
105
|
-
end
|
106
|
-
|
107
|
-
end # class Client
|
108
|
-
|
109
|
-
# @param [Object] arg OAI::Header object or OAI::Record object or String (oai identifier)
|
110
|
-
# @return [String] the druid part of an OAI identifier in an OAI header, e.g. bb134cc1324
|
111
|
-
def self.druid(arg)
|
112
|
-
oai_id = arg
|
113
|
-
if arg.is_a?(OAI::Header)
|
114
|
-
oai_id = arg.identifier
|
115
|
-
elsif arg.is_a?(OAI::Record)
|
116
|
-
oai_id = arg.header.identifier
|
117
|
-
end
|
118
|
-
oai_id.split('druid:').last
|
119
|
-
end
|
120
|
-
|
17
|
+
HTTP_OPTIONS_DEFAULT = { 'ssl' => {
|
18
|
+
'verify' => false
|
19
|
+
},
|
20
|
+
'request' => {
|
21
|
+
'timeout' => 60, # open/read timeout (seconds)
|
22
|
+
'open_timeout' => 60 # connection open timeout (seconds)
|
23
|
+
}
|
24
|
+
}
|
121
25
|
end # module Harvestdor
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# log_dir: directory for log file (default logs, relative to harvestdor gem path)
|
2
|
+
log_dir: spec/test_logs
|
3
|
+
|
4
|
+
# log_name: name of log file (default: harvestdor.log)
|
5
|
+
|
6
|
+
# purl: url for the DOR purl server (used to get ContentMetadata, etc.)
|
7
|
+
# purl: http://purl-test.stanford.edu
|
8
|
+
|
9
|
+
# Additional options to pass to Faraday http client (https://github.com/technoweenie/faraday)
|
10
|
+
# timeouts are in seconds; timeout -> open/read, open_timeout -> connection open
|
11
|
+
http_options:
|
12
|
+
ssl:
|
13
|
+
verify: false
|
14
|
+
request:
|
15
|
+
timeout: 121
|
16
|
+
open_timeout: 122
|
@@ -3,7 +3,7 @@ require "spec_helper"
|
|
3
3
|
describe Harvestdor::Client do
|
4
4
|
|
5
5
|
before(:all) do
|
6
|
-
@config_yml_path = File.join(File.dirname(__FILE__), "config", "
|
6
|
+
@config_yml_path = File.join(File.dirname(__FILE__), "config", "example.yml")
|
7
7
|
@client_via_yml_only = Harvestdor::Client.new({:config_yml_path => @config_yml_path})
|
8
8
|
require 'yaml'
|
9
9
|
@yaml = YAML.load_file(@config_yml_path)
|
@@ -11,68 +11,44 @@ describe Harvestdor::Client do
|
|
11
11
|
|
12
12
|
describe "initialization" do
|
13
13
|
before(:all) do
|
14
|
-
@
|
15
|
-
@repo_url = 'http://my_oai_repo.org/oai'
|
14
|
+
@some_args = Harvestdor::Client.new.config
|
16
15
|
end
|
16
|
+
|
17
17
|
context "attributes passed in hash argument" do
|
18
|
-
before(:all) do
|
19
|
-
@some_args = Harvestdor::Client.new({:default_from_date => @from_date, :oai_repository_url => @repo_url}).config
|
20
|
-
end
|
21
|
-
it "should set the attributes to the passed values" do
|
22
|
-
expect(@some_args.oai_repository_url).to eql(@repo_url)
|
23
|
-
expect(@some_args.default_from_date).to eql(@from_date)
|
24
|
-
end
|
25
18
|
it "should keep the defaults for attributes not in the hash argument" do
|
26
19
|
expect(@some_args.log_name).to eql(Harvestdor::LOG_NAME_DEFAULT)
|
27
20
|
expect(@some_args.log_dir).to eql(Harvestdor::LOG_DIR_DEFAULT)
|
28
21
|
expect(@some_args.purl).to eql(Harvestdor::PURL_DEFAULT)
|
29
22
|
expect(@some_args.http_options).to eql(Confstruct::Configuration.new(Harvestdor::HTTP_OPTIONS_DEFAULT))
|
30
|
-
expect(@some_args.oai_client_debug).to eql(Harvestdor::OAI_CLIENT_DEBUG_DEFAULT)
|
31
|
-
expect(@some_args.default_metadata_prefix).to eql(Harvestdor::DEFAULT_METADATA_PREFIX)
|
32
|
-
expect(@some_args.default_until_date).to eql(Harvestdor::DEFAULT_UNTIL_DATE)
|
33
|
-
expect(@some_args.default_set).to eql(Harvestdor::DEFAULT_SET)
|
34
23
|
end
|
35
24
|
end
|
36
|
-
|
25
|
+
|
37
26
|
context "config_yml_path in hash argument" do
|
38
27
|
before(:all) do
|
39
28
|
@config_via_yml_only = @client_via_yml_only.config
|
40
29
|
end
|
41
30
|
it "should set attributes in yml file over defaults" do
|
42
31
|
expect(@config_via_yml_only.log_dir).to eql(@yaml['log_dir'])
|
43
|
-
expect(@config_via_yml_only.oai_repository_url).to eql(@yaml['oai_repository_url'])
|
44
|
-
expect(@config_via_yml_only.default_from_date).to eql(@yaml['default_from_date'])
|
45
|
-
expect(@config_via_yml_only.default_metadata_prefix).to eql(@yaml['default_metadata_prefix'])
|
46
32
|
expect(@config_via_yml_only.http_options.request.timeout).to eql(@yaml['http_options']['request']['timeout'])
|
47
33
|
end
|
48
34
|
it "should keep the defaults for attributes not present in yml file nor a config yml file" do
|
49
35
|
expect(@config_via_yml_only.log_name).to eql(Harvestdor::LOG_NAME_DEFAULT)
|
50
36
|
expect(@config_via_yml_only.purl).to eql(Harvestdor::PURL_DEFAULT)
|
51
|
-
expect(@config_via_yml_only.default_until_date).to eql(Harvestdor::DEFAULT_UNTIL_DATE)
|
52
|
-
expect(@config_via_yml_only.default_set).to eql(Harvestdor::DEFAULT_SET)
|
53
37
|
end
|
54
38
|
context "and some hash arguments" do
|
55
39
|
before(:all) do
|
56
|
-
@config_via_yml_plus = Harvestdor::Client.new({:config_yml_path => @config_yml_path
|
57
|
-
:default_from_date => @from_date, :oai_repository_url => @repo_url}).config
|
58
|
-
end
|
59
|
-
it "should favor hash arg attribute values over yml file values" do
|
60
|
-
expect(@config_via_yml_plus.oai_repository_url).to eql(@repo_url)
|
61
|
-
expect(@config_via_yml_plus.default_from_date).to eql(@from_date)
|
40
|
+
@config_via_yml_plus = Harvestdor::Client.new({:config_yml_path => @config_yml_path}).config
|
62
41
|
end
|
63
42
|
it "should favor yml file values over defaults" do
|
64
43
|
expect(@config_via_yml_plus.log_dir).to eql(@yaml['log_dir'])
|
65
|
-
expect(@config_via_yml_plus.default_metadata_prefix).to eql(@yaml['default_metadata_prefix'])
|
66
44
|
expect(@config_via_yml_plus.http_options.timeout).to eql(@yaml['http_options']['timeout'])
|
67
45
|
end
|
68
46
|
it "should keep the defaults for attributes not present in yml file" do
|
69
47
|
expect(@config_via_yml_plus.log_name).to eql(Harvestdor::LOG_NAME_DEFAULT)
|
70
|
-
expect(@config_via_yml_plus.default_until_date).to eql(Harvestdor::DEFAULT_UNTIL_DATE)
|
71
|
-
expect(@config_via_yml_plus.default_set).to eql(Harvestdor::DEFAULT_SET)
|
72
48
|
end
|
73
49
|
end
|
74
50
|
end
|
75
|
-
|
51
|
+
|
76
52
|
context "without hash arguments" do
|
77
53
|
it "should keep the defaults for all attributes" do
|
78
54
|
no_args = Harvestdor::Client.new.config
|
@@ -80,16 +56,10 @@ describe Harvestdor::Client do
|
|
80
56
|
expect(no_args.log_dir).to eql(Harvestdor::LOG_DIR_DEFAULT)
|
81
57
|
expect(no_args.purl).to eql(Harvestdor::PURL_DEFAULT)
|
82
58
|
expect(no_args.http_options).to eql(Confstruct::Configuration.new(Harvestdor::HTTP_OPTIONS_DEFAULT))
|
83
|
-
expect(no_args.oai_client_debug).to eql(Harvestdor::OAI_CLIENT_DEBUG_DEFAULT)
|
84
|
-
expect(no_args.oai_repository_url).to eql(Harvestdor::OAI_REPOSITORY_URL_DEFAULT)
|
85
|
-
expect(no_args.default_metadata_prefix).to eql(Harvestdor::DEFAULT_METADATA_PREFIX)
|
86
|
-
expect(no_args.default_from_date).to eql(Harvestdor::DEFAULT_FROM_DATE)
|
87
|
-
expect(no_args.default_until_date).to eql(Harvestdor::DEFAULT_UNTIL_DATE)
|
88
|
-
expect(no_args.default_set).to eql(Harvestdor::DEFAULT_SET)
|
89
59
|
end
|
90
60
|
end
|
91
61
|
end # initialize client
|
92
|
-
|
62
|
+
|
93
63
|
it "should allow direct setting of configuration attributes" do
|
94
64
|
conf = Harvestdor::Client.new.config
|
95
65
|
expect(conf.log_dir).to eql(Harvestdor::LOG_DIR_DEFAULT)
|
@@ -103,33 +73,5 @@ describe Harvestdor::Client do
|
|
103
73
|
expect(File.exists?(File.join(@yaml['log_dir'], Harvestdor::LOG_NAME_DEFAULT))).to eql(true)
|
104
74
|
end
|
105
75
|
end
|
106
|
-
|
107
|
-
context "oai_client" do
|
108
|
-
before(:all) do
|
109
|
-
@client = Harvestdor::Client.new
|
110
|
-
@default_oai_client = Harvestdor::Client.new.oai_client
|
111
|
-
end
|
112
|
-
|
113
|
-
it "oai_client should return an OAI::Client object based on config data" do
|
114
|
-
expect(@default_oai_client).to be_an_instance_of(OAI::Client)
|
115
|
-
end
|
116
|
-
|
117
|
-
it "oai_client should have an http_client" do
|
118
|
-
expect(@default_oai_client.instance_variable_get(:@http_client)).to be_an_instance_of(Faraday::Connection)
|
119
|
-
end
|
120
76
|
|
121
|
-
context "oai_http_client (protected method)" do
|
122
|
-
before(:all) do
|
123
|
-
@http_client = @client.send(:oai_http_client)
|
124
|
-
end
|
125
|
-
it "should be a Faraday object" do
|
126
|
-
expect(@http_client).to be_an_instance_of(Faraday::Connection)
|
127
|
-
end
|
128
|
-
it "should have the oai_provider url from config" do
|
129
|
-
uri_obj = @http_client.url_prefix
|
130
|
-
expect(@client.config.oai_repository_url).to match(Regexp.new(uri_obj.host + uri_obj.path))
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end # context oai_client
|
134
|
-
|
135
77
|
end
|
data/spec/purl_xml_spec.rb
CHANGED
@@ -12,10 +12,10 @@ describe Harvestdor::Client do
|
|
12
12
|
@rdf_xml = "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'><rdf:Description rdf:about=\"info:fedora/druid:#{@druid}\">relationship!</rdf:Description></rdf:RDF>"
|
13
13
|
@dc_xml = "<oai_dc:dc xmlns:oai_dc='#{Harvestdor::OAI_DC_NAMESPACE}'><oai_dc:title>hoo ha</oai_dc:title</oai_dc:dc>"
|
14
14
|
@pub_xml = "<publicObject id='druid:#{@druid}'>#{@id_md_xml}#{@cntnt_md_xml}#{@rights_md_xml}#{@rdf_xml}#{@dc_xml}</publicObject>"
|
15
|
-
@ng_pub_xml = Nokogiri::XML(@pub_xml)
|
16
|
-
@fake_druid = 'oo000oo0000'
|
15
|
+
@ng_pub_xml = Nokogiri::XML(@pub_xml)
|
16
|
+
@fake_druid = 'oo000oo0000'
|
17
17
|
end
|
18
|
-
|
18
|
+
|
19
19
|
it "#mods returns a Nokogiri::XML::Document from the purl mods" do
|
20
20
|
VCR.use_cassette('purl_mods') do
|
21
21
|
x = Harvestdor.mods(@druid, @purl)
|
@@ -23,7 +23,7 @@ describe Harvestdor::Client do
|
|
23
23
|
expect(x.root.name).to eql('mods')
|
24
24
|
expect(x.root.namespace.href).to eql(Harvestdor::MODS_NAMESPACE)
|
25
25
|
end
|
26
|
-
end
|
26
|
+
end
|
27
27
|
|
28
28
|
context "#public_xml" do
|
29
29
|
it "#public_xml retrieves entire public xml as a Nokogiri::XML::Document when called with druid" do
|
@@ -44,7 +44,7 @@ describe Harvestdor::Client do
|
|
44
44
|
expect { Harvestdor.public_xml(@fake_druid, @purl) }.to raise_error(Harvestdor::Errors::MissingPublicXml)
|
45
45
|
end
|
46
46
|
end
|
47
|
-
|
47
|
+
|
48
48
|
context "#pub_xml" do
|
49
49
|
it "retrieves public_xml via fetch when first arg is a druid" do
|
50
50
|
expect(Harvestdor).to receive(:public_xml).with(@druid, @purl)
|
@@ -57,7 +57,7 @@ describe Harvestdor::Client do
|
|
57
57
|
expect { Harvestdor.pub_xml(Array.new)}.to raise_error(RuntimeError, "expected String or Nokogiri::XML::Document for first argument, got Array")
|
58
58
|
end
|
59
59
|
end
|
60
|
-
|
60
|
+
|
61
61
|
context "#content_metadata" do
|
62
62
|
it "returns a Nokogiri::XML::Document from the public xml fetched with druid" do
|
63
63
|
VCR.use_cassette('content_metadata') do
|
@@ -76,9 +76,9 @@ describe Harvestdor::Client do
|
|
76
76
|
it "raises MissingContentMetadata error if there is no contentMetadata in the public_xml for the druid" do
|
77
77
|
pub_xml = "<publicObject id='druid:#{@druid}'>#{@id_md_xml}#{@rights_md_xml}</publicObject>"
|
78
78
|
expect { Harvestdor.content_metadata(Nokogiri::XML(pub_xml)) }.to raise_error(Harvestdor::Errors::MissingContentMetadata)
|
79
|
-
end
|
79
|
+
end
|
80
80
|
end
|
81
|
-
|
81
|
+
|
82
82
|
context "#identity_metadata" do
|
83
83
|
it "returns a Nokogiri::XML::Document from the public xml fetched with druid" do
|
84
84
|
VCR.use_cassette('identity_metadata') do
|
@@ -98,9 +98,9 @@ describe Harvestdor::Client do
|
|
98
98
|
it "raises MissingIdentityMetadata error if there is no identityMetadata in the public_xml for the druid" do
|
99
99
|
pub_xml = "<publicObject id='druid:#{@druid}'>#{@cntnt_md_xml}#{@rights_md_xml}</publicObject>"
|
100
100
|
expect { Harvestdor.identity_metadata(Nokogiri::XML(pub_xml)) }.to raise_error(Harvestdor::Errors::MissingIdentityMetadata)
|
101
|
-
end
|
101
|
+
end
|
102
102
|
end
|
103
|
-
|
103
|
+
|
104
104
|
context "#rights_metadata" do
|
105
105
|
it "#rights_metadata returns a Nokogiri::XML::Document from the public xml fetched with druid" do
|
106
106
|
VCR.use_cassette('rights_metadata') do
|
@@ -118,9 +118,9 @@ describe Harvestdor::Client do
|
|
118
118
|
it "raises MissingRightsMetadata error if there is no identityMetadata in the public_xml for the druid" do
|
119
119
|
pub_xml = "<publicObject id='druid:#{@druid}'>#{@cntnt_md_xml}#{@id_md_xml}</publicObject>"
|
120
120
|
expect { Harvestdor.rights_metadata(Nokogiri::XML(pub_xml)) }.to raise_error(Harvestdor::Errors::MissingRightsMetadata)
|
121
|
-
end
|
121
|
+
end
|
122
122
|
end
|
123
|
-
|
123
|
+
|
124
124
|
context "#rdf" do
|
125
125
|
it "returns a Nokogiri::XML::Document from the public xml fetched with druid" do
|
126
126
|
VCR.use_cassette('rdf') do
|
@@ -140,9 +140,9 @@ describe Harvestdor::Client do
|
|
140
140
|
it "raises MissingRDF error if there is no RDF in the public_xml for the druid" do
|
141
141
|
pub_xml = "<publicObject id='druid:#{@druid}'>#{@cntnt_md_xml}#{@id_md_xml}</publicObject>"
|
142
142
|
expect { Harvestdor.rdf(Nokogiri::XML(pub_xml)) }.to raise_error(Harvestdor::Errors::MissingRDF)
|
143
|
-
end
|
143
|
+
end
|
144
144
|
end
|
145
|
-
|
145
|
+
|
146
146
|
context "#dc" do
|
147
147
|
it "returns a Nokogiri::XML::Document from the public xml fetched with druid" do
|
148
148
|
VCR.use_cassette('dc') do
|
@@ -162,9 +162,9 @@ describe Harvestdor::Client do
|
|
162
162
|
it "raises MissingDC error if there is no DC in the public_xml for the druid" do
|
163
163
|
pub_xml = "<publicObject id='druid:#{@druid}'>#{@cntnt_md_xml}#{@id_md_xml}</publicObject>"
|
164
164
|
expect { Harvestdor.dc(Nokogiri::XML(pub_xml)) }.to raise_error(Harvestdor::Errors::MissingDC)
|
165
|
-
end
|
165
|
+
end
|
166
166
|
end
|
167
|
-
|
167
|
+
|
168
168
|
context "Harvestdor:Client calls methods with config.purl" do
|
169
169
|
before(:all) do
|
170
170
|
@client = Harvestdor::Client.new({:purl_url => 'http://thisone.org'})
|