harvestdor 0.0.14 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.hound.yml +2 -0
- data/.rubocop.yml +3 -0
- data/.rubocop_todo.yml +249 -0
- data/.travis.yml +3 -1
- data/Gemfile +0 -4
- data/README.rdoc +13 -58
- data/Rakefile +10 -6
- data/harvestdor.gemspec +9 -14
- data/lib/harvestdor/client.rb +110 -0
- data/lib/harvestdor/errors.rb +8 -8
- data/lib/harvestdor/purl_xml.rb +13 -72
- data/lib/harvestdor/version.rb +1 -1
- data/lib/harvestdor.rb +10 -106
- data/spec/config/example.yml +16 -0
- data/spec/harvestdor_client_spec.rb +7 -65
- data/spec/purl_xml_spec.rb +16 -16
- metadata +53 -29
- data/lib/harvestdor/oai_harvest.rb +0 -115
- data/spec/config/oai.yml +0 -37
- data/spec/harvestdor_spec.rb +0 -23
- data/spec/oai_harvest_spec.rb +0 -220
- data/spec/oai_integration_spec.rb +0 -139
data/lib/harvestdor/purl_xml.rb
CHANGED
@@ -2,7 +2,7 @@ require 'nokogiri'
|
|
2
2
|
|
3
3
|
module Harvestdor
|
4
4
|
# Mixin: code to retrieve Purl public xml pieces
|
5
|
-
|
5
|
+
|
6
6
|
RDF_NAMESPACE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
7
7
|
OAI_DC_NAMESPACE = 'http://www.openarchives.org/OAI/2.0/oai_dc/'
|
8
8
|
MODS_NAMESPACE = 'http://www.loc.gov/mods/v3'
|
@@ -28,14 +28,14 @@ module Harvestdor
|
|
28
28
|
begin
|
29
29
|
ng_doc = Nokogiri::XML(open("#{purl_url}/#{druid}.xml"))
|
30
30
|
raise Harvestdor::Errors::MissingPublicXml.new(druid) if !ng_doc || ng_doc.children.empty?
|
31
|
-
ng_doc
|
31
|
+
ng_doc
|
32
32
|
rescue OpenURI::HTTPError
|
33
33
|
raise Harvestdor::Errors::MissingPurlPage.new(druid)
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
37
|
# the contentMetadata for this fedora object, from the purl xml
|
38
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
38
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
39
39
|
# a Nokogiri::XML::Document containing the public_xml for an object
|
40
40
|
# @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
|
41
41
|
# @return [Nokogiri::XML::Document] the contentMetadata for the fedora object
|
@@ -45,14 +45,14 @@ module Harvestdor
|
|
45
45
|
# preserve namespaces, etc for the node
|
46
46
|
ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/contentMetadata').to_xml)
|
47
47
|
raise Harvestdor::Errors::MissingContentMetadata.new(object.inspect) if !ng_doc || ng_doc.children.empty?
|
48
|
-
ng_doc
|
48
|
+
ng_doc
|
49
49
|
rescue
|
50
50
|
raise Harvestdor::Errors::MissingContentMetadata.new(object.inspect)
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
54
54
|
# the identityMetadata for this fedora object, from the purl xml
|
55
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
55
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
56
56
|
# a Nokogiri::XML::Document containing the public_xml for an object
|
57
57
|
# @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
|
58
58
|
# @return [Nokogiri::XML::Document] the identityMetadata for the fedora object
|
@@ -62,14 +62,14 @@ module Harvestdor
|
|
62
62
|
# preserve namespaces, etc for the node
|
63
63
|
ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/identityMetadata').to_xml)
|
64
64
|
raise Harvestdor::Errors::MissingIdentityMetadata.new(object.inspect) if !ng_doc || ng_doc.children.empty?
|
65
|
-
ng_doc
|
65
|
+
ng_doc
|
66
66
|
rescue
|
67
67
|
raise Harvestdor::Errors::MissingIdentityMetadata.new(object.inspect)
|
68
68
|
end
|
69
69
|
end
|
70
70
|
|
71
71
|
# the rightsMetadata for this fedora object, from the purl xml
|
72
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
72
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
73
73
|
# a Nokogiri::XML::Document containing the public_xml for an object
|
74
74
|
# @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
|
75
75
|
# @return [Nokogiri::XML::Document] the rightsMetadata for the fedora object
|
@@ -86,7 +86,7 @@ module Harvestdor
|
|
86
86
|
end
|
87
87
|
|
88
88
|
# the RDF for this fedora object, from the purl xml
|
89
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
89
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
90
90
|
# a Nokogiri::XML::Document containing the public_xml for an object
|
91
91
|
# @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
|
92
92
|
# @return [Nokogiri::XML::Document] the RDF for the fedora object
|
@@ -103,7 +103,7 @@ module Harvestdor
|
|
103
103
|
end
|
104
104
|
|
105
105
|
# the Dublin Core for this fedora object, from the purl xml
|
106
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
106
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
107
107
|
# a Nokogiri::XML::Document containing the public_xml for an object
|
108
108
|
# @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
|
109
109
|
# @return [Nokogiri::XML::Document] the dc for the fedora object
|
@@ -119,73 +119,14 @@ module Harvestdor
|
|
119
119
|
end
|
120
120
|
end
|
121
121
|
|
122
|
-
|
123
|
-
class Client
|
124
|
-
|
125
|
-
# the public xml for this fedora object, from the purl server
|
126
|
-
# @param [String] druid e.g. ab123cd4567, in the purl url
|
127
|
-
# @return [Nokogiri::XML::Document] the MODS metadata for the fedora object
|
128
|
-
def mods druid
|
129
|
-
Harvestdor.mods(druid, config.purl)
|
130
|
-
end
|
131
|
-
|
132
|
-
# the public xml for this fedora object, from the purl xml
|
133
|
-
# @param [String] druid e.g. ab123cd4567, in the purl url
|
134
|
-
# @return [Nokogiri::XML::Document] the public xml for the fedora object
|
135
|
-
def public_xml druid
|
136
|
-
Harvestdor.public_xml(druid, config.purl)
|
137
|
-
end
|
138
|
-
|
139
|
-
# the contentMetadata for this fedora object, from the purl xml
|
140
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
141
|
-
# a Nokogiri::XML::Document containing the public_xml for an object
|
142
|
-
# @return [Nokogiri::XML::Document] the contentMetadata for the fedora object
|
143
|
-
def content_metadata object
|
144
|
-
Harvestdor.content_metadata(object, config.purl)
|
145
|
-
end
|
146
|
-
|
147
|
-
# the identityMetadata for this fedora object, from the purl xml
|
148
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
149
|
-
# a Nokogiri::XML::Document containing the public_xml for an object
|
150
|
-
# @return [Nokogiri::XML::Document] the identityMetadata for the fedora object
|
151
|
-
def identity_metadata object
|
152
|
-
Harvestdor.identity_metadata(object, config.purl)
|
153
|
-
end
|
154
|
-
|
155
|
-
# the rightsMetadata for this fedora object, from the purl xml
|
156
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
157
|
-
# a Nokogiri::XML::Document containing the public_xml for an object
|
158
|
-
# @return [Nokogiri::XML::Document] the rightsMetadata for the fedora object
|
159
|
-
def rights_metadata object
|
160
|
-
Harvestdor.rights_metadata(object, config.purl)
|
161
|
-
end
|
162
|
-
|
163
|
-
# the RDF for this fedora object, from the purl xml
|
164
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
165
|
-
# a Nokogiri::XML::Document containing the public_xml for an object
|
166
|
-
# @return [Nokogiri::XML::Document] the RDF for the fedora object
|
167
|
-
def rdf object
|
168
|
-
Harvestdor.rdf(object, config.purl)
|
169
|
-
end
|
170
|
-
|
171
|
-
# the Dublin Core for this fedora object, from the purl xml
|
172
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
173
|
-
# a Nokogiri::XML::Document containing the public_xml for an object
|
174
|
-
# @return [Nokogiri::XML::Document] the dc for the fedora object
|
175
|
-
def dc object
|
176
|
-
Harvestdor.dc(object, config.purl)
|
177
|
-
end
|
178
|
-
|
179
|
-
end # class Client
|
180
|
-
|
181
122
|
protected #--------------------------------------------
|
182
|
-
|
183
|
-
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
123
|
+
|
124
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
184
125
|
# a Nokogiri::XML::Document containing the public_xml for an object
|
185
126
|
# @param [String] purl_url url for the purl server. default is Harvestdor::PURL_DEFAULT
|
186
127
|
# @return [Nokogiri::XML::Document] the public xml for a DOR object
|
187
128
|
def self.pub_xml(object, purl_url = Harvestdor::PURL_DEFAULT)
|
188
|
-
case
|
129
|
+
case
|
189
130
|
when object.instance_of?(String)
|
190
131
|
# it's a druid
|
191
132
|
pub_xml_ng_doc = Harvestdor.public_xml(object, purl_url)
|
@@ -194,7 +135,7 @@ module Harvestdor
|
|
194
135
|
else
|
195
136
|
raise "expected String or Nokogiri::XML::Document for first argument, got #{object.class}"
|
196
137
|
end
|
197
|
-
pub_xml_ng_doc
|
138
|
+
pub_xml_ng_doc
|
198
139
|
end
|
199
140
|
|
200
141
|
end # module Harvestdor
|
data/lib/harvestdor/version.rb
CHANGED
data/lib/harvestdor.rb
CHANGED
@@ -1,121 +1,25 @@
|
|
1
1
|
require 'harvestdor/errors'
|
2
|
-
require 'harvestdor/oai_harvest'
|
3
2
|
require 'harvestdor/purl_xml'
|
4
3
|
require 'harvestdor/version'
|
4
|
+
require 'harvestdor/client'
|
5
5
|
# external gems
|
6
6
|
require 'confstruct'
|
7
|
-
require 'oai'
|
8
7
|
# stdlib
|
9
8
|
require 'logger'
|
10
9
|
require 'open-uri'
|
11
10
|
require 'yaml'
|
12
11
|
|
13
12
|
module Harvestdor
|
14
|
-
|
13
|
+
|
15
14
|
LOG_NAME_DEFAULT = "harvestdor.log"
|
16
15
|
LOG_DIR_DEFAULT = File.join(File.dirname(__FILE__), "..", "logs")
|
17
16
|
PURL_DEFAULT = 'http://purl.stanford.edu'
|
18
|
-
HTTP_OPTIONS_DEFAULT = { 'ssl' => {
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
OAI_CLIENT_DEBUG_DEFAULT = false
|
27
|
-
OAI_REPOSITORY_URL_DEFAULT = 'https://dor-oaiprovider-prod.stanford.edu/oai'
|
28
|
-
DEFAULT_METADATA_PREFIX = 'mods'
|
29
|
-
DEFAULT_FROM_DATE = nil
|
30
|
-
DEFAULT_UNTIL_DATE = nil
|
31
|
-
DEFAULT_SET = nil
|
32
|
-
|
33
|
-
class Client
|
34
|
-
|
35
|
-
# Set default values for the construction of Harvestdor::Client objects
|
36
|
-
def self.default_config
|
37
|
-
@class_config ||= Confstruct::Configuration.new({
|
38
|
-
:log_dir => LOG_DIR_DEFAULT,
|
39
|
-
:log_name => LOG_NAME_DEFAULT,
|
40
|
-
:purl => PURL_DEFAULT,
|
41
|
-
:http_options => HTTP_OPTIONS_DEFAULT,
|
42
|
-
:oai_repository_url => OAI_REPOSITORY_URL_DEFAULT,
|
43
|
-
:oai_client_debug => OAI_CLIENT_DEBUG_DEFAULT,
|
44
|
-
:default_metadata_prefix => DEFAULT_METADATA_PREFIX,
|
45
|
-
:default_from_date => DEFAULT_FROM_DATE,
|
46
|
-
:default_until_date => DEFAULT_UNTIL_DATE,
|
47
|
-
:default_set => DEFAULT_SET
|
48
|
-
})
|
49
|
-
end
|
50
|
-
|
51
|
-
# Initialize a new instance of Harvestdor::Client
|
52
|
-
# @param Hash options
|
53
|
-
# @example
|
54
|
-
# client = Harvestdor::Client.new({ # Example with all possible options
|
55
|
-
# :log_dir => File.join(File.dirname(__FILE__), "..", "logs"),
|
56
|
-
# :log_name => 'harvestdor.log',
|
57
|
-
# :purl => 'http://purl.stanford.edu',
|
58
|
-
# :http_options => { 'ssl' => {
|
59
|
-
# 'verify' => false
|
60
|
-
# },
|
61
|
-
# 'request' => {
|
62
|
-
# 'timeout' => 30, # open/read timeout (seconds)
|
63
|
-
# 'open_timeout' => 30 # connection open timeout (seconds)
|
64
|
-
# }
|
65
|
-
# },
|
66
|
-
# :oai_repository_url => 'https://dor-oaiprovider-prod.stanford.edu/oai', # The OAI repository to connect to
|
67
|
-
# :oai_client_debug => false,
|
68
|
-
# :default_metadata_prefix => 'mods',
|
69
|
-
# :default_from_date => '2012-12-01',
|
70
|
-
# :default_until_date => '2014-12-01',
|
71
|
-
# :default_set => nil,
|
72
|
-
# })
|
73
|
-
def initialize options = {}
|
74
|
-
config.configure(YAML.load_file(options[:config_yml_path])) if options[:config_yml_path]
|
75
|
-
config.configure options
|
76
|
-
yield(config) if block_given?
|
77
|
-
end
|
78
|
-
|
79
|
-
def config
|
80
|
-
@config ||= Confstruct::Configuration.new(self.class.default_config)
|
81
|
-
end
|
82
|
-
|
83
|
-
# @return OAI::Client an instantiated OAI::Client object, based on config options
|
84
|
-
def oai_client
|
85
|
-
@oai_client ||= OAI::Client.new config.oai_repository_url, :debug => config.oai_client_debug, :http => oai_http_client
|
86
|
-
end
|
87
|
-
|
88
|
-
def logger
|
89
|
-
@logger ||= self.class.logger(config.log_dir, config.log_name)
|
90
|
-
end
|
91
|
-
|
92
|
-
protected #---------------------------------------------------------------------
|
93
|
-
|
94
|
-
def oai_http_client
|
95
|
-
logger.info "Constructing OAI http client with faraday options #{config.http_options.to_hash.inspect}"
|
96
|
-
@oai_http_client ||= Faraday.new config.oai_repository_url, config.http_options.to_hash
|
97
|
-
end
|
98
|
-
|
99
|
-
# Global, memoized, lazy initialized instance of a logger
|
100
|
-
# @param [String] log_dir directory for to get log file
|
101
|
-
# @param [String] log_name name of log file
|
102
|
-
def self.logger(log_dir, log_name)
|
103
|
-
Dir.mkdir(log_dir) unless File.directory?(log_dir)
|
104
|
-
@logger ||= Logger.new(File.join(log_dir, log_name), 'daily')
|
105
|
-
end
|
106
|
-
|
107
|
-
end # class Client
|
108
|
-
|
109
|
-
# @param [Object] arg OAI::Header object or OAI::Record object or String (oai identifier)
|
110
|
-
# @return [String] the druid part of an OAI identifier in an OAI header, e.g. bb134cc1324
|
111
|
-
def self.druid(arg)
|
112
|
-
oai_id = arg
|
113
|
-
if arg.is_a?(OAI::Header)
|
114
|
-
oai_id = arg.identifier
|
115
|
-
elsif arg.is_a?(OAI::Record)
|
116
|
-
oai_id = arg.header.identifier
|
117
|
-
end
|
118
|
-
oai_id.split('druid:').last
|
119
|
-
end
|
120
|
-
|
17
|
+
HTTP_OPTIONS_DEFAULT = { 'ssl' => {
|
18
|
+
'verify' => false
|
19
|
+
},
|
20
|
+
'request' => {
|
21
|
+
'timeout' => 60, # open/read timeout (seconds)
|
22
|
+
'open_timeout' => 60 # connection open timeout (seconds)
|
23
|
+
}
|
24
|
+
}
|
121
25
|
end # module Harvestdor
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# log_dir: directory for log file (default logs, relative to harvestdor gem path)
|
2
|
+
log_dir: spec/test_logs
|
3
|
+
|
4
|
+
# log_name: name of log file (default: harvestdor.log)
|
5
|
+
|
6
|
+
# purl: url for the DOR purl server (used to get ContentMetadata, etc.)
|
7
|
+
# purl: http://purl-test.stanford.edu
|
8
|
+
|
9
|
+
# Additional options to pass to Faraday http client (https://github.com/technoweenie/faraday)
|
10
|
+
# timeouts are in seconds; timeout -> open/read, open_timeout -> connection open
|
11
|
+
http_options:
|
12
|
+
ssl:
|
13
|
+
verify: false
|
14
|
+
request:
|
15
|
+
timeout: 121
|
16
|
+
open_timeout: 122
|
@@ -3,7 +3,7 @@ require "spec_helper"
|
|
3
3
|
describe Harvestdor::Client do
|
4
4
|
|
5
5
|
before(:all) do
|
6
|
-
@config_yml_path = File.join(File.dirname(__FILE__), "config", "
|
6
|
+
@config_yml_path = File.join(File.dirname(__FILE__), "config", "example.yml")
|
7
7
|
@client_via_yml_only = Harvestdor::Client.new({:config_yml_path => @config_yml_path})
|
8
8
|
require 'yaml'
|
9
9
|
@yaml = YAML.load_file(@config_yml_path)
|
@@ -11,68 +11,44 @@ describe Harvestdor::Client do
|
|
11
11
|
|
12
12
|
describe "initialization" do
|
13
13
|
before(:all) do
|
14
|
-
@
|
15
|
-
@repo_url = 'http://my_oai_repo.org/oai'
|
14
|
+
@some_args = Harvestdor::Client.new.config
|
16
15
|
end
|
16
|
+
|
17
17
|
context "attributes passed in hash argument" do
|
18
|
-
before(:all) do
|
19
|
-
@some_args = Harvestdor::Client.new({:default_from_date => @from_date, :oai_repository_url => @repo_url}).config
|
20
|
-
end
|
21
|
-
it "should set the attributes to the passed values" do
|
22
|
-
expect(@some_args.oai_repository_url).to eql(@repo_url)
|
23
|
-
expect(@some_args.default_from_date).to eql(@from_date)
|
24
|
-
end
|
25
18
|
it "should keep the defaults for attributes not in the hash argument" do
|
26
19
|
expect(@some_args.log_name).to eql(Harvestdor::LOG_NAME_DEFAULT)
|
27
20
|
expect(@some_args.log_dir).to eql(Harvestdor::LOG_DIR_DEFAULT)
|
28
21
|
expect(@some_args.purl).to eql(Harvestdor::PURL_DEFAULT)
|
29
22
|
expect(@some_args.http_options).to eql(Confstruct::Configuration.new(Harvestdor::HTTP_OPTIONS_DEFAULT))
|
30
|
-
expect(@some_args.oai_client_debug).to eql(Harvestdor::OAI_CLIENT_DEBUG_DEFAULT)
|
31
|
-
expect(@some_args.default_metadata_prefix).to eql(Harvestdor::DEFAULT_METADATA_PREFIX)
|
32
|
-
expect(@some_args.default_until_date).to eql(Harvestdor::DEFAULT_UNTIL_DATE)
|
33
|
-
expect(@some_args.default_set).to eql(Harvestdor::DEFAULT_SET)
|
34
23
|
end
|
35
24
|
end
|
36
|
-
|
25
|
+
|
37
26
|
context "config_yml_path in hash argument" do
|
38
27
|
before(:all) do
|
39
28
|
@config_via_yml_only = @client_via_yml_only.config
|
40
29
|
end
|
41
30
|
it "should set attributes in yml file over defaults" do
|
42
31
|
expect(@config_via_yml_only.log_dir).to eql(@yaml['log_dir'])
|
43
|
-
expect(@config_via_yml_only.oai_repository_url).to eql(@yaml['oai_repository_url'])
|
44
|
-
expect(@config_via_yml_only.default_from_date).to eql(@yaml['default_from_date'])
|
45
|
-
expect(@config_via_yml_only.default_metadata_prefix).to eql(@yaml['default_metadata_prefix'])
|
46
32
|
expect(@config_via_yml_only.http_options.request.timeout).to eql(@yaml['http_options']['request']['timeout'])
|
47
33
|
end
|
48
34
|
it "should keep the defaults for attributes not present in yml file nor a config yml file" do
|
49
35
|
expect(@config_via_yml_only.log_name).to eql(Harvestdor::LOG_NAME_DEFAULT)
|
50
36
|
expect(@config_via_yml_only.purl).to eql(Harvestdor::PURL_DEFAULT)
|
51
|
-
expect(@config_via_yml_only.default_until_date).to eql(Harvestdor::DEFAULT_UNTIL_DATE)
|
52
|
-
expect(@config_via_yml_only.default_set).to eql(Harvestdor::DEFAULT_SET)
|
53
37
|
end
|
54
38
|
context "and some hash arguments" do
|
55
39
|
before(:all) do
|
56
|
-
@config_via_yml_plus = Harvestdor::Client.new({:config_yml_path => @config_yml_path
|
57
|
-
:default_from_date => @from_date, :oai_repository_url => @repo_url}).config
|
58
|
-
end
|
59
|
-
it "should favor hash arg attribute values over yml file values" do
|
60
|
-
expect(@config_via_yml_plus.oai_repository_url).to eql(@repo_url)
|
61
|
-
expect(@config_via_yml_plus.default_from_date).to eql(@from_date)
|
40
|
+
@config_via_yml_plus = Harvestdor::Client.new({:config_yml_path => @config_yml_path}).config
|
62
41
|
end
|
63
42
|
it "should favor yml file values over defaults" do
|
64
43
|
expect(@config_via_yml_plus.log_dir).to eql(@yaml['log_dir'])
|
65
|
-
expect(@config_via_yml_plus.default_metadata_prefix).to eql(@yaml['default_metadata_prefix'])
|
66
44
|
expect(@config_via_yml_plus.http_options.timeout).to eql(@yaml['http_options']['timeout'])
|
67
45
|
end
|
68
46
|
it "should keep the defaults for attributes not present in yml file" do
|
69
47
|
expect(@config_via_yml_plus.log_name).to eql(Harvestdor::LOG_NAME_DEFAULT)
|
70
|
-
expect(@config_via_yml_plus.default_until_date).to eql(Harvestdor::DEFAULT_UNTIL_DATE)
|
71
|
-
expect(@config_via_yml_plus.default_set).to eql(Harvestdor::DEFAULT_SET)
|
72
48
|
end
|
73
49
|
end
|
74
50
|
end
|
75
|
-
|
51
|
+
|
76
52
|
context "without hash arguments" do
|
77
53
|
it "should keep the defaults for all attributes" do
|
78
54
|
no_args = Harvestdor::Client.new.config
|
@@ -80,16 +56,10 @@ describe Harvestdor::Client do
|
|
80
56
|
expect(no_args.log_dir).to eql(Harvestdor::LOG_DIR_DEFAULT)
|
81
57
|
expect(no_args.purl).to eql(Harvestdor::PURL_DEFAULT)
|
82
58
|
expect(no_args.http_options).to eql(Confstruct::Configuration.new(Harvestdor::HTTP_OPTIONS_DEFAULT))
|
83
|
-
expect(no_args.oai_client_debug).to eql(Harvestdor::OAI_CLIENT_DEBUG_DEFAULT)
|
84
|
-
expect(no_args.oai_repository_url).to eql(Harvestdor::OAI_REPOSITORY_URL_DEFAULT)
|
85
|
-
expect(no_args.default_metadata_prefix).to eql(Harvestdor::DEFAULT_METADATA_PREFIX)
|
86
|
-
expect(no_args.default_from_date).to eql(Harvestdor::DEFAULT_FROM_DATE)
|
87
|
-
expect(no_args.default_until_date).to eql(Harvestdor::DEFAULT_UNTIL_DATE)
|
88
|
-
expect(no_args.default_set).to eql(Harvestdor::DEFAULT_SET)
|
89
59
|
end
|
90
60
|
end
|
91
61
|
end # initialize client
|
92
|
-
|
62
|
+
|
93
63
|
it "should allow direct setting of configuration attributes" do
|
94
64
|
conf = Harvestdor::Client.new.config
|
95
65
|
expect(conf.log_dir).to eql(Harvestdor::LOG_DIR_DEFAULT)
|
@@ -103,33 +73,5 @@ describe Harvestdor::Client do
|
|
103
73
|
expect(File.exists?(File.join(@yaml['log_dir'], Harvestdor::LOG_NAME_DEFAULT))).to eql(true)
|
104
74
|
end
|
105
75
|
end
|
106
|
-
|
107
|
-
context "oai_client" do
|
108
|
-
before(:all) do
|
109
|
-
@client = Harvestdor::Client.new
|
110
|
-
@default_oai_client = Harvestdor::Client.new.oai_client
|
111
|
-
end
|
112
|
-
|
113
|
-
it "oai_client should return an OAI::Client object based on config data" do
|
114
|
-
expect(@default_oai_client).to be_an_instance_of(OAI::Client)
|
115
|
-
end
|
116
|
-
|
117
|
-
it "oai_client should have an http_client" do
|
118
|
-
expect(@default_oai_client.instance_variable_get(:@http_client)).to be_an_instance_of(Faraday::Connection)
|
119
|
-
end
|
120
76
|
|
121
|
-
context "oai_http_client (protected method)" do
|
122
|
-
before(:all) do
|
123
|
-
@http_client = @client.send(:oai_http_client)
|
124
|
-
end
|
125
|
-
it "should be a Faraday object" do
|
126
|
-
expect(@http_client).to be_an_instance_of(Faraday::Connection)
|
127
|
-
end
|
128
|
-
it "should have the oai_provider url from config" do
|
129
|
-
uri_obj = @http_client.url_prefix
|
130
|
-
expect(@client.config.oai_repository_url).to match(Regexp.new(uri_obj.host + uri_obj.path))
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end # context oai_client
|
134
|
-
|
135
77
|
end
|
data/spec/purl_xml_spec.rb
CHANGED
@@ -12,10 +12,10 @@ describe Harvestdor::Client do
|
|
12
12
|
@rdf_xml = "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'><rdf:Description rdf:about=\"info:fedora/druid:#{@druid}\">relationship!</rdf:Description></rdf:RDF>"
|
13
13
|
@dc_xml = "<oai_dc:dc xmlns:oai_dc='#{Harvestdor::OAI_DC_NAMESPACE}'><oai_dc:title>hoo ha</oai_dc:title</oai_dc:dc>"
|
14
14
|
@pub_xml = "<publicObject id='druid:#{@druid}'>#{@id_md_xml}#{@cntnt_md_xml}#{@rights_md_xml}#{@rdf_xml}#{@dc_xml}</publicObject>"
|
15
|
-
@ng_pub_xml = Nokogiri::XML(@pub_xml)
|
16
|
-
@fake_druid = 'oo000oo0000'
|
15
|
+
@ng_pub_xml = Nokogiri::XML(@pub_xml)
|
16
|
+
@fake_druid = 'oo000oo0000'
|
17
17
|
end
|
18
|
-
|
18
|
+
|
19
19
|
it "#mods returns a Nokogiri::XML::Document from the purl mods" do
|
20
20
|
VCR.use_cassette('purl_mods') do
|
21
21
|
x = Harvestdor.mods(@druid, @purl)
|
@@ -23,7 +23,7 @@ describe Harvestdor::Client do
|
|
23
23
|
expect(x.root.name).to eql('mods')
|
24
24
|
expect(x.root.namespace.href).to eql(Harvestdor::MODS_NAMESPACE)
|
25
25
|
end
|
26
|
-
end
|
26
|
+
end
|
27
27
|
|
28
28
|
context "#public_xml" do
|
29
29
|
it "#public_xml retrieves entire public xml as a Nokogiri::XML::Document when called with druid" do
|
@@ -44,7 +44,7 @@ describe Harvestdor::Client do
|
|
44
44
|
expect { Harvestdor.public_xml(@fake_druid, @purl) }.to raise_error(Harvestdor::Errors::MissingPublicXml)
|
45
45
|
end
|
46
46
|
end
|
47
|
-
|
47
|
+
|
48
48
|
context "#pub_xml" do
|
49
49
|
it "retrieves public_xml via fetch when first arg is a druid" do
|
50
50
|
expect(Harvestdor).to receive(:public_xml).with(@druid, @purl)
|
@@ -57,7 +57,7 @@ describe Harvestdor::Client do
|
|
57
57
|
expect { Harvestdor.pub_xml(Array.new)}.to raise_error(RuntimeError, "expected String or Nokogiri::XML::Document for first argument, got Array")
|
58
58
|
end
|
59
59
|
end
|
60
|
-
|
60
|
+
|
61
61
|
context "#content_metadata" do
|
62
62
|
it "returns a Nokogiri::XML::Document from the public xml fetched with druid" do
|
63
63
|
VCR.use_cassette('content_metadata') do
|
@@ -76,9 +76,9 @@ describe Harvestdor::Client do
|
|
76
76
|
it "raises MissingContentMetadata error if there is no contentMetadata in the public_xml for the druid" do
|
77
77
|
pub_xml = "<publicObject id='druid:#{@druid}'>#{@id_md_xml}#{@rights_md_xml}</publicObject>"
|
78
78
|
expect { Harvestdor.content_metadata(Nokogiri::XML(pub_xml)) }.to raise_error(Harvestdor::Errors::MissingContentMetadata)
|
79
|
-
end
|
79
|
+
end
|
80
80
|
end
|
81
|
-
|
81
|
+
|
82
82
|
context "#identity_metadata" do
|
83
83
|
it "returns a Nokogiri::XML::Document from the public xml fetched with druid" do
|
84
84
|
VCR.use_cassette('identity_metadata') do
|
@@ -98,9 +98,9 @@ describe Harvestdor::Client do
|
|
98
98
|
it "raises MissingIdentityMetadata error if there is no identityMetadata in the public_xml for the druid" do
|
99
99
|
pub_xml = "<publicObject id='druid:#{@druid}'>#{@cntnt_md_xml}#{@rights_md_xml}</publicObject>"
|
100
100
|
expect { Harvestdor.identity_metadata(Nokogiri::XML(pub_xml)) }.to raise_error(Harvestdor::Errors::MissingIdentityMetadata)
|
101
|
-
end
|
101
|
+
end
|
102
102
|
end
|
103
|
-
|
103
|
+
|
104
104
|
context "#rights_metadata" do
|
105
105
|
it "#rights_metadata returns a Nokogiri::XML::Document from the public xml fetched with druid" do
|
106
106
|
VCR.use_cassette('rights_metadata') do
|
@@ -118,9 +118,9 @@ describe Harvestdor::Client do
|
|
118
118
|
it "raises MissingRightsMetadata error if there is no identityMetadata in the public_xml for the druid" do
|
119
119
|
pub_xml = "<publicObject id='druid:#{@druid}'>#{@cntnt_md_xml}#{@id_md_xml}</publicObject>"
|
120
120
|
expect { Harvestdor.rights_metadata(Nokogiri::XML(pub_xml)) }.to raise_error(Harvestdor::Errors::MissingRightsMetadata)
|
121
|
-
end
|
121
|
+
end
|
122
122
|
end
|
123
|
-
|
123
|
+
|
124
124
|
context "#rdf" do
|
125
125
|
it "returns a Nokogiri::XML::Document from the public xml fetched with druid" do
|
126
126
|
VCR.use_cassette('rdf') do
|
@@ -140,9 +140,9 @@ describe Harvestdor::Client do
|
|
140
140
|
it "raises MissingRDF error if there is no RDF in the public_xml for the druid" do
|
141
141
|
pub_xml = "<publicObject id='druid:#{@druid}'>#{@cntnt_md_xml}#{@id_md_xml}</publicObject>"
|
142
142
|
expect { Harvestdor.rdf(Nokogiri::XML(pub_xml)) }.to raise_error(Harvestdor::Errors::MissingRDF)
|
143
|
-
end
|
143
|
+
end
|
144
144
|
end
|
145
|
-
|
145
|
+
|
146
146
|
context "#dc" do
|
147
147
|
it "returns a Nokogiri::XML::Document from the public xml fetched with druid" do
|
148
148
|
VCR.use_cassette('dc') do
|
@@ -162,9 +162,9 @@ describe Harvestdor::Client do
|
|
162
162
|
it "raises MissingDC error if there is no DC in the public_xml for the druid" do
|
163
163
|
pub_xml = "<publicObject id='druid:#{@druid}'>#{@cntnt_md_xml}#{@id_md_xml}</publicObject>"
|
164
164
|
expect { Harvestdor.dc(Nokogiri::XML(pub_xml)) }.to raise_error(Harvestdor::Errors::MissingDC)
|
165
|
-
end
|
165
|
+
end
|
166
166
|
end
|
167
|
-
|
167
|
+
|
168
168
|
context "Harvestdor:Client calls methods with config.purl" do
|
169
169
|
before(:all) do
|
170
170
|
@client = Harvestdor::Client.new({:purl_url => 'http://thisone.org'})
|