harvestdor 0.0.14 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.hound.yml +2 -0
- data/.rubocop.yml +3 -0
- data/.rubocop_todo.yml +249 -0
- data/.travis.yml +3 -1
- data/Gemfile +0 -4
- data/README.rdoc +13 -58
- data/Rakefile +10 -6
- data/harvestdor.gemspec +9 -14
- data/lib/harvestdor/client.rb +110 -0
- data/lib/harvestdor/errors.rb +8 -8
- data/lib/harvestdor/purl_xml.rb +13 -72
- data/lib/harvestdor/version.rb +1 -1
- data/lib/harvestdor.rb +10 -106
- data/spec/config/example.yml +16 -0
- data/spec/harvestdor_client_spec.rb +7 -65
- data/spec/purl_xml_spec.rb +16 -16
- metadata +53 -29
- data/lib/harvestdor/oai_harvest.rb +0 -115
- data/spec/config/oai.yml +0 -37
- data/spec/harvestdor_spec.rb +0 -23
- data/spec/oai_harvest_spec.rb +0 -220
- data/spec/oai_integration_spec.rb +0 -139
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 76b1b6271ac8bb3a5840399bb22d7c1657c22919
|
4
|
+
data.tar.gz: 2a0a25cca28ecb9393e6f28af14c5756a628b85a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2e07d52a6bf82b5041c215d98011d49c7d95a59f1559f67484e427168ee7223c356fa681311330ffe382f592d25bd11f9f2a174e1e9b4a68ffb9ac867dd149cf
|
7
|
+
data.tar.gz: 8e7e02c64b65918b501610b54e2ca37e4a908dad7c9e7b00483d3ad61cab92cbd7c7f3a959833113e1188ccc7a02add5ed7f553b28d7c0d111559ce65ddc2422
|
data/.hound.yml
ADDED
data/.rubocop.yml
ADDED
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,249 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2015-10-21 19:04:57 -0700 using RuboCop version 0.34.2.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 1
|
10
|
+
# Cop supports --auto-correct.
|
11
|
+
Lint/DeprecatedClassMethods:
|
12
|
+
Exclude:
|
13
|
+
- 'spec/harvestdor_client_spec.rb'
|
14
|
+
|
15
|
+
# Offense count: 85
|
16
|
+
# Configuration parameters: AllowURI, URISchemes.
|
17
|
+
Metrics/LineLength:
|
18
|
+
Max: 182
|
19
|
+
|
20
|
+
# Offense count: 6
|
21
|
+
RSpec/DescribedClass:
|
22
|
+
Exclude:
|
23
|
+
- 'spec/harvestdor_client_spec.rb'
|
24
|
+
- 'spec/purl_xml_spec.rb'
|
25
|
+
|
26
|
+
# Offense count: 8
|
27
|
+
# Configuration parameters: CustomTransform, IgnoredWords.
|
28
|
+
RSpec/ExampleWording:
|
29
|
+
Exclude:
|
30
|
+
- 'spec/harvestdor_client_spec.rb'
|
31
|
+
|
32
|
+
# Offense count: 2
|
33
|
+
# Configuration parameters: CustomTransform.
|
34
|
+
RSpec/FilePath:
|
35
|
+
Exclude:
|
36
|
+
- 'spec/harvestdor_client_spec.rb'
|
37
|
+
- 'spec/purl_xml_spec.rb'
|
38
|
+
|
39
|
+
# Offense count: 118
|
40
|
+
RSpec/InstanceVariable:
|
41
|
+
Exclude:
|
42
|
+
- 'spec/harvestdor_client_spec.rb'
|
43
|
+
- 'spec/purl_xml_spec.rb'
|
44
|
+
|
45
|
+
# Offense count: 1
|
46
|
+
# Cop supports --auto-correct.
|
47
|
+
# Configuration parameters: EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle, SupportedLastArgumentHashStyles.
|
48
|
+
Style/AlignHash:
|
49
|
+
Exclude:
|
50
|
+
- 'lib/harvestdor.rb'
|
51
|
+
|
52
|
+
# Offense count: 6
|
53
|
+
# Cop supports --auto-correct.
|
54
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
55
|
+
Style/BracesAroundHashParameters:
|
56
|
+
Exclude:
|
57
|
+
- 'lib/harvestdor/client.rb'
|
58
|
+
- 'lib/harvestdor/purl_xml.rb'
|
59
|
+
- 'spec/harvestdor_client_spec.rb'
|
60
|
+
- 'spec/purl_xml_spec.rb'
|
61
|
+
|
62
|
+
# Offense count: 2
|
63
|
+
# Cop supports --auto-correct.
|
64
|
+
# Configuration parameters: IndentWhenRelativeTo, SupportedStyles, IndentOneStep.
|
65
|
+
Style/CaseIndentation:
|
66
|
+
Enabled: false
|
67
|
+
|
68
|
+
# Offense count: 1
|
69
|
+
# Cop supports --auto-correct.
|
70
|
+
Style/CommentIndentation:
|
71
|
+
Exclude:
|
72
|
+
- 'harvestdor.gemspec'
|
73
|
+
|
74
|
+
# Offense count: 4
|
75
|
+
# Configuration parameters: Exclude.
|
76
|
+
Style/Documentation:
|
77
|
+
Exclude:
|
78
|
+
- 'lib/harvestdor.rb'
|
79
|
+
- 'lib/harvestdor/errors.rb'
|
80
|
+
- 'lib/harvestdor/purl_xml.rb'
|
81
|
+
- 'lib/harvestdor/version.rb'
|
82
|
+
|
83
|
+
# Offense count: 2
|
84
|
+
# Cop supports --auto-correct.
|
85
|
+
Style/EmptyLines:
|
86
|
+
Exclude:
|
87
|
+
- 'Rakefile'
|
88
|
+
- 'lib/harvestdor/client.rb'
|
89
|
+
|
90
|
+
# Offense count: 4
|
91
|
+
# Cop supports --auto-correct.
|
92
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
93
|
+
Style/EmptyLinesAroundBlockBody:
|
94
|
+
Exclude:
|
95
|
+
- 'harvestdor.gemspec'
|
96
|
+
- 'spec/harvestdor_client_spec.rb'
|
97
|
+
- 'spec/purl_xml_spec.rb'
|
98
|
+
|
99
|
+
# Offense count: 2
|
100
|
+
# Cop supports --auto-correct.
|
101
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
102
|
+
Style/EmptyLinesAroundClassBody:
|
103
|
+
Exclude:
|
104
|
+
- 'lib/harvestdor/client.rb'
|
105
|
+
|
106
|
+
# Offense count: 2
|
107
|
+
# Cop supports --auto-correct.
|
108
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
109
|
+
Style/EmptyLinesAroundModuleBody:
|
110
|
+
Exclude:
|
111
|
+
- 'lib/harvestdor.rb'
|
112
|
+
- 'lib/harvestdor/purl_xml.rb'
|
113
|
+
|
114
|
+
# Offense count: 1
|
115
|
+
# Cop supports --auto-correct.
|
116
|
+
Style/EmptyLiteral:
|
117
|
+
Exclude:
|
118
|
+
- 'spec/purl_xml_spec.rb'
|
119
|
+
|
120
|
+
# Offense count: 11
|
121
|
+
# Cop supports --auto-correct.
|
122
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles, UseHashRocketsWithSymbolValues.
|
123
|
+
Style/HashSyntax:
|
124
|
+
Enabled: false
|
125
|
+
|
126
|
+
# Offense count: 2
|
127
|
+
# Cop supports --auto-correct.
|
128
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
129
|
+
Style/IndentHash:
|
130
|
+
Enabled: false
|
131
|
+
|
132
|
+
# Offense count: 3
|
133
|
+
# Cop supports --auto-correct.
|
134
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
135
|
+
Style/IndentationConsistency:
|
136
|
+
Exclude:
|
137
|
+
- 'harvestdor.gemspec'
|
138
|
+
|
139
|
+
# Offense count: 2
|
140
|
+
# Cop supports --auto-correct.
|
141
|
+
Style/LeadingCommentSpace:
|
142
|
+
Exclude:
|
143
|
+
- 'spec/spec_helper.rb'
|
144
|
+
|
145
|
+
# Offense count: 15
|
146
|
+
# Cop supports --auto-correct.
|
147
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
148
|
+
Style/MethodDefParentheses:
|
149
|
+
Enabled: false
|
150
|
+
|
151
|
+
# Offense count: 1
|
152
|
+
# Cop supports --auto-correct.
|
153
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
154
|
+
Style/MultilineOperationIndentation:
|
155
|
+
Enabled: false
|
156
|
+
|
157
|
+
# Offense count: 2
|
158
|
+
# Cop supports --auto-correct.
|
159
|
+
# Configuration parameters: PreferredDelimiters.
|
160
|
+
Style/PercentLiteralDelimiters:
|
161
|
+
Exclude:
|
162
|
+
- 'harvestdor.gemspec'
|
163
|
+
|
164
|
+
# Offense count: 13
|
165
|
+
# Configuration parameters: SupportedStyles.
|
166
|
+
Style/RaiseArgs:
|
167
|
+
EnforcedStyle: compact
|
168
|
+
|
169
|
+
# Offense count: 1
|
170
|
+
# Cop supports --auto-correct.
|
171
|
+
Style/RedundantBegin:
|
172
|
+
Exclude:
|
173
|
+
- 'lib/harvestdor/purl_xml.rb'
|
174
|
+
|
175
|
+
# Offense count: 7
|
176
|
+
# Cop supports --auto-correct.
|
177
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
178
|
+
Style/SignalException:
|
179
|
+
Exclude:
|
180
|
+
- 'lib/harvestdor/purl_xml.rb'
|
181
|
+
|
182
|
+
# Offense count: 2
|
183
|
+
# Cop supports --auto-correct.
|
184
|
+
Style/SpaceAfterComma:
|
185
|
+
Exclude:
|
186
|
+
- 'lib/harvestdor/purl_xml.rb'
|
187
|
+
|
188
|
+
# Offense count: 1
|
189
|
+
# Cop supports --auto-correct.
|
190
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
191
|
+
Style/SpaceBeforeBlockBraces:
|
192
|
+
Enabled: false
|
193
|
+
|
194
|
+
# Offense count: 1
|
195
|
+
# Cop supports --auto-correct.
|
196
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles, EnforcedStyleForEmptyBraces, SpaceBeforeBlockParameters.
|
197
|
+
Style/SpaceInsideBlockBraces:
|
198
|
+
Enabled: false
|
199
|
+
|
200
|
+
# Offense count: 2
|
201
|
+
# Cop supports --auto-correct.
|
202
|
+
Style/SpaceInsideBrackets:
|
203
|
+
Exclude:
|
204
|
+
- 'Rakefile'
|
205
|
+
|
206
|
+
# Offense count: 10
|
207
|
+
# Cop supports --auto-correct.
|
208
|
+
# Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces, SupportedStyles.
|
209
|
+
Style/SpaceInsideHashLiteralBraces:
|
210
|
+
Enabled: false
|
211
|
+
|
212
|
+
# Offense count: 1
|
213
|
+
# Cop supports --auto-correct.
|
214
|
+
Style/SpecialGlobalVars:
|
215
|
+
Exclude:
|
216
|
+
- 'harvestdor.gemspec'
|
217
|
+
|
218
|
+
# Offense count: 85
|
219
|
+
# Cop supports --auto-correct.
|
220
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
221
|
+
Style/StringLiterals:
|
222
|
+
Enabled: false
|
223
|
+
|
224
|
+
# Offense count: 3
|
225
|
+
# Cop supports --auto-correct.
|
226
|
+
Style/Tab:
|
227
|
+
Exclude:
|
228
|
+
- 'harvestdor.gemspec'
|
229
|
+
|
230
|
+
# Offense count: 9
|
231
|
+
# Cop supports --auto-correct.
|
232
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
233
|
+
Style/TrailingBlankLines:
|
234
|
+
Exclude:
|
235
|
+
- 'Gemfile'
|
236
|
+
- 'Rakefile'
|
237
|
+
- 'lib/harvestdor.rb'
|
238
|
+
- 'lib/harvestdor/client.rb'
|
239
|
+
- 'lib/harvestdor/errors.rb'
|
240
|
+
- 'lib/harvestdor/purl_xml.rb'
|
241
|
+
- 'spec/harvestdor_client_spec.rb'
|
242
|
+
- 'spec/purl_xml_spec.rb'
|
243
|
+
- 'spec/spec_helper.rb'
|
244
|
+
|
245
|
+
# Offense count: 2
|
246
|
+
# Cop supports --auto-correct.
|
247
|
+
Style/UnneededPercentQ:
|
248
|
+
Exclude:
|
249
|
+
- 'harvestdor.gemspec'
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.rdoc
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
= Harvestdor
|
2
2
|
{<img src="https://travis-ci.org/sul-dlss/harvestdor.svg?branch=master" alt="Build Status" />}[https://travis-ci.org/sul-dlss/harvestdor] {<img src="https://coveralls.io/repos/sul-dlss/harvestdor/badge.png" alt="Coverage Status" />}[https://coveralls.io/r/sul-dlss/harvestdor] {<img src="https://gemnasium.com/sul-dlss/harvestdor.svg" alt="Dependency Status" />}[https://gemnasium.com/sul-dlss/harvestdor] {<img src="https://badge.fury.io/rb/harvestdor.svg" alt="Gem Version" />}[http://badge.fury.io/rb/harvestdor]
|
3
3
|
|
4
|
-
A Gem to harvest
|
4
|
+
A Gem to harvest data from a Stanford Purl page, with convenience methods for
|
5
|
+
getting Nokogiri::XML::Document and errors when pieces are missing
|
5
6
|
|
6
7
|
== Installation
|
7
8
|
|
@@ -27,73 +28,34 @@ Or install it yourself as:
|
|
27
28
|
:log_dir => File.join(File.dirname(__FILE__), "..", "logs"),
|
28
29
|
:log_name => 'harvestdor.log',
|
29
30
|
:purl => 'http://purl.stanford.edu',
|
30
|
-
:http_options => { 'ssl' => {
|
31
|
-
'verify' => false
|
32
|
-
},
|
31
|
+
:http_options => { 'ssl' => {
|
32
|
+
'verify' => false
|
33
|
+
},
|
33
34
|
'request' => {
|
34
35
|
'timeout' => 60, # open/read timeout (seconds)
|
35
36
|
'open_timeout' => 60 # connection open timeout (seconds)
|
36
37
|
}
|
37
|
-
}
|
38
|
-
:oai_repository_url => 'https://dor-oaiprovider-prod.stanford.edu/oai', # The OAI repository to connect to
|
39
|
-
:oai_client_debug => false,
|
40
|
-
:default_metadata_prefix => 'mods',
|
41
|
-
:default_from_date => '2012-12-01', # default value is nil
|
42
|
-
:default_until_date => '2014-12-01', # default value is nil
|
43
|
-
:default_set => 'is_governed_by_hy787xj5878', # default value is nil
|
38
|
+
}
|
44
39
|
})
|
45
40
|
|
46
41
|
==== Option 1: use a yaml file
|
47
42
|
|
48
|
-
for contents of yml -- see spec/config/
|
43
|
+
for contents of yml -- see spec/config/example.yml
|
49
44
|
|
50
45
|
client = Harvestdor::Client.new({:config_yml_path => path_to_my_yml})
|
51
|
-
client.
|
52
|
-
# do stuff with the druid, e.g.
|
53
|
-
# cm = client.content_metadata(druid)
|
54
|
-
# mods = client.mods(druid)
|
55
|
-
# create solr doc from mods and cm
|
56
|
-
# write solr doc to your app's index
|
57
|
-
end
|
46
|
+
client.mods('oo111oo2222')
|
58
47
|
|
59
48
|
==== Option 2: pass in non-default configurations as a hash
|
60
49
|
|
61
|
-
client = Harvestdor::Client.new({:
|
62
|
-
client.
|
63
|
-
# do stuff with the druid, e.g.
|
64
|
-
# cm = client.content_metadata(druid)
|
65
|
-
# mods = client.mods(druid)
|
66
|
-
# create solr doc from mods and cm
|
67
|
-
# write solr doc to your app's index
|
68
|
-
end
|
50
|
+
client = Harvestdor::Client.new({:purl => 'http://my_purl.org'})
|
51
|
+
client.mods('oo111oo2222')
|
69
52
|
|
70
53
|
==== Option 3: set the attributes explicitly in your code
|
71
54
|
|
72
55
|
client = Harvestdor::Client.new
|
73
|
-
client.config.
|
74
|
-
client.
|
75
|
-
# do stuff with the OAI rec, e.g.
|
76
|
-
# manipulate metadata into solr doc
|
77
|
-
# manipulate stuff from record.about into solr doc
|
78
|
-
# write solr doc to your app's index
|
79
|
-
end
|
80
|
-
|
81
|
-
==== Option 4: set the OAI parameters as params in call to Harvestdor::Client.harvest_xx
|
82
|
-
|
83
|
-
client = Harvestdor::Client.new({oai_repository_url = 'http://my_oai.org'})
|
84
|
-
client.oai_headers(:metadataPrefix => 'foo', :from => '2012-11-27', :set => 'is_governed_by_hy787xj5878') do | id |
|
85
|
-
# do stuff with the druid
|
86
|
-
end
|
87
|
-
|
88
|
-
=== OAI Harvesting
|
56
|
+
client.config.purl = 'http://my_purl.org'
|
57
|
+
client.mods('oo111oo2222')
|
89
58
|
|
90
|
-
Harvestdor::Client.druids_via_oai gets enumerated druids for the records in your specified set / date range (druids are not preceded by 'druid:')
|
91
|
-
|
92
|
-
Harvestdor::Client.oai_records gets enumerated OAI record objects in your specified set / data range, with the metadata format you indicated
|
93
|
-
|
94
|
-
You can also get these as arrays:
|
95
|
-
|
96
|
-
druid_array = client.druids_via_oai(:metadataPrefix => 'foo', :from => '(last_harvested_date)', :set => 'asdfasdf')
|
97
59
|
|
98
60
|
=== XML from PURL pages
|
99
61
|
|
@@ -126,17 +88,10 @@ Similarly for
|
|
126
88
|
|
127
89
|
You can also do this from a Harvestdor::Client object, and it will use the purl from the Client.config:
|
128
90
|
|
129
|
-
client = Harvestdor::Client.new({:
|
91
|
+
client = Harvestdor::Client.new({purl: 'http://thisone.org'})
|
130
92
|
client.identity_metadata('bb375wb8869')
|
131
93
|
|
132
94
|
|
133
|
-
=== TODO: Last Harvested Datestamp (Incremental Harvests)
|
134
|
-
|
135
|
-
Harvestdor::Client.last_datestamp
|
136
|
-
|
137
|
-
persist this information with your app for incremental harvests
|
138
|
-
|
139
|
-
|
140
95
|
== Contributing
|
141
96
|
|
142
97
|
# Fork it
|
data/Rakefile
CHANGED
@@ -3,9 +3,6 @@ require "bundler/gem_tasks"
|
|
3
3
|
require 'rake'
|
4
4
|
require 'bundler'
|
5
5
|
|
6
|
-
require 'rspec/core/rake_task'
|
7
|
-
require 'yard'
|
8
|
-
require 'yard/rake/yardoc_task'
|
9
6
|
|
10
7
|
begin
|
11
8
|
Bundler.setup(:default, :development)
|
@@ -15,13 +12,15 @@ rescue Bundler::BundlerError => e
|
|
15
12
|
exit e.status_code
|
16
13
|
end
|
17
14
|
|
18
|
-
task :default => :
|
15
|
+
task :default => [:rspec, :rubocop]
|
19
16
|
|
20
|
-
desc "run continuous integration suite (tests, coverage, docs)"
|
17
|
+
desc "run continuous integration suite (tests, coverage, docs)"
|
21
18
|
task :ci => [:rspec, :doc]
|
22
19
|
|
23
20
|
task :spec => :rspec
|
24
21
|
|
22
|
+
require 'rspec/core/rake_task'
|
23
|
+
|
25
24
|
desc "run specs EXCEPT integration specs"
|
26
25
|
RSpec::Core::RakeTask.new(:spec_fast) do |spec|
|
27
26
|
spec.rspec_opts = ["-c", "-f progress", "--tty", "-t ~integration", "-r ./spec/spec_helper.rb"]
|
@@ -31,7 +30,12 @@ RSpec::Core::RakeTask.new(:rspec) do |spec|
|
|
31
30
|
spec.rspec_opts = ["-c", "-f progress", "--tty", "-r ./spec/spec_helper.rb"]
|
32
31
|
end
|
33
32
|
|
33
|
+
require 'rubocop/rake_task'
|
34
|
+
RuboCop::RakeTask.new(:rubocop)
|
35
|
+
|
34
36
|
# Use yard to build docs
|
37
|
+
require 'yard'
|
38
|
+
require 'yard/rake/yardoc_task'
|
35
39
|
begin
|
36
40
|
project_root = File.expand_path(File.dirname(__FILE__))
|
37
41
|
doc_dest_dir = File.join(project_root, 'doc')
|
@@ -46,5 +50,5 @@ rescue LoadError
|
|
46
50
|
task :doc do
|
47
51
|
abort "Please install the YARD gem to generate rdoc."
|
48
52
|
end
|
49
|
-
end
|
53
|
+
end
|
50
54
|
|
data/harvestdor.gemspec
CHANGED
@@ -8,34 +8,29 @@ Gem::Specification.new do |gem|
|
|
8
8
|
gem.version = Harvestdor::VERSION
|
9
9
|
gem.authors = ["Naomi Dushay"]
|
10
10
|
gem.email = ["ndushay@stanford.edu"]
|
11
|
-
gem.description = %q{Harvest DOR object metadata
|
11
|
+
gem.description = %q{Harvest DOR object metadata from a Stanford public purl page}
|
12
12
|
gem.summary = %q{Harvest DOR object metadata}
|
13
|
-
gem.homepage = "
|
13
|
+
gem.homepage = ""
|
14
14
|
|
15
15
|
gem.files = `git ls-files`.split($/)
|
16
16
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
17
|
gem.test_files = gem.files.grep(%r{^spec/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
|
-
|
20
|
-
gem.add_dependency 'oai', '~> 0.3.0'
|
19
|
+
|
21
20
|
gem.add_dependency 'faraday', '>= 0.9.0'
|
22
21
|
gem.add_dependency 'confstruct'
|
23
22
|
gem.add_dependency 'nokogiri'
|
24
|
-
|
25
|
-
# Runtime dependencies
|
26
|
-
# gem.add_runtime_dependency 'nokogiri'
|
27
23
|
|
28
24
|
# Development dependencies
|
29
|
-
# Bundler will install these gems too if you've checked out solrmarc-wrapper source from git and run 'bundle install'
|
30
|
-
# It will not add these as dependencies if you require solrmarc-wrapper for other projects
|
31
25
|
gem.add_development_dependency "rake"
|
32
26
|
# docs
|
33
27
|
gem.add_development_dependency "rdoc"
|
34
28
|
gem.add_development_dependency "yard"
|
35
29
|
# tests
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
30
|
+
gem.add_development_dependency 'rspec'
|
31
|
+
gem.add_development_dependency 'coveralls'
|
32
|
+
gem.add_development_dependency 'rubocop'
|
33
|
+
gem.add_development_dependency 'rubocop-rspec'
|
34
|
+
gem.add_development_dependency 'vcr'
|
35
|
+
gem.add_development_dependency 'webmock'
|
41
36
|
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
module Harvestdor
|
2
|
+
# convenience methods for getting Nokogiri::XML::Document objects for pieces
|
3
|
+
# of the purl public xml, and errors when pieces are missing
|
4
|
+
class Client
|
5
|
+
|
6
|
+
# Set default values for the construction of Harvestdor::Client objects
|
7
|
+
def self.default_config
|
8
|
+
@class_config ||= Confstruct::Configuration.new({
|
9
|
+
:log_dir => LOG_DIR_DEFAULT,
|
10
|
+
:log_name => LOG_NAME_DEFAULT,
|
11
|
+
:purl => PURL_DEFAULT,
|
12
|
+
:http_options => HTTP_OPTIONS_DEFAULT
|
13
|
+
})
|
14
|
+
end
|
15
|
+
|
16
|
+
# Global, memoized, lazy initialized instance of a logger
|
17
|
+
# @param [String] log_dir directory for to get log file
|
18
|
+
# @param [String] log_name name of log file
|
19
|
+
def self.logger(log_dir, log_name)
|
20
|
+
Dir.mkdir(log_dir) unless File.directory?(log_dir)
|
21
|
+
@logger ||= Logger.new(File.join(log_dir, log_name), 'daily')
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
# Initialize a new instance of Harvestdor::Client
|
26
|
+
# @param Hash options
|
27
|
+
# @example
|
28
|
+
# client = Harvestdor::Client.new({ # Example with all possible options
|
29
|
+
# :log_dir => File.join(File.dirname(__FILE__), "..", "logs"),
|
30
|
+
# :log_name => 'harvestdor.log',
|
31
|
+
# :purl => 'http://purl.stanford.edu',
|
32
|
+
# :http_options => { 'ssl' => {
|
33
|
+
# 'verify' => false
|
34
|
+
# },
|
35
|
+
# 'request' => {
|
36
|
+
# 'timeout' => 30, # open/read timeout (seconds)
|
37
|
+
# 'open_timeout' => 30 # connection open timeout (seconds)
|
38
|
+
# }
|
39
|
+
# },
|
40
|
+
# })
|
41
|
+
def initialize options = {}
|
42
|
+
config.configure(YAML.load_file(options[:config_yml_path])) if options[:config_yml_path]
|
43
|
+
config.configure options
|
44
|
+
yield(config) if block_given?
|
45
|
+
end
|
46
|
+
|
47
|
+
def config
|
48
|
+
@config ||= Confstruct::Configuration.new(self.class.default_config)
|
49
|
+
end
|
50
|
+
|
51
|
+
def logger
|
52
|
+
@logger ||= self.class.logger(config.log_dir, config.log_name)
|
53
|
+
end
|
54
|
+
|
55
|
+
# the public xml for this fedora object, from the purl server
|
56
|
+
# @param [String] druid e.g. ab123cd4567, in the purl url
|
57
|
+
# @return [Nokogiri::XML::Document] the MODS metadata for the fedora object
|
58
|
+
def mods druid
|
59
|
+
Harvestdor.mods(druid, config.purl)
|
60
|
+
end
|
61
|
+
|
62
|
+
# the public xml for this fedora object, from the purl xml
|
63
|
+
# @param [String] druid e.g. ab123cd4567, in the purl url
|
64
|
+
# @return [Nokogiri::XML::Document] the public xml for the fedora object
|
65
|
+
def public_xml druid
|
66
|
+
Harvestdor.public_xml(druid, config.purl)
|
67
|
+
end
|
68
|
+
|
69
|
+
# the contentMetadata for this fedora object, from the purl xml
|
70
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
71
|
+
# a Nokogiri::XML::Document containing the public_xml for an object
|
72
|
+
# @return [Nokogiri::XML::Document] the contentMetadata for the fedora object
|
73
|
+
def content_metadata object
|
74
|
+
Harvestdor.content_metadata(object, config.purl)
|
75
|
+
end
|
76
|
+
|
77
|
+
# the identityMetadata for this fedora object, from the purl xml
|
78
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
79
|
+
# a Nokogiri::XML::Document containing the public_xml for an object
|
80
|
+
# @return [Nokogiri::XML::Document] the identityMetadata for the fedora object
|
81
|
+
def identity_metadata object
|
82
|
+
Harvestdor.identity_metadata(object, config.purl)
|
83
|
+
end
|
84
|
+
|
85
|
+
# the rightsMetadata for this fedora object, from the purl xml
|
86
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
87
|
+
# a Nokogiri::XML::Document containing the public_xml for an object
|
88
|
+
# @return [Nokogiri::XML::Document] the rightsMetadata for the fedora object
|
89
|
+
def rights_metadata object
|
90
|
+
Harvestdor.rights_metadata(object, config.purl)
|
91
|
+
end
|
92
|
+
|
93
|
+
# the RDF for this fedora object, from the purl xml
|
94
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
95
|
+
# a Nokogiri::XML::Document containing the public_xml for an object
|
96
|
+
# @return [Nokogiri::XML::Document] the RDF for the fedora object
|
97
|
+
def rdf object
|
98
|
+
Harvestdor.rdf(object, config.purl)
|
99
|
+
end
|
100
|
+
|
101
|
+
# the Dublin Core for this fedora object, from the purl xml
|
102
|
+
# @param [Object] object a String containing a druid (e.g. ab123cd4567), or
|
103
|
+
# a Nokogiri::XML::Document containing the public_xml for an object
|
104
|
+
# @return [Nokogiri::XML::Document] the dc for the fedora object
|
105
|
+
def dc object
|
106
|
+
Harvestdor.dc(object, config.purl)
|
107
|
+
end
|
108
|
+
|
109
|
+
end # class Client
|
110
|
+
end
|
data/lib/harvestdor/errors.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
module Harvestdor
|
2
2
|
module Errors
|
3
|
-
MissingPurlPage = Class.new(StandardError)
|
4
|
-
MissingMods = Class.new(StandardError)
|
5
|
-
MissingPublicXml = Class.new(StandardError)
|
6
|
-
MissingContentMetadata = Class.new(StandardError)
|
7
|
-
MissingIdentityMetadata = Class.new(StandardError)
|
8
|
-
MissingRightsMetadata = Class.new(StandardError)
|
9
|
-
MissingRDF = Class.new(StandardError)
|
10
|
-
MissingDC = Class.new(StandardError)
|
3
|
+
MissingPurlPage = Class.new(StandardError)
|
4
|
+
MissingMods = Class.new(StandardError)
|
5
|
+
MissingPublicXml = Class.new(StandardError)
|
6
|
+
MissingContentMetadata = Class.new(StandardError)
|
7
|
+
MissingIdentityMetadata = Class.new(StandardError)
|
8
|
+
MissingRightsMetadata = Class.new(StandardError)
|
9
|
+
MissingRDF = Class.new(StandardError)
|
10
|
+
MissingDC = Class.new(StandardError)
|
11
11
|
end
|
12
12
|
end
|