duracloud-client 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +52 -3
- data/lib/duracloud/abstract_entity.rb +92 -0
- data/lib/duracloud/chunked_content.rb +35 -0
- data/lib/duracloud/client.rb +4 -4
- data/lib/duracloud/content.rb +78 -98
- data/lib/duracloud/content_manifest.rb +88 -0
- data/lib/duracloud/manifest.rb +34 -15
- data/lib/duracloud/properties.rb +1 -1
- data/lib/duracloud/request.rb +6 -5
- data/lib/duracloud/rest_methods.rb +8 -8
- data/lib/duracloud/space.rb +8 -10
- data/lib/duracloud/tsv.rb +27 -4
- data/lib/duracloud/version.rb +1 -1
- data/lib/duracloud.rb +3 -0
- data/spec/fixtures/content_manifest.xml +32 -0
- data/spec/support/shared_examples_for_tsv.rb +23 -0
- data/spec/unit/audit_log_spec.rb +5 -15
- data/spec/unit/bit_integrity_report_spec.rb +5 -15
- data/spec/unit/content_manifest_spec.rb +17 -0
- data/spec/unit/content_spec.rb +117 -28
- data/spec/unit/manifest_spec.rb +5 -14
- metadata +11 -4
- data/lib/duracloud/has_properties.rb +0 -52
- data/lib/duracloud/persistence.rb +0 -59
@@ -0,0 +1,88 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'active_model'
|
3
|
+
|
4
|
+
module Duracloud
|
5
|
+
class ContentManifest
|
6
|
+
include ActiveModel::Model
|
7
|
+
|
8
|
+
validates_presence_of :space_id, :manifest_id
|
9
|
+
|
10
|
+
attr_accessor :space_id, :manifest_id, :store_id
|
11
|
+
|
12
|
+
def self.find(**kwargs)
|
13
|
+
new(**kwargs).tap do |manifest|
|
14
|
+
manifest.content
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def content
|
19
|
+
@content ||= Content.new(space_id: space_id, content_id: manifest_id, store_id: store_id).tap do |c|
|
20
|
+
c.load_properties
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def source
|
25
|
+
@source ||= Source.new(self)
|
26
|
+
end
|
27
|
+
|
28
|
+
def xml
|
29
|
+
@xml ||= content.download.body
|
30
|
+
end
|
31
|
+
|
32
|
+
protected
|
33
|
+
|
34
|
+
def method_missing(name, *args, &block)
|
35
|
+
if content.respond_to?(name)
|
36
|
+
content.send(name, *args, &block)
|
37
|
+
else
|
38
|
+
super
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
class Source
|
43
|
+
attr_reader :manifest
|
44
|
+
|
45
|
+
def initialize(manifest)
|
46
|
+
@manifest = manifest
|
47
|
+
end
|
48
|
+
|
49
|
+
def doc
|
50
|
+
@doc ||= Nokogiri::XML(manifest.xml)
|
51
|
+
end
|
52
|
+
|
53
|
+
def md5
|
54
|
+
doc.css("sourceContent md5").text
|
55
|
+
end
|
56
|
+
|
57
|
+
def content_id
|
58
|
+
doc.css("sourceContent").first["contentId"]
|
59
|
+
end
|
60
|
+
|
61
|
+
def size
|
62
|
+
doc.css("sourceContent byteSize").text.to_i
|
63
|
+
end
|
64
|
+
|
65
|
+
def content_type
|
66
|
+
doc.css("sourceContent mimetype").text
|
67
|
+
end
|
68
|
+
|
69
|
+
def download(&block)
|
70
|
+
chunks.each do |chunk|
|
71
|
+
chunk.download(&block)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def chunks
|
76
|
+
Enumerator.new do |e|
|
77
|
+
doc.css("chunk").each do |chunk_xml|
|
78
|
+
e << Content.find(space_id: manifest.space_id,
|
79
|
+
content_id: chunk_xml["chunkId"],
|
80
|
+
store_id: manifest.store_id,
|
81
|
+
md5: chunk_xml.css("md5").text)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
data/lib/duracloud/manifest.rb
CHANGED
@@ -2,35 +2,54 @@ module Duracloud
|
|
2
2
|
class Manifest
|
3
3
|
include TSV
|
4
4
|
|
5
|
+
TSV_FORMAT = "TSV"
|
6
|
+
BAGIT_FORMAT = "BAGIT"
|
7
|
+
|
5
8
|
attr_reader :space_id, :store_id
|
6
9
|
|
7
10
|
def initialize(space_id, store_id = nil)
|
8
11
|
@space_id = space_id
|
9
12
|
@store_id = store_id
|
10
|
-
@tsv_response = nil
|
11
|
-
@bagit_response = nil
|
12
13
|
end
|
13
14
|
|
14
|
-
|
15
|
-
|
15
|
+
# Returns the manifest in TSV format,
|
16
|
+
# downloading from DuraCloud is not pre-loaded.
|
17
|
+
# @yield [String] chunk of the manifest, if block given.
|
18
|
+
# @return [Duracloud::Response, String, IO] the response,
|
19
|
+
# if downloaded, or the pre-loaded TSV.
|
20
|
+
# @raise [Duracloud::NotFoundError]
|
21
|
+
def tsv(&block)
|
22
|
+
tsv_source? ? super : download(TSV_FORMAT, &block)
|
16
23
|
end
|
17
24
|
|
18
|
-
|
19
|
-
|
25
|
+
# Downloads the manifest in BAGIT format.
|
26
|
+
# @yield [String] chunk of the manifest, if block given.
|
27
|
+
# @return [Duracloud::Response] the response.
|
28
|
+
# @raise [Duracloud::NotFoundError]
|
29
|
+
def bagit(&block)
|
30
|
+
download(BAGIT_FORMAT, &block)
|
20
31
|
end
|
21
32
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
33
|
+
# Downloads the manifest
|
34
|
+
# @yield [String] chunk of the manifest, if block given.
|
35
|
+
# @param format [Symbol, String] the format of the manifest.
|
36
|
+
# Defaults to "TSV".
|
37
|
+
# @return [Duracloud::Response, String] the response, if block
|
38
|
+
# given, or the manifest content, if no block.
|
39
|
+
# @raise [Duracloud::NotFoundError]
|
40
|
+
def download(format = TSV_FORMAT, &block)
|
41
|
+
fmt = format.to_s.upcase
|
42
|
+
if block_given?
|
43
|
+
get_response(fmt, &block)
|
44
|
+
else
|
45
|
+
get_response(fmt).body
|
46
|
+
end
|
26
47
|
end
|
27
48
|
|
28
|
-
|
29
|
-
@bagit_response ||= get_response("BAGIT")
|
30
|
-
end
|
49
|
+
private
|
31
50
|
|
32
|
-
def get_response(format)
|
33
|
-
Client.get_manifest(space_id, query(format))
|
51
|
+
def get_response(format, &block)
|
52
|
+
Client.get_manifest(space_id, query(format), &block)
|
34
53
|
end
|
35
54
|
|
36
55
|
def query(format)
|
data/lib/duracloud/properties.rb
CHANGED
@@ -33,7 +33,7 @@ module Duracloud
|
|
33
33
|
# @param prop [String] the property name
|
34
34
|
# @return [Boolean]
|
35
35
|
def self.property?(prop)
|
36
|
-
duracloud_property?(prop)
|
36
|
+
duracloud_property?(prop)
|
37
37
|
end
|
38
38
|
|
39
39
|
# Filter the hash of properties, selecting only the properties valid
|
data/lib/duracloud/request.rb
CHANGED
@@ -16,24 +16,25 @@ module Duracloud
|
|
16
16
|
set_options(options.dup)
|
17
17
|
end
|
18
18
|
|
19
|
-
def execute
|
20
|
-
response_class.new(
|
19
|
+
def execute(&block)
|
20
|
+
response_class.new original_response(&block)
|
21
21
|
end
|
22
22
|
|
23
23
|
private
|
24
24
|
|
25
|
-
def original_response
|
25
|
+
def original_response(&block)
|
26
26
|
connection.send(http_method,
|
27
27
|
url,
|
28
28
|
body: body,
|
29
29
|
query: query,
|
30
|
-
header: headers
|
30
|
+
header: headers,
|
31
|
+
&block)
|
31
32
|
end
|
32
33
|
|
33
34
|
def set_options(options)
|
34
35
|
@body = options.delete(:body)
|
35
36
|
@headers = options.delete(:headers)
|
36
|
-
query
|
37
|
+
query = options.delete(:query) || {}
|
37
38
|
# Treat other keywords args as query params and ignore empty params
|
38
39
|
@query = query.merge(options).reject { |k, v| v.to_s.empty? }
|
39
40
|
end
|
@@ -33,8 +33,8 @@ module Duracloud
|
|
33
33
|
durastore(:delete, space_id, **query)
|
34
34
|
end
|
35
35
|
|
36
|
-
def get_content(space_id, content_id, **options)
|
37
|
-
durastore_content(:get, space_id, content_id, **options)
|
36
|
+
def get_content(space_id, content_id, **options, &block)
|
37
|
+
durastore_content(:get, space_id, content_id, **options, &block)
|
38
38
|
end
|
39
39
|
|
40
40
|
def get_content_properties(space_id, content_id, **options)
|
@@ -61,8 +61,8 @@ module Duracloud
|
|
61
61
|
durastore(:get, "audit/#{space_id}", **query)
|
62
62
|
end
|
63
63
|
|
64
|
-
def get_manifest(space_id, **query)
|
65
|
-
durastore(:get, "manifest/#{space_id}", **query)
|
64
|
+
def get_manifest(space_id, **query, &block)
|
65
|
+
durastore(:get, "manifest/#{space_id}", **query, &block)
|
66
66
|
end
|
67
67
|
|
68
68
|
def get_bit_integrity_report(space_id, **query)
|
@@ -85,14 +85,14 @@ module Duracloud
|
|
85
85
|
|
86
86
|
private
|
87
87
|
|
88
|
-
def durastore(*args)
|
89
|
-
execute(DurastoreRequest, *args)
|
88
|
+
def durastore(*args, &block)
|
89
|
+
execute(DurastoreRequest, *args, &block)
|
90
90
|
end
|
91
91
|
|
92
|
-
def durastore_content(http_method, space_id, content_id, **options)
|
92
|
+
def durastore_content(http_method, space_id, content_id, **options, &block)
|
93
93
|
escaped_content_id = content_id.gsub(/%/, "%25").gsub(/ /, "%20")
|
94
94
|
url = [ space_id, escaped_content_id ].join("/")
|
95
|
-
durastore(http_method, url, **options)
|
95
|
+
durastore(http_method, url, **options, &block)
|
96
96
|
end
|
97
97
|
|
98
98
|
end
|
data/lib/duracloud/space.rb
CHANGED
@@ -5,9 +5,7 @@ module Duracloud
|
|
5
5
|
#
|
6
6
|
# A "space" within a DuraCloud account.
|
7
7
|
#
|
8
|
-
class Space
|
9
|
-
include Persistence
|
10
|
-
include HasProperties
|
8
|
+
class Space < AbstractEntity
|
11
9
|
|
12
10
|
after_save :reset_acls
|
13
11
|
|
@@ -119,7 +117,7 @@ module Duracloud
|
|
119
117
|
end
|
120
118
|
end
|
121
119
|
|
122
|
-
|
120
|
+
attr_accessor :space_id, :store_id
|
123
121
|
alias_method :id, :space_id
|
124
122
|
|
125
123
|
after_save :reset_acls
|
@@ -128,8 +126,7 @@ module Duracloud
|
|
128
126
|
# @param space_id [String] the space ID
|
129
127
|
# @param store_id [String] the store ID (optional)
|
130
128
|
def initialize(space_id, store_id = nil)
|
131
|
-
|
132
|
-
@store_id = store_id
|
129
|
+
super(space_id: space_id, store_id: store_id)
|
133
130
|
yield self if block_given?
|
134
131
|
end
|
135
132
|
|
@@ -226,6 +223,11 @@ module Duracloud
|
|
226
223
|
|
227
224
|
private
|
228
225
|
|
226
|
+
def do_load_properties
|
227
|
+
response = Client.get_space_properties(id, **query)
|
228
|
+
self.properties = response.headers
|
229
|
+
end
|
230
|
+
|
229
231
|
def reset_acls
|
230
232
|
@acls = nil
|
231
233
|
end
|
@@ -244,10 +246,6 @@ module Duracloud
|
|
244
246
|
SpaceProperties
|
245
247
|
end
|
246
248
|
|
247
|
-
def get_properties_response
|
248
|
-
Client.get_space_properties(id, **query)
|
249
|
-
end
|
250
|
-
|
251
249
|
def do_delete
|
252
250
|
Client.delete_space(id, **query)
|
253
251
|
end
|
data/lib/duracloud/tsv.rb
CHANGED
@@ -3,6 +3,8 @@ require "csv"
|
|
3
3
|
module Duracloud
|
4
4
|
module TSV
|
5
5
|
|
6
|
+
CHUNK_SIZE = 1024 * 16
|
7
|
+
|
6
8
|
def csv
|
7
9
|
@csv ||= CSV.new(tsv, csv_options)
|
8
10
|
end
|
@@ -20,16 +22,36 @@ module Duracloud
|
|
20
22
|
csv.rewind
|
21
23
|
end
|
22
24
|
|
23
|
-
def tsv
|
24
|
-
|
25
|
+
def tsv(&block)
|
26
|
+
return unless tsv_source?
|
27
|
+
begin
|
28
|
+
tsv_source.rewind
|
29
|
+
if block_given?
|
30
|
+
while chunk = tsv_source.read(CHUNK_SIZE)
|
31
|
+
yield chunk
|
32
|
+
end
|
33
|
+
else
|
34
|
+
tsv_source.read
|
35
|
+
end
|
36
|
+
ensure
|
37
|
+
tsv_source.rewind
|
38
|
+
end
|
25
39
|
end
|
26
40
|
|
27
41
|
def load_tsv(io_or_str)
|
28
|
-
@
|
42
|
+
@tsv_source = io_or_str.is_a?(String) ? StringIO.new(io_or_str, "rb") : io_or_str
|
43
|
+
end
|
44
|
+
|
45
|
+
def tsv_source
|
46
|
+
@tsv_source
|
47
|
+
end
|
48
|
+
|
49
|
+
def tsv_source?
|
50
|
+
!!@tsv_source
|
29
51
|
end
|
30
52
|
|
31
53
|
def load_tsv_file(path)
|
32
|
-
load_tsv
|
54
|
+
load_tsv File.new(path, "rb")
|
33
55
|
end
|
34
56
|
|
35
57
|
def to_s
|
@@ -42,6 +64,7 @@ module Duracloud
|
|
42
64
|
{ col_sep: "\t",
|
43
65
|
quote_char: "`",
|
44
66
|
headers: true,
|
67
|
+
return_headers: false,
|
45
68
|
header_converters: header_converters,
|
46
69
|
}
|
47
70
|
end
|
data/lib/duracloud/version.rb
CHANGED
data/lib/duracloud.rb
CHANGED
@@ -2,12 +2,15 @@ require "duracloud/version"
|
|
2
2
|
require "duracloud/error"
|
3
3
|
|
4
4
|
module Duracloud
|
5
|
+
autoload :AbstractEntity, "duracloud/abstract_entity"
|
5
6
|
autoload :AuditLog, "duracloud/audit_log"
|
6
7
|
autoload :BitIntegrityReport, "duracloud/bit_integrity_report"
|
8
|
+
autoload :ChunkedContent, "duracloud/chunked_content"
|
7
9
|
autoload :Client, "duracloud/client"
|
8
10
|
autoload :Configuration, "duracloud/configuration"
|
9
11
|
autoload :Connection, "duracloud/connection"
|
10
12
|
autoload :Content, "duracloud/content"
|
13
|
+
autoload :ContentManifest, "duracloud/content_manifest"
|
11
14
|
autoload :ContentProperties, "duracloud/content_properties"
|
12
15
|
autoload :DurastoreRequest, "duracloud/durastore_request"
|
13
16
|
autoload :ErrorHandler, "duracloud/error_handler"
|
@@ -0,0 +1,32 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<dur:chunksManifest xmlns:dur="duracloud.org">
|
3
|
+
<header schemaVersion="0.2">
|
4
|
+
<sourceContent contentId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0">
|
5
|
+
<mimetype>application/octet-stream</mimetype>
|
6
|
+
<byteSize>4227858432</byteSize>
|
7
|
+
<md5>164e9aee34c0c42915716e11d5d539b5</md5>
|
8
|
+
</sourceContent>
|
9
|
+
</header>
|
10
|
+
<chunks>
|
11
|
+
<chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0000" index="0">
|
12
|
+
<byteSize>1000000000</byteSize>
|
13
|
+
<md5>8a7d5beb2523fb5e4d7c921096be50a9</md5>
|
14
|
+
</chunk>
|
15
|
+
<chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0001" index="1">
|
16
|
+
<byteSize>1000000000</byteSize>
|
17
|
+
<md5>e37115d4da0e187130ab645dee4f14ed</md5>
|
18
|
+
</chunk>
|
19
|
+
<chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0002" index="2">
|
20
|
+
<byteSize>1000000000</byteSize>
|
21
|
+
<md5>e37115d4da0e187130ab645dee4f14ed</md5>
|
22
|
+
</chunk>
|
23
|
+
<chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0003" index="3">
|
24
|
+
<byteSize>1000000000</byteSize>
|
25
|
+
<md5>93e9a4d242a9fb89796b98060094910d</md5>
|
26
|
+
</chunk>
|
27
|
+
<chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0004" index="4">
|
28
|
+
<byteSize>227858432</byteSize>
|
29
|
+
<md5>db0124ee56298ff7c7ac17be4ef14871</md5>
|
30
|
+
</chunk>
|
31
|
+
</chunks>
|
32
|
+
</dur:chunksManifest>
|
@@ -0,0 +1,23 @@
|
|
1
|
+
RSpec.shared_examples "a TSV" do
|
2
|
+
|
3
|
+
describe "#load_tsv" do
|
4
|
+
it "loads a string" do
|
5
|
+
tsv = File.read(path)
|
6
|
+
subject.load_tsv(tsv)
|
7
|
+
expect(subject.tsv).to eq(tsv)
|
8
|
+
end
|
9
|
+
it "loads an IO" do
|
10
|
+
tsv = File.read(path)
|
11
|
+
subject.load_tsv(tsv)
|
12
|
+
expect(subject.tsv).to eq(tsv)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
describe "#load_tsv_file" do
|
17
|
+
specify {
|
18
|
+
subject.load_tsv_file(path)
|
19
|
+
expect(subject.tsv).to eq(File.read(path))
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
data/spec/unit/audit_log_spec.rb
CHANGED
@@ -1,9 +1,13 @@
|
|
1
|
+
require 'support/shared_examples_for_tsv'
|
2
|
+
|
1
3
|
module Duracloud
|
2
4
|
RSpec.describe AuditLog do
|
3
5
|
|
6
|
+
subject { described_class.new("myspace") }
|
7
|
+
|
4
8
|
let(:path) { File.expand_path('../../fixtures/audit_log.tsv', __FILE__) }
|
5
9
|
|
6
|
-
|
10
|
+
it_behaves_like "a TSV"
|
7
11
|
|
8
12
|
describe "#csv" do
|
9
13
|
before {
|
@@ -17,19 +21,5 @@ module Duracloud
|
|
17
21
|
}
|
18
22
|
end
|
19
23
|
|
20
|
-
describe "#load_tsv" do
|
21
|
-
it "loads a string" do
|
22
|
-
tsv = File.read(path)
|
23
|
-
subject.load_tsv(tsv)
|
24
|
-
expect(subject.tsv).to eq(tsv)
|
25
|
-
end
|
26
|
-
it "loads an IO" do
|
27
|
-
tsv = File.read(path)
|
28
|
-
tsv_io = File.new(path, "rb")
|
29
|
-
subject.load_tsv(tsv)
|
30
|
-
expect(subject.tsv.to_s).to eq(tsv)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
24
|
end
|
35
25
|
end
|