duracloud-client 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +52 -3
- data/lib/duracloud/abstract_entity.rb +92 -0
- data/lib/duracloud/chunked_content.rb +35 -0
- data/lib/duracloud/client.rb +4 -4
- data/lib/duracloud/content.rb +78 -98
- data/lib/duracloud/content_manifest.rb +88 -0
- data/lib/duracloud/manifest.rb +34 -15
- data/lib/duracloud/properties.rb +1 -1
- data/lib/duracloud/request.rb +6 -5
- data/lib/duracloud/rest_methods.rb +8 -8
- data/lib/duracloud/space.rb +8 -10
- data/lib/duracloud/tsv.rb +27 -4
- data/lib/duracloud/version.rb +1 -1
- data/lib/duracloud.rb +3 -0
- data/spec/fixtures/content_manifest.xml +32 -0
- data/spec/support/shared_examples_for_tsv.rb +23 -0
- data/spec/unit/audit_log_spec.rb +5 -15
- data/spec/unit/bit_integrity_report_spec.rb +5 -15
- data/spec/unit/content_manifest_spec.rb +17 -0
- data/spec/unit/content_spec.rb +117 -28
- data/spec/unit/manifest_spec.rb +5 -14
- metadata +11 -4
- data/lib/duracloud/has_properties.rb +0 -52
- data/lib/duracloud/persistence.rb +0 -59
@@ -0,0 +1,88 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'active_model'
|
3
|
+
|
4
|
+
module Duracloud
|
5
|
+
class ContentManifest
|
6
|
+
include ActiveModel::Model
|
7
|
+
|
8
|
+
validates_presence_of :space_id, :manifest_id
|
9
|
+
|
10
|
+
attr_accessor :space_id, :manifest_id, :store_id
|
11
|
+
|
12
|
+
def self.find(**kwargs)
|
13
|
+
new(**kwargs).tap do |manifest|
|
14
|
+
manifest.content
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def content
|
19
|
+
@content ||= Content.new(space_id: space_id, content_id: manifest_id, store_id: store_id).tap do |c|
|
20
|
+
c.load_properties
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def source
|
25
|
+
@source ||= Source.new(self)
|
26
|
+
end
|
27
|
+
|
28
|
+
def xml
|
29
|
+
@xml ||= content.download.body
|
30
|
+
end
|
31
|
+
|
32
|
+
protected
|
33
|
+
|
34
|
+
def method_missing(name, *args, &block)
|
35
|
+
if content.respond_to?(name)
|
36
|
+
content.send(name, *args, &block)
|
37
|
+
else
|
38
|
+
super
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
class Source
|
43
|
+
attr_reader :manifest
|
44
|
+
|
45
|
+
def initialize(manifest)
|
46
|
+
@manifest = manifest
|
47
|
+
end
|
48
|
+
|
49
|
+
def doc
|
50
|
+
@doc ||= Nokogiri::XML(manifest.xml)
|
51
|
+
end
|
52
|
+
|
53
|
+
def md5
|
54
|
+
doc.css("sourceContent md5").text
|
55
|
+
end
|
56
|
+
|
57
|
+
def content_id
|
58
|
+
doc.css("sourceContent").first["contentId"]
|
59
|
+
end
|
60
|
+
|
61
|
+
def size
|
62
|
+
doc.css("sourceContent byteSize").text.to_i
|
63
|
+
end
|
64
|
+
|
65
|
+
def content_type
|
66
|
+
doc.css("sourceContent mimetype").text
|
67
|
+
end
|
68
|
+
|
69
|
+
def download(&block)
|
70
|
+
chunks.each do |chunk|
|
71
|
+
chunk.download(&block)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def chunks
|
76
|
+
Enumerator.new do |e|
|
77
|
+
doc.css("chunk").each do |chunk_xml|
|
78
|
+
e << Content.find(space_id: manifest.space_id,
|
79
|
+
content_id: chunk_xml["chunkId"],
|
80
|
+
store_id: manifest.store_id,
|
81
|
+
md5: chunk_xml.css("md5").text)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
data/lib/duracloud/manifest.rb
CHANGED
@@ -2,35 +2,54 @@ module Duracloud
|
|
2
2
|
class Manifest
|
3
3
|
include TSV
|
4
4
|
|
5
|
+
TSV_FORMAT = "TSV"
|
6
|
+
BAGIT_FORMAT = "BAGIT"
|
7
|
+
|
5
8
|
attr_reader :space_id, :store_id
|
6
9
|
|
7
10
|
def initialize(space_id, store_id = nil)
|
8
11
|
@space_id = space_id
|
9
12
|
@store_id = store_id
|
10
|
-
@tsv_response = nil
|
11
|
-
@bagit_response = nil
|
12
13
|
end
|
13
14
|
|
14
|
-
|
15
|
-
|
15
|
+
# Returns the manifest in TSV format,
|
16
|
+
# downloading from DuraCloud is not pre-loaded.
|
17
|
+
# @yield [String] chunk of the manifest, if block given.
|
18
|
+
# @return [Duracloud::Response, String, IO] the response,
|
19
|
+
# if downloaded, or the pre-loaded TSV.
|
20
|
+
# @raise [Duracloud::NotFoundError]
|
21
|
+
def tsv(&block)
|
22
|
+
tsv_source? ? super : download(TSV_FORMAT, &block)
|
16
23
|
end
|
17
24
|
|
18
|
-
|
19
|
-
|
25
|
+
# Downloads the manifest in BAGIT format.
|
26
|
+
# @yield [String] chunk of the manifest, if block given.
|
27
|
+
# @return [Duracloud::Response] the response.
|
28
|
+
# @raise [Duracloud::NotFoundError]
|
29
|
+
def bagit(&block)
|
30
|
+
download(BAGIT_FORMAT, &block)
|
20
31
|
end
|
21
32
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
33
|
+
# Downloads the manifest
|
34
|
+
# @yield [String] chunk of the manifest, if block given.
|
35
|
+
# @param format [Symbol, String] the format of the manifest.
|
36
|
+
# Defaults to "TSV".
|
37
|
+
# @return [Duracloud::Response, String] the response, if block
|
38
|
+
# given, or the manifest content, if no block.
|
39
|
+
# @raise [Duracloud::NotFoundError]
|
40
|
+
def download(format = TSV_FORMAT, &block)
|
41
|
+
fmt = format.to_s.upcase
|
42
|
+
if block_given?
|
43
|
+
get_response(fmt, &block)
|
44
|
+
else
|
45
|
+
get_response(fmt).body
|
46
|
+
end
|
26
47
|
end
|
27
48
|
|
28
|
-
|
29
|
-
@bagit_response ||= get_response("BAGIT")
|
30
|
-
end
|
49
|
+
private
|
31
50
|
|
32
|
-
def get_response(format)
|
33
|
-
Client.get_manifest(space_id, query(format))
|
51
|
+
def get_response(format, &block)
|
52
|
+
Client.get_manifest(space_id, query(format), &block)
|
34
53
|
end
|
35
54
|
|
36
55
|
def query(format)
|
data/lib/duracloud/properties.rb
CHANGED
@@ -33,7 +33,7 @@ module Duracloud
|
|
33
33
|
# @param prop [String] the property name
|
34
34
|
# @return [Boolean]
|
35
35
|
def self.property?(prop)
|
36
|
-
duracloud_property?(prop)
|
36
|
+
duracloud_property?(prop)
|
37
37
|
end
|
38
38
|
|
39
39
|
# Filter the hash of properties, selecting only the properties valid
|
data/lib/duracloud/request.rb
CHANGED
@@ -16,24 +16,25 @@ module Duracloud
|
|
16
16
|
set_options(options.dup)
|
17
17
|
end
|
18
18
|
|
19
|
-
def execute
|
20
|
-
response_class.new(
|
19
|
+
def execute(&block)
|
20
|
+
response_class.new original_response(&block)
|
21
21
|
end
|
22
22
|
|
23
23
|
private
|
24
24
|
|
25
|
-
def original_response
|
25
|
+
def original_response(&block)
|
26
26
|
connection.send(http_method,
|
27
27
|
url,
|
28
28
|
body: body,
|
29
29
|
query: query,
|
30
|
-
header: headers
|
30
|
+
header: headers,
|
31
|
+
&block)
|
31
32
|
end
|
32
33
|
|
33
34
|
def set_options(options)
|
34
35
|
@body = options.delete(:body)
|
35
36
|
@headers = options.delete(:headers)
|
36
|
-
query
|
37
|
+
query = options.delete(:query) || {}
|
37
38
|
# Treat other keywords args as query params and ignore empty params
|
38
39
|
@query = query.merge(options).reject { |k, v| v.to_s.empty? }
|
39
40
|
end
|
@@ -33,8 +33,8 @@ module Duracloud
|
|
33
33
|
durastore(:delete, space_id, **query)
|
34
34
|
end
|
35
35
|
|
36
|
-
def get_content(space_id, content_id, **options)
|
37
|
-
durastore_content(:get, space_id, content_id, **options)
|
36
|
+
def get_content(space_id, content_id, **options, &block)
|
37
|
+
durastore_content(:get, space_id, content_id, **options, &block)
|
38
38
|
end
|
39
39
|
|
40
40
|
def get_content_properties(space_id, content_id, **options)
|
@@ -61,8 +61,8 @@ module Duracloud
|
|
61
61
|
durastore(:get, "audit/#{space_id}", **query)
|
62
62
|
end
|
63
63
|
|
64
|
-
def get_manifest(space_id, **query)
|
65
|
-
durastore(:get, "manifest/#{space_id}", **query)
|
64
|
+
def get_manifest(space_id, **query, &block)
|
65
|
+
durastore(:get, "manifest/#{space_id}", **query, &block)
|
66
66
|
end
|
67
67
|
|
68
68
|
def get_bit_integrity_report(space_id, **query)
|
@@ -85,14 +85,14 @@ module Duracloud
|
|
85
85
|
|
86
86
|
private
|
87
87
|
|
88
|
-
def durastore(*args)
|
89
|
-
execute(DurastoreRequest, *args)
|
88
|
+
def durastore(*args, &block)
|
89
|
+
execute(DurastoreRequest, *args, &block)
|
90
90
|
end
|
91
91
|
|
92
|
-
def durastore_content(http_method, space_id, content_id, **options)
|
92
|
+
def durastore_content(http_method, space_id, content_id, **options, &block)
|
93
93
|
escaped_content_id = content_id.gsub(/%/, "%25").gsub(/ /, "%20")
|
94
94
|
url = [ space_id, escaped_content_id ].join("/")
|
95
|
-
durastore(http_method, url, **options)
|
95
|
+
durastore(http_method, url, **options, &block)
|
96
96
|
end
|
97
97
|
|
98
98
|
end
|
data/lib/duracloud/space.rb
CHANGED
@@ -5,9 +5,7 @@ module Duracloud
|
|
5
5
|
#
|
6
6
|
# A "space" within a DuraCloud account.
|
7
7
|
#
|
8
|
-
class Space
|
9
|
-
include Persistence
|
10
|
-
include HasProperties
|
8
|
+
class Space < AbstractEntity
|
11
9
|
|
12
10
|
after_save :reset_acls
|
13
11
|
|
@@ -119,7 +117,7 @@ module Duracloud
|
|
119
117
|
end
|
120
118
|
end
|
121
119
|
|
122
|
-
|
120
|
+
attr_accessor :space_id, :store_id
|
123
121
|
alias_method :id, :space_id
|
124
122
|
|
125
123
|
after_save :reset_acls
|
@@ -128,8 +126,7 @@ module Duracloud
|
|
128
126
|
# @param space_id [String] the space ID
|
129
127
|
# @param store_id [String] the store ID (optional)
|
130
128
|
def initialize(space_id, store_id = nil)
|
131
|
-
|
132
|
-
@store_id = store_id
|
129
|
+
super(space_id: space_id, store_id: store_id)
|
133
130
|
yield self if block_given?
|
134
131
|
end
|
135
132
|
|
@@ -226,6 +223,11 @@ module Duracloud
|
|
226
223
|
|
227
224
|
private
|
228
225
|
|
226
|
+
def do_load_properties
|
227
|
+
response = Client.get_space_properties(id, **query)
|
228
|
+
self.properties = response.headers
|
229
|
+
end
|
230
|
+
|
229
231
|
def reset_acls
|
230
232
|
@acls = nil
|
231
233
|
end
|
@@ -244,10 +246,6 @@ module Duracloud
|
|
244
246
|
SpaceProperties
|
245
247
|
end
|
246
248
|
|
247
|
-
def get_properties_response
|
248
|
-
Client.get_space_properties(id, **query)
|
249
|
-
end
|
250
|
-
|
251
249
|
def do_delete
|
252
250
|
Client.delete_space(id, **query)
|
253
251
|
end
|
data/lib/duracloud/tsv.rb
CHANGED
@@ -3,6 +3,8 @@ require "csv"
|
|
3
3
|
module Duracloud
|
4
4
|
module TSV
|
5
5
|
|
6
|
+
CHUNK_SIZE = 1024 * 16
|
7
|
+
|
6
8
|
def csv
|
7
9
|
@csv ||= CSV.new(tsv, csv_options)
|
8
10
|
end
|
@@ -20,16 +22,36 @@ module Duracloud
|
|
20
22
|
csv.rewind
|
21
23
|
end
|
22
24
|
|
23
|
-
def tsv
|
24
|
-
|
25
|
+
def tsv(&block)
|
26
|
+
return unless tsv_source?
|
27
|
+
begin
|
28
|
+
tsv_source.rewind
|
29
|
+
if block_given?
|
30
|
+
while chunk = tsv_source.read(CHUNK_SIZE)
|
31
|
+
yield chunk
|
32
|
+
end
|
33
|
+
else
|
34
|
+
tsv_source.read
|
35
|
+
end
|
36
|
+
ensure
|
37
|
+
tsv_source.rewind
|
38
|
+
end
|
25
39
|
end
|
26
40
|
|
27
41
|
def load_tsv(io_or_str)
|
28
|
-
@
|
42
|
+
@tsv_source = io_or_str.is_a?(String) ? StringIO.new(io_or_str, "rb") : io_or_str
|
43
|
+
end
|
44
|
+
|
45
|
+
def tsv_source
|
46
|
+
@tsv_source
|
47
|
+
end
|
48
|
+
|
49
|
+
def tsv_source?
|
50
|
+
!!@tsv_source
|
29
51
|
end
|
30
52
|
|
31
53
|
def load_tsv_file(path)
|
32
|
-
load_tsv
|
54
|
+
load_tsv File.new(path, "rb")
|
33
55
|
end
|
34
56
|
|
35
57
|
def to_s
|
@@ -42,6 +64,7 @@ module Duracloud
|
|
42
64
|
{ col_sep: "\t",
|
43
65
|
quote_char: "`",
|
44
66
|
headers: true,
|
67
|
+
return_headers: false,
|
45
68
|
header_converters: header_converters,
|
46
69
|
}
|
47
70
|
end
|
data/lib/duracloud/version.rb
CHANGED
data/lib/duracloud.rb
CHANGED
@@ -2,12 +2,15 @@ require "duracloud/version"
|
|
2
2
|
require "duracloud/error"
|
3
3
|
|
4
4
|
module Duracloud
|
5
|
+
autoload :AbstractEntity, "duracloud/abstract_entity"
|
5
6
|
autoload :AuditLog, "duracloud/audit_log"
|
6
7
|
autoload :BitIntegrityReport, "duracloud/bit_integrity_report"
|
8
|
+
autoload :ChunkedContent, "duracloud/chunked_content"
|
7
9
|
autoload :Client, "duracloud/client"
|
8
10
|
autoload :Configuration, "duracloud/configuration"
|
9
11
|
autoload :Connection, "duracloud/connection"
|
10
12
|
autoload :Content, "duracloud/content"
|
13
|
+
autoload :ContentManifest, "duracloud/content_manifest"
|
11
14
|
autoload :ContentProperties, "duracloud/content_properties"
|
12
15
|
autoload :DurastoreRequest, "duracloud/durastore_request"
|
13
16
|
autoload :ErrorHandler, "duracloud/error_handler"
|
@@ -0,0 +1,32 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<dur:chunksManifest xmlns:dur="duracloud.org">
|
3
|
+
<header schemaVersion="0.2">
|
4
|
+
<sourceContent contentId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0">
|
5
|
+
<mimetype>application/octet-stream</mimetype>
|
6
|
+
<byteSize>4227858432</byteSize>
|
7
|
+
<md5>164e9aee34c0c42915716e11d5d539b5</md5>
|
8
|
+
</sourceContent>
|
9
|
+
</header>
|
10
|
+
<chunks>
|
11
|
+
<chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0000" index="0">
|
12
|
+
<byteSize>1000000000</byteSize>
|
13
|
+
<md5>8a7d5beb2523fb5e4d7c921096be50a9</md5>
|
14
|
+
</chunk>
|
15
|
+
<chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0001" index="1">
|
16
|
+
<byteSize>1000000000</byteSize>
|
17
|
+
<md5>e37115d4da0e187130ab645dee4f14ed</md5>
|
18
|
+
</chunk>
|
19
|
+
<chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0002" index="2">
|
20
|
+
<byteSize>1000000000</byteSize>
|
21
|
+
<md5>e37115d4da0e187130ab645dee4f14ed</md5>
|
22
|
+
</chunk>
|
23
|
+
<chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0003" index="3">
|
24
|
+
<byteSize>1000000000</byteSize>
|
25
|
+
<md5>93e9a4d242a9fb89796b98060094910d</md5>
|
26
|
+
</chunk>
|
27
|
+
<chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0004" index="4">
|
28
|
+
<byteSize>227858432</byteSize>
|
29
|
+
<md5>db0124ee56298ff7c7ac17be4ef14871</md5>
|
30
|
+
</chunk>
|
31
|
+
</chunks>
|
32
|
+
</dur:chunksManifest>
|
@@ -0,0 +1,23 @@
|
|
1
|
+
RSpec.shared_examples "a TSV" do
|
2
|
+
|
3
|
+
describe "#load_tsv" do
|
4
|
+
it "loads a string" do
|
5
|
+
tsv = File.read(path)
|
6
|
+
subject.load_tsv(tsv)
|
7
|
+
expect(subject.tsv).to eq(tsv)
|
8
|
+
end
|
9
|
+
it "loads an IO" do
|
10
|
+
tsv = File.read(path)
|
11
|
+
subject.load_tsv(tsv)
|
12
|
+
expect(subject.tsv).to eq(tsv)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
describe "#load_tsv_file" do
|
17
|
+
specify {
|
18
|
+
subject.load_tsv_file(path)
|
19
|
+
expect(subject.tsv).to eq(File.read(path))
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
data/spec/unit/audit_log_spec.rb
CHANGED
@@ -1,9 +1,13 @@
|
|
1
|
+
require 'support/shared_examples_for_tsv'
|
2
|
+
|
1
3
|
module Duracloud
|
2
4
|
RSpec.describe AuditLog do
|
3
5
|
|
6
|
+
subject { described_class.new("myspace") }
|
7
|
+
|
4
8
|
let(:path) { File.expand_path('../../fixtures/audit_log.tsv', __FILE__) }
|
5
9
|
|
6
|
-
|
10
|
+
it_behaves_like "a TSV"
|
7
11
|
|
8
12
|
describe "#csv" do
|
9
13
|
before {
|
@@ -17,19 +21,5 @@ module Duracloud
|
|
17
21
|
}
|
18
22
|
end
|
19
23
|
|
20
|
-
describe "#load_tsv" do
|
21
|
-
it "loads a string" do
|
22
|
-
tsv = File.read(path)
|
23
|
-
subject.load_tsv(tsv)
|
24
|
-
expect(subject.tsv).to eq(tsv)
|
25
|
-
end
|
26
|
-
it "loads an IO" do
|
27
|
-
tsv = File.read(path)
|
28
|
-
tsv_io = File.new(path, "rb")
|
29
|
-
subject.load_tsv(tsv)
|
30
|
-
expect(subject.tsv.to_s).to eq(tsv)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
24
|
end
|
35
25
|
end
|