duracloud-client 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,88 @@
1
+ require 'nokogiri'
2
+ require 'active_model'
3
+
4
+ module Duracloud
5
+ class ContentManifest
6
+ include ActiveModel::Model
7
+
8
+ validates_presence_of :space_id, :manifest_id
9
+
10
+ attr_accessor :space_id, :manifest_id, :store_id
11
+
12
+ def self.find(**kwargs)
13
+ new(**kwargs).tap do |manifest|
14
+ manifest.content
15
+ end
16
+ end
17
+
18
+ def content
19
+ @content ||= Content.new(space_id: space_id, content_id: manifest_id, store_id: store_id).tap do |c|
20
+ c.load_properties
21
+ end
22
+ end
23
+
24
+ def source
25
+ @source ||= Source.new(self)
26
+ end
27
+
28
+ def xml
29
+ @xml ||= content.download.body
30
+ end
31
+
32
+ protected
33
+
34
+ def method_missing(name, *args, &block)
35
+ if content.respond_to?(name)
36
+ content.send(name, *args, &block)
37
+ else
38
+ super
39
+ end
40
+ end
41
+
42
+ class Source
43
+ attr_reader :manifest
44
+
45
+ def initialize(manifest)
46
+ @manifest = manifest
47
+ end
48
+
49
+ def doc
50
+ @doc ||= Nokogiri::XML(manifest.xml)
51
+ end
52
+
53
+ def md5
54
+ doc.css("sourceContent md5").text
55
+ end
56
+
57
+ def content_id
58
+ doc.css("sourceContent").first["contentId"]
59
+ end
60
+
61
+ def size
62
+ doc.css("sourceContent byteSize").text.to_i
63
+ end
64
+
65
+ def content_type
66
+ doc.css("sourceContent mimetype").text
67
+ end
68
+
69
+ def download(&block)
70
+ chunks.each do |chunk|
71
+ chunk.download(&block)
72
+ end
73
+ end
74
+
75
+ def chunks
76
+ Enumerator.new do |e|
77
+ doc.css("chunk").each do |chunk_xml|
78
+ e << Content.find(space_id: manifest.space_id,
79
+ content_id: chunk_xml["chunkId"],
80
+ store_id: manifest.store_id,
81
+ md5: chunk_xml.css("md5").text)
82
+ end
83
+ end
84
+ end
85
+ end
86
+
87
+ end
88
+ end
@@ -2,35 +2,54 @@ module Duracloud
2
2
  class Manifest
3
3
  include TSV
4
4
 
5
+ TSV_FORMAT = "TSV"
6
+ BAGIT_FORMAT = "BAGIT"
7
+
5
8
  attr_reader :space_id, :store_id
6
9
 
7
10
  def initialize(space_id, store_id = nil)
8
11
  @space_id = space_id
9
12
  @store_id = store_id
10
- @tsv_response = nil
11
- @bagit_response = nil
12
13
  end
13
14
 
14
- def tsv
15
- super || tsv_response.body
15
+ # Returns the manifest in TSV format,
16
+ # downloading from DuraCloud is not pre-loaded.
17
+ # @yield [String] chunk of the manifest, if block given.
18
+ # @return [Duracloud::Response, String, IO] the response,
19
+ # if downloaded, or the pre-loaded TSV.
20
+ # @raise [Duracloud::NotFoundError]
21
+ def tsv(&block)
22
+ tsv_source? ? super : download(TSV_FORMAT, &block)
16
23
  end
17
24
 
18
- def bagit
19
- bagit_response.body
25
+ # Downloads the manifest in BAGIT format.
26
+ # @yield [String] chunk of the manifest, if block given.
27
+ # @return [Duracloud::Response] the response.
28
+ # @raise [Duracloud::NotFoundError]
29
+ def bagit(&block)
30
+ download(BAGIT_FORMAT, &block)
20
31
  end
21
32
 
22
- private
23
-
24
- def tsv_response
25
- @tsv_response ||= get_response("TSV")
33
+ # Downloads the manifest
34
+ # @yield [String] chunk of the manifest, if block given.
35
+ # @param format [Symbol, String] the format of the manifest.
36
+ # Defaults to "TSV".
37
+ # @return [Duracloud::Response, String] the response, if block
38
+ # given, or the manifest content, if no block.
39
+ # @raise [Duracloud::NotFoundError]
40
+ def download(format = TSV_FORMAT, &block)
41
+ fmt = format.to_s.upcase
42
+ if block_given?
43
+ get_response(fmt, &block)
44
+ else
45
+ get_response(fmt).body
46
+ end
26
47
  end
27
48
 
28
- def bagit_response
29
- @bagit_response ||= get_response("BAGIT")
30
- end
49
+ private
31
50
 
32
- def get_response(format)
33
- Client.get_manifest(space_id, query(format))
51
+ def get_response(format, &block)
52
+ Client.get_manifest(space_id, query(format), &block)
34
53
  end
35
54
 
36
55
  def query(format)
@@ -33,7 +33,7 @@ module Duracloud
33
33
  # @param prop [String] the property name
34
34
  # @return [Boolean]
35
35
  def self.property?(prop)
36
- duracloud_property?(prop) && !internal_property?(prop)
36
+ duracloud_property?(prop)
37
37
  end
38
38
 
39
39
  # Filter the hash of properties, selecting only the properties valid
@@ -16,24 +16,25 @@ module Duracloud
16
16
  set_options(options.dup)
17
17
  end
18
18
 
19
- def execute
20
- response_class.new(original_response)
19
+ def execute(&block)
20
+ response_class.new original_response(&block)
21
21
  end
22
22
 
23
23
  private
24
24
 
25
- def original_response
25
+ def original_response(&block)
26
26
  connection.send(http_method,
27
27
  url,
28
28
  body: body,
29
29
  query: query,
30
- header: headers)
30
+ header: headers,
31
+ &block)
31
32
  end
32
33
 
33
34
  def set_options(options)
34
35
  @body = options.delete(:body)
35
36
  @headers = options.delete(:headers)
36
- query = options.delete(:query) || {}
37
+ query = options.delete(:query) || {}
37
38
  # Treat other keywords args as query params and ignore empty params
38
39
  @query = query.merge(options).reject { |k, v| v.to_s.empty? }
39
40
  end
@@ -33,8 +33,8 @@ module Duracloud
33
33
  durastore(:delete, space_id, **query)
34
34
  end
35
35
 
36
- def get_content(space_id, content_id, **options)
37
- durastore_content(:get, space_id, content_id, **options)
36
+ def get_content(space_id, content_id, **options, &block)
37
+ durastore_content(:get, space_id, content_id, **options, &block)
38
38
  end
39
39
 
40
40
  def get_content_properties(space_id, content_id, **options)
@@ -61,8 +61,8 @@ module Duracloud
61
61
  durastore(:get, "audit/#{space_id}", **query)
62
62
  end
63
63
 
64
- def get_manifest(space_id, **query)
65
- durastore(:get, "manifest/#{space_id}", **query)
64
+ def get_manifest(space_id, **query, &block)
65
+ durastore(:get, "manifest/#{space_id}", **query, &block)
66
66
  end
67
67
 
68
68
  def get_bit_integrity_report(space_id, **query)
@@ -85,14 +85,14 @@ module Duracloud
85
85
 
86
86
  private
87
87
 
88
- def durastore(*args)
89
- execute(DurastoreRequest, *args)
88
+ def durastore(*args, &block)
89
+ execute(DurastoreRequest, *args, &block)
90
90
  end
91
91
 
92
- def durastore_content(http_method, space_id, content_id, **options)
92
+ def durastore_content(http_method, space_id, content_id, **options, &block)
93
93
  escaped_content_id = content_id.gsub(/%/, "%25").gsub(/ /, "%20")
94
94
  url = [ space_id, escaped_content_id ].join("/")
95
- durastore(http_method, url, **options)
95
+ durastore(http_method, url, **options, &block)
96
96
  end
97
97
 
98
98
  end
@@ -5,9 +5,7 @@ module Duracloud
5
5
  #
6
6
  # A "space" within a DuraCloud account.
7
7
  #
8
- class Space
9
- include Persistence
10
- include HasProperties
8
+ class Space < AbstractEntity
11
9
 
12
10
  after_save :reset_acls
13
11
 
@@ -119,7 +117,7 @@ module Duracloud
119
117
  end
120
118
  end
121
119
 
122
- attr_reader :space_id, :store_id
120
+ attr_accessor :space_id, :store_id
123
121
  alias_method :id, :space_id
124
122
 
125
123
  after_save :reset_acls
@@ -128,8 +126,7 @@ module Duracloud
128
126
  # @param space_id [String] the space ID
129
127
  # @param store_id [String] the store ID (optional)
130
128
  def initialize(space_id, store_id = nil)
131
- @space_id = space_id
132
- @store_id = store_id
129
+ super(space_id: space_id, store_id: store_id)
133
130
  yield self if block_given?
134
131
  end
135
132
 
@@ -226,6 +223,11 @@ module Duracloud
226
223
 
227
224
  private
228
225
 
226
+ def do_load_properties
227
+ response = Client.get_space_properties(id, **query)
228
+ self.properties = response.headers
229
+ end
230
+
229
231
  def reset_acls
230
232
  @acls = nil
231
233
  end
@@ -244,10 +246,6 @@ module Duracloud
244
246
  SpaceProperties
245
247
  end
246
248
 
247
- def get_properties_response
248
- Client.get_space_properties(id, **query)
249
- end
250
-
251
249
  def do_delete
252
250
  Client.delete_space(id, **query)
253
251
  end
data/lib/duracloud/tsv.rb CHANGED
@@ -3,6 +3,8 @@ require "csv"
3
3
  module Duracloud
4
4
  module TSV
5
5
 
6
+ CHUNK_SIZE = 1024 * 16
7
+
6
8
  def csv
7
9
  @csv ||= CSV.new(tsv, csv_options)
8
10
  end
@@ -20,16 +22,36 @@ module Duracloud
20
22
  csv.rewind
21
23
  end
22
24
 
23
- def tsv
24
- @tsv
25
+ def tsv(&block)
26
+ return unless tsv_source?
27
+ begin
28
+ tsv_source.rewind
29
+ if block_given?
30
+ while chunk = tsv_source.read(CHUNK_SIZE)
31
+ yield chunk
32
+ end
33
+ else
34
+ tsv_source.read
35
+ end
36
+ ensure
37
+ tsv_source.rewind
38
+ end
25
39
  end
26
40
 
27
41
  def load_tsv(io_or_str)
28
- @tsv = io_or_str
42
+ @tsv_source = io_or_str.is_a?(String) ? StringIO.new(io_or_str, "rb") : io_or_str
43
+ end
44
+
45
+ def tsv_source
46
+ @tsv_source
47
+ end
48
+
49
+ def tsv_source?
50
+ !!@tsv_source
29
51
  end
30
52
 
31
53
  def load_tsv_file(path)
32
- load_tsv(File.new(path, "rb"))
54
+ load_tsv File.new(path, "rb")
33
55
  end
34
56
 
35
57
  def to_s
@@ -42,6 +64,7 @@ module Duracloud
42
64
  { col_sep: "\t",
43
65
  quote_char: "`",
44
66
  headers: true,
67
+ return_headers: false,
45
68
  header_converters: header_converters,
46
69
  }
47
70
  end
@@ -1,3 +1,3 @@
1
1
  module Duracloud
2
- VERSION = "0.3.0"
2
+ VERSION = "0.4.0"
3
3
  end
data/lib/duracloud.rb CHANGED
@@ -2,12 +2,15 @@ require "duracloud/version"
2
2
  require "duracloud/error"
3
3
 
4
4
  module Duracloud
5
+ autoload :AbstractEntity, "duracloud/abstract_entity"
5
6
  autoload :AuditLog, "duracloud/audit_log"
6
7
  autoload :BitIntegrityReport, "duracloud/bit_integrity_report"
8
+ autoload :ChunkedContent, "duracloud/chunked_content"
7
9
  autoload :Client, "duracloud/client"
8
10
  autoload :Configuration, "duracloud/configuration"
9
11
  autoload :Connection, "duracloud/connection"
10
12
  autoload :Content, "duracloud/content"
13
+ autoload :ContentManifest, "duracloud/content_manifest"
11
14
  autoload :ContentProperties, "duracloud/content_properties"
12
15
  autoload :DurastoreRequest, "duracloud/durastore_request"
13
16
  autoload :ErrorHandler, "duracloud/error_handler"
@@ -0,0 +1,32 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <dur:chunksManifest xmlns:dur="duracloud.org">
3
+ <header schemaVersion="0.2">
4
+ <sourceContent contentId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0">
5
+ <mimetype>application/octet-stream</mimetype>
6
+ <byteSize>4227858432</byteSize>
7
+ <md5>164e9aee34c0c42915716e11d5d539b5</md5>
8
+ </sourceContent>
9
+ </header>
10
+ <chunks>
11
+ <chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0000" index="0">
12
+ <byteSize>1000000000</byteSize>
13
+ <md5>8a7d5beb2523fb5e4d7c921096be50a9</md5>
14
+ </chunk>
15
+ <chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0001" index="1">
16
+ <byteSize>1000000000</byteSize>
17
+ <md5>e37115d4da0e187130ab645dee4f14ed</md5>
18
+ </chunk>
19
+ <chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0002" index="2">
20
+ <byteSize>1000000000</byteSize>
21
+ <md5>e37115d4da0e187130ab645dee4f14ed</md5>
22
+ </chunk>
23
+ <chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0003" index="3">
24
+ <byteSize>1000000000</byteSize>
25
+ <md5>93e9a4d242a9fb89796b98060094910d</md5>
26
+ </chunk>
27
+ <chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0004" index="4">
28
+ <byteSize>227858432</byteSize>
29
+ <md5>db0124ee56298ff7c7ac17be4ef14871</md5>
30
+ </chunk>
31
+ </chunks>
32
+ </dur:chunksManifest>
@@ -0,0 +1,23 @@
1
+ RSpec.shared_examples "a TSV" do
2
+
3
+ describe "#load_tsv" do
4
+ it "loads a string" do
5
+ tsv = File.read(path)
6
+ subject.load_tsv(tsv)
7
+ expect(subject.tsv).to eq(tsv)
8
+ end
9
+ it "loads an IO" do
10
+ tsv = File.read(path)
11
+ subject.load_tsv(tsv)
12
+ expect(subject.tsv).to eq(tsv)
13
+ end
14
+ end
15
+
16
+ describe "#load_tsv_file" do
17
+ specify {
18
+ subject.load_tsv_file(path)
19
+ expect(subject.tsv).to eq(File.read(path))
20
+ }
21
+ end
22
+
23
+ end
@@ -1,9 +1,13 @@
1
+ require 'support/shared_examples_for_tsv'
2
+
1
3
  module Duracloud
2
4
  RSpec.describe AuditLog do
3
5
 
6
+ subject { described_class.new("myspace") }
7
+
4
8
  let(:path) { File.expand_path('../../fixtures/audit_log.tsv', __FILE__) }
5
9
 
6
- subject { described_class.new("myspace") }
10
+ it_behaves_like "a TSV"
7
11
 
8
12
  describe "#csv" do
9
13
  before {
@@ -17,19 +21,5 @@ module Duracloud
17
21
  }
18
22
  end
19
23
 
20
- describe "#load_tsv" do
21
- it "loads a string" do
22
- tsv = File.read(path)
23
- subject.load_tsv(tsv)
24
- expect(subject.tsv).to eq(tsv)
25
- end
26
- it "loads an IO" do
27
- tsv = File.read(path)
28
- tsv_io = File.new(path, "rb")
29
- subject.load_tsv(tsv)
30
- expect(subject.tsv.to_s).to eq(tsv)
31
- end
32
- end
33
-
34
24
  end
35
25
  end