duracloud-client 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,88 @@
1
+ require 'nokogiri'
2
+ require 'active_model'
3
+
4
+ module Duracloud
5
+ class ContentManifest
6
+ include ActiveModel::Model
7
+
8
+ validates_presence_of :space_id, :manifest_id
9
+
10
+ attr_accessor :space_id, :manifest_id, :store_id
11
+
12
+ def self.find(**kwargs)
13
+ new(**kwargs).tap do |manifest|
14
+ manifest.content
15
+ end
16
+ end
17
+
18
+ def content
19
+ @content ||= Content.new(space_id: space_id, content_id: manifest_id, store_id: store_id).tap do |c|
20
+ c.load_properties
21
+ end
22
+ end
23
+
24
+ def source
25
+ @source ||= Source.new(self)
26
+ end
27
+
28
+ def xml
29
+ @xml ||= content.download.body
30
+ end
31
+
32
+ protected
33
+
34
+ def method_missing(name, *args, &block)
35
+ if content.respond_to?(name)
36
+ content.send(name, *args, &block)
37
+ else
38
+ super
39
+ end
40
+ end
41
+
42
+ class Source
43
+ attr_reader :manifest
44
+
45
+ def initialize(manifest)
46
+ @manifest = manifest
47
+ end
48
+
49
+ def doc
50
+ @doc ||= Nokogiri::XML(manifest.xml)
51
+ end
52
+
53
+ def md5
54
+ doc.css("sourceContent md5").text
55
+ end
56
+
57
+ def content_id
58
+ doc.css("sourceContent").first["contentId"]
59
+ end
60
+
61
+ def size
62
+ doc.css("sourceContent byteSize").text.to_i
63
+ end
64
+
65
+ def content_type
66
+ doc.css("sourceContent mimetype").text
67
+ end
68
+
69
+ def download(&block)
70
+ chunks.each do |chunk|
71
+ chunk.download(&block)
72
+ end
73
+ end
74
+
75
+ def chunks
76
+ Enumerator.new do |e|
77
+ doc.css("chunk").each do |chunk_xml|
78
+ e << Content.find(space_id: manifest.space_id,
79
+ content_id: chunk_xml["chunkId"],
80
+ store_id: manifest.store_id,
81
+ md5: chunk_xml.css("md5").text)
82
+ end
83
+ end
84
+ end
85
+ end
86
+
87
+ end
88
+ end
@@ -2,35 +2,54 @@ module Duracloud
2
2
  class Manifest
3
3
  include TSV
4
4
 
5
+ TSV_FORMAT = "TSV"
6
+ BAGIT_FORMAT = "BAGIT"
7
+
5
8
  attr_reader :space_id, :store_id
6
9
 
7
10
  def initialize(space_id, store_id = nil)
8
11
  @space_id = space_id
9
12
  @store_id = store_id
10
- @tsv_response = nil
11
- @bagit_response = nil
12
13
  end
13
14
 
14
- def tsv
15
- super || tsv_response.body
15
+ # Returns the manifest in TSV format,
16
+ # downloading from DuraCloud is not pre-loaded.
17
+ # @yield [String] chunk of the manifest, if block given.
18
+ # @return [Duracloud::Response, String, IO] the response,
19
+ # if downloaded, or the pre-loaded TSV.
20
+ # @raise [Duracloud::NotFoundError]
21
+ def tsv(&block)
22
+ tsv_source? ? super : download(TSV_FORMAT, &block)
16
23
  end
17
24
 
18
- def bagit
19
- bagit_response.body
25
+ # Downloads the manifest in BAGIT format.
26
+ # @yield [String] chunk of the manifest, if block given.
27
+ # @return [Duracloud::Response] the response.
28
+ # @raise [Duracloud::NotFoundError]
29
+ def bagit(&block)
30
+ download(BAGIT_FORMAT, &block)
20
31
  end
21
32
 
22
- private
23
-
24
- def tsv_response
25
- @tsv_response ||= get_response("TSV")
33
+ # Downloads the manifest
34
+ # @yield [String] chunk of the manifest, if block given.
35
+ # @param format [Symbol, String] the format of the manifest.
36
+ # Defaults to "TSV".
37
+ # @return [Duracloud::Response, String] the response, if block
38
+ # given, or the manifest content, if no block.
39
+ # @raise [Duracloud::NotFoundError]
40
+ def download(format = TSV_FORMAT, &block)
41
+ fmt = format.to_s.upcase
42
+ if block_given?
43
+ get_response(fmt, &block)
44
+ else
45
+ get_response(fmt).body
46
+ end
26
47
  end
27
48
 
28
- def bagit_response
29
- @bagit_response ||= get_response("BAGIT")
30
- end
49
+ private
31
50
 
32
- def get_response(format)
33
- Client.get_manifest(space_id, query(format))
51
+ def get_response(format, &block)
52
+ Client.get_manifest(space_id, query(format), &block)
34
53
  end
35
54
 
36
55
  def query(format)
@@ -33,7 +33,7 @@ module Duracloud
33
33
  # @param prop [String] the property name
34
34
  # @return [Boolean]
35
35
  def self.property?(prop)
36
- duracloud_property?(prop) && !internal_property?(prop)
36
+ duracloud_property?(prop)
37
37
  end
38
38
 
39
39
  # Filter the hash of properties, selecting only the properties valid
@@ -16,24 +16,25 @@ module Duracloud
16
16
  set_options(options.dup)
17
17
  end
18
18
 
19
- def execute
20
- response_class.new(original_response)
19
+ def execute(&block)
20
+ response_class.new original_response(&block)
21
21
  end
22
22
 
23
23
  private
24
24
 
25
- def original_response
25
+ def original_response(&block)
26
26
  connection.send(http_method,
27
27
  url,
28
28
  body: body,
29
29
  query: query,
30
- header: headers)
30
+ header: headers,
31
+ &block)
31
32
  end
32
33
 
33
34
  def set_options(options)
34
35
  @body = options.delete(:body)
35
36
  @headers = options.delete(:headers)
36
- query = options.delete(:query) || {}
37
+ query = options.delete(:query) || {}
37
38
  # Treat other keywords args as query params and ignore empty params
38
39
  @query = query.merge(options).reject { |k, v| v.to_s.empty? }
39
40
  end
@@ -33,8 +33,8 @@ module Duracloud
33
33
  durastore(:delete, space_id, **query)
34
34
  end
35
35
 
36
- def get_content(space_id, content_id, **options)
37
- durastore_content(:get, space_id, content_id, **options)
36
+ def get_content(space_id, content_id, **options, &block)
37
+ durastore_content(:get, space_id, content_id, **options, &block)
38
38
  end
39
39
 
40
40
  def get_content_properties(space_id, content_id, **options)
@@ -61,8 +61,8 @@ module Duracloud
61
61
  durastore(:get, "audit/#{space_id}", **query)
62
62
  end
63
63
 
64
- def get_manifest(space_id, **query)
65
- durastore(:get, "manifest/#{space_id}", **query)
64
+ def get_manifest(space_id, **query, &block)
65
+ durastore(:get, "manifest/#{space_id}", **query, &block)
66
66
  end
67
67
 
68
68
  def get_bit_integrity_report(space_id, **query)
@@ -85,14 +85,14 @@ module Duracloud
85
85
 
86
86
  private
87
87
 
88
- def durastore(*args)
89
- execute(DurastoreRequest, *args)
88
+ def durastore(*args, &block)
89
+ execute(DurastoreRequest, *args, &block)
90
90
  end
91
91
 
92
- def durastore_content(http_method, space_id, content_id, **options)
92
+ def durastore_content(http_method, space_id, content_id, **options, &block)
93
93
  escaped_content_id = content_id.gsub(/%/, "%25").gsub(/ /, "%20")
94
94
  url = [ space_id, escaped_content_id ].join("/")
95
- durastore(http_method, url, **options)
95
+ durastore(http_method, url, **options, &block)
96
96
  end
97
97
 
98
98
  end
@@ -5,9 +5,7 @@ module Duracloud
5
5
  #
6
6
  # A "space" within a DuraCloud account.
7
7
  #
8
- class Space
9
- include Persistence
10
- include HasProperties
8
+ class Space < AbstractEntity
11
9
 
12
10
  after_save :reset_acls
13
11
 
@@ -119,7 +117,7 @@ module Duracloud
119
117
  end
120
118
  end
121
119
 
122
- attr_reader :space_id, :store_id
120
+ attr_accessor :space_id, :store_id
123
121
  alias_method :id, :space_id
124
122
 
125
123
  after_save :reset_acls
@@ -128,8 +126,7 @@ module Duracloud
128
126
  # @param space_id [String] the space ID
129
127
  # @param store_id [String] the store ID (optional)
130
128
  def initialize(space_id, store_id = nil)
131
- @space_id = space_id
132
- @store_id = store_id
129
+ super(space_id: space_id, store_id: store_id)
133
130
  yield self if block_given?
134
131
  end
135
132
 
@@ -226,6 +223,11 @@ module Duracloud
226
223
 
227
224
  private
228
225
 
226
+ def do_load_properties
227
+ response = Client.get_space_properties(id, **query)
228
+ self.properties = response.headers
229
+ end
230
+
229
231
  def reset_acls
230
232
  @acls = nil
231
233
  end
@@ -244,10 +246,6 @@ module Duracloud
244
246
  SpaceProperties
245
247
  end
246
248
 
247
- def get_properties_response
248
- Client.get_space_properties(id, **query)
249
- end
250
-
251
249
  def do_delete
252
250
  Client.delete_space(id, **query)
253
251
  end
data/lib/duracloud/tsv.rb CHANGED
@@ -3,6 +3,8 @@ require "csv"
3
3
  module Duracloud
4
4
  module TSV
5
5
 
6
+ CHUNK_SIZE = 1024 * 16
7
+
6
8
  def csv
7
9
  @csv ||= CSV.new(tsv, csv_options)
8
10
  end
@@ -20,16 +22,36 @@ module Duracloud
20
22
  csv.rewind
21
23
  end
22
24
 
23
- def tsv
24
- @tsv
25
+ def tsv(&block)
26
+ return unless tsv_source?
27
+ begin
28
+ tsv_source.rewind
29
+ if block_given?
30
+ while chunk = tsv_source.read(CHUNK_SIZE)
31
+ yield chunk
32
+ end
33
+ else
34
+ tsv_source.read
35
+ end
36
+ ensure
37
+ tsv_source.rewind
38
+ end
25
39
  end
26
40
 
27
41
  def load_tsv(io_or_str)
28
- @tsv = io_or_str
42
+ @tsv_source = io_or_str.is_a?(String) ? StringIO.new(io_or_str, "rb") : io_or_str
43
+ end
44
+
45
+ def tsv_source
46
+ @tsv_source
47
+ end
48
+
49
+ def tsv_source?
50
+ !!@tsv_source
29
51
  end
30
52
 
31
53
  def load_tsv_file(path)
32
- load_tsv(File.new(path, "rb"))
54
+ load_tsv File.new(path, "rb")
33
55
  end
34
56
 
35
57
  def to_s
@@ -42,6 +64,7 @@ module Duracloud
42
64
  { col_sep: "\t",
43
65
  quote_char: "`",
44
66
  headers: true,
67
+ return_headers: false,
45
68
  header_converters: header_converters,
46
69
  }
47
70
  end
@@ -1,3 +1,3 @@
1
1
  module Duracloud
2
- VERSION = "0.3.0"
2
+ VERSION = "0.4.0"
3
3
  end
data/lib/duracloud.rb CHANGED
@@ -2,12 +2,15 @@ require "duracloud/version"
2
2
  require "duracloud/error"
3
3
 
4
4
  module Duracloud
5
+ autoload :AbstractEntity, "duracloud/abstract_entity"
5
6
  autoload :AuditLog, "duracloud/audit_log"
6
7
  autoload :BitIntegrityReport, "duracloud/bit_integrity_report"
8
+ autoload :ChunkedContent, "duracloud/chunked_content"
7
9
  autoload :Client, "duracloud/client"
8
10
  autoload :Configuration, "duracloud/configuration"
9
11
  autoload :Connection, "duracloud/connection"
10
12
  autoload :Content, "duracloud/content"
13
+ autoload :ContentManifest, "duracloud/content_manifest"
11
14
  autoload :ContentProperties, "duracloud/content_properties"
12
15
  autoload :DurastoreRequest, "duracloud/durastore_request"
13
16
  autoload :ErrorHandler, "duracloud/error_handler"
@@ -0,0 +1,32 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <dur:chunksManifest xmlns:dur="duracloud.org">
3
+ <header schemaVersion="0.2">
4
+ <sourceContent contentId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0">
5
+ <mimetype>application/octet-stream</mimetype>
6
+ <byteSize>4227858432</byteSize>
7
+ <md5>164e9aee34c0c42915716e11d5d539b5</md5>
8
+ </sourceContent>
9
+ </header>
10
+ <chunks>
11
+ <chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0000" index="0">
12
+ <byteSize>1000000000</byteSize>
13
+ <md5>8a7d5beb2523fb5e4d7c921096be50a9</md5>
14
+ </chunk>
15
+ <chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0001" index="1">
16
+ <byteSize>1000000000</byteSize>
17
+ <md5>e37115d4da0e187130ab645dee4f14ed</md5>
18
+ </chunk>
19
+ <chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0002" index="2">
20
+ <byteSize>1000000000</byteSize>
21
+ <md5>e37115d4da0e187130ab645dee4f14ed</md5>
22
+ </chunk>
23
+ <chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0003" index="3">
24
+ <byteSize>1000000000</byteSize>
25
+ <md5>93e9a4d242a9fb89796b98060094910d</md5>
26
+ </chunk>
27
+ <chunk chunkId="datastreamStore/8/b/d4/info%3Afedora%2Fduke%3A447146%2Fcontent%2Fcontent.0.dura-chunk-0004" index="4">
28
+ <byteSize>227858432</byteSize>
29
+ <md5>db0124ee56298ff7c7ac17be4ef14871</md5>
30
+ </chunk>
31
+ </chunks>
32
+ </dur:chunksManifest>
@@ -0,0 +1,23 @@
1
+ RSpec.shared_examples "a TSV" do
2
+
3
+ describe "#load_tsv" do
4
+ it "loads a string" do
5
+ tsv = File.read(path)
6
+ subject.load_tsv(tsv)
7
+ expect(subject.tsv).to eq(tsv)
8
+ end
9
+ it "loads an IO" do
10
+ tsv = File.read(path)
11
+ subject.load_tsv(tsv)
12
+ expect(subject.tsv).to eq(tsv)
13
+ end
14
+ end
15
+
16
+ describe "#load_tsv_file" do
17
+ specify {
18
+ subject.load_tsv_file(path)
19
+ expect(subject.tsv).to eq(File.read(path))
20
+ }
21
+ end
22
+
23
+ end
@@ -1,9 +1,13 @@
1
+ require 'support/shared_examples_for_tsv'
2
+
1
3
  module Duracloud
2
4
  RSpec.describe AuditLog do
3
5
 
6
+ subject { described_class.new("myspace") }
7
+
4
8
  let(:path) { File.expand_path('../../fixtures/audit_log.tsv', __FILE__) }
5
9
 
6
- subject { described_class.new("myspace") }
10
+ it_behaves_like "a TSV"
7
11
 
8
12
  describe "#csv" do
9
13
  before {
@@ -17,19 +21,5 @@ module Duracloud
17
21
  }
18
22
  end
19
23
 
20
- describe "#load_tsv" do
21
- it "loads a string" do
22
- tsv = File.read(path)
23
- subject.load_tsv(tsv)
24
- expect(subject.tsv).to eq(tsv)
25
- end
26
- it "loads an IO" do
27
- tsv = File.read(path)
28
- tsv_io = File.new(path, "rb")
29
- subject.load_tsv(tsv)
30
- expect(subject.tsv.to_s).to eq(tsv)
31
- end
32
- end
33
-
34
24
  end
35
25
  end