duracloud-client 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +0 -1
- data/README.md +25 -0
- data/bin/duracloud +5 -0
- data/lib/duracloud/chunked_content.rb +5 -0
- data/lib/duracloud/command.rb +128 -0
- data/lib/duracloud/commands.rb +35 -0
- data/lib/duracloud/content.rb +10 -1
- data/lib/duracloud/error.rb +1 -0
- data/lib/duracloud/manifest.rb +8 -0
- data/lib/duracloud/response.rb +5 -1
- data/lib/duracloud/sync_validation.rb +66 -0
- data/lib/duracloud/version.rb +1 -1
- data/lib/duracloud.rb +3 -0
- data/spec/unit/content_spec.rb +3 -0
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7ea05804282811db4ddae6b49a04e87f30f3eec7
|
4
|
+
data.tar.gz: 8ab80ee402b9ff8f17dde4e6aeb445af13af77d0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b67d6ac687c02438b8814b54ba3b2eec6e84ce1db77370aacd76789e8f22298af0370eda6a666dfc392533515db25608d4490d99819fa0ac8afc1203e823030
|
7
|
+
data.tar.gz: b832eed79711148ef5a0ab4be82d6f7d29186138ef66c403b3b99b71ca504e7f55975f840ee9fc64451848f6876629a182058ee155e165270eace05feb3da711
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -121,6 +121,25 @@ foo8
|
|
121
121
|
=> ["ark:/99999/fk4zzzz", "foo", "foo2", "foo22", "foo3", "foo5", "foo7", "foo8"]
|
122
122
|
```
|
123
123
|
|
124
|
+
#### Sync Validation
|
125
|
+
|
126
|
+
*New in version 0.6.0*
|
127
|
+
|
128
|
+
Sync validation is the process of comparing the files in a local content directory with content in a DuraCloud space in order to confirm that all local content has been successfully sync. This functionality is NOT part of the DuraCloud REST API and is provided AS IS (per the license terms).
|
129
|
+
|
130
|
+
Assumptions:
|
131
|
+
- The external program `md5deep` is installed and available on the user's path. See http://md5deep.sourceforge.net/ for `md5deep` documentation and download packages.
|
132
|
+
- The content IDs in the target DuraCloud space match the relative paths of files in the source content directory -- i.e., no support for "prefixes" (this may change in the future).
|
133
|
+
|
134
|
+
Process:
|
135
|
+
- The space manifest is downloaded from DuraCloud and converted to the expected input format for `md5deep` (two columns: md5 hash and file path, separated by two spaces).
|
136
|
+
- `md5deep` is run against the content directory in "non-matching" mode (-X) with the converted manifest as the list of "known hashes".
|
137
|
+
- Non-matching files from the `md5deep` run are re-checked individually by calling `Duracloud::Content.exist?`. This pass will account for content sync after `md5deep` started as well as files that have been chunked in DuraCloud.
|
138
|
+
|
139
|
+
```ruby
|
140
|
+
Duracloud::SyncValidation.call(space_id: 'foo', content_dir: '/var/foo/bar')
|
141
|
+
```
|
142
|
+
|
124
143
|
### Content
|
125
144
|
|
126
145
|
#### Create a new content item and store it in DuraCloud
|
@@ -299,6 +318,12 @@ D, [2016-05-19T15:39:33.538448 #29974] DEBUG -- : Duracloud::Client GET https://
|
|
299
318
|
=> #<CSV::Table mode:col_or_row row_count:8>
|
300
319
|
```
|
301
320
|
|
321
|
+
### Command Line Interface
|
322
|
+
|
323
|
+
*New in version 0.6.0*
|
324
|
+
|
325
|
+
The `bin/` directory of the gem now includes an executable `duracloud`. Use `-h/--help` to display usage. If the gem was installed with `bundler` you may need to run `bundle exec bin/duracloud`.
|
326
|
+
|
302
327
|
## Versioning
|
303
328
|
|
304
329
|
We endeavor to follow semantic versioning. In particular, versions < 1.0 may introduce backward-incompatible changes without notice. Use at your own risk. Version 1.0 signals a stable API.
|
data/bin/duracloud
ADDED
@@ -17,6 +17,10 @@ module Duracloud
|
|
17
17
|
@manifest
|
18
18
|
end
|
19
19
|
|
20
|
+
def chunked?
|
21
|
+
true
|
22
|
+
end
|
23
|
+
|
20
24
|
private
|
21
25
|
|
22
26
|
def do_load_properties
|
@@ -29,6 +33,7 @@ module Duracloud
|
|
29
33
|
end
|
30
34
|
self.properties = manifest.properties.dup
|
31
35
|
self.content_type = manifest.source.content_type
|
36
|
+
self.size = manifest.source.size
|
32
37
|
end
|
33
38
|
|
34
39
|
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'active_model'
|
3
|
+
|
4
|
+
module Duracloud
|
5
|
+
class Command
|
6
|
+
include ActiveModel::Model
|
7
|
+
include Commands
|
8
|
+
|
9
|
+
COMMANDS = Commands.public_instance_methods.map(&:to_s)
|
10
|
+
USAGE = "Usage: duracloud [#{COMMANDS.join('|')}] [options]"
|
11
|
+
HELP = "Type 'duracloud --help' for usage."
|
12
|
+
|
13
|
+
attr_accessor :command, :user, :password, :host, :port,
|
14
|
+
:space_id, :store_id, :content_id,
|
15
|
+
:content_type, :md5,
|
16
|
+
:content_dir, :format,
|
17
|
+
:logging
|
18
|
+
|
19
|
+
def self.error!(reason)
|
20
|
+
STDERR.puts reason
|
21
|
+
STDERR.puts HELP
|
22
|
+
exit(false)
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.call(*args)
|
26
|
+
options = {}
|
27
|
+
|
28
|
+
parser = OptionParser.new do |opts|
|
29
|
+
opts.banner = USAGE
|
30
|
+
|
31
|
+
opts.on("-h", "--help",
|
32
|
+
"Prints help") do
|
33
|
+
puts opts
|
34
|
+
exit
|
35
|
+
end
|
36
|
+
|
37
|
+
opts.on("-H", "--host HOST",
|
38
|
+
"DuraCloud host") do |v|
|
39
|
+
options[:host] = v
|
40
|
+
end
|
41
|
+
|
42
|
+
opts.on("-P", "--port PORT",
|
43
|
+
"DuraCloud port") do |v|
|
44
|
+
options[:port] = v
|
45
|
+
end
|
46
|
+
|
47
|
+
opts.on("-u", "--user USER",
|
48
|
+
"DuraCloud user") do |v|
|
49
|
+
options[:user] = v
|
50
|
+
end
|
51
|
+
|
52
|
+
opts.on("-p", "--password PASSWORD",
|
53
|
+
"DuraCloud password") do |v|
|
54
|
+
options[:password] = v
|
55
|
+
end
|
56
|
+
|
57
|
+
opts.on("-l", "--[no-]logging",
|
58
|
+
"Enable/disable logging to STDERR") do |v|
|
59
|
+
options[:logging] = v
|
60
|
+
end
|
61
|
+
|
62
|
+
opts.on("-s", "--space-id SPACE_ID",
|
63
|
+
"DuraCloud space ID") do |v|
|
64
|
+
options[:space_id] = v
|
65
|
+
end
|
66
|
+
|
67
|
+
opts.on("-i", "--store-id STORE_ID",
|
68
|
+
"DuraCloud store ID") do |v|
|
69
|
+
options[:store_id] = v
|
70
|
+
end
|
71
|
+
|
72
|
+
opts.on("-c", "--content-id CONTENT_ID",
|
73
|
+
"DuraCloud content ID") do |v|
|
74
|
+
options[:content_id] = v
|
75
|
+
end
|
76
|
+
|
77
|
+
opts.on("-m", "--md5 MD5",
|
78
|
+
"MD5 digest of content to store or retrieve") do |v|
|
79
|
+
options[:md5] = v
|
80
|
+
end
|
81
|
+
|
82
|
+
opts.on("-b", "--bagit",
|
83
|
+
"Get manifest in BAGIT format (default is TSV)") do
|
84
|
+
options[:format] = Manifest::BAGIT_FORMAT
|
85
|
+
end
|
86
|
+
|
87
|
+
opts.on("-d", "--content-dir CONTENT_DIR",
|
88
|
+
"Local content directory") do |v|
|
89
|
+
options[:content_dir] = v
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
command = args.shift if COMMANDS.include?(args.first)
|
94
|
+
parser.parse!(args)
|
95
|
+
|
96
|
+
new(options).execute(command)
|
97
|
+
rescue CommandError, OptionParser::ParseError => e
|
98
|
+
error!(e.message)
|
99
|
+
end
|
100
|
+
|
101
|
+
def execute(command)
|
102
|
+
unless COMMANDS.include?(command)
|
103
|
+
raise CommandError, "Invalid command: #{command}."
|
104
|
+
end
|
105
|
+
begin
|
106
|
+
configure_client
|
107
|
+
send(command)
|
108
|
+
rescue Error => e
|
109
|
+
STDERR.puts e.message
|
110
|
+
exit(false)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
private
|
115
|
+
|
116
|
+
def configure_client
|
117
|
+
Client.configure do |config|
|
118
|
+
config.user = user if user
|
119
|
+
config.password = password if password
|
120
|
+
config.host = host if host
|
121
|
+
config.port = port if port
|
122
|
+
|
123
|
+
config.silence_logging! unless logging
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Duracloud
|
2
|
+
module Commands
|
3
|
+
|
4
|
+
def validate
|
5
|
+
SyncValidation.call(space_id: space_id, store_id: store_id, content_dir: content_dir)
|
6
|
+
end
|
7
|
+
|
8
|
+
def manifest
|
9
|
+
Manifest.download(space_id, store_id, format: format) do |chunk|
|
10
|
+
print chunk
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def properties
|
15
|
+
proplist = content_id ? content_properties : space_properties
|
16
|
+
STDOUT.puts proplist
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def content_properties
|
22
|
+
content = Content.find(space_id: space_id, store_id: store_id, content_id: content_id, md5: md5)
|
23
|
+
proplist = content.properties.map { |k, v| "#{k}: #{v}" }
|
24
|
+
proplist << "MD5: #{content.md5}"
|
25
|
+
proplist << "Size: #{content.size} (#{content.human_size})"
|
26
|
+
proplist << "Chunked?: #{content.chunked?}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def space_properties
|
30
|
+
space = Space.find(space_id, store_id)
|
31
|
+
space.properties.map { |k, v| "#{k}: #{v}" }
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
data/lib/duracloud/content.rb
CHANGED
@@ -46,7 +46,7 @@ module Duracloud
|
|
46
46
|
end
|
47
47
|
|
48
48
|
attr_accessor :space_id, :content_id, :store_id,
|
49
|
-
:body, :md5, :content_type
|
49
|
+
:body, :md5, :content_type, :size
|
50
50
|
alias_method :id, :content_id
|
51
51
|
validates_presence_of :space_id, :content_id
|
52
52
|
|
@@ -100,6 +100,14 @@ module Duracloud
|
|
100
100
|
copied
|
101
101
|
end
|
102
102
|
|
103
|
+
def chunked?
|
104
|
+
false
|
105
|
+
end
|
106
|
+
|
107
|
+
def human_size
|
108
|
+
ActiveSupport::NumberHelper.number_to_human_size(size, prefix: :si)
|
109
|
+
end
|
110
|
+
|
103
111
|
private
|
104
112
|
|
105
113
|
def store
|
@@ -159,6 +167,7 @@ module Duracloud
|
|
159
167
|
end
|
160
168
|
self.properties = response.headers
|
161
169
|
self.content_type = response.content_type
|
170
|
+
self.size = response.size
|
162
171
|
end
|
163
172
|
|
164
173
|
def do_delete
|
data/lib/duracloud/error.rb
CHANGED
data/lib/duracloud/manifest.rb
CHANGED
@@ -13,6 +13,14 @@ module Duracloud
|
|
13
13
|
|
14
14
|
attr_reader :space_id, :store_id
|
15
15
|
|
16
|
+
def self.download(*args, **kwargs, &block)
|
17
|
+
new(*args).download(**kwargs, &block)
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.download_generated(*args, **kwargs, &block)
|
21
|
+
new(*args).download_generated(**kwargs, &block)
|
22
|
+
end
|
23
|
+
|
16
24
|
def initialize(space_id, store_id = nil)
|
17
25
|
@space_id = space_id
|
18
26
|
@store_id = store_id
|
data/lib/duracloud/response.rb
CHANGED
@@ -8,7 +8,7 @@ module Duracloud
|
|
8
8
|
|
9
9
|
delegate [:header, :body, :code, :ok?, :redirect?, :status, :reason] => :original_response,
|
10
10
|
:content_type => :header,
|
11
|
-
|
11
|
+
:empty? => :body
|
12
12
|
|
13
13
|
def_delegator :header, :request_uri, :url
|
14
14
|
def_delegator :header, :request_method
|
@@ -39,5 +39,9 @@ module Duracloud
|
|
39
39
|
def md5
|
40
40
|
header["content-md5"].first
|
41
41
|
end
|
42
|
+
|
43
|
+
def size
|
44
|
+
header["content-length"].first.to_i rescue nil
|
45
|
+
end
|
42
46
|
end
|
43
47
|
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'active_model'
|
2
|
+
require 'tempfile'
|
3
|
+
require 'csv'
|
4
|
+
|
5
|
+
module Duracloud
|
6
|
+
class SyncValidation
|
7
|
+
include ActiveModel::Model
|
8
|
+
|
9
|
+
TWO_SPACES = ' '
|
10
|
+
MD5_CSV_OPTS = { col_sep: TWO_SPACES }.freeze
|
11
|
+
MANIFEST_CSV_OPTS = { col_sep: "\t", headers: true, return_headers: false }.freeze
|
12
|
+
|
13
|
+
attr_accessor :space_id, :content_dir, :store_id
|
14
|
+
|
15
|
+
def self.call(*args)
|
16
|
+
new(*args).call
|
17
|
+
end
|
18
|
+
|
19
|
+
def call
|
20
|
+
Tempfile.open("#{space_id}-manifest") do |manifest|
|
21
|
+
Manifest.download(space_id, store_id) do |chunk|
|
22
|
+
manifest.write(chunk)
|
23
|
+
end
|
24
|
+
manifest.close
|
25
|
+
|
26
|
+
# convert manifest into md5deep format
|
27
|
+
Tempfile.open("#{space_id}-md5") do |md5_list|
|
28
|
+
CSV.foreach(manifest.path, MANIFEST_CSV_OPTS) do |row|
|
29
|
+
md5_list.puts [ row[2], row[1] ].join(TWO_SPACES)
|
30
|
+
end
|
31
|
+
md5_list.close
|
32
|
+
|
33
|
+
# run md5deep to find files not listed in the manifest
|
34
|
+
Tempfile.open("#{space_id}-audit") do |audit|
|
35
|
+
audit.close
|
36
|
+
pid = spawn("md5deep", "-X", md5_list.path, "-l", "-r", ".", chdir: content_dir, out: audit.path)
|
37
|
+
Process.wait(pid)
|
38
|
+
case $?.exitstatus
|
39
|
+
when 0
|
40
|
+
true
|
41
|
+
when 1, 2
|
42
|
+
failures = []
|
43
|
+
CSV.foreach(audit.path, MD5_CSV_OPTS) do |md5, path|
|
44
|
+
content_id = path.sub(/^\.\//, "")
|
45
|
+
begin
|
46
|
+
if !Duracloud::Content.exist?(space_id: space_id, store_id: store_id, content_id: content_id, md5: md5)
|
47
|
+
failures << [ "MISSING", md5, content_id ].join("\t")
|
48
|
+
end
|
49
|
+
rescue MessageDigestError => e
|
50
|
+
failures << [ "CHANGED", md5, content_id ].join("\t")
|
51
|
+
end
|
52
|
+
end
|
53
|
+
STDOUT.puts failures
|
54
|
+
failures.empty?
|
55
|
+
when 64
|
56
|
+
raise Error, "md5deep user error."
|
57
|
+
when 128
|
58
|
+
raise Error, "md5deep internal error."
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
data/lib/duracloud/version.rb
CHANGED
data/lib/duracloud.rb
CHANGED
@@ -7,6 +7,8 @@ module Duracloud
|
|
7
7
|
autoload :BitIntegrityReport, "duracloud/bit_integrity_report"
|
8
8
|
autoload :ChunkedContent, "duracloud/chunked_content"
|
9
9
|
autoload :Client, "duracloud/client"
|
10
|
+
autoload :Command, "duracloud/command"
|
11
|
+
autoload :Commands, "duracloud/commands"
|
10
12
|
autoload :Configuration, "duracloud/configuration"
|
11
13
|
autoload :Connection, "duracloud/connection"
|
12
14
|
autoload :Content, "duracloud/content"
|
@@ -25,5 +27,6 @@ module Duracloud
|
|
25
27
|
autoload :SpaceAcls, "duracloud/space_acls"
|
26
28
|
autoload :SpaceProperties, "duracloud/space_properties"
|
27
29
|
autoload :Store, "duracloud/store"
|
30
|
+
autoload :SyncValidation, "duracloud/sync_validation"
|
28
31
|
autoload :TSV, "duracloud/tsv"
|
29
32
|
end
|
data/spec/unit/content_spec.rb
CHANGED
@@ -10,6 +10,7 @@ module Duracloud
|
|
10
10
|
describe "and it is not chunked" do
|
11
11
|
before { stub_request(:head, url) }
|
12
12
|
it { is_expected.to be_a described_class }
|
13
|
+
it { is_expected.to_not be_chunked }
|
13
14
|
end
|
14
15
|
describe "and it is chunked" do
|
15
16
|
let(:manifest_xml) { File.read(File.expand_path("../../fixtures/content_manifest.xml", __FILE__)) }
|
@@ -20,7 +21,9 @@ module Duracloud
|
|
20
21
|
end
|
21
22
|
it { is_expected.to be_a described_class }
|
22
23
|
its(:md5) { is_expected.to eq "164e9aee34c0c42915716e11d5d539b5" }
|
24
|
+
its(:size) { is_expected.to eq 4227858432 }
|
23
25
|
its(:content_type) { is_expected.to eq "application/octet-stream" }
|
26
|
+
it { is_expected.to be_chunked }
|
24
27
|
end
|
25
28
|
end
|
26
29
|
describe "when it does not exist" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: duracloud-client
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Chandek-Stark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hashie
|
@@ -145,7 +145,8 @@ dependencies:
|
|
145
145
|
description: Ruby client for communicating with DuraCloud
|
146
146
|
email:
|
147
147
|
- dchandekstark@gmail.com
|
148
|
-
executables:
|
148
|
+
executables:
|
149
|
+
- duracloud
|
149
150
|
extensions: []
|
150
151
|
extra_rdoc_files: []
|
151
152
|
files:
|
@@ -156,6 +157,7 @@ files:
|
|
156
157
|
- LICENSE
|
157
158
|
- README.md
|
158
159
|
- Rakefile
|
160
|
+
- bin/duracloud
|
159
161
|
- duracloud.gemspec
|
160
162
|
- gemfiles/Gemfile.activemodel-4.2
|
161
163
|
- gemfiles/Gemfile.activemodel-5.0
|
@@ -166,6 +168,8 @@ files:
|
|
166
168
|
- lib/duracloud/bit_integrity_report.rb
|
167
169
|
- lib/duracloud/chunked_content.rb
|
168
170
|
- lib/duracloud/client.rb
|
171
|
+
- lib/duracloud/command.rb
|
172
|
+
- lib/duracloud/commands.rb
|
169
173
|
- lib/duracloud/configuration.rb
|
170
174
|
- lib/duracloud/connection.rb
|
171
175
|
- lib/duracloud/content.rb
|
@@ -183,6 +187,7 @@ files:
|
|
183
187
|
- lib/duracloud/space_acls.rb
|
184
188
|
- lib/duracloud/space_properties.rb
|
185
189
|
- lib/duracloud/store.rb
|
190
|
+
- lib/duracloud/sync_validation.rb
|
186
191
|
- lib/duracloud/tsv.rb
|
187
192
|
- lib/duracloud/version.rb
|
188
193
|
- spec/fixtures/audit_log.tsv
|