duracloud-client 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 60ab1ee918d1618f1f9fe31e0bad7461246a1f9c
4
- data.tar.gz: 4430b99f0f2a1d98845139e26113179610f7ac57
3
+ metadata.gz: 7ea05804282811db4ddae6b49a04e87f30f3eec7
4
+ data.tar.gz: 8ab80ee402b9ff8f17dde4e6aeb445af13af77d0
5
5
  SHA512:
6
- metadata.gz: 62eeb59e57a5898783fc310aa848819779aa33eaf7c29b88296d01ff76210f459a4ad37c2c4c872a7871ab0027a757537e24e78fa2aacf6f45d27427d25e8bf3
7
- data.tar.gz: 24006ad94935c937a5e3a9188b4c42946e9cf485ceb3e643fb4ca14531805e70ff77b341ca4f2ef90c97cac06094dd4727e01ca6e1064ecb8dca546714e45f98
6
+ metadata.gz: 0b67d6ac687c02438b8814b54ba3b2eec6e84ce1db77370aacd76789e8f22298af0370eda6a666dfc392533515db25608d4490d99819fa0ac8afc1203e823030
7
+ data.tar.gz: b832eed79711148ef5a0ab4be82d6f7d29186138ef66c403b3b99b71ca504e7f55975f840ee9fc64451848f6876629a182058ee155e165270eace05feb3da711
data/Gemfile CHANGED
@@ -1,5 +1,4 @@
1
1
  source 'https://rubygems.org'
2
- ruby '2.3.1'
3
2
 
4
3
  # Specify your gem's dependencies in duracloud.gemspec
5
4
  gemspec
data/README.md CHANGED
@@ -121,6 +121,25 @@ foo8
121
121
  => ["ark:/99999/fk4zzzz", "foo", "foo2", "foo22", "foo3", "foo5", "foo7", "foo8"]
122
122
  ```
123
123
 
124
+ #### Sync Validation
125
+
126
+ *New in version 0.6.0*
127
+
128
+ Sync validation is the process of comparing the files in a local content directory with content in a DuraCloud space in order to confirm that all local content has been successfully sync. This functionality is NOT part of the DuraCloud REST API and is provided AS IS (per the license terms).
129
+
130
+ Assumptions:
131
+ - The external program `md5deep` is installed and available on the user's path. See http://md5deep.sourceforge.net/ for `md5deep` documentation and download packages.
132
+ - The content IDs in the target DuraCloud space match the relative paths of files in the source content directory -- i.e., no support for "prefixes" (this may change in the future).
133
+
134
+ Process:
135
+ - The space manifest is downloaded from DuraCloud and converted to the expected input format for `md5deep` (two columns: md5 hash and file path, separated by two spaces).
136
+ - `md5deep` is run against the content directory in "non-matching" mode (-X) with the converted manifest as the list of "known hashes".
137
+ - Non-matching files from the `md5deep` run are re-checked individually by calling `Duracloud::Content.exist?`. This pass will account for content sync after `md5deep` started as well as files that have been chunked in DuraCloud.
138
+
139
+ ```ruby
140
+ Duracloud::SyncValidation.call(space_id: 'foo', content_dir: '/var/foo/bar')
141
+ ```
142
+
124
143
  ### Content
125
144
 
126
145
  #### Create a new content item and store it in DuraCloud
@@ -299,6 +318,12 @@ D, [2016-05-19T15:39:33.538448 #29974] DEBUG -- : Duracloud::Client GET https://
299
318
  => #<CSV::Table mode:col_or_row row_count:8>
300
319
  ```
301
320
 
321
+ ### Command Line Interface
322
+
323
+ *New in version 0.6.0*
324
+
325
+ The `bin/` directory of the gem now includes an executable `duracloud`. Use `-h/--help` to display usage. If the gem was installed with `bundler` you may need to run `bundle exec bin/duracloud`.
326
+
302
327
  ## Versioning
303
328
 
304
329
  We endeavor to follow semantic versioning. In particular, versions < 1.0 may introduce backward-incompatible changes without notice. Use at your own risk. Version 1.0 signals a stable API.
data/bin/duracloud ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'duracloud'
4
+
5
+ Duracloud::Command.call(*ARGV)
@@ -17,6 +17,10 @@ module Duracloud
17
17
  @manifest
18
18
  end
19
19
 
20
+ def chunked?
21
+ true
22
+ end
23
+
20
24
  private
21
25
 
22
26
  def do_load_properties
@@ -29,6 +33,7 @@ module Duracloud
29
33
  end
30
34
  self.properties = manifest.properties.dup
31
35
  self.content_type = manifest.source.content_type
36
+ self.size = manifest.source.size
32
37
  end
33
38
 
34
39
  end
@@ -0,0 +1,128 @@
1
+ require 'optparse'
2
+ require 'active_model'
3
+
4
+ module Duracloud
5
+ class Command
6
+ include ActiveModel::Model
7
+ include Commands
8
+
9
+ COMMANDS = Commands.public_instance_methods.map(&:to_s)
10
+ USAGE = "Usage: duracloud [#{COMMANDS.join('|')}] [options]"
11
+ HELP = "Type 'duracloud --help' for usage."
12
+
13
+ attr_accessor :command, :user, :password, :host, :port,
14
+ :space_id, :store_id, :content_id,
15
+ :content_type, :md5,
16
+ :content_dir, :format,
17
+ :logging
18
+
19
+ def self.error!(reason)
20
+ STDERR.puts reason
21
+ STDERR.puts HELP
22
+ exit(false)
23
+ end
24
+
25
+ def self.call(*args)
26
+ options = {}
27
+
28
+ parser = OptionParser.new do |opts|
29
+ opts.banner = USAGE
30
+
31
+ opts.on("-h", "--help",
32
+ "Prints help") do
33
+ puts opts
34
+ exit
35
+ end
36
+
37
+ opts.on("-H", "--host HOST",
38
+ "DuraCloud host") do |v|
39
+ options[:host] = v
40
+ end
41
+
42
+ opts.on("-P", "--port PORT",
43
+ "DuraCloud port") do |v|
44
+ options[:port] = v
45
+ end
46
+
47
+ opts.on("-u", "--user USER",
48
+ "DuraCloud user") do |v|
49
+ options[:user] = v
50
+ end
51
+
52
+ opts.on("-p", "--password PASSWORD",
53
+ "DuraCloud password") do |v|
54
+ options[:password] = v
55
+ end
56
+
57
+ opts.on("-l", "--[no-]logging",
58
+ "Enable/disable logging to STDERR") do |v|
59
+ options[:logging] = v
60
+ end
61
+
62
+ opts.on("-s", "--space-id SPACE_ID",
63
+ "DuraCloud space ID") do |v|
64
+ options[:space_id] = v
65
+ end
66
+
67
+ opts.on("-i", "--store-id STORE_ID",
68
+ "DuraCloud store ID") do |v|
69
+ options[:store_id] = v
70
+ end
71
+
72
+ opts.on("-c", "--content-id CONTENT_ID",
73
+ "DuraCloud content ID") do |v|
74
+ options[:content_id] = v
75
+ end
76
+
77
+ opts.on("-m", "--md5 MD5",
78
+ "MD5 digest of content to store or retrieve") do |v|
79
+ options[:md5] = v
80
+ end
81
+
82
+ opts.on("-b", "--bagit",
83
+ "Get manifest in BAGIT format (default is TSV)") do
84
+ options[:format] = Manifest::BAGIT_FORMAT
85
+ end
86
+
87
+ opts.on("-d", "--content-dir CONTENT_DIR",
88
+ "Local content directory") do |v|
89
+ options[:content_dir] = v
90
+ end
91
+ end
92
+
93
+ command = args.shift if COMMANDS.include?(args.first)
94
+ parser.parse!(args)
95
+
96
+ new(options).execute(command)
97
+ rescue CommandError, OptionParser::ParseError => e
98
+ error!(e.message)
99
+ end
100
+
101
+ def execute(command)
102
+ unless COMMANDS.include?(command)
103
+ raise CommandError, "Invalid command: #{command}."
104
+ end
105
+ begin
106
+ configure_client
107
+ send(command)
108
+ rescue Error => e
109
+ STDERR.puts e.message
110
+ exit(false)
111
+ end
112
+ end
113
+
114
+ private
115
+
116
+ def configure_client
117
+ Client.configure do |config|
118
+ config.user = user if user
119
+ config.password = password if password
120
+ config.host = host if host
121
+ config.port = port if port
122
+
123
+ config.silence_logging! unless logging
124
+ end
125
+ end
126
+
127
+ end
128
+ end
@@ -0,0 +1,35 @@
1
+ module Duracloud
2
+ module Commands
3
+
4
+ def validate
5
+ SyncValidation.call(space_id: space_id, store_id: store_id, content_dir: content_dir)
6
+ end
7
+
8
+ def manifest
9
+ Manifest.download(space_id, store_id, format: format) do |chunk|
10
+ print chunk
11
+ end
12
+ end
13
+
14
+ def properties
15
+ proplist = content_id ? content_properties : space_properties
16
+ STDOUT.puts proplist
17
+ end
18
+
19
+ private
20
+
21
+ def content_properties
22
+ content = Content.find(space_id: space_id, store_id: store_id, content_id: content_id, md5: md5)
23
+ proplist = content.properties.map { |k, v| "#{k}: #{v}" }
24
+ proplist << "MD5: #{content.md5}"
25
+ proplist << "Size: #{content.size} (#{content.human_size})"
26
+ proplist << "Chunked?: #{content.chunked?}"
27
+ end
28
+
29
+ def space_properties
30
+ space = Space.find(space_id, store_id)
31
+ space.properties.map { |k, v| "#{k}: #{v}" }
32
+ end
33
+
34
+ end
35
+ end
@@ -46,7 +46,7 @@ module Duracloud
46
46
  end
47
47
 
48
48
  attr_accessor :space_id, :content_id, :store_id,
49
- :body, :md5, :content_type
49
+ :body, :md5, :content_type, :size
50
50
  alias_method :id, :content_id
51
51
  validates_presence_of :space_id, :content_id
52
52
 
@@ -100,6 +100,14 @@ module Duracloud
100
100
  copied
101
101
  end
102
102
 
103
+ def chunked?
104
+ false
105
+ end
106
+
107
+ def human_size
108
+ ActiveSupport::NumberHelper.number_to_human_size(size, prefix: :si)
109
+ end
110
+
103
111
  private
104
112
 
105
113
  def store
@@ -159,6 +167,7 @@ module Duracloud
159
167
  end
160
168
  self.properties = response.headers
161
169
  self.content_type = response.content_type
170
+ self.size = response.size
162
171
  end
163
172
 
164
173
  def do_delete
@@ -5,4 +5,5 @@ module Duracloud
5
5
  class BadRequestError < Error; end
6
6
  class ConflictError < Error; end
7
7
  class MessageDigestError < Error; end
8
+ class CommandError < Error; end
8
9
  end
@@ -13,6 +13,14 @@ module Duracloud
13
13
 
14
14
  attr_reader :space_id, :store_id
15
15
 
16
+ def self.download(*args, **kwargs, &block)
17
+ new(*args).download(**kwargs, &block)
18
+ end
19
+
20
+ def self.download_generated(*args, **kwargs, &block)
21
+ new(*args).download_generated(**kwargs, &block)
22
+ end
23
+
16
24
  def initialize(space_id, store_id = nil)
17
25
  @space_id = space_id
18
26
  @store_id = store_id
@@ -8,7 +8,7 @@ module Duracloud
8
8
 
9
9
  delegate [:header, :body, :code, :ok?, :redirect?, :status, :reason] => :original_response,
10
10
  :content_type => :header,
11
- [:size, :empty?] => :body
11
+ :empty? => :body
12
12
 
13
13
  def_delegator :header, :request_uri, :url
14
14
  def_delegator :header, :request_method
@@ -39,5 +39,9 @@ module Duracloud
39
39
  def md5
40
40
  header["content-md5"].first
41
41
  end
42
+
43
+ def size
44
+ header["content-length"].first.to_i rescue nil
45
+ end
42
46
  end
43
47
  end
@@ -0,0 +1,66 @@
1
+ require 'active_model'
2
+ require 'tempfile'
3
+ require 'csv'
4
+
5
+ module Duracloud
6
+ class SyncValidation
7
+ include ActiveModel::Model
8
+
9
+ TWO_SPACES = ' '
10
+ MD5_CSV_OPTS = { col_sep: TWO_SPACES }.freeze
11
+ MANIFEST_CSV_OPTS = { col_sep: "\t", headers: true, return_headers: false }.freeze
12
+
13
+ attr_accessor :space_id, :content_dir, :store_id
14
+
15
+ def self.call(*args)
16
+ new(*args).call
17
+ end
18
+
19
+ def call
20
+ Tempfile.open("#{space_id}-manifest") do |manifest|
21
+ Manifest.download(space_id, store_id) do |chunk|
22
+ manifest.write(chunk)
23
+ end
24
+ manifest.close
25
+
26
+ # convert manifest into md5deep format
27
+ Tempfile.open("#{space_id}-md5") do |md5_list|
28
+ CSV.foreach(manifest.path, MANIFEST_CSV_OPTS) do |row|
29
+ md5_list.puts [ row[2], row[1] ].join(TWO_SPACES)
30
+ end
31
+ md5_list.close
32
+
33
+ # run md5deep to find files not listed in the manifest
34
+ Tempfile.open("#{space_id}-audit") do |audit|
35
+ audit.close
36
+ pid = spawn("md5deep", "-X", md5_list.path, "-l", "-r", ".", chdir: content_dir, out: audit.path)
37
+ Process.wait(pid)
38
+ case $?.exitstatus
39
+ when 0
40
+ true
41
+ when 1, 2
42
+ failures = []
43
+ CSV.foreach(audit.path, MD5_CSV_OPTS) do |md5, path|
44
+ content_id = path.sub(/^\.\//, "")
45
+ begin
46
+ if !Duracloud::Content.exist?(space_id: space_id, store_id: store_id, content_id: content_id, md5: md5)
47
+ failures << [ "MISSING", md5, content_id ].join("\t")
48
+ end
49
+ rescue MessageDigestError => e
50
+ failures << [ "CHANGED", md5, content_id ].join("\t")
51
+ end
52
+ end
53
+ STDOUT.puts failures
54
+ failures.empty?
55
+ when 64
56
+ raise Error, "md5deep user error."
57
+ when 128
58
+ raise Error, "md5deep internal error."
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ end
66
+ end
@@ -1,3 +1,3 @@
1
1
  module Duracloud
2
- VERSION = "0.5.0"
2
+ VERSION = "0.6.0"
3
3
  end
data/lib/duracloud.rb CHANGED
@@ -7,6 +7,8 @@ module Duracloud
7
7
  autoload :BitIntegrityReport, "duracloud/bit_integrity_report"
8
8
  autoload :ChunkedContent, "duracloud/chunked_content"
9
9
  autoload :Client, "duracloud/client"
10
+ autoload :Command, "duracloud/command"
11
+ autoload :Commands, "duracloud/commands"
10
12
  autoload :Configuration, "duracloud/configuration"
11
13
  autoload :Connection, "duracloud/connection"
12
14
  autoload :Content, "duracloud/content"
@@ -25,5 +27,6 @@ module Duracloud
25
27
  autoload :SpaceAcls, "duracloud/space_acls"
26
28
  autoload :SpaceProperties, "duracloud/space_properties"
27
29
  autoload :Store, "duracloud/store"
30
+ autoload :SyncValidation, "duracloud/sync_validation"
28
31
  autoload :TSV, "duracloud/tsv"
29
32
  end
@@ -10,6 +10,7 @@ module Duracloud
10
10
  describe "and it is not chunked" do
11
11
  before { stub_request(:head, url) }
12
12
  it { is_expected.to be_a described_class }
13
+ it { is_expected.to_not be_chunked }
13
14
  end
14
15
  describe "and it is chunked" do
15
16
  let(:manifest_xml) { File.read(File.expand_path("../../fixtures/content_manifest.xml", __FILE__)) }
@@ -20,7 +21,9 @@ module Duracloud
20
21
  end
21
22
  it { is_expected.to be_a described_class }
22
23
  its(:md5) { is_expected.to eq "164e9aee34c0c42915716e11d5d539b5" }
24
+ its(:size) { is_expected.to eq 4227858432 }
23
25
  its(:content_type) { is_expected.to eq "application/octet-stream" }
26
+ it { is_expected.to be_chunked }
24
27
  end
25
28
  end
26
29
  describe "when it does not exist" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: duracloud-client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Chandek-Stark
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-06-07 00:00:00.000000000 Z
11
+ date: 2017-06-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: hashie
@@ -145,7 +145,8 @@ dependencies:
145
145
  description: Ruby client for communicating with DuraCloud
146
146
  email:
147
147
  - dchandekstark@gmail.com
148
- executables: []
148
+ executables:
149
+ - duracloud
149
150
  extensions: []
150
151
  extra_rdoc_files: []
151
152
  files:
@@ -156,6 +157,7 @@ files:
156
157
  - LICENSE
157
158
  - README.md
158
159
  - Rakefile
160
+ - bin/duracloud
159
161
  - duracloud.gemspec
160
162
  - gemfiles/Gemfile.activemodel-4.2
161
163
  - gemfiles/Gemfile.activemodel-5.0
@@ -166,6 +168,8 @@ files:
166
168
  - lib/duracloud/bit_integrity_report.rb
167
169
  - lib/duracloud/chunked_content.rb
168
170
  - lib/duracloud/client.rb
171
+ - lib/duracloud/command.rb
172
+ - lib/duracloud/commands.rb
169
173
  - lib/duracloud/configuration.rb
170
174
  - lib/duracloud/connection.rb
171
175
  - lib/duracloud/content.rb
@@ -183,6 +187,7 @@ files:
183
187
  - lib/duracloud/space_acls.rb
184
188
  - lib/duracloud/space_properties.rb
185
189
  - lib/duracloud/store.rb
190
+ - lib/duracloud/sync_validation.rb
186
191
  - lib/duracloud/tsv.rb
187
192
  - lib/duracloud/version.rb
188
193
  - spec/fixtures/audit_log.tsv