duracloud-client 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +14 -1
- data/lib/duracloud.rb +5 -1
- data/lib/duracloud/cli.rb +29 -107
- data/lib/duracloud/command_options.rb +120 -0
- data/lib/duracloud/commands.rb +40 -0
- data/lib/duracloud/commands/command.rb +6 -2
- data/lib/duracloud/commands/count.rb +15 -0
- data/lib/duracloud/commands/download_manifest.rb +0 -2
- data/lib/duracloud/commands/find.rb +16 -0
- data/lib/duracloud/commands/find_item.rb +16 -0
- data/lib/duracloud/commands/find_items.rb +22 -0
- data/lib/duracloud/commands/find_missing_items.rb +15 -0
- data/lib/duracloud/commands/find_space.rb +12 -0
- data/lib/duracloud/commands/get_storage_report.rb +16 -0
- data/lib/duracloud/commands/get_storage_report_for_all_spaces.rb +12 -0
- data/lib/duracloud/commands/get_storage_report_for_space.rb +10 -0
- data/lib/duracloud/commands/get_storage_report_for_store.rb +10 -0
- data/lib/duracloud/commands/list_content_ids.rb +11 -0
- data/lib/duracloud/commands/list_items.rb +17 -0
- data/lib/duracloud/commands/sync.rb +0 -2
- data/lib/duracloud/commands/validate.rb +2 -3
- data/lib/duracloud/content.rb +12 -3
- data/lib/duracloud/fast_sync_validation.rb +42 -0
- data/lib/duracloud/rest_methods.rb +15 -0
- data/lib/duracloud/storage_report.rb +33 -0
- data/lib/duracloud/storage_reports.rb +52 -0
- data/lib/duracloud/sync_validation.rb +122 -39
- data/lib/duracloud/version.rb +1 -1
- data/spec/unit/cli_spec.rb +59 -15
- data/spec/unit/client_spec.rb +24 -0
- data/spec/unit/content_spec.rb +17 -7
- data/spec/unit/storage_report_spec.rb +15 -0
- data/spec/unit/storage_reports_spec.rb +45 -0
- metadata +23 -3
- data/lib/duracloud/commands/get_properties.rb +0 -27
@@ -0,0 +1,16 @@
|
|
1
|
+
module Duracloud::Commands
|
2
|
+
class FindItem < Command
|
3
|
+
|
4
|
+
def call
|
5
|
+
content = Duracloud::Content.find(space_id: space_id, store_id: store_id, content_id: content_id, md5: md5)
|
6
|
+
props = content.properties.dup
|
7
|
+
props.merge!("MD5" => content.md5,
|
8
|
+
"Size" => content.size,
|
9
|
+
"Chunked" => content.chunked?)
|
10
|
+
props.each do |k, v|
|
11
|
+
puts "#{k}: #{v}"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module Duracloud::Commands
|
4
|
+
class FindItems < Command
|
5
|
+
|
6
|
+
HEADERS = %i( content_id md5 size content_type modified )
|
7
|
+
|
8
|
+
def call
|
9
|
+
CSV.instance($stdout, headers: HEADERS, write_headers: true) do |csv|
|
10
|
+
CSV.foreach(infile, headers: false) do |row|
|
11
|
+
begin
|
12
|
+
item = Duracloud::Content.find(space_id: space_id, store_id: store_id, content_id: row[0], md5: row[1])
|
13
|
+
csv << HEADERS.map { |header| item.send(header) }
|
14
|
+
rescue Duracloud::NotFoundError, Duracloud::MessageDigestError => e
|
15
|
+
$stderr.puts "ERROR: Content ID #{row[0]} -- #{e.message}"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Duracloud::Commands
|
2
|
+
class FindMissingItems < Command
|
3
|
+
|
4
|
+
def call
|
5
|
+
CSV.instance($stdout, headers: false) do |csv|
|
6
|
+
CSV.foreach(infile, headers: false) do |row|
|
7
|
+
unless Duracloud::Content.exist?(space_id: space_id, store_id: store_id, content_id: row[0], md5: row[1])
|
8
|
+
csv << row
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Duracloud::Commands
|
2
|
+
class GetStorageReport < Command
|
3
|
+
|
4
|
+
def call
|
5
|
+
delegate_to = if space_id
|
6
|
+
GetStorageReportForSpace
|
7
|
+
elsif all_spaces
|
8
|
+
GetStorageReportForAllSpaces
|
9
|
+
else
|
10
|
+
GetStorageReportForStore
|
11
|
+
end
|
12
|
+
delegate_to.call(cli)
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require "csv"
|
2
|
+
|
3
|
+
module Duracloud::Commands
|
4
|
+
class ListItems < Command
|
5
|
+
|
6
|
+
HEADERS = %i( content_id md5 size content_type modified )
|
7
|
+
|
8
|
+
def call
|
9
|
+
CSV.instance($stdout, headers: HEADERS, write_headers: true) do |csv|
|
10
|
+
Duracloud::Space.items(space_id, store_id: store_id, prefix: prefix).each do |item|
|
11
|
+
csv << HEADERS.map { |header| item.send(header) }
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
@@ -1,10 +1,9 @@
|
|
1
|
-
require_relative "command"
|
2
|
-
|
3
1
|
module Duracloud::Commands
|
4
2
|
class Validate < Command
|
5
3
|
|
6
4
|
def call
|
7
|
-
|
5
|
+
klass = fast ? Duracloud::FastSyncValidation : DuracloudSyncValidation
|
6
|
+
klass.call(space_id: space_id, store_id: store_id, content_dir: content_dir, work_dir: work_dir)
|
8
7
|
end
|
9
8
|
|
10
9
|
end
|
data/lib/duracloud/content.rb
CHANGED
@@ -79,21 +79,30 @@ module Duracloud
|
|
79
79
|
|
80
80
|
# @return [Duracloud::Content] the copied content
|
81
81
|
# The current instance still represents the original content.
|
82
|
+
# @raise [Duracloud::Error]
|
82
83
|
def copy(**args)
|
83
84
|
dest = args.except(:force)
|
84
85
|
dest[:space_id] ||= space_id
|
86
|
+
dest[:store_id] ||= store_id
|
85
87
|
dest[:content_id] ||= content_id
|
86
|
-
|
88
|
+
if dest == copy_source
|
89
|
+
raise CopyError, "Destination is the same as the source."
|
90
|
+
end
|
87
91
|
if !args[:force] && Content.exist?(**dest)
|
88
|
-
raise CopyError, "Destination exists and
|
92
|
+
raise CopyError, "Destination exists and `:force' option is false."
|
89
93
|
end
|
90
94
|
options = { storeID: dest[:store_id], headers: copy_headers }
|
91
|
-
Client.copy_content(dest[:space_id], dest[:content_id], **options)
|
95
|
+
response = Client.copy_content(dest[:space_id], dest[:content_id], **options)
|
96
|
+
if md5 != response.md5
|
97
|
+
raise CopyError, "Message digest of copy does not match source " \
|
98
|
+
"(source: #{md5}; destination: #{response.md5})"
|
99
|
+
end
|
92
100
|
Content.new(dest.merge(md5: md5))
|
93
101
|
end
|
94
102
|
|
95
103
|
# @return [Duracloud::Content] the moved content
|
96
104
|
# The current instance still represents the deleted content.
|
105
|
+
# @raise [Duracloud::Error]
|
97
106
|
def move(**args)
|
98
107
|
copied = copy(**args)
|
99
108
|
delete
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Duracloud
|
2
|
+
class FastSyncValidation < SyncValidation
|
3
|
+
|
4
|
+
def convert_manifest
|
5
|
+
# content-id is the 2nd column of the manifest
|
6
|
+
system("cut -f 2 #{manifest_filename} | sort", out: converted_manifest_filename)
|
7
|
+
end
|
8
|
+
|
9
|
+
def audit
|
10
|
+
find_files
|
11
|
+
if system("comm", "-23", find_filename, converted_manifest_filename, out: audit_filename)
|
12
|
+
File.empty?(audit_filename) || recheck
|
13
|
+
else
|
14
|
+
raise Error, "Error comparing #{find_filename} with #{converted_manifest_filename}."
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def find_files
|
19
|
+
# TODO handle exclude file?
|
20
|
+
outfile = File.join(FileUtils.pwd, find_filename)
|
21
|
+
# Using a separate command for sort so we get find results incrementally
|
22
|
+
system("find -L . -type f | sed -e 's|^\./||'", chdir: content_dir, out: outfile) &&
|
23
|
+
system("sort", "-o", find_filename, find_filename)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def do_recheck
|
29
|
+
Enumerator.new do |e|
|
30
|
+
File.foreach(audit_filename) do |line|
|
31
|
+
content_id = line.chomp
|
32
|
+
e << check(content_id)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def find_filename
|
38
|
+
filename("find.txt")
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
@@ -108,6 +108,21 @@ module Duracloud
|
|
108
108
|
"The API method 'Perform Task' has not been implemented."
|
109
109
|
end
|
110
110
|
|
111
|
+
# @see https://wiki.duraspace.org/display/DURACLOUDDOC/DuraCloud+REST+API#DuraCloudRESTAPI-GetStorageReportsbySpace
|
112
|
+
def get_storage_reports_by_space(space_id, **query)
|
113
|
+
durastore(:get, "report/space/#{space_id}", **query)
|
114
|
+
end
|
115
|
+
|
116
|
+
# @see https://wiki.duraspace.org/display/DURACLOUDDOC/DuraCloud+REST+API#DuraCloudRESTAPI-GetStorageReportsbyStore
|
117
|
+
def get_storage_reports_by_store(**query)
|
118
|
+
durastore(:get, "report/store", **query)
|
119
|
+
end
|
120
|
+
|
121
|
+
# @see https://wiki.duraspace.org/display/DURACLOUDDOC/DuraCloud+REST+API#DuraCloudRESTAPI-GetStorageReportsforallSpacesinaStore(inasingleday)
|
122
|
+
def get_storage_reports_for_all_spaces_in_a_store(epoch_ms, **query)
|
123
|
+
durastore(:get, "report/store/#{epoch_ms}", **query)
|
124
|
+
end
|
125
|
+
|
111
126
|
private
|
112
127
|
|
113
128
|
def durastore(*args, &block)
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'hashie'
|
2
|
+
require 'active_support'
|
3
|
+
|
4
|
+
module Duracloud
|
5
|
+
class StorageReport < Hashie::Trash
|
6
|
+
|
7
|
+
property "space_id", from: "spaceId"
|
8
|
+
property "store_id", from: "storeId"
|
9
|
+
property "byte_count", from: "byteCount"
|
10
|
+
property "object_count", from: "objectCount"
|
11
|
+
property "account_id", from: "accountId"
|
12
|
+
property "timestamp"
|
13
|
+
|
14
|
+
def time
|
15
|
+
@time ||= Time.at(timestamp / 1000.0).utc
|
16
|
+
end
|
17
|
+
|
18
|
+
def human_size
|
19
|
+
ActiveSupport::NumberHelper.number_to_human_size(byte_count, prefix: :si)
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_s
|
23
|
+
<<-EOS
|
24
|
+
Date: #{time}
|
25
|
+
Space ID: #{space_id || "(all)"}
|
26
|
+
Store ID: #{store_id}
|
27
|
+
Objects: #{object_count}
|
28
|
+
Total size: #{human_size} (#{byte_count} bytes)
|
29
|
+
EOS
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'hashie'
|
3
|
+
|
4
|
+
module Duracloud
|
5
|
+
class StorageReports
|
6
|
+
include Enumerable
|
7
|
+
extend Forwardable
|
8
|
+
|
9
|
+
delegate :last => :to_a
|
10
|
+
|
11
|
+
attr_reader :data
|
12
|
+
|
13
|
+
def self.by_space(space_id, **query)
|
14
|
+
params = Params.new(query)
|
15
|
+
response = Client.get_storage_reports_by_space(space_id, **params)
|
16
|
+
new(response)
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.by_store(**query)
|
20
|
+
params = Params.new(query)
|
21
|
+
response = Client.get_storage_reports_by_store(**params)
|
22
|
+
new(response)
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.for_all_spaces_in_a_store(epoch_ms = nil, **query)
|
26
|
+
epoch_ms ||= (Time.now - (3600 * 24)).to_i * 1000
|
27
|
+
params = Params.new(query)
|
28
|
+
response = Client.get_storage_reports_for_all_spaces_in_a_store(epoch_ms, **params)
|
29
|
+
new(response)
|
30
|
+
end
|
31
|
+
|
32
|
+
def initialize(response)
|
33
|
+
@data = JSON.parse(response.body)
|
34
|
+
end
|
35
|
+
|
36
|
+
def each
|
37
|
+
data.each do |report|
|
38
|
+
yield StorageReport.new(report)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
class Params < Hashie::Trash
|
45
|
+
property :storeID, from: :store_id
|
46
|
+
property :groupBy, from: :group_by
|
47
|
+
property :start
|
48
|
+
property :end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'active_model'
|
2
2
|
require 'tempfile'
|
3
3
|
require 'csv'
|
4
|
+
require 'fileutils'
|
4
5
|
|
5
6
|
module Duracloud
|
6
7
|
class SyncValidation
|
@@ -10,57 +11,139 @@ module Duracloud
|
|
10
11
|
MD5_CSV_OPTS = { col_sep: TWO_SPACES }.freeze
|
11
12
|
MANIFEST_CSV_OPTS = { col_sep: "\t", headers: true, return_headers: false }.freeze
|
12
13
|
|
13
|
-
|
14
|
+
MISSING = "MISSING"
|
15
|
+
CHANGED = "CHANGED"
|
16
|
+
FOUND = "FOUND"
|
17
|
+
|
18
|
+
attr_accessor :space_id, :content_dir, :store_id, :work_dir, :fast
|
14
19
|
|
15
20
|
def self.call(*args)
|
16
21
|
new(*args).call
|
17
22
|
end
|
18
23
|
|
24
|
+
def in_work_dir
|
25
|
+
if work_dir
|
26
|
+
FileUtils.cd(work_dir) { yield }
|
27
|
+
else
|
28
|
+
Dir.mktmpdir("#{space_id}-validation-") do |tmpdir|
|
29
|
+
FileUtils.cd(tmpdir) { yield }
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
19
34
|
def call
|
20
|
-
|
35
|
+
in_work_dir do
|
36
|
+
download_manifest
|
37
|
+
convert_manifest
|
38
|
+
audit
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def download_manifest
|
43
|
+
File.open(manifest_filename, "w") do |manifest|
|
21
44
|
Manifest.download(space_id, store_id) do |chunk|
|
22
45
|
manifest.write(chunk)
|
23
46
|
end
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
md5_list.close
|
32
|
-
|
33
|
-
# run md5deep to find files not listed in the manifest
|
34
|
-
Tempfile.open("#{space_id}-audit") do |audit|
|
35
|
-
audit.close
|
36
|
-
pid = spawn("md5deep", "-X", md5_list.path, "-l", "-r", ".", chdir: content_dir, out: audit.path)
|
37
|
-
Process.wait(pid)
|
38
|
-
case $?.exitstatus
|
39
|
-
when 0
|
40
|
-
true
|
41
|
-
when 1, 2
|
42
|
-
failures = []
|
43
|
-
CSV.foreach(audit.path, MD5_CSV_OPTS) do |md5, path|
|
44
|
-
content_id = path.sub(/^\.\//, "")
|
45
|
-
begin
|
46
|
-
if !Duracloud::Content.exist?(space_id: space_id, store_id: store_id, content_id: content_id, md5: md5)
|
47
|
-
failures << [ "MISSING", md5, content_id ].join("\t")
|
48
|
-
end
|
49
|
-
rescue MessageDigestError => e
|
50
|
-
failures << [ "CHANGED", md5, content_id ].join("\t")
|
51
|
-
end
|
52
|
-
end
|
53
|
-
STDOUT.puts failures
|
54
|
-
failures.empty?
|
55
|
-
when 64
|
56
|
-
raise Error, "md5deep user error."
|
57
|
-
when 128
|
58
|
-
raise Error, "md5deep internal error."
|
59
|
-
end
|
60
|
-
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def convert_manifest
|
51
|
+
File.open(converted_manifest_filename, "w") do |f|
|
52
|
+
CSV.foreach(manifest_filename, MANIFEST_CSV_OPTS) do |row|
|
53
|
+
f.puts [ row[2], row[1] ].join(TWO_SPACES)
|
61
54
|
end
|
62
55
|
end
|
63
56
|
end
|
64
57
|
|
58
|
+
def audit
|
59
|
+
outfile = File.join(FileUtils.pwd, audit_filename)
|
60
|
+
infile = File.join(FileUtils.pwd, converted_manifest_filename)
|
61
|
+
pid = spawn("md5deep", "-X", infile, "-l", "-r", ".", chdir: content_dir, out: outfile)
|
62
|
+
Process.wait(pid)
|
63
|
+
case $?.exitstatus
|
64
|
+
when 0
|
65
|
+
true
|
66
|
+
when 1, 2
|
67
|
+
recheck
|
68
|
+
when 64, 128
|
69
|
+
raise Error, "md5deep error."
|
70
|
+
else
|
71
|
+
raise Error, "Unknown error."
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def recheck
|
76
|
+
success = true
|
77
|
+
recheck_file do |csv|
|
78
|
+
do_recheck.each do |result|
|
79
|
+
csv << result.to_a
|
80
|
+
success &&= result.found?
|
81
|
+
end
|
82
|
+
end
|
83
|
+
success
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
CheckResult = Struct.new(:status, :md5, :content_id) do
|
89
|
+
def found?
|
90
|
+
status == FOUND
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def recheck_file
|
95
|
+
if work_dir
|
96
|
+
CSV.open(recheck_filename, "w", col_sep: "\t") { |csv| yield(csv) }
|
97
|
+
else
|
98
|
+
CSV($stdout, col_sep: "\t") { |csv| yield(csv) }
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def check(content_id, md5 = nil)
|
103
|
+
status = begin
|
104
|
+
exist?(content_id, md5) ? FOUND : MISSING
|
105
|
+
rescue MessageDigestError => e
|
106
|
+
CHANGED
|
107
|
+
end
|
108
|
+
CheckResult.new(status, md5 || "-", content_id)
|
109
|
+
end
|
110
|
+
|
111
|
+
def exist?(content_id, md5 = nil)
|
112
|
+
Duracloud::Content.exist?(space_id: space_id, store_id: store_id, content_id: content_id, md5: md5)
|
113
|
+
end
|
114
|
+
|
115
|
+
def do_recheck
|
116
|
+
Enumerator.new do |e|
|
117
|
+
CSV.foreach(audit_filename, MD5_CSV_OPTS) do |md5, path|
|
118
|
+
content_id = path.sub(/^\.\//, "")
|
119
|
+
e << check(content_id, md5)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def prefix
|
125
|
+
space_id
|
126
|
+
end
|
127
|
+
|
128
|
+
def filename(suffix)
|
129
|
+
[ prefix, suffix ].join("-")
|
130
|
+
end
|
131
|
+
|
132
|
+
def manifest_filename
|
133
|
+
filename("manifest.tsv")
|
134
|
+
end
|
135
|
+
|
136
|
+
def converted_manifest_filename
|
137
|
+
filename("converted-manifest.txt")
|
138
|
+
end
|
139
|
+
|
140
|
+
def audit_filename
|
141
|
+
filename("audit.txt")
|
142
|
+
end
|
143
|
+
|
144
|
+
def recheck_filename
|
145
|
+
filename("recheck.txt")
|
146
|
+
end
|
147
|
+
|
65
148
|
end
|
66
149
|
end
|