duracloud-client 0.8.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +14 -1
- data/lib/duracloud.rb +5 -1
- data/lib/duracloud/cli.rb +29 -107
- data/lib/duracloud/command_options.rb +120 -0
- data/lib/duracloud/commands.rb +40 -0
- data/lib/duracloud/commands/command.rb +6 -2
- data/lib/duracloud/commands/count.rb +15 -0
- data/lib/duracloud/commands/download_manifest.rb +0 -2
- data/lib/duracloud/commands/find.rb +16 -0
- data/lib/duracloud/commands/find_item.rb +16 -0
- data/lib/duracloud/commands/find_items.rb +22 -0
- data/lib/duracloud/commands/find_missing_items.rb +15 -0
- data/lib/duracloud/commands/find_space.rb +12 -0
- data/lib/duracloud/commands/get_storage_report.rb +16 -0
- data/lib/duracloud/commands/get_storage_report_for_all_spaces.rb +12 -0
- data/lib/duracloud/commands/get_storage_report_for_space.rb +10 -0
- data/lib/duracloud/commands/get_storage_report_for_store.rb +10 -0
- data/lib/duracloud/commands/list_content_ids.rb +11 -0
- data/lib/duracloud/commands/list_items.rb +17 -0
- data/lib/duracloud/commands/sync.rb +0 -2
- data/lib/duracloud/commands/validate.rb +2 -3
- data/lib/duracloud/content.rb +12 -3
- data/lib/duracloud/fast_sync_validation.rb +42 -0
- data/lib/duracloud/rest_methods.rb +15 -0
- data/lib/duracloud/storage_report.rb +33 -0
- data/lib/duracloud/storage_reports.rb +52 -0
- data/lib/duracloud/sync_validation.rb +122 -39
- data/lib/duracloud/version.rb +1 -1
- data/spec/unit/cli_spec.rb +59 -15
- data/spec/unit/client_spec.rb +24 -0
- data/spec/unit/content_spec.rb +17 -7
- data/spec/unit/storage_report_spec.rb +15 -0
- data/spec/unit/storage_reports_spec.rb +45 -0
- metadata +23 -3
- data/lib/duracloud/commands/get_properties.rb +0 -27
@@ -0,0 +1,16 @@
|
|
1
|
+
module Duracloud::Commands
|
2
|
+
class FindItem < Command
|
3
|
+
|
4
|
+
def call
|
5
|
+
content = Duracloud::Content.find(space_id: space_id, store_id: store_id, content_id: content_id, md5: md5)
|
6
|
+
props = content.properties.dup
|
7
|
+
props.merge!("MD5" => content.md5,
|
8
|
+
"Size" => content.size,
|
9
|
+
"Chunked" => content.chunked?)
|
10
|
+
props.each do |k, v|
|
11
|
+
puts "#{k}: #{v}"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module Duracloud::Commands
|
4
|
+
class FindItems < Command
|
5
|
+
|
6
|
+
HEADERS = %i( content_id md5 size content_type modified )
|
7
|
+
|
8
|
+
def call
|
9
|
+
CSV.instance($stdout, headers: HEADERS, write_headers: true) do |csv|
|
10
|
+
CSV.foreach(infile, headers: false) do |row|
|
11
|
+
begin
|
12
|
+
item = Duracloud::Content.find(space_id: space_id, store_id: store_id, content_id: row[0], md5: row[1])
|
13
|
+
csv << HEADERS.map { |header| item.send(header) }
|
14
|
+
rescue Duracloud::NotFoundError, Duracloud::MessageDigestError => e
|
15
|
+
$stderr.puts "ERROR: Content ID #{row[0]} -- #{e.message}"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Duracloud::Commands
|
2
|
+
class FindMissingItems < Command
|
3
|
+
|
4
|
+
def call
|
5
|
+
CSV.instance($stdout, headers: false) do |csv|
|
6
|
+
CSV.foreach(infile, headers: false) do |row|
|
7
|
+
unless Duracloud::Content.exist?(space_id: space_id, store_id: store_id, content_id: row[0], md5: row[1])
|
8
|
+
csv << row
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Duracloud::Commands
|
2
|
+
class GetStorageReport < Command
|
3
|
+
|
4
|
+
def call
|
5
|
+
delegate_to = if space_id
|
6
|
+
GetStorageReportForSpace
|
7
|
+
elsif all_spaces
|
8
|
+
GetStorageReportForAllSpaces
|
9
|
+
else
|
10
|
+
GetStorageReportForStore
|
11
|
+
end
|
12
|
+
delegate_to.call(cli)
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require "csv"
|
2
|
+
|
3
|
+
module Duracloud::Commands
|
4
|
+
class ListItems < Command
|
5
|
+
|
6
|
+
HEADERS = %i( content_id md5 size content_type modified )
|
7
|
+
|
8
|
+
def call
|
9
|
+
CSV.instance($stdout, headers: HEADERS, write_headers: true) do |csv|
|
10
|
+
Duracloud::Space.items(space_id, store_id: store_id, prefix: prefix).each do |item|
|
11
|
+
csv << HEADERS.map { |header| item.send(header) }
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
@@ -1,10 +1,9 @@
|
|
1
|
-
require_relative "command"
|
2
|
-
|
3
1
|
module Duracloud::Commands
|
4
2
|
class Validate < Command
|
5
3
|
|
6
4
|
def call
|
7
|
-
|
5
|
+
klass = fast ? Duracloud::FastSyncValidation : DuracloudSyncValidation
|
6
|
+
klass.call(space_id: space_id, store_id: store_id, content_dir: content_dir, work_dir: work_dir)
|
8
7
|
end
|
9
8
|
|
10
9
|
end
|
data/lib/duracloud/content.rb
CHANGED
@@ -79,21 +79,30 @@ module Duracloud
|
|
79
79
|
|
80
80
|
# @return [Duracloud::Content] the copied content
|
81
81
|
# The current instance still represents the original content.
|
82
|
+
# @raise [Duracloud::Error]
|
82
83
|
def copy(**args)
|
83
84
|
dest = args.except(:force)
|
84
85
|
dest[:space_id] ||= space_id
|
86
|
+
dest[:store_id] ||= store_id
|
85
87
|
dest[:content_id] ||= content_id
|
86
|
-
|
88
|
+
if dest == copy_source
|
89
|
+
raise CopyError, "Destination is the same as the source."
|
90
|
+
end
|
87
91
|
if !args[:force] && Content.exist?(**dest)
|
88
|
-
raise CopyError, "Destination exists and
|
92
|
+
raise CopyError, "Destination exists and `:force' option is false."
|
89
93
|
end
|
90
94
|
options = { storeID: dest[:store_id], headers: copy_headers }
|
91
|
-
Client.copy_content(dest[:space_id], dest[:content_id], **options)
|
95
|
+
response = Client.copy_content(dest[:space_id], dest[:content_id], **options)
|
96
|
+
if md5 != response.md5
|
97
|
+
raise CopyError, "Message digest of copy does not match source " \
|
98
|
+
"(source: #{md5}; destination: #{response.md5})"
|
99
|
+
end
|
92
100
|
Content.new(dest.merge(md5: md5))
|
93
101
|
end
|
94
102
|
|
95
103
|
# @return [Duracloud::Content] the moved content
|
96
104
|
# The current instance still represents the deleted content.
|
105
|
+
# @raise [Duracloud::Error]
|
97
106
|
def move(**args)
|
98
107
|
copied = copy(**args)
|
99
108
|
delete
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Duracloud
|
2
|
+
class FastSyncValidation < SyncValidation
|
3
|
+
|
4
|
+
def convert_manifest
|
5
|
+
# content-id is the 2nd column of the manifest
|
6
|
+
system("cut -f 2 #{manifest_filename} | sort", out: converted_manifest_filename)
|
7
|
+
end
|
8
|
+
|
9
|
+
def audit
|
10
|
+
find_files
|
11
|
+
if system("comm", "-23", find_filename, converted_manifest_filename, out: audit_filename)
|
12
|
+
File.empty?(audit_filename) || recheck
|
13
|
+
else
|
14
|
+
raise Error, "Error comparing #{find_filename} with #{converted_manifest_filename}."
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def find_files
|
19
|
+
# TODO handle exclude file?
|
20
|
+
outfile = File.join(FileUtils.pwd, find_filename)
|
21
|
+
# Using a separate command for sort so we get find results incrementally
|
22
|
+
system("find -L . -type f | sed -e 's|^\./||'", chdir: content_dir, out: outfile) &&
|
23
|
+
system("sort", "-o", find_filename, find_filename)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def do_recheck
|
29
|
+
Enumerator.new do |e|
|
30
|
+
File.foreach(audit_filename) do |line|
|
31
|
+
content_id = line.chomp
|
32
|
+
e << check(content_id)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def find_filename
|
38
|
+
filename("find.txt")
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
@@ -108,6 +108,21 @@ module Duracloud
|
|
108
108
|
"The API method 'Perform Task' has not been implemented."
|
109
109
|
end
|
110
110
|
|
111
|
+
# @see https://wiki.duraspace.org/display/DURACLOUDDOC/DuraCloud+REST+API#DuraCloudRESTAPI-GetStorageReportsbySpace
|
112
|
+
def get_storage_reports_by_space(space_id, **query)
|
113
|
+
durastore(:get, "report/space/#{space_id}", **query)
|
114
|
+
end
|
115
|
+
|
116
|
+
# @see https://wiki.duraspace.org/display/DURACLOUDDOC/DuraCloud+REST+API#DuraCloudRESTAPI-GetStorageReportsbyStore
|
117
|
+
def get_storage_reports_by_store(**query)
|
118
|
+
durastore(:get, "report/store", **query)
|
119
|
+
end
|
120
|
+
|
121
|
+
# @see https://wiki.duraspace.org/display/DURACLOUDDOC/DuraCloud+REST+API#DuraCloudRESTAPI-GetStorageReportsforallSpacesinaStore(inasingleday)
|
122
|
+
def get_storage_reports_for_all_spaces_in_a_store(epoch_ms, **query)
|
123
|
+
durastore(:get, "report/store/#{epoch_ms}", **query)
|
124
|
+
end
|
125
|
+
|
111
126
|
private
|
112
127
|
|
113
128
|
def durastore(*args, &block)
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'hashie'
|
2
|
+
require 'active_support'
|
3
|
+
|
4
|
+
module Duracloud
|
5
|
+
class StorageReport < Hashie::Trash
|
6
|
+
|
7
|
+
property "space_id", from: "spaceId"
|
8
|
+
property "store_id", from: "storeId"
|
9
|
+
property "byte_count", from: "byteCount"
|
10
|
+
property "object_count", from: "objectCount"
|
11
|
+
property "account_id", from: "accountId"
|
12
|
+
property "timestamp"
|
13
|
+
|
14
|
+
def time
|
15
|
+
@time ||= Time.at(timestamp / 1000.0).utc
|
16
|
+
end
|
17
|
+
|
18
|
+
def human_size
|
19
|
+
ActiveSupport::NumberHelper.number_to_human_size(byte_count, prefix: :si)
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_s
|
23
|
+
<<-EOS
|
24
|
+
Date: #{time}
|
25
|
+
Space ID: #{space_id || "(all)"}
|
26
|
+
Store ID: #{store_id}
|
27
|
+
Objects: #{object_count}
|
28
|
+
Total size: #{human_size} (#{byte_count} bytes)
|
29
|
+
EOS
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'hashie'
|
3
|
+
|
4
|
+
module Duracloud
|
5
|
+
class StorageReports
|
6
|
+
include Enumerable
|
7
|
+
extend Forwardable
|
8
|
+
|
9
|
+
delegate :last => :to_a
|
10
|
+
|
11
|
+
attr_reader :data
|
12
|
+
|
13
|
+
def self.by_space(space_id, **query)
|
14
|
+
params = Params.new(query)
|
15
|
+
response = Client.get_storage_reports_by_space(space_id, **params)
|
16
|
+
new(response)
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.by_store(**query)
|
20
|
+
params = Params.new(query)
|
21
|
+
response = Client.get_storage_reports_by_store(**params)
|
22
|
+
new(response)
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.for_all_spaces_in_a_store(epoch_ms = nil, **query)
|
26
|
+
epoch_ms ||= (Time.now - (3600 * 24)).to_i * 1000
|
27
|
+
params = Params.new(query)
|
28
|
+
response = Client.get_storage_reports_for_all_spaces_in_a_store(epoch_ms, **params)
|
29
|
+
new(response)
|
30
|
+
end
|
31
|
+
|
32
|
+
def initialize(response)
|
33
|
+
@data = JSON.parse(response.body)
|
34
|
+
end
|
35
|
+
|
36
|
+
def each
|
37
|
+
data.each do |report|
|
38
|
+
yield StorageReport.new(report)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
class Params < Hashie::Trash
|
45
|
+
property :storeID, from: :store_id
|
46
|
+
property :groupBy, from: :group_by
|
47
|
+
property :start
|
48
|
+
property :end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'active_model'
|
2
2
|
require 'tempfile'
|
3
3
|
require 'csv'
|
4
|
+
require 'fileutils'
|
4
5
|
|
5
6
|
module Duracloud
|
6
7
|
class SyncValidation
|
@@ -10,57 +11,139 @@ module Duracloud
|
|
10
11
|
MD5_CSV_OPTS = { col_sep: TWO_SPACES }.freeze
|
11
12
|
MANIFEST_CSV_OPTS = { col_sep: "\t", headers: true, return_headers: false }.freeze
|
12
13
|
|
13
|
-
|
14
|
+
MISSING = "MISSING"
|
15
|
+
CHANGED = "CHANGED"
|
16
|
+
FOUND = "FOUND"
|
17
|
+
|
18
|
+
attr_accessor :space_id, :content_dir, :store_id, :work_dir, :fast
|
14
19
|
|
15
20
|
def self.call(*args)
|
16
21
|
new(*args).call
|
17
22
|
end
|
18
23
|
|
24
|
+
def in_work_dir
|
25
|
+
if work_dir
|
26
|
+
FileUtils.cd(work_dir) { yield }
|
27
|
+
else
|
28
|
+
Dir.mktmpdir("#{space_id}-validation-") do |tmpdir|
|
29
|
+
FileUtils.cd(tmpdir) { yield }
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
19
34
|
def call
|
20
|
-
|
35
|
+
in_work_dir do
|
36
|
+
download_manifest
|
37
|
+
convert_manifest
|
38
|
+
audit
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def download_manifest
|
43
|
+
File.open(manifest_filename, "w") do |manifest|
|
21
44
|
Manifest.download(space_id, store_id) do |chunk|
|
22
45
|
manifest.write(chunk)
|
23
46
|
end
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
md5_list.close
|
32
|
-
|
33
|
-
# run md5deep to find files not listed in the manifest
|
34
|
-
Tempfile.open("#{space_id}-audit") do |audit|
|
35
|
-
audit.close
|
36
|
-
pid = spawn("md5deep", "-X", md5_list.path, "-l", "-r", ".", chdir: content_dir, out: audit.path)
|
37
|
-
Process.wait(pid)
|
38
|
-
case $?.exitstatus
|
39
|
-
when 0
|
40
|
-
true
|
41
|
-
when 1, 2
|
42
|
-
failures = []
|
43
|
-
CSV.foreach(audit.path, MD5_CSV_OPTS) do |md5, path|
|
44
|
-
content_id = path.sub(/^\.\//, "")
|
45
|
-
begin
|
46
|
-
if !Duracloud::Content.exist?(space_id: space_id, store_id: store_id, content_id: content_id, md5: md5)
|
47
|
-
failures << [ "MISSING", md5, content_id ].join("\t")
|
48
|
-
end
|
49
|
-
rescue MessageDigestError => e
|
50
|
-
failures << [ "CHANGED", md5, content_id ].join("\t")
|
51
|
-
end
|
52
|
-
end
|
53
|
-
STDOUT.puts failures
|
54
|
-
failures.empty?
|
55
|
-
when 64
|
56
|
-
raise Error, "md5deep user error."
|
57
|
-
when 128
|
58
|
-
raise Error, "md5deep internal error."
|
59
|
-
end
|
60
|
-
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def convert_manifest
|
51
|
+
File.open(converted_manifest_filename, "w") do |f|
|
52
|
+
CSV.foreach(manifest_filename, MANIFEST_CSV_OPTS) do |row|
|
53
|
+
f.puts [ row[2], row[1] ].join(TWO_SPACES)
|
61
54
|
end
|
62
55
|
end
|
63
56
|
end
|
64
57
|
|
58
|
+
def audit
|
59
|
+
outfile = File.join(FileUtils.pwd, audit_filename)
|
60
|
+
infile = File.join(FileUtils.pwd, converted_manifest_filename)
|
61
|
+
pid = spawn("md5deep", "-X", infile, "-l", "-r", ".", chdir: content_dir, out: outfile)
|
62
|
+
Process.wait(pid)
|
63
|
+
case $?.exitstatus
|
64
|
+
when 0
|
65
|
+
true
|
66
|
+
when 1, 2
|
67
|
+
recheck
|
68
|
+
when 64, 128
|
69
|
+
raise Error, "md5deep error."
|
70
|
+
else
|
71
|
+
raise Error, "Unknown error."
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def recheck
|
76
|
+
success = true
|
77
|
+
recheck_file do |csv|
|
78
|
+
do_recheck.each do |result|
|
79
|
+
csv << result.to_a
|
80
|
+
success &&= result.found?
|
81
|
+
end
|
82
|
+
end
|
83
|
+
success
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
CheckResult = Struct.new(:status, :md5, :content_id) do
|
89
|
+
def found?
|
90
|
+
status == FOUND
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def recheck_file
|
95
|
+
if work_dir
|
96
|
+
CSV.open(recheck_filename, "w", col_sep: "\t") { |csv| yield(csv) }
|
97
|
+
else
|
98
|
+
CSV($stdout, col_sep: "\t") { |csv| yield(csv) }
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def check(content_id, md5 = nil)
|
103
|
+
status = begin
|
104
|
+
exist?(content_id, md5) ? FOUND : MISSING
|
105
|
+
rescue MessageDigestError => e
|
106
|
+
CHANGED
|
107
|
+
end
|
108
|
+
CheckResult.new(status, md5 || "-", content_id)
|
109
|
+
end
|
110
|
+
|
111
|
+
def exist?(content_id, md5 = nil)
|
112
|
+
Duracloud::Content.exist?(space_id: space_id, store_id: store_id, content_id: content_id, md5: md5)
|
113
|
+
end
|
114
|
+
|
115
|
+
def do_recheck
|
116
|
+
Enumerator.new do |e|
|
117
|
+
CSV.foreach(audit_filename, MD5_CSV_OPTS) do |md5, path|
|
118
|
+
content_id = path.sub(/^\.\//, "")
|
119
|
+
e << check(content_id, md5)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def prefix
|
125
|
+
space_id
|
126
|
+
end
|
127
|
+
|
128
|
+
def filename(suffix)
|
129
|
+
[ prefix, suffix ].join("-")
|
130
|
+
end
|
131
|
+
|
132
|
+
def manifest_filename
|
133
|
+
filename("manifest.tsv")
|
134
|
+
end
|
135
|
+
|
136
|
+
def converted_manifest_filename
|
137
|
+
filename("converted-manifest.txt")
|
138
|
+
end
|
139
|
+
|
140
|
+
def audit_filename
|
141
|
+
filename("audit.txt")
|
142
|
+
end
|
143
|
+
|
144
|
+
def recheck_filename
|
145
|
+
filename("recheck.txt")
|
146
|
+
end
|
147
|
+
|
65
148
|
end
|
66
149
|
end
|