duracloud-client 0.8.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +14 -1
  3. data/lib/duracloud.rb +5 -1
  4. data/lib/duracloud/cli.rb +29 -107
  5. data/lib/duracloud/command_options.rb +120 -0
  6. data/lib/duracloud/commands.rb +40 -0
  7. data/lib/duracloud/commands/command.rb +6 -2
  8. data/lib/duracloud/commands/count.rb +15 -0
  9. data/lib/duracloud/commands/download_manifest.rb +0 -2
  10. data/lib/duracloud/commands/find.rb +16 -0
  11. data/lib/duracloud/commands/find_item.rb +16 -0
  12. data/lib/duracloud/commands/find_items.rb +22 -0
  13. data/lib/duracloud/commands/find_missing_items.rb +15 -0
  14. data/lib/duracloud/commands/find_space.rb +12 -0
  15. data/lib/duracloud/commands/get_storage_report.rb +16 -0
  16. data/lib/duracloud/commands/get_storage_report_for_all_spaces.rb +12 -0
  17. data/lib/duracloud/commands/get_storage_report_for_space.rb +10 -0
  18. data/lib/duracloud/commands/get_storage_report_for_store.rb +10 -0
  19. data/lib/duracloud/commands/list_content_ids.rb +11 -0
  20. data/lib/duracloud/commands/list_items.rb +17 -0
  21. data/lib/duracloud/commands/sync.rb +0 -2
  22. data/lib/duracloud/commands/validate.rb +2 -3
  23. data/lib/duracloud/content.rb +12 -3
  24. data/lib/duracloud/fast_sync_validation.rb +42 -0
  25. data/lib/duracloud/rest_methods.rb +15 -0
  26. data/lib/duracloud/storage_report.rb +33 -0
  27. data/lib/duracloud/storage_reports.rb +52 -0
  28. data/lib/duracloud/sync_validation.rb +122 -39
  29. data/lib/duracloud/version.rb +1 -1
  30. data/spec/unit/cli_spec.rb +59 -15
  31. data/spec/unit/client_spec.rb +24 -0
  32. data/spec/unit/content_spec.rb +17 -7
  33. data/spec/unit/storage_report_spec.rb +15 -0
  34. data/spec/unit/storage_reports_spec.rb +45 -0
  35. metadata +23 -3
  36. data/lib/duracloud/commands/get_properties.rb +0 -27
@@ -0,0 +1,16 @@
1
+ module Duracloud::Commands
2
+ class FindItem < Command
3
+
4
+ def call
5
+ content = Duracloud::Content.find(space_id: space_id, store_id: store_id, content_id: content_id, md5: md5)
6
+ props = content.properties.dup
7
+ props.merge!("MD5" => content.md5,
8
+ "Size" => content.size,
9
+ "Chunked" => content.chunked?)
10
+ props.each do |k, v|
11
+ puts "#{k}: #{v}"
12
+ end
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,22 @@
1
+ require 'csv'
2
+
3
+ module Duracloud::Commands
4
+ class FindItems < Command
5
+
6
+ HEADERS = %i( content_id md5 size content_type modified )
7
+
8
+ def call
9
+ CSV.instance($stdout, headers: HEADERS, write_headers: true) do |csv|
10
+ CSV.foreach(infile, headers: false) do |row|
11
+ begin
12
+ item = Duracloud::Content.find(space_id: space_id, store_id: store_id, content_id: row[0], md5: row[1])
13
+ csv << HEADERS.map { |header| item.send(header) }
14
+ rescue Duracloud::NotFoundError, Duracloud::MessageDigestError => e
15
+ $stderr.puts "ERROR: Content ID #{row[0]} -- #{e.message}"
16
+ end
17
+ end
18
+ end
19
+ end
20
+
21
+ end
22
+ end
@@ -0,0 +1,15 @@
1
+ module Duracloud::Commands
2
+ class FindMissingItems < Command
3
+
4
+ def call
5
+ CSV.instance($stdout, headers: false) do |csv|
6
+ CSV.foreach(infile, headers: false) do |row|
7
+ unless Duracloud::Content.exist?(space_id: space_id, store_id: store_id, content_id: row[0], md5: row[1])
8
+ csv << row
9
+ end
10
+ end
11
+ end
12
+ end
13
+
14
+ end
15
+ end
@@ -0,0 +1,12 @@
1
+ module Duracloud::Commands
2
+ class FindSpace < Command
3
+
4
+ def call
5
+ space = Duracloud::Space.find(space_id, store_id)
6
+ space.properties.each do |k, v|
7
+ puts "#{k}: #{v}"
8
+ end
9
+ end
10
+
11
+ end
12
+ end
@@ -0,0 +1,16 @@
1
+ module Duracloud::Commands
2
+ class GetStorageReport < Command
3
+
4
+ def call
5
+ delegate_to = if space_id
6
+ GetStorageReportForSpace
7
+ elsif all_spaces
8
+ GetStorageReportForAllSpaces
9
+ else
10
+ GetStorageReportForStore
11
+ end
12
+ delegate_to.call(cli)
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,12 @@
1
+ module Duracloud::Commands
2
+ class GetStorageReportForAllSpaces < Command
3
+
4
+ def call
5
+ Duracloud::StorageReports.for_all_spaces_in_a_store(store_id: store_id).each do |report|
6
+ puts "-"*40
7
+ puts report.to_s
8
+ end
9
+ end
10
+
11
+ end
12
+ end
@@ -0,0 +1,10 @@
1
+ module Duracloud::Commands
2
+ class GetStorageReportForSpace < Command
3
+
4
+ def call
5
+ reports = Duracloud::StorageReports.by_space(space_id, store_id: store_id)
6
+ puts reports.last.to_s
7
+ end
8
+
9
+ end
10
+ end
@@ -0,0 +1,10 @@
1
+ module Duracloud::Commands
2
+ class GetStorageReportForStore < Command
3
+
4
+ def call
5
+ reports = Duracloud::StorageReports.by_store(store_id: store_id)
6
+ puts reports.last.to_s
7
+ end
8
+
9
+ end
10
+ end
@@ -0,0 +1,11 @@
1
+ module Duracloud::Commands
2
+ class ListContentIds < Command
3
+
4
+ def call
5
+ Duracloud::Space.content_ids(space_id, store_id: store_id, prefix: prefix).each do |id|
6
+ puts id
7
+ end
8
+ end
9
+
10
+ end
11
+ end
@@ -0,0 +1,17 @@
1
+ require "csv"
2
+
3
+ module Duracloud::Commands
4
+ class ListItems < Command
5
+
6
+ HEADERS = %i( content_id md5 size content_type modified )
7
+
8
+ def call
9
+ CSV.instance($stdout, headers: HEADERS, write_headers: true) do |csv|
10
+ Duracloud::Space.items(space_id, store_id: store_id, prefix: prefix).each do |item|
11
+ csv << HEADERS.map { |header| item.send(header) }
12
+ end
13
+ end
14
+ end
15
+
16
+ end
17
+ end
@@ -1,5 +1,3 @@
1
- require_relative 'command'
2
-
3
1
  module Duracloud::Commands
4
2
  class Sync < Command
5
3
 
@@ -1,10 +1,9 @@
1
- require_relative "command"
2
-
3
1
  module Duracloud::Commands
4
2
  class Validate < Command
5
3
 
6
4
  def call
7
- Duracloud::SyncValidation.call(space_id: space_id, store_id: store_id, content_dir: content_dir)
5
+ klass = fast ? Duracloud::FastSyncValidation : DuracloudSyncValidation
6
+ klass.call(space_id: space_id, store_id: store_id, content_dir: content_dir, work_dir: work_dir)
8
7
  end
9
8
 
10
9
  end
@@ -79,21 +79,30 @@ module Duracloud
79
79
 
80
80
  # @return [Duracloud::Content] the copied content
81
81
  # The current instance still represents the original content.
82
+ # @raise [Duracloud::Error]
82
83
  def copy(**args)
83
84
  dest = args.except(:force)
84
85
  dest[:space_id] ||= space_id
86
+ dest[:store_id] ||= store_id
85
87
  dest[:content_id] ||= content_id
86
- raise CopyError, "Destination is the same as the source." if dest == copy_source
88
+ if dest == copy_source
89
+ raise CopyError, "Destination is the same as the source."
90
+ end
87
91
  if !args[:force] && Content.exist?(**dest)
88
- raise CopyError, "Destination exists and :false option is false."
92
+ raise CopyError, "Destination exists and `:force' option is false."
89
93
  end
90
94
  options = { storeID: dest[:store_id], headers: copy_headers }
91
- Client.copy_content(dest[:space_id], dest[:content_id], **options)
95
+ response = Client.copy_content(dest[:space_id], dest[:content_id], **options)
96
+ if md5 != response.md5
97
+ raise CopyError, "Message digest of copy does not match source " \
98
+ "(source: #{md5}; destination: #{response.md5})"
99
+ end
92
100
  Content.new(dest.merge(md5: md5))
93
101
  end
94
102
 
95
103
  # @return [Duracloud::Content] the moved content
96
104
  # The current instance still represents the deleted content.
105
+ # @raise [Duracloud::Error]
97
106
  def move(**args)
98
107
  copied = copy(**args)
99
108
  delete
@@ -0,0 +1,42 @@
1
+ module Duracloud
2
+ class FastSyncValidation < SyncValidation
3
+
4
+ def convert_manifest
5
+ # content-id is the 2nd column of the manifest
6
+ system("cut -f 2 #{manifest_filename} | sort", out: converted_manifest_filename)
7
+ end
8
+
9
+ def audit
10
+ find_files
11
+ if system("comm", "-23", find_filename, converted_manifest_filename, out: audit_filename)
12
+ File.empty?(audit_filename) || recheck
13
+ else
14
+ raise Error, "Error comparing #{find_filename} with #{converted_manifest_filename}."
15
+ end
16
+ end
17
+
18
+ def find_files
19
+ # TODO handle exclude file?
20
+ outfile = File.join(FileUtils.pwd, find_filename)
21
+ # Using a separate command for sort so we get find results incrementally
22
+ system("find -L . -type f | sed -e 's|^\./||'", chdir: content_dir, out: outfile) &&
23
+ system("sort", "-o", find_filename, find_filename)
24
+ end
25
+
26
+ private
27
+
28
+ def do_recheck
29
+ Enumerator.new do |e|
30
+ File.foreach(audit_filename) do |line|
31
+ content_id = line.chomp
32
+ e << check(content_id)
33
+ end
34
+ end
35
+ end
36
+
37
+ def find_filename
38
+ filename("find.txt")
39
+ end
40
+
41
+ end
42
+ end
@@ -108,6 +108,21 @@ module Duracloud
108
108
  "The API method 'Perform Task' has not been implemented."
109
109
  end
110
110
 
111
+ # @see https://wiki.duraspace.org/display/DURACLOUDDOC/DuraCloud+REST+API#DuraCloudRESTAPI-GetStorageReportsbySpace
112
+ def get_storage_reports_by_space(space_id, **query)
113
+ durastore(:get, "report/space/#{space_id}", **query)
114
+ end
115
+
116
+ # @see https://wiki.duraspace.org/display/DURACLOUDDOC/DuraCloud+REST+API#DuraCloudRESTAPI-GetStorageReportsbyStore
117
+ def get_storage_reports_by_store(**query)
118
+ durastore(:get, "report/store", **query)
119
+ end
120
+
121
+ # @see https://wiki.duraspace.org/display/DURACLOUDDOC/DuraCloud+REST+API#DuraCloudRESTAPI-GetStorageReportsforallSpacesinaStore(inasingleday)
122
+ def get_storage_reports_for_all_spaces_in_a_store(epoch_ms, **query)
123
+ durastore(:get, "report/store/#{epoch_ms}", **query)
124
+ end
125
+
111
126
  private
112
127
 
113
128
  def durastore(*args, &block)
@@ -0,0 +1,33 @@
1
+ require 'hashie'
2
+ require 'active_support'
3
+
4
+ module Duracloud
5
+ class StorageReport < Hashie::Trash
6
+
7
+ property "space_id", from: "spaceId"
8
+ property "store_id", from: "storeId"
9
+ property "byte_count", from: "byteCount"
10
+ property "object_count", from: "objectCount"
11
+ property "account_id", from: "accountId"
12
+ property "timestamp"
13
+
14
+ def time
15
+ @time ||= Time.at(timestamp / 1000.0).utc
16
+ end
17
+
18
+ def human_size
19
+ ActiveSupport::NumberHelper.number_to_human_size(byte_count, prefix: :si)
20
+ end
21
+
22
+ def to_s
23
+ <<-EOS
24
+ Date: #{time}
25
+ Space ID: #{space_id || "(all)"}
26
+ Store ID: #{store_id}
27
+ Objects: #{object_count}
28
+ Total size: #{human_size} (#{byte_count} bytes)
29
+ EOS
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,52 @@
1
+ require 'json'
2
+ require 'hashie'
3
+
4
+ module Duracloud
5
+ class StorageReports
6
+ include Enumerable
7
+ extend Forwardable
8
+
9
+ delegate :last => :to_a
10
+
11
+ attr_reader :data
12
+
13
+ def self.by_space(space_id, **query)
14
+ params = Params.new(query)
15
+ response = Client.get_storage_reports_by_space(space_id, **params)
16
+ new(response)
17
+ end
18
+
19
+ def self.by_store(**query)
20
+ params = Params.new(query)
21
+ response = Client.get_storage_reports_by_store(**params)
22
+ new(response)
23
+ end
24
+
25
+ def self.for_all_spaces_in_a_store(epoch_ms = nil, **query)
26
+ epoch_ms ||= (Time.now - (3600 * 24)).to_i * 1000
27
+ params = Params.new(query)
28
+ response = Client.get_storage_reports_for_all_spaces_in_a_store(epoch_ms, **params)
29
+ new(response)
30
+ end
31
+
32
+ def initialize(response)
33
+ @data = JSON.parse(response.body)
34
+ end
35
+
36
+ def each
37
+ data.each do |report|
38
+ yield StorageReport.new(report)
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ class Params < Hashie::Trash
45
+ property :storeID, from: :store_id
46
+ property :groupBy, from: :group_by
47
+ property :start
48
+ property :end
49
+ end
50
+
51
+ end
52
+ end
@@ -1,6 +1,7 @@
1
1
  require 'active_model'
2
2
  require 'tempfile'
3
3
  require 'csv'
4
+ require 'fileutils'
4
5
 
5
6
  module Duracloud
6
7
  class SyncValidation
@@ -10,57 +11,139 @@ module Duracloud
10
11
  MD5_CSV_OPTS = { col_sep: TWO_SPACES }.freeze
11
12
  MANIFEST_CSV_OPTS = { col_sep: "\t", headers: true, return_headers: false }.freeze
12
13
 
13
- attr_accessor :space_id, :content_dir, :store_id
14
+ MISSING = "MISSING"
15
+ CHANGED = "CHANGED"
16
+ FOUND = "FOUND"
17
+
18
+ attr_accessor :space_id, :content_dir, :store_id, :work_dir, :fast
14
19
 
15
20
  def self.call(*args)
16
21
  new(*args).call
17
22
  end
18
23
 
24
+ def in_work_dir
25
+ if work_dir
26
+ FileUtils.cd(work_dir) { yield }
27
+ else
28
+ Dir.mktmpdir("#{space_id}-validation-") do |tmpdir|
29
+ FileUtils.cd(tmpdir) { yield }
30
+ end
31
+ end
32
+ end
33
+
19
34
  def call
20
- Tempfile.open("#{space_id}-manifest") do |manifest|
35
+ in_work_dir do
36
+ download_manifest
37
+ convert_manifest
38
+ audit
39
+ end
40
+ end
41
+
42
+ def download_manifest
43
+ File.open(manifest_filename, "w") do |manifest|
21
44
  Manifest.download(space_id, store_id) do |chunk|
22
45
  manifest.write(chunk)
23
46
  end
24
- manifest.close
25
-
26
- # convert manifest into md5deep format
27
- Tempfile.open("#{space_id}-md5") do |md5_list|
28
- CSV.foreach(manifest.path, MANIFEST_CSV_OPTS) do |row|
29
- md5_list.puts [ row[2], row[1] ].join(TWO_SPACES)
30
- end
31
- md5_list.close
32
-
33
- # run md5deep to find files not listed in the manifest
34
- Tempfile.open("#{space_id}-audit") do |audit|
35
- audit.close
36
- pid = spawn("md5deep", "-X", md5_list.path, "-l", "-r", ".", chdir: content_dir, out: audit.path)
37
- Process.wait(pid)
38
- case $?.exitstatus
39
- when 0
40
- true
41
- when 1, 2
42
- failures = []
43
- CSV.foreach(audit.path, MD5_CSV_OPTS) do |md5, path|
44
- content_id = path.sub(/^\.\//, "")
45
- begin
46
- if !Duracloud::Content.exist?(space_id: space_id, store_id: store_id, content_id: content_id, md5: md5)
47
- failures << [ "MISSING", md5, content_id ].join("\t")
48
- end
49
- rescue MessageDigestError => e
50
- failures << [ "CHANGED", md5, content_id ].join("\t")
51
- end
52
- end
53
- STDOUT.puts failures
54
- failures.empty?
55
- when 64
56
- raise Error, "md5deep user error."
57
- when 128
58
- raise Error, "md5deep internal error."
59
- end
60
- end
47
+ end
48
+ end
49
+
50
+ def convert_manifest
51
+ File.open(converted_manifest_filename, "w") do |f|
52
+ CSV.foreach(manifest_filename, MANIFEST_CSV_OPTS) do |row|
53
+ f.puts [ row[2], row[1] ].join(TWO_SPACES)
61
54
  end
62
55
  end
63
56
  end
64
57
 
58
+ def audit
59
+ outfile = File.join(FileUtils.pwd, audit_filename)
60
+ infile = File.join(FileUtils.pwd, converted_manifest_filename)
61
+ pid = spawn("md5deep", "-X", infile, "-l", "-r", ".", chdir: content_dir, out: outfile)
62
+ Process.wait(pid)
63
+ case $?.exitstatus
64
+ when 0
65
+ true
66
+ when 1, 2
67
+ recheck
68
+ when 64, 128
69
+ raise Error, "md5deep error."
70
+ else
71
+ raise Error, "Unknown error."
72
+ end
73
+ end
74
+
75
+ def recheck
76
+ success = true
77
+ recheck_file do |csv|
78
+ do_recheck.each do |result|
79
+ csv << result.to_a
80
+ success &&= result.found?
81
+ end
82
+ end
83
+ success
84
+ end
85
+
86
+ private
87
+
88
+ CheckResult = Struct.new(:status, :md5, :content_id) do
89
+ def found?
90
+ status == FOUND
91
+ end
92
+ end
93
+
94
+ def recheck_file
95
+ if work_dir
96
+ CSV.open(recheck_filename, "w", col_sep: "\t") { |csv| yield(csv) }
97
+ else
98
+ CSV($stdout, col_sep: "\t") { |csv| yield(csv) }
99
+ end
100
+ end
101
+
102
+ def check(content_id, md5 = nil)
103
+ status = begin
104
+ exist?(content_id, md5) ? FOUND : MISSING
105
+ rescue MessageDigestError => e
106
+ CHANGED
107
+ end
108
+ CheckResult.new(status, md5 || "-", content_id)
109
+ end
110
+
111
+ def exist?(content_id, md5 = nil)
112
+ Duracloud::Content.exist?(space_id: space_id, store_id: store_id, content_id: content_id, md5: md5)
113
+ end
114
+
115
+ def do_recheck
116
+ Enumerator.new do |e|
117
+ CSV.foreach(audit_filename, MD5_CSV_OPTS) do |md5, path|
118
+ content_id = path.sub(/^\.\//, "")
119
+ e << check(content_id, md5)
120
+ end
121
+ end
122
+ end
123
+
124
+ def prefix
125
+ space_id
126
+ end
127
+
128
+ def filename(suffix)
129
+ [ prefix, suffix ].join("-")
130
+ end
131
+
132
+ def manifest_filename
133
+ filename("manifest.tsv")
134
+ end
135
+
136
+ def converted_manifest_filename
137
+ filename("converted-manifest.txt")
138
+ end
139
+
140
+ def audit_filename
141
+ filename("audit.txt")
142
+ end
143
+
144
+ def recheck_filename
145
+ filename("recheck.txt")
146
+ end
147
+
65
148
  end
66
149
  end