duracloud-client 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +14 -1
  3. data/lib/duracloud.rb +5 -1
  4. data/lib/duracloud/cli.rb +29 -107
  5. data/lib/duracloud/command_options.rb +120 -0
  6. data/lib/duracloud/commands.rb +40 -0
  7. data/lib/duracloud/commands/command.rb +6 -2
  8. data/lib/duracloud/commands/count.rb +15 -0
  9. data/lib/duracloud/commands/download_manifest.rb +0 -2
  10. data/lib/duracloud/commands/find.rb +16 -0
  11. data/lib/duracloud/commands/find_item.rb +16 -0
  12. data/lib/duracloud/commands/find_items.rb +22 -0
  13. data/lib/duracloud/commands/find_missing_items.rb +15 -0
  14. data/lib/duracloud/commands/find_space.rb +12 -0
  15. data/lib/duracloud/commands/get_storage_report.rb +16 -0
  16. data/lib/duracloud/commands/get_storage_report_for_all_spaces.rb +12 -0
  17. data/lib/duracloud/commands/get_storage_report_for_space.rb +10 -0
  18. data/lib/duracloud/commands/get_storage_report_for_store.rb +10 -0
  19. data/lib/duracloud/commands/list_content_ids.rb +11 -0
  20. data/lib/duracloud/commands/list_items.rb +17 -0
  21. data/lib/duracloud/commands/sync.rb +0 -2
  22. data/lib/duracloud/commands/validate.rb +2 -3
  23. data/lib/duracloud/content.rb +12 -3
  24. data/lib/duracloud/fast_sync_validation.rb +42 -0
  25. data/lib/duracloud/rest_methods.rb +15 -0
  26. data/lib/duracloud/storage_report.rb +33 -0
  27. data/lib/duracloud/storage_reports.rb +52 -0
  28. data/lib/duracloud/sync_validation.rb +122 -39
  29. data/lib/duracloud/version.rb +1 -1
  30. data/spec/unit/cli_spec.rb +59 -15
  31. data/spec/unit/client_spec.rb +24 -0
  32. data/spec/unit/content_spec.rb +17 -7
  33. data/spec/unit/storage_report_spec.rb +15 -0
  34. data/spec/unit/storage_reports_spec.rb +45 -0
  35. metadata +23 -3
  36. data/lib/duracloud/commands/get_properties.rb +0 -27
@@ -0,0 +1,16 @@
1
+ module Duracloud::Commands
2
+ class FindItem < Command
3
+
4
+ def call
5
+ content = Duracloud::Content.find(space_id: space_id, store_id: store_id, content_id: content_id, md5: md5)
6
+ props = content.properties.dup
7
+ props.merge!("MD5" => content.md5,
8
+ "Size" => content.size,
9
+ "Chunked" => content.chunked?)
10
+ props.each do |k, v|
11
+ puts "#{k}: #{v}"
12
+ end
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,22 @@
1
+ require 'csv'
2
+
3
+ module Duracloud::Commands
4
+ class FindItems < Command
5
+
6
+ HEADERS = %i( content_id md5 size content_type modified )
7
+
8
+ def call
9
+ CSV.instance($stdout, headers: HEADERS, write_headers: true) do |csv|
10
+ CSV.foreach(infile, headers: false) do |row|
11
+ begin
12
+ item = Duracloud::Content.find(space_id: space_id, store_id: store_id, content_id: row[0], md5: row[1])
13
+ csv << HEADERS.map { |header| item.send(header) }
14
+ rescue Duracloud::NotFoundError, Duracloud::MessageDigestError => e
15
+ $stderr.puts "ERROR: Content ID #{row[0]} -- #{e.message}"
16
+ end
17
+ end
18
+ end
19
+ end
20
+
21
+ end
22
+ end
@@ -0,0 +1,15 @@
1
+ module Duracloud::Commands
2
+ class FindMissingItems < Command
3
+
4
+ def call
5
+ CSV.instance($stdout, headers: false) do |csv|
6
+ CSV.foreach(infile, headers: false) do |row|
7
+ unless Duracloud::Content.exist?(space_id: space_id, store_id: store_id, content_id: row[0], md5: row[1])
8
+ csv << row
9
+ end
10
+ end
11
+ end
12
+ end
13
+
14
+ end
15
+ end
@@ -0,0 +1,12 @@
1
+ module Duracloud::Commands
2
+ class FindSpace < Command
3
+
4
+ def call
5
+ space = Duracloud::Space.find(space_id, store_id)
6
+ space.properties.each do |k, v|
7
+ puts "#{k}: #{v}"
8
+ end
9
+ end
10
+
11
+ end
12
+ end
@@ -0,0 +1,16 @@
1
+ module Duracloud::Commands
2
+ class GetStorageReport < Command
3
+
4
+ def call
5
+ delegate_to = if space_id
6
+ GetStorageReportForSpace
7
+ elsif all_spaces
8
+ GetStorageReportForAllSpaces
9
+ else
10
+ GetStorageReportForStore
11
+ end
12
+ delegate_to.call(cli)
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,12 @@
1
+ module Duracloud::Commands
2
+ class GetStorageReportForAllSpaces < Command
3
+
4
+ def call
5
+ Duracloud::StorageReports.for_all_spaces_in_a_store(store_id: store_id).each do |report|
6
+ puts "-"*40
7
+ puts report.to_s
8
+ end
9
+ end
10
+
11
+ end
12
+ end
@@ -0,0 +1,10 @@
1
+ module Duracloud::Commands
2
+ class GetStorageReportForSpace < Command
3
+
4
+ def call
5
+ reports = Duracloud::StorageReports.by_space(space_id, store_id: store_id)
6
+ puts reports.last.to_s
7
+ end
8
+
9
+ end
10
+ end
@@ -0,0 +1,10 @@
1
+ module Duracloud::Commands
2
+ class GetStorageReportForStore < Command
3
+
4
+ def call
5
+ reports = Duracloud::StorageReports.by_store(store_id: store_id)
6
+ puts reports.last.to_s
7
+ end
8
+
9
+ end
10
+ end
@@ -0,0 +1,11 @@
1
+ module Duracloud::Commands
2
+ class ListContentIds < Command
3
+
4
+ def call
5
+ Duracloud::Space.content_ids(space_id, store_id: store_id, prefix: prefix).each do |id|
6
+ puts id
7
+ end
8
+ end
9
+
10
+ end
11
+ end
@@ -0,0 +1,17 @@
1
+ require "csv"
2
+
3
+ module Duracloud::Commands
4
+ class ListItems < Command
5
+
6
+ HEADERS = %i( content_id md5 size content_type modified )
7
+
8
+ def call
9
+ CSV.instance($stdout, headers: HEADERS, write_headers: true) do |csv|
10
+ Duracloud::Space.items(space_id, store_id: store_id, prefix: prefix).each do |item|
11
+ csv << HEADERS.map { |header| item.send(header) }
12
+ end
13
+ end
14
+ end
15
+
16
+ end
17
+ end
@@ -1,5 +1,3 @@
1
- require_relative 'command'
2
-
3
1
  module Duracloud::Commands
4
2
  class Sync < Command
5
3
 
@@ -1,10 +1,9 @@
1
- require_relative "command"
2
-
3
1
  module Duracloud::Commands
4
2
  class Validate < Command
5
3
 
6
4
  def call
7
- Duracloud::SyncValidation.call(space_id: space_id, store_id: store_id, content_dir: content_dir)
5
+ klass = fast ? Duracloud::FastSyncValidation : DuracloudSyncValidation
6
+ klass.call(space_id: space_id, store_id: store_id, content_dir: content_dir, work_dir: work_dir)
8
7
  end
9
8
 
10
9
  end
@@ -79,21 +79,30 @@ module Duracloud
79
79
 
80
80
  # @return [Duracloud::Content] the copied content
81
81
  # The current instance still represents the original content.
82
+ # @raise [Duracloud::Error]
82
83
  def copy(**args)
83
84
  dest = args.except(:force)
84
85
  dest[:space_id] ||= space_id
86
+ dest[:store_id] ||= store_id
85
87
  dest[:content_id] ||= content_id
86
- raise CopyError, "Destination is the same as the source." if dest == copy_source
88
+ if dest == copy_source
89
+ raise CopyError, "Destination is the same as the source."
90
+ end
87
91
  if !args[:force] && Content.exist?(**dest)
88
- raise CopyError, "Destination exists and :false option is false."
92
+ raise CopyError, "Destination exists and `:force' option is false."
89
93
  end
90
94
  options = { storeID: dest[:store_id], headers: copy_headers }
91
- Client.copy_content(dest[:space_id], dest[:content_id], **options)
95
+ response = Client.copy_content(dest[:space_id], dest[:content_id], **options)
96
+ if md5 != response.md5
97
+ raise CopyError, "Message digest of copy does not match source " \
98
+ "(source: #{md5}; destination: #{response.md5})"
99
+ end
92
100
  Content.new(dest.merge(md5: md5))
93
101
  end
94
102
 
95
103
  # @return [Duracloud::Content] the moved content
96
104
  # The current instance still represents the deleted content.
105
+ # @raise [Duracloud::Error]
97
106
  def move(**args)
98
107
  copied = copy(**args)
99
108
  delete
@@ -0,0 +1,42 @@
1
+ module Duracloud
2
+ class FastSyncValidation < SyncValidation
3
+
4
+ def convert_manifest
5
+ # content-id is the 2nd column of the manifest
6
+ system("cut -f 2 #{manifest_filename} | sort", out: converted_manifest_filename)
7
+ end
8
+
9
+ def audit
10
+ find_files
11
+ if system("comm", "-23", find_filename, converted_manifest_filename, out: audit_filename)
12
+ File.empty?(audit_filename) || recheck
13
+ else
14
+ raise Error, "Error comparing #{find_filename} with #{converted_manifest_filename}."
15
+ end
16
+ end
17
+
18
+ def find_files
19
+ # TODO handle exclude file?
20
+ outfile = File.join(FileUtils.pwd, find_filename)
21
+ # Using a separate command for sort so we get find results incrementally
22
+ system("find -L . -type f | sed -e 's|^\./||'", chdir: content_dir, out: outfile) &&
23
+ system("sort", "-o", find_filename, find_filename)
24
+ end
25
+
26
+ private
27
+
28
+ def do_recheck
29
+ Enumerator.new do |e|
30
+ File.foreach(audit_filename) do |line|
31
+ content_id = line.chomp
32
+ e << check(content_id)
33
+ end
34
+ end
35
+ end
36
+
37
+ def find_filename
38
+ filename("find.txt")
39
+ end
40
+
41
+ end
42
+ end
@@ -108,6 +108,21 @@ module Duracloud
108
108
  "The API method 'Perform Task' has not been implemented."
109
109
  end
110
110
 
111
+ # @see https://wiki.duraspace.org/display/DURACLOUDDOC/DuraCloud+REST+API#DuraCloudRESTAPI-GetStorageReportsbySpace
112
+ def get_storage_reports_by_space(space_id, **query)
113
+ durastore(:get, "report/space/#{space_id}", **query)
114
+ end
115
+
116
+ # @see https://wiki.duraspace.org/display/DURACLOUDDOC/DuraCloud+REST+API#DuraCloudRESTAPI-GetStorageReportsbyStore
117
+ def get_storage_reports_by_store(**query)
118
+ durastore(:get, "report/store", **query)
119
+ end
120
+
121
+ # @see https://wiki.duraspace.org/display/DURACLOUDDOC/DuraCloud+REST+API#DuraCloudRESTAPI-GetStorageReportsforallSpacesinaStore(inasingleday)
122
+ def get_storage_reports_for_all_spaces_in_a_store(epoch_ms, **query)
123
+ durastore(:get, "report/store/#{epoch_ms}", **query)
124
+ end
125
+
111
126
  private
112
127
 
113
128
  def durastore(*args, &block)
@@ -0,0 +1,33 @@
1
+ require 'hashie'
2
+ require 'active_support'
3
+
4
+ module Duracloud
5
+ class StorageReport < Hashie::Trash
6
+
7
+ property "space_id", from: "spaceId"
8
+ property "store_id", from: "storeId"
9
+ property "byte_count", from: "byteCount"
10
+ property "object_count", from: "objectCount"
11
+ property "account_id", from: "accountId"
12
+ property "timestamp"
13
+
14
+ def time
15
+ @time ||= Time.at(timestamp / 1000.0).utc
16
+ end
17
+
18
+ def human_size
19
+ ActiveSupport::NumberHelper.number_to_human_size(byte_count, prefix: :si)
20
+ end
21
+
22
+ def to_s
23
+ <<-EOS
24
+ Date: #{time}
25
+ Space ID: #{space_id || "(all)"}
26
+ Store ID: #{store_id}
27
+ Objects: #{object_count}
28
+ Total size: #{human_size} (#{byte_count} bytes)
29
+ EOS
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,52 @@
1
+ require 'json'
2
+ require 'hashie'
3
+
4
+ module Duracloud
5
+ class StorageReports
6
+ include Enumerable
7
+ extend Forwardable
8
+
9
+ delegate :last => :to_a
10
+
11
+ attr_reader :data
12
+
13
+ def self.by_space(space_id, **query)
14
+ params = Params.new(query)
15
+ response = Client.get_storage_reports_by_space(space_id, **params)
16
+ new(response)
17
+ end
18
+
19
+ def self.by_store(**query)
20
+ params = Params.new(query)
21
+ response = Client.get_storage_reports_by_store(**params)
22
+ new(response)
23
+ end
24
+
25
+ def self.for_all_spaces_in_a_store(epoch_ms = nil, **query)
26
+ epoch_ms ||= (Time.now - (3600 * 24)).to_i * 1000
27
+ params = Params.new(query)
28
+ response = Client.get_storage_reports_for_all_spaces_in_a_store(epoch_ms, **params)
29
+ new(response)
30
+ end
31
+
32
+ def initialize(response)
33
+ @data = JSON.parse(response.body)
34
+ end
35
+
36
+ def each
37
+ data.each do |report|
38
+ yield StorageReport.new(report)
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ class Params < Hashie::Trash
45
+ property :storeID, from: :store_id
46
+ property :groupBy, from: :group_by
47
+ property :start
48
+ property :end
49
+ end
50
+
51
+ end
52
+ end
@@ -1,6 +1,7 @@
1
1
  require 'active_model'
2
2
  require 'tempfile'
3
3
  require 'csv'
4
+ require 'fileutils'
4
5
 
5
6
  module Duracloud
6
7
  class SyncValidation
@@ -10,57 +11,139 @@ module Duracloud
10
11
  MD5_CSV_OPTS = { col_sep: TWO_SPACES }.freeze
11
12
  MANIFEST_CSV_OPTS = { col_sep: "\t", headers: true, return_headers: false }.freeze
12
13
 
13
- attr_accessor :space_id, :content_dir, :store_id
14
+ MISSING = "MISSING"
15
+ CHANGED = "CHANGED"
16
+ FOUND = "FOUND"
17
+
18
+ attr_accessor :space_id, :content_dir, :store_id, :work_dir, :fast
14
19
 
15
20
  def self.call(*args)
16
21
  new(*args).call
17
22
  end
18
23
 
24
+ def in_work_dir
25
+ if work_dir
26
+ FileUtils.cd(work_dir) { yield }
27
+ else
28
+ Dir.mktmpdir("#{space_id}-validation-") do |tmpdir|
29
+ FileUtils.cd(tmpdir) { yield }
30
+ end
31
+ end
32
+ end
33
+
19
34
  def call
20
- Tempfile.open("#{space_id}-manifest") do |manifest|
35
+ in_work_dir do
36
+ download_manifest
37
+ convert_manifest
38
+ audit
39
+ end
40
+ end
41
+
42
+ def download_manifest
43
+ File.open(manifest_filename, "w") do |manifest|
21
44
  Manifest.download(space_id, store_id) do |chunk|
22
45
  manifest.write(chunk)
23
46
  end
24
- manifest.close
25
-
26
- # convert manifest into md5deep format
27
- Tempfile.open("#{space_id}-md5") do |md5_list|
28
- CSV.foreach(manifest.path, MANIFEST_CSV_OPTS) do |row|
29
- md5_list.puts [ row[2], row[1] ].join(TWO_SPACES)
30
- end
31
- md5_list.close
32
-
33
- # run md5deep to find files not listed in the manifest
34
- Tempfile.open("#{space_id}-audit") do |audit|
35
- audit.close
36
- pid = spawn("md5deep", "-X", md5_list.path, "-l", "-r", ".", chdir: content_dir, out: audit.path)
37
- Process.wait(pid)
38
- case $?.exitstatus
39
- when 0
40
- true
41
- when 1, 2
42
- failures = []
43
- CSV.foreach(audit.path, MD5_CSV_OPTS) do |md5, path|
44
- content_id = path.sub(/^\.\//, "")
45
- begin
46
- if !Duracloud::Content.exist?(space_id: space_id, store_id: store_id, content_id: content_id, md5: md5)
47
- failures << [ "MISSING", md5, content_id ].join("\t")
48
- end
49
- rescue MessageDigestError => e
50
- failures << [ "CHANGED", md5, content_id ].join("\t")
51
- end
52
- end
53
- STDOUT.puts failures
54
- failures.empty?
55
- when 64
56
- raise Error, "md5deep user error."
57
- when 128
58
- raise Error, "md5deep internal error."
59
- end
60
- end
47
+ end
48
+ end
49
+
50
+ def convert_manifest
51
+ File.open(converted_manifest_filename, "w") do |f|
52
+ CSV.foreach(manifest_filename, MANIFEST_CSV_OPTS) do |row|
53
+ f.puts [ row[2], row[1] ].join(TWO_SPACES)
61
54
  end
62
55
  end
63
56
  end
64
57
 
58
+ def audit
59
+ outfile = File.join(FileUtils.pwd, audit_filename)
60
+ infile = File.join(FileUtils.pwd, converted_manifest_filename)
61
+ pid = spawn("md5deep", "-X", infile, "-l", "-r", ".", chdir: content_dir, out: outfile)
62
+ Process.wait(pid)
63
+ case $?.exitstatus
64
+ when 0
65
+ true
66
+ when 1, 2
67
+ recheck
68
+ when 64, 128
69
+ raise Error, "md5deep error."
70
+ else
71
+ raise Error, "Unknown error."
72
+ end
73
+ end
74
+
75
+ def recheck
76
+ success = true
77
+ recheck_file do |csv|
78
+ do_recheck.each do |result|
79
+ csv << result.to_a
80
+ success &&= result.found?
81
+ end
82
+ end
83
+ success
84
+ end
85
+
86
+ private
87
+
88
+ CheckResult = Struct.new(:status, :md5, :content_id) do
89
+ def found?
90
+ status == FOUND
91
+ end
92
+ end
93
+
94
+ def recheck_file
95
+ if work_dir
96
+ CSV.open(recheck_filename, "w", col_sep: "\t") { |csv| yield(csv) }
97
+ else
98
+ CSV($stdout, col_sep: "\t") { |csv| yield(csv) }
99
+ end
100
+ end
101
+
102
+ def check(content_id, md5 = nil)
103
+ status = begin
104
+ exist?(content_id, md5) ? FOUND : MISSING
105
+ rescue MessageDigestError => e
106
+ CHANGED
107
+ end
108
+ CheckResult.new(status, md5 || "-", content_id)
109
+ end
110
+
111
+ def exist?(content_id, md5 = nil)
112
+ Duracloud::Content.exist?(space_id: space_id, store_id: store_id, content_id: content_id, md5: md5)
113
+ end
114
+
115
+ def do_recheck
116
+ Enumerator.new do |e|
117
+ CSV.foreach(audit_filename, MD5_CSV_OPTS) do |md5, path|
118
+ content_id = path.sub(/^\.\//, "")
119
+ e << check(content_id, md5)
120
+ end
121
+ end
122
+ end
123
+
124
+ def prefix
125
+ space_id
126
+ end
127
+
128
+ def filename(suffix)
129
+ [ prefix, suffix ].join("-")
130
+ end
131
+
132
+ def manifest_filename
133
+ filename("manifest.tsv")
134
+ end
135
+
136
+ def converted_manifest_filename
137
+ filename("converted-manifest.txt")
138
+ end
139
+
140
+ def audit_filename
141
+ filename("audit.txt")
142
+ end
143
+
144
+ def recheck_filename
145
+ filename("recheck.txt")
146
+ end
147
+
65
148
  end
66
149
  end