sdr-replication 0.4.2 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/replication/sdr_object_version.rb +2 -2
- data/lib/sdr_replication.rb +1 -12
- metadata +59 -48
- data/lib/replication/bagit_bag.rb +0 -353
- data/lib/replication/file_fixity.rb +0 -98
- data/lib/replication/fixity.rb +0 -155
- data/lib/replication/operating_system.rb +0 -33
- data/lib/replication/tarfile.rb +0 -160
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 97ad8544df9baab2d69065f1ef6612eae515fc78
|
4
|
+
data.tar.gz: a81bee33c44cd1fa8a28176fa938891bdaecdad1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 05d4d43b5ba1abc93bfd450eb3d93645d3a7d0685d905e1a1541bc1eafde910005af98755bbd9344074bc82f5882b0786a9ec31b1bc0f0428bd586fbc0bd1428
|
7
|
+
data.tar.gz: 27f7d6ca7037724abae331f4f3f16cd4fec143f42f2c292b00503a9acd166869cdf26289fd6dde186b66f92c26f038c858ec33d09099b68e111cbaa5e595c79c
|
@@ -152,7 +152,7 @@ module Replication
|
|
152
152
|
# @return [Replica] Copy the object version into a BagIt Bag in tarfile format
|
153
153
|
def create_replica
|
154
154
|
replica = self.replica
|
155
|
-
bag = BagitBag.create_bag(replica.bag_pathname)
|
155
|
+
bag = Archive::BagitBag.create_bag(replica.bag_pathname)
|
156
156
|
bag.bag_checksum_types = [:sha256]
|
157
157
|
bag.add_payload_tarfile("#{replica.replica_id}.tar",version_pathname, storage_object.object_pathname.parent)
|
158
158
|
bag.write_bag_info_txt
|
@@ -163,4 +163,4 @@ module Replication
|
|
163
163
|
|
164
164
|
end
|
165
165
|
|
166
|
-
end
|
166
|
+
end
|
data/lib/sdr_replication.rb
CHANGED
@@ -1,27 +1,16 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'bundler/setup'
|
3
3
|
Bundler.setup
|
4
|
-
require '
|
5
|
-
require 'find'
|
6
|
-
require 'json/pure'
|
4
|
+
require 'archive-utils'
|
7
5
|
require 'moab_stanford'
|
8
|
-
require 'pathname'
|
9
|
-
require 'rest-client'
|
10
|
-
require 'systemu'
|
11
|
-
|
12
6
|
|
13
7
|
# The classes used for SDR Replication workflows
|
14
8
|
module Replication
|
15
9
|
end
|
16
10
|
|
17
11
|
require 'replication/archive_catalog'
|
18
|
-
require 'replication/bagit_bag'
|
19
|
-
require 'replication/file_fixity'
|
20
|
-
require 'replication/fixity'
|
21
|
-
require 'replication/operating_system'
|
22
12
|
require 'replication/replica'
|
23
13
|
require 'replication/sdr_object'
|
24
14
|
require 'replication/sdr_object_version'
|
25
|
-
require 'replication/tarfile'
|
26
15
|
include Replication
|
27
16
|
|
metadata
CHANGED
@@ -1,29 +1,44 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sdr-replication
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
+
- Darren Weber
|
7
8
|
- Richard Anderson
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2014-
|
12
|
+
date: 2014-10-15 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: json_pure
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
16
17
|
requirements:
|
17
|
-
- - "
|
18
|
+
- - "~>"
|
18
19
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
20
|
+
version: '1.8'
|
20
21
|
type: :runtime
|
21
22
|
prerelease: false
|
22
23
|
version_requirements: !ruby/object:Gem::Requirement
|
23
24
|
requirements:
|
24
|
-
- - "
|
25
|
+
- - "~>"
|
25
26
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
27
|
+
version: '1.8'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: rest-client
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '1.7'
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '1.7'
|
27
42
|
- !ruby/object:Gem::Dependency
|
28
43
|
name: moab-versioning
|
29
44
|
requirement: !ruby/object:Gem::Requirement
|
@@ -39,152 +54,148 @@ dependencies:
|
|
39
54
|
- !ruby/object:Gem::Version
|
40
55
|
version: '1.3'
|
41
56
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
57
|
+
name: archive-utils
|
43
58
|
requirement: !ruby/object:Gem::Requirement
|
44
59
|
requirements:
|
45
|
-
- - "
|
60
|
+
- - "~>"
|
46
61
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
62
|
+
version: '0.0'
|
48
63
|
type: :runtime
|
49
64
|
prerelease: false
|
50
65
|
version_requirements: !ruby/object:Gem::Requirement
|
51
66
|
requirements:
|
52
|
-
- - "
|
67
|
+
- - "~>"
|
53
68
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
69
|
+
version: '0.0'
|
55
70
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
71
|
+
name: pry
|
57
72
|
requirement: !ruby/object:Gem::Requirement
|
58
73
|
requirements:
|
59
|
-
- - "
|
74
|
+
- - "~>"
|
60
75
|
- !ruby/object:Gem::Version
|
61
76
|
version: '0'
|
62
|
-
type: :
|
77
|
+
type: :development
|
63
78
|
prerelease: false
|
64
79
|
version_requirements: !ruby/object:Gem::Requirement
|
65
80
|
requirements:
|
66
|
-
- - "
|
81
|
+
- - "~>"
|
67
82
|
- !ruby/object:Gem::Version
|
68
83
|
version: '0'
|
69
84
|
- !ruby/object:Gem::Dependency
|
70
85
|
name: rake
|
71
86
|
requirement: !ruby/object:Gem::Requirement
|
72
87
|
requirements:
|
73
|
-
- - "
|
88
|
+
- - "~>"
|
74
89
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
90
|
+
version: '10'
|
76
91
|
type: :development
|
77
92
|
prerelease: false
|
78
93
|
version_requirements: !ruby/object:Gem::Requirement
|
79
94
|
requirements:
|
80
|
-
- - "
|
95
|
+
- - "~>"
|
81
96
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
97
|
+
version: '10'
|
83
98
|
- !ruby/object:Gem::Dependency
|
84
99
|
name: awesome_print
|
85
100
|
requirement: !ruby/object:Gem::Requirement
|
86
101
|
requirements:
|
87
|
-
- - "
|
102
|
+
- - "~>"
|
88
103
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
104
|
+
version: '1'
|
90
105
|
type: :development
|
91
106
|
prerelease: false
|
92
107
|
version_requirements: !ruby/object:Gem::Requirement
|
93
108
|
requirements:
|
94
|
-
- - "
|
109
|
+
- - "~>"
|
95
110
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
111
|
+
version: '1'
|
97
112
|
- !ruby/object:Gem::Dependency
|
98
113
|
name: equivalent-xml
|
99
114
|
requirement: !ruby/object:Gem::Requirement
|
100
115
|
requirements:
|
101
|
-
- - "
|
116
|
+
- - "~>"
|
102
117
|
- !ruby/object:Gem::Version
|
103
|
-
version: '0'
|
118
|
+
version: '0.5'
|
104
119
|
type: :development
|
105
120
|
prerelease: false
|
106
121
|
version_requirements: !ruby/object:Gem::Requirement
|
107
122
|
requirements:
|
108
|
-
- - "
|
123
|
+
- - "~>"
|
109
124
|
- !ruby/object:Gem::Version
|
110
|
-
version: '0'
|
125
|
+
version: '0.5'
|
111
126
|
- !ruby/object:Gem::Dependency
|
112
127
|
name: fakeweb
|
113
128
|
requirement: !ruby/object:Gem::Requirement
|
114
129
|
requirements:
|
115
|
-
- - "
|
130
|
+
- - "~>"
|
116
131
|
- !ruby/object:Gem::Version
|
117
|
-
version: '
|
132
|
+
version: '1'
|
118
133
|
type: :development
|
119
134
|
prerelease: false
|
120
135
|
version_requirements: !ruby/object:Gem::Requirement
|
121
136
|
requirements:
|
122
|
-
- - "
|
137
|
+
- - "~>"
|
123
138
|
- !ruby/object:Gem::Version
|
124
|
-
version: '
|
139
|
+
version: '1'
|
125
140
|
- !ruby/object:Gem::Dependency
|
126
141
|
name: rspec
|
127
142
|
requirement: !ruby/object:Gem::Requirement
|
128
143
|
requirements:
|
129
144
|
- - "~>"
|
130
145
|
- !ruby/object:Gem::Version
|
131
|
-
version: 2.
|
146
|
+
version: '2.0'
|
132
147
|
type: :development
|
133
148
|
prerelease: false
|
134
149
|
version_requirements: !ruby/object:Gem::Requirement
|
135
150
|
requirements:
|
136
151
|
- - "~>"
|
137
152
|
- !ruby/object:Gem::Version
|
138
|
-
version: 2.
|
153
|
+
version: '2.0'
|
139
154
|
- !ruby/object:Gem::Dependency
|
140
155
|
name: simplecov
|
141
156
|
requirement: !ruby/object:Gem::Requirement
|
142
157
|
requirements:
|
143
158
|
- - "~>"
|
144
159
|
- !ruby/object:Gem::Version
|
145
|
-
version: 0.7
|
160
|
+
version: '0.7'
|
146
161
|
type: :development
|
147
162
|
prerelease: false
|
148
163
|
version_requirements: !ruby/object:Gem::Requirement
|
149
164
|
requirements:
|
150
165
|
- - "~>"
|
151
166
|
- !ruby/object:Gem::Version
|
152
|
-
version: 0.7
|
167
|
+
version: '0.7'
|
153
168
|
- !ruby/object:Gem::Dependency
|
154
169
|
name: yard
|
155
170
|
requirement: !ruby/object:Gem::Requirement
|
156
171
|
requirements:
|
157
|
-
- - "
|
172
|
+
- - "~>"
|
158
173
|
- !ruby/object:Gem::Version
|
159
|
-
version: '0'
|
174
|
+
version: '0.8'
|
160
175
|
type: :development
|
161
176
|
prerelease: false
|
162
177
|
version_requirements: !ruby/object:Gem::Requirement
|
163
178
|
requirements:
|
164
|
-
- - "
|
179
|
+
- - "~>"
|
165
180
|
- !ruby/object:Gem::Version
|
166
|
-
version: '0'
|
181
|
+
version: '0.8'
|
167
182
|
description: Contains classes to archive and retrieve digital object version content
|
168
183
|
and metadata
|
169
184
|
email:
|
170
|
-
-
|
185
|
+
- darren.weber@stanford.edu
|
171
186
|
executables: []
|
172
187
|
extensions: []
|
173
188
|
extra_rdoc_files: []
|
174
189
|
files:
|
175
190
|
- lib/libdir.rb
|
176
191
|
- lib/replication/archive_catalog.rb
|
177
|
-
- lib/replication/bagit_bag.rb
|
178
|
-
- lib/replication/file_fixity.rb
|
179
|
-
- lib/replication/fixity.rb
|
180
|
-
- lib/replication/operating_system.rb
|
181
192
|
- lib/replication/replica.rb
|
182
193
|
- lib/replication/sdr_object.rb
|
183
194
|
- lib/replication/sdr_object_version.rb
|
184
|
-
- lib/replication/tarfile.rb
|
185
195
|
- lib/sdr_replication.rb
|
186
|
-
homepage:
|
187
|
-
licenses:
|
196
|
+
homepage: https://github.com/sul-dlss/sdr-replication
|
197
|
+
licenses:
|
198
|
+
- Apache-2.0
|
188
199
|
metadata: {}
|
189
200
|
post_install_message:
|
190
201
|
rdoc_options: []
|
@@ -202,7 +213,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
202
213
|
version: 2.2.1
|
203
214
|
requirements: []
|
204
215
|
rubyforge_project:
|
205
|
-
rubygems_version: 2.4.
|
216
|
+
rubygems_version: 2.4.2
|
206
217
|
signing_key:
|
207
218
|
specification_version: 4
|
208
219
|
summary: Core methods for support of SDR Preservation Core replication
|
@@ -1,353 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../libdir')
|
2
|
-
require 'sdr_replication'
|
3
|
-
|
4
|
-
module Replication
|
5
|
-
|
6
|
-
# A BagIt bag contains a structured copy of a digital object for storage, transfer, or replication
|
7
|
-
# @see https://tools.ietf.org/html/draft-kunze-bagit-10
|
8
|
-
# This class can be used to create, parse, or validate a bag instance
|
9
|
-
#
|
10
|
-
# @note Copyright (c) 2014 by The Board of Trustees of the Leland Stanford Junior University.
|
11
|
-
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
12
|
-
class BagitBag
|
13
|
-
|
14
|
-
# @param [Pathname,String] pathname The location of the bag home directory
|
15
|
-
# @return [BagitBag] Initialize a new bag, create home and payload folders, write bagit.txt file
|
16
|
-
def BagitBag.create_bag(pathname)
|
17
|
-
bag = BagitBag.new
|
18
|
-
bag.bag_pathname = pathname
|
19
|
-
bag.payload_pathname.mkpath
|
20
|
-
bag.write_bagit_txt
|
21
|
-
bag
|
22
|
-
end
|
23
|
-
|
24
|
-
# @param [Pathname,String] pathname The location of the bag home directory
|
25
|
-
# @return [BagitBag] Initialize a new bag, create home and payload folders, write bagit.txt file
|
26
|
-
def BagitBag.open_bag(pathname)
|
27
|
-
bag = BagitBag.new
|
28
|
-
bag.bag_pathname = pathname
|
29
|
-
raise "No bag found at #{bag.bag_pathname}" unless bag.bag_pathname.exist?
|
30
|
-
bagit_txt = bag.bag_pathname.join("bagit.txt")
|
31
|
-
raise "No bagit.txt file found at #{bagit_txt}" unless bagit_txt.exist?
|
32
|
-
bag
|
33
|
-
end
|
34
|
-
|
35
|
-
# @return [Pathname] The location of the bag home directory
|
36
|
-
def bag_pathname
|
37
|
-
@bag_pathname
|
38
|
-
end
|
39
|
-
|
40
|
-
# @param [Pathname,String] pathname The location of the bag home directory
|
41
|
-
# @return [Void] Set the location of the bag home directory
|
42
|
-
def bag_pathname=(pathname)
|
43
|
-
@bag_pathname = Pathname(pathname)
|
44
|
-
end
|
45
|
-
|
46
|
-
# @return [Pathname] The location of the bag data directory
|
47
|
-
def payload_pathname
|
48
|
-
bag_pathname.join('data')
|
49
|
-
end
|
50
|
-
|
51
|
-
# @return [Pathname] Generate the bagit.txt tag file
|
52
|
-
def write_bagit_txt
|
53
|
-
bagit_txt = bag_pathname.join("bagit.txt")
|
54
|
-
bagit_txt.open('w') do |f|
|
55
|
-
f.puts "Tag-File-Character-Encoding: UTF-8"
|
56
|
-
f.puts "BagIt-Version: 0.97"
|
57
|
-
end
|
58
|
-
bagit_txt
|
59
|
-
end
|
60
|
-
|
61
|
-
# @return [Hash<String,String] A hash containing the properties documented in the bagit.txt tagfile
|
62
|
-
def read_bagit_txt
|
63
|
-
properties = Hash.new
|
64
|
-
bagit_txt = bag_pathname.join("bagit.txt")
|
65
|
-
bagit_txt.readlines.each do |line|
|
66
|
-
line.chomp!.strip!
|
67
|
-
key,value = line.split(':',2)
|
68
|
-
properties[key.strip] = value.strip if value
|
69
|
-
end
|
70
|
-
properties
|
71
|
-
end
|
72
|
-
|
73
|
-
# @return [Array<Symbol>] The list of checksum types to be used when generating fixity data
|
74
|
-
def bag_checksum_types
|
75
|
-
@bag_checksum_types ||= Fixity.default_checksum_types
|
76
|
-
end
|
77
|
-
|
78
|
-
# @param [Object] types The list of checksum types to be used when generating fixity data
|
79
|
-
# @return [Void] Set the list of checksum types to be used when generating fixity data
|
80
|
-
def bag_checksum_types=(*types)
|
81
|
-
@bag_checksum_types = Fixity.validate_checksum_types(*types)
|
82
|
-
end
|
83
|
-
|
84
|
-
# @param [Symbol] link_mode Specifies whether to :copy, :link, or :symlink the files to the payload directory
|
85
|
-
# @param [Pathname] source_dir The source location of the directory whose contents are to be bagged
|
86
|
-
# @return [Pathname] Generate file_fixity_hash and send it to #add_files_to_payload
|
87
|
-
def add_dir_to_payload (link_mode, source_dir)
|
88
|
-
file_fixity_hash = Fixity.generate_checksums(source_dir, source_dir.find ,bag_checksum_types)
|
89
|
-
add_files_to_payload(link_mode, source_dir, file_fixity_hash)
|
90
|
-
payload_pathname
|
91
|
-
end
|
92
|
-
|
93
|
-
# @param [Symbol] link_mode Specifies whether to :copy, :link, or :symlink the files to the payload directory
|
94
|
-
# @param [Pathname] source_basepath The source location of the directory whose contents are to be ingested
|
95
|
-
# @param [Hash<String,FileFixity>] file_fixity_hash The list of files (with fixity data) to be added to the payload
|
96
|
-
# @return [Pathname] Copy or link the files specified in the file_fixity_hash to the payload directory,
|
97
|
-
# then update the payload manifest files
|
98
|
-
def add_files_to_payload(link_mode, source_basepath, file_fixity_hash)
|
99
|
-
file_fixity_hash.keys.each do |file_id|
|
100
|
-
source_pathname = source_basepath.join(file_id)
|
101
|
-
target_pathname = payload_pathname.join(file_id)
|
102
|
-
copy_file(link_mode, source_pathname, target_pathname)
|
103
|
-
end
|
104
|
-
write_manifest_checksums('manifest', add_data_prefix(file_fixity_hash))
|
105
|
-
payload_pathname
|
106
|
-
end
|
107
|
-
|
108
|
-
# @param [Hash<String,FileFixity>] file_fixity_hash key is file_id, values are Fixity objects containing checksums
|
109
|
-
# @return [Hash<String,FileFixity>] A revised hash with file_id paths prefixed with 'data/'
|
110
|
-
def add_data_prefix(file_fixity_hash)
|
111
|
-
new_hash = Hash.new
|
112
|
-
file_fixity_hash.values.each do |fixity|
|
113
|
-
fixity.file_id = "data/#{fixity.file_id}"
|
114
|
-
new_hash[fixity.file_id] = fixity
|
115
|
-
end
|
116
|
-
new_hash
|
117
|
-
end
|
118
|
-
|
119
|
-
# @param [Symbol] link_mode Specifies whether to :copy, :link, or :symlink the files to the payload directory
|
120
|
-
# @param [Pathname] source_pathname The source location of the file to be ingested
|
121
|
-
# @param [Pathname] target_pathname The location of the directory in which to place the file
|
122
|
-
# @return [Pathname] link or copy the specified file from source location to the target location
|
123
|
-
def copy_file(link_mode, source_pathname, target_pathname)
|
124
|
-
target_pathname.parent.mkpath
|
125
|
-
case link_mode
|
126
|
-
when :copy, nil
|
127
|
-
FileUtils.copy(source_pathname.to_s, target_pathname.to_s) # automatically dereferences symlinks
|
128
|
-
when :link
|
129
|
-
FileUtils.link(source_pathname.to_s, target_pathname.to_s) #, :force => true (false is default)
|
130
|
-
when :symlink
|
131
|
-
FileUtils.symlink(source_pathname.to_s, target_pathname.to_s) #, :force => true (false is default)
|
132
|
-
else
|
133
|
-
raise "Invalid link_mode: #{link_mode}, expected one of [:copy,:link,:symlink]"
|
134
|
-
end
|
135
|
-
target_pathname
|
136
|
-
end
|
137
|
-
|
138
|
-
# @param [Pathname,String] source_fullpath The location of the directory whose content will be tarred
|
139
|
-
# @param [Pathname,String] source_basepath The location of the directory to change to before doing the tar create
|
140
|
-
# @return [Tarfile] Create a tar archive of a directory into the payload directory,
|
141
|
-
# generating checksums in parallel processes and recording those checksums in the payload manifests
|
142
|
-
def add_payload_tarfile(tarfile_id,source_fullpath, source_basepath)
|
143
|
-
tarfile = Tarfile.new
|
144
|
-
tarfile.source_basepath = Pathname(source_basepath)
|
145
|
-
tarfile.source_fullpath = Pathname(source_fullpath)
|
146
|
-
tarfile.tarfile_basepath = payload_pathname
|
147
|
-
tarfile.tarfile_fullpath = payload_pathname.join("#{tarfile_id}")
|
148
|
-
tarfile.create_tarfile
|
149
|
-
file_fixity_hash = Fixity.generate_checksums(bag_pathname,[tarfile.tarfile_fullpath],bag_checksum_types)
|
150
|
-
write_manifest_checksums('manifest', file_fixity_hash)
|
151
|
-
tarfile
|
152
|
-
end
|
153
|
-
|
154
|
-
# @return [Pathname] Generate the bag-info.txt tag file to record the payload size
|
155
|
-
def write_bag_info_txt
|
156
|
-
payload_size = bag_payload_size
|
157
|
-
bag_info_txt = bag_pathname.join("bag-info.txt")
|
158
|
-
bag_info_txt.open('w') do |f|
|
159
|
-
f.puts "External-Identifier: #{bag_pathname.basename}"
|
160
|
-
f.puts "Payload-Oxum: #{payload_size[:bytes]}.#{payload_size[:files]}"
|
161
|
-
f.puts "Bag-Size: #{bag_size_human(payload_size[:bytes])}"
|
162
|
-
end
|
163
|
-
bag_info_txt
|
164
|
-
end
|
165
|
-
|
166
|
-
# @return [Hash<Symbol,Integer>] A hash contining the payload size in bytes, and the number of files,
|
167
|
-
# derived from the payload directory contents
|
168
|
-
def bag_payload_size
|
169
|
-
payload_pathname.find.select{|f| f.file?}.inject({bytes: 0, files: 0}) do |hash,file|
|
170
|
-
hash[:bytes] += file.size
|
171
|
-
hash[:files] += 1
|
172
|
-
hash
|
173
|
-
end
|
174
|
-
end
|
175
|
-
|
176
|
-
# @param [Integer] bytes The total number of bytes in the payload
|
177
|
-
# @return [String] Human-readable rendition of the total payload size
|
178
|
-
def bag_size_human(bytes)
|
179
|
-
count = 0
|
180
|
-
size = bytes
|
181
|
-
while ( size >= 1024 and count < 4 )
|
182
|
-
size /= 1024.0
|
183
|
-
count += 1
|
184
|
-
end
|
185
|
-
if (count == 0)
|
186
|
-
return sprintf("%d B", size)
|
187
|
-
else
|
188
|
-
return sprintf("%.2f %s", size, %w[B KB MB GB TB][count] )
|
189
|
-
end
|
190
|
-
end
|
191
|
-
|
192
|
-
# @return [Hash<String,String] A hash containing the properties documented in the bag-info.txt tagfile
|
193
|
-
def read_bag_info_txt
|
194
|
-
properties = Hash.new
|
195
|
-
bag_info = bag_pathname.join("bag-info.txt")
|
196
|
-
bag_info.readlines.each do |line|
|
197
|
-
line.chomp!.strip!
|
198
|
-
key,value = line.split(':',2)
|
199
|
-
properties[key.strip] = value.strip if value
|
200
|
-
end
|
201
|
-
properties
|
202
|
-
end
|
203
|
-
|
204
|
-
# @return [Hash<Symbol,Integer>] A hash contining the payload size in bytes, and the number of files,
|
205
|
-
# derived from the Payload-Oxum property
|
206
|
-
def info_payload_size
|
207
|
-
info = read_bag_info_txt
|
208
|
-
size_array = info['Payload-Oxum'].split('.')
|
209
|
-
size_hash = {:bytes => size_array[0].to_i, :files => size_array[1].to_i}
|
210
|
-
size_hash
|
211
|
-
end
|
212
|
-
|
213
|
-
# @return [Boolean] Compare the actual measured payload size against the value recorded in bag-info.txt
|
214
|
-
def verify_payload_size
|
215
|
-
info_size = info_payload_size
|
216
|
-
bag_size = bag_payload_size
|
217
|
-
if info_size != bag_size
|
218
|
-
raise "Failed payload size verification! Expected: #{info_size}, Found: #{bag_size}"
|
219
|
-
end
|
220
|
-
true
|
221
|
-
end
|
222
|
-
|
223
|
-
# @return [Hash<String,FileFixity>] create hash containing ids and checksums for all files in the bag's root directory
|
224
|
-
def generate_tagfile_checksums
|
225
|
-
# get list of all files in the bag home dir, except those starting with 'tagmanifest'
|
226
|
-
tagfiles = bag_pathname.children.reject{|file| file.basename.to_s.start_with?('tagmanifest')}
|
227
|
-
# generate checksums, using bag home dir as the base directory for file ids (per bagit spec)
|
228
|
-
Fixity.generate_checksums(bag_pathname, tagfiles, bag_checksum_types )
|
229
|
-
end
|
230
|
-
|
231
|
-
# @return [Hash<String,FileFixity>] create hash containing ids and checksums for all files in the bag's payload
|
232
|
-
def generate_payload_checksums
|
233
|
-
# get list of all files in the data directory
|
234
|
-
path_list = payload_pathname.find
|
235
|
-
# generate checksums, but use bag home dir as the base directory for file ids (per bagit spec)
|
236
|
-
Fixity.generate_checksums(bag_pathname, path_list, bag_checksum_types)
|
237
|
-
end
|
238
|
-
|
239
|
-
# @param [String] manifest_type The type of manifest file ('manifest' or 'tagmanifest') to be updated
|
240
|
-
# @param [Hash<String,FileFixity>] file_fixity_hash A hash containing file ids and fixity data
|
241
|
-
# @param [String] open_mode The file open mode (default is 'a')
|
242
|
-
# @return [Hash<Symbol,Pathname] Update each of the manifests with data from the file_fixity_hash
|
243
|
-
def write_manifest_checksums(manifest_type, file_fixity_hash, open_mode='a')
|
244
|
-
manifests = Hash.new
|
245
|
-
self.bag_checksum_types.each do |checksum_type|
|
246
|
-
manifest_pathname = bag_pathname.join("#{manifest_type}-#{checksum_type}.txt")
|
247
|
-
manifest_file = manifest_pathname.open(open_mode)
|
248
|
-
file_fixity_hash.values.each do |fixity|
|
249
|
-
checksum = fixity.get_checksum(checksum_type)
|
250
|
-
manifest_file.puts("#{checksum} #{fixity.file_id}") if checksum
|
251
|
-
end
|
252
|
-
manifest_file.close
|
253
|
-
manifests[checksum_type] = manifest_pathname
|
254
|
-
end
|
255
|
-
manifests
|
256
|
-
end
|
257
|
-
|
258
|
-
# @param [String] manifest_type The type of manifest file ('manifest' or 'tagmanifest') to be read
|
259
|
-
# @return [Hash<String,FileFixity>] A hash containing file ids and fixity data derived from the manifest files
|
260
|
-
def read_manifest_files(manifest_type)
|
261
|
-
file_fixity_hash = Hash.new
|
262
|
-
checksum_type_list = Array.new
|
263
|
-
Fixity.valid_checksum_ids.each do |checksum_type|
|
264
|
-
manifest_pathname = bag_pathname.join("#{manifest_type}-#{checksum_type}.txt")
|
265
|
-
if manifest_pathname.file?
|
266
|
-
checksum_type_list << checksum_type
|
267
|
-
manifest_pathname.readlines.each do |line|
|
268
|
-
line.chomp!.strip!
|
269
|
-
checksum,file_id = line.split(/[\s*]+/,2)
|
270
|
-
file_fixity = file_fixity_hash[file_id] || FileFixity.new(file_id: file_id)
|
271
|
-
file_fixity.set_checksum(checksum_type,checksum)
|
272
|
-
file_fixity_hash[file_id] = file_fixity
|
273
|
-
end
|
274
|
-
end
|
275
|
-
end
|
276
|
-
self.bag_checksum_types = self.bag_checksum_types | checksum_type_list
|
277
|
-
file_fixity_hash
|
278
|
-
end
|
279
|
-
|
280
|
-
# @return [Boolean] Compare fixity data from the tag manifest files against the values measured by digesting the files
|
281
|
-
def verify_tagfile_manifests
|
282
|
-
manifest_type = 'tagmanifest'
|
283
|
-
manifest_fixity_hash = read_manifest_files(manifest_type)
|
284
|
-
bag_fixity_hash = generate_tagfile_checksums
|
285
|
-
verify_manifests(manifest_type, manifest_fixity_hash, bag_fixity_hash)
|
286
|
-
end
|
287
|
-
|
288
|
-
# @return [Boolean] Compare fixity data from the payload manifest files against the values measured by digesting the files
|
289
|
-
def verify_payload_manifests
|
290
|
-
manifest_type = 'manifest'
|
291
|
-
manifest_fixity_hash = read_manifest_files(manifest_type)
|
292
|
-
bag_fixity_hash = generate_payload_checksums
|
293
|
-
verify_manifests(manifest_type, manifest_fixity_hash, bag_fixity_hash)
|
294
|
-
end
|
295
|
-
|
296
|
-
# @param [String] manifest_type The type of manifest file ('manifest' or 'tagmanifest') to be read
|
297
|
-
# @param [Hash<String,FileFixity>] manifest_fixity_hash A hash containing file ids and fixity data derived from the manifest files
|
298
|
-
# @param [Hash<String,FileFixity>] bag_fixity_hash A hash containing file ids and fixity data derived from the actual files
|
299
|
-
# @return [Boolean] Compare fixity data from the manifest files against the values measured by digesting the files,
|
300
|
-
# returning true if equal or false if not equal
|
301
|
-
def verify_manifests(manifest_type, manifest_fixity_hash, bag_fixity_hash)
|
302
|
-
diff = manifest_diff(manifest_fixity_hash, bag_fixity_hash)
|
303
|
-
if diff.size > 0
|
304
|
-
raise "Failed #{manifest_type} verification! Differences: \n#{diff.inspect}"
|
305
|
-
end
|
306
|
-
true
|
307
|
-
end
|
308
|
-
|
309
|
-
# @param [Hash<String,FileFixity>] manifest_fixity_hash A hash containing file ids and fixity data derived from the manifest files
|
310
|
-
# @param [Hash<String,FileFixity>] bag_fixity_hash A hash containing file ids and fixity data derived from the actual files
|
311
|
-
# @return [Hash] A report of the differences between the fixity data from the manifest files
|
312
|
-
# against the values measured by digesting the files
|
313
|
-
def manifest_diff(manifest_fixity_hash, bag_fixity_hash)
|
314
|
-
diff = Hash.new
|
315
|
-
(manifest_fixity_hash.keys | bag_fixity_hash.keys).each do |file_id|
|
316
|
-
manifest_fixity = manifest_fixity_hash[file_id] || FileFixity.new(file_id: file_id)
|
317
|
-
bag_fixity = bag_fixity_hash[file_id] || FileFixity.new(file_id: file_id)
|
318
|
-
if manifest_fixity != bag_fixity
|
319
|
-
diff[file_id] = manifest_fixity.diff(bag_fixity,'manifest','bag')
|
320
|
-
end
|
321
|
-
end
|
322
|
-
diff
|
323
|
-
end
|
324
|
-
|
325
|
-
# @return [Boolean] Validate the bag containing the digital object
|
326
|
-
def verify_bag
|
327
|
-
verify_bag_structure
|
328
|
-
verify_tagfile_manifests
|
329
|
-
verify_payload_size
|
330
|
-
verify_payload_manifests
|
331
|
-
true
|
332
|
-
end
|
333
|
-
|
334
|
-
# @return [Boolean] Test the existence of expected files, return true if files exist, raise exception if not
|
335
|
-
def verify_bag_structure
|
336
|
-
required_files = ['data','bagit.txt','bag-info.txt','manifest-sha256.txt','tagmanifest-sha256.txt']
|
337
|
-
required_files.each{|filename| verify_pathname(bag_pathname.join(filename))}
|
338
|
-
optional_files = []
|
339
|
-
true
|
340
|
-
end
|
341
|
-
|
342
|
-
# @param [Pathname] pathname The file whose existence should be verified
|
343
|
-
# @return [Boolean] Test the existence of the specified path. Return true if file exists, raise exception if not
|
344
|
-
def verify_pathname(pathname)
|
345
|
-
raise "#{pathname.basename} not found at #{pathname}" unless pathname.exist?
|
346
|
-
true
|
347
|
-
end
|
348
|
-
|
349
|
-
|
350
|
-
end
|
351
|
-
|
352
|
-
|
353
|
-
end
|
@@ -1,98 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../libdir')
|
2
|
-
require 'sdr_replication'
|
3
|
-
|
4
|
-
module Replication
|
5
|
-
|
6
|
-
# The fixity properties of a file, used to determine file content equivalence.
|
7
|
-
# Placing this data in a class by itself facilitates using the MD5, SHA1, etc checksums (and optionally the file size)
|
8
|
-
# as a single key when doing comparisons against other file instances. The design assumes that this file fixity
|
9
|
-
# is sufficiently unique to act as a comparator for determining file equality or verifying checksum manifests.
|
10
|
-
#
|
11
|
-
# @note Copyright (c) 2014 by The Board of Trustees of the Leland Stanford Junior University.
|
12
|
-
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
13
|
-
class FileFixity
|
14
|
-
|
15
|
-
# @param [Hash<Symbol,Object>] options Key,Value pairs specifying initial values of attributes
|
16
|
-
def initialize(options=nil)
|
17
|
-
@checksums=Hash.new
|
18
|
-
options = {} if options.nil?
|
19
|
-
options.each do |key,value|
|
20
|
-
#instance_variable_set("@#{key}", value)
|
21
|
-
send "#{key}=", value
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
# @return [String] The name of the file, relative to its base directory
|
26
|
-
# (for payload files, path relative to the data folder. For tag files, path relative to the bag home folder)
|
27
|
-
attr_accessor :file_id
|
28
|
-
|
29
|
-
# @return [Integer] The size of the file in bytes
|
30
|
-
attr_accessor :bytes
|
31
|
-
|
32
|
-
# @return [Hash<Symbol,String>] The MD5, SHA1, SHA256, etc checksum values of the file
|
33
|
-
attr_accessor :checksums
|
34
|
-
|
35
|
-
# @param [Symbol,String] type The type of checksum (e.g. :md5, :sha1, :sha256)
|
36
|
-
# @return [String] The value of the file digest
|
37
|
-
def get_checksum(type)
|
38
|
-
checksum_type = type.to_s.downcase.to_sym
|
39
|
-
self.checksums[checksum_type]
|
40
|
-
end
|
41
|
-
|
42
|
-
# @param type [Symbol,String] The type of checksum
|
43
|
-
# @param value [String] value of the file digest
|
44
|
-
# @return [void] Set the value for the specified checksum type in the checksum hash
|
45
|
-
def set_checksum(type,value)
|
46
|
-
checksum_type = type.to_s.downcase.to_sym
|
47
|
-
Fixity.validate_checksum_types(checksum_type)
|
48
|
-
self.checksums[checksum_type] = value
|
49
|
-
end
|
50
|
-
|
51
|
-
# @param other [FileFixity] The other file fixity being compared to this fixity
|
52
|
-
# @return [Boolean] Returns true if self and other have comparable fixity data.
|
53
|
-
def eql?(other)
|
54
|
-
matching_checksum_types = self.checksums.keys & other.checksums.keys
|
55
|
-
return false if matching_checksum_types.size == 0
|
56
|
-
matching_checksum_types.each do |type|
|
57
|
-
return false if self.checksums[type] != other.checksums[type]
|
58
|
-
end
|
59
|
-
true
|
60
|
-
end
|
61
|
-
|
62
|
-
# (see #eql?)
|
63
|
-
def ==(other)
|
64
|
-
eql?(other)
|
65
|
-
end
|
66
|
-
|
67
|
-
# @return [Fixnum] Compute a hash-code for the fixity value array.
|
68
|
-
# Two file instances with the same content will have the same hash code (and will compare using eql?).
|
69
|
-
# @note The hash and eql? methods override the methods inherited from Object.
|
70
|
-
# These methods ensure that instances of this class can be used as Hash keys. See
|
71
|
-
# * {http://www.paulbutcher.com/2007/10/navigating-the-equality-maze/}
|
72
|
-
# * {http://techbot.me/2011/05/ruby-basics-equality-operators-ruby/}
|
73
|
-
# Also overriden is {#==} so that equality tests in other contexts will also return the expected result.
|
74
|
-
def hash
|
75
|
-
[self.file_id].hash
|
76
|
-
end
|
77
|
-
|
78
|
-
# @param [FileFixity] other The other FileFixity object being compared to this one
|
79
|
-
# @param [String] left The label to use for values from this base FileFixity object
|
80
|
-
# @param [String] right he label to use for values from the other FileFixity object
|
81
|
-
# @return [Hash<symbol,Hash<String,String>] details of the checksum differences between fixity objects
|
82
|
-
def diff(other,left='base',right='other')
|
83
|
-
diff_hash = Hash.new
|
84
|
-
matching_checksum_types = (self.checksums.keys & other.checksums.keys)
|
85
|
-
matching_checksum_types = (self.checksums.keys | other.checksums.keys) if matching_checksum_types.empty?
|
86
|
-
matching_checksum_types.each do |type|
|
87
|
-
base_checksum = self.checksums[type]
|
88
|
-
other_checksum = other.checksums[type]
|
89
|
-
if base_checksum != other_checksum
|
90
|
-
diff_hash[type] = {left => base_checksum, right => other_checksum }
|
91
|
-
end
|
92
|
-
end
|
93
|
-
return diff_hash.size > 0 ? diff_hash : nil
|
94
|
-
end
|
95
|
-
|
96
|
-
end
|
97
|
-
|
98
|
-
end
|
data/lib/replication/fixity.rb
DELETED
@@ -1,155 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../libdir')
|
2
|
-
require 'sdr_replication'
|
3
|
-
|
4
|
-
module Replication
|
5
|
-
|
6
|
-
# A Struct to hold properties of a given checksum digest type
|
7
|
-
ChecksumType = Struct.new(:id, :hex_length, :names)
|
8
|
-
|
9
|
-
# A helper class that facilites the generation and processing of checksums
|
10
|
-
#
|
11
|
-
# @note Copyright (c) 2014 by The Board of Trustees of the Leland Stanford Junior University.
|
12
|
-
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
13
|
-
class Fixity
|
14
|
-
|
15
|
-
@@default_checksum_types = [:sha1, :sha256]
|
16
|
-
|
17
|
-
# @return [Array<Symbol>] The list of checksum types to be used when generating fixity data
|
18
|
-
def Fixity.default_checksum_types
|
19
|
-
@@default_checksum_types
|
20
|
-
end
|
21
|
-
|
22
|
-
# @param [Array<Symbol>] types The list of checksum types to be used when generating fixity data
|
23
|
-
# @return [Void] Set the list of checksum types to be used when generating fixity data
|
24
|
-
def Fixity.default_checksum_types=(*types)
|
25
|
-
@@default_checksum_types = Fixity.validate_checksum_types(*types)
|
26
|
-
end
|
27
|
-
|
28
|
-
@@valid_checksum_types = [
|
29
|
-
ChecksumType.new(:md5, 32, ['MD5']),
|
30
|
-
ChecksumType.new(:sha1, 40, ['SHA-1', 'SHA1']),
|
31
|
-
ChecksumType.new(:sha256, 64, ['SHA-256', 'SHA256']),
|
32
|
-
ChecksumType.new(:sha384, 96, ['SHA-384', 'SHA384']),
|
33
|
-
ChecksumType.new(:sha512, 128, ['SHA-512', 'SHA512'])
|
34
|
-
]
|
35
|
-
|
36
|
-
# @return [Array<ChecksumType>] The list of allowed ChecksumType structs containing the type's properties
|
37
|
-
def Fixity.valid_checksum_types
|
38
|
-
@@valid_checksum_types
|
39
|
-
end
|
40
|
-
|
41
|
-
# @return [Array<Symbol>] The list of allowed checksum types
|
42
|
-
def Fixity.valid_checksum_ids
|
43
|
-
@@valid_checksum_types.map { |type| type.id }
|
44
|
-
end
|
45
|
-
|
46
|
-
# @param [Array<Symbol>] types The list of checksum types being specified by the caller
|
47
|
-
# @return [Object] The list of specified checksum types after being checked for validity
|
48
|
-
def Fixity.validate_checksum_types(*types)
|
49
|
-
checksum_types = types.flatten
|
50
|
-
invalid_types = checksum_types - valid_checksum_ids
|
51
|
-
raise "Invalid digest type specified: #{invalid_types.inspect}" unless invalid_types.empty?
|
52
|
-
checksum_types
|
53
|
-
end
|
54
|
-
|
55
|
-
# @param [Array<Symbol>] checksum_types The list of checksum types being specified by the caller
|
56
|
-
# @return [Array<Digest::Class>] The list of digest implementation objects that will generate the checksums
|
57
|
-
def Fixity.get_digesters(checksum_types=@@default_checksum_types)
|
58
|
-
checksum_types.inject(Hash.new) do |digesters, checksum_type|
|
59
|
-
case checksum_type
|
60
|
-
when :md5
|
61
|
-
digesters[checksum_type] = Digest::MD5.new
|
62
|
-
when :sha1
|
63
|
-
digesters[checksum_type] = Digest::SHA1.new
|
64
|
-
when :sha256
|
65
|
-
digesters[checksum_type] = Digest::SHA2.new(256)
|
66
|
-
when :sha384
|
67
|
-
digesters[checksum_type] = Digest::SHA2.new(384)
|
68
|
-
when :sha512
|
69
|
-
digesters[checksum_type] = Digest::SHA2.new(512)
|
70
|
-
else
|
71
|
-
raise "Unrecognized checksum type: #{checksum_type}"
|
72
|
-
end
|
73
|
-
digesters
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
# @param pathname [Pathname] The location of the file to be digested
|
78
|
-
# @param [Object] base_pathname The base directory from which relative paths (file IDS) will be derived
|
79
|
-
# @param [Object] checksum_types The list of checksum types being specified by the caller (or default list)
|
80
|
-
# @return [FileFixity] Generate a FileFixity instance containing fixity properties measured from of a physical file
|
81
|
-
def Fixity.fixity_from_file(pathname, base_pathname, checksum_types=@@default_checksum_types)
|
82
|
-
file_fixity = FileFixity.new
|
83
|
-
file_fixity.file_id = pathname.relative_path_from(base_pathname).to_s
|
84
|
-
file_fixity.bytes = pathname.size
|
85
|
-
digesters = Fixity.get_digesters(checksum_types)
|
86
|
-
pathname.open("r") do |stream|
|
87
|
-
while buffer = stream.read(8192)
|
88
|
-
digesters.values.each { |digest| digest.update(buffer) }
|
89
|
-
end
|
90
|
-
end
|
91
|
-
digesters.each { |checksum_type, digest| file_fixity.checksums[checksum_type] = digest.hexdigest }
|
92
|
-
file_fixity
|
93
|
-
end
|
94
|
-
|
95
|
-
# @param [Pathname] base_pathname The directory path used as the base for deriving relative paths (file IDs)
|
96
|
-
# @param [Array<Pathname>] path_list The list of pathnames for files whose fixity will be generated
|
97
|
-
# @return [Hash<String,FileFixity>] A hash containing file ids and fixity data derived from the actual files
|
98
|
-
def Fixity.generate_checksums(base_pathname, path_list, checksum_types=@@default_checksum_types)
|
99
|
-
path_list = base_pathname.find if path_list.nil?
|
100
|
-
file_fixity_hash = Hash.new
|
101
|
-
path_list.select{|pathname| pathname.file?}.each do |file|
|
102
|
-
file_fixity = Fixity.fixity_from_file(file, base_pathname, checksum_types)
|
103
|
-
file_fixity_hash[file_fixity.file_id] = file_fixity
|
104
|
-
end
|
105
|
-
file_fixity_hash
|
106
|
-
end
|
107
|
-
|
108
|
-
# @param [Integer] length The length of the checksum value in hex format
|
109
|
-
# @return [ChecksumType] The ChecksumType struct that contains the properties of the matching checksum type
|
110
|
-
def Fixity.type_for_length(length)
|
111
|
-
@@valid_checksum_types.select {|type| type.hex_length == length}.first
|
112
|
-
end
|
113
|
-
|
114
|
-
# @param [Object] file_id The filename or relative path of the file from its base directory
|
115
|
-
# @param [Object] checksum_values The digest values of the file
|
116
|
-
# @return [FileFixity] Generate a FileFixity instance containing fixity properties supplied by the caller
|
117
|
-
def Fixity.fixity_from_checksum_values(file_id, checksum_values)
|
118
|
-
file_fixity = FileFixity.new
|
119
|
-
file_fixity.file_id = file_id
|
120
|
-
checksum_values.each do |digest|
|
121
|
-
checksum_type = Fixity.type_for_length(digest.length)
|
122
|
-
file_fixity.checksums[checksum_type.id] = digest
|
123
|
-
end
|
124
|
-
file_fixity
|
125
|
-
end
|
126
|
-
|
127
|
-
# @param [Hash<String,FileFixity>] file_fixity_hash A hash containing file ids and fixity data derived from the manifest files
|
128
|
-
# @return [Hash<String,Hash<Symbol,String] A hash containing file ids and checksum data derived from the file_fixity_hash
|
129
|
-
def Fixity.file_checksum_hash(file_fixity_hash)
|
130
|
-
checksum_hash = Hash.new
|
131
|
-
file_fixity_hash.values.each{|file| checksum_hash[file.file_id] = file.checksums}
|
132
|
-
checksum_hash
|
133
|
-
end
|
134
|
-
|
135
|
-
# @param [Symbol,String] checksum_type The type of checksum digest to be generated
|
136
|
-
# @param [Pathname,String] file_pathname The location of the file to digest
|
137
|
-
# @return [String] The operating system shell command that will generate the checksum digest value
|
138
|
-
def Fixity.openssl_digest_command(checksum_type,file_pathname)
|
139
|
-
command = "openssl dgst -#{checksum_type} #{file_pathname}"
|
140
|
-
command
|
141
|
-
end
|
142
|
-
|
143
|
-
# @param [Symbol,String] checksum_type The type of checksum digest to be generated
|
144
|
-
# @param [Pathname,String] file_pathname The location of the file to digest
|
145
|
-
# @return [String] The checksum digest value for the file
|
146
|
-
def Fixity.openssl_digest(checksum_type,file_pathname)
|
147
|
-
command = openssl_digest_command(checksum_type,file_pathname)
|
148
|
-
stdout = OperatingSystem.execute(command)
|
149
|
-
checksum = stdout.scan(/[A-Za-z0-9]+/).last
|
150
|
-
checksum
|
151
|
-
end
|
152
|
-
|
153
|
-
end
|
154
|
-
|
155
|
-
end
|
@@ -1,33 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../libdir')
|
2
|
-
require 'sdr_replication'
|
3
|
-
|
4
|
-
module Replication
|
5
|
-
|
6
|
-
# A wrapper class around the systemu gem that is used for shelling out to the operating system
|
7
|
-
# and executing a command
|
8
|
-
#
|
9
|
-
# @note Copyright (c) 2014 by The Board of Trustees of the Leland Stanford Junior University.
|
10
|
-
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
11
|
-
class OperatingSystem
|
12
|
-
|
13
|
-
# Executes a system command in a subprocess.
|
14
|
-
# The method will return stdout from the command if execution was successful.
|
15
|
-
# The method will raise an exception if if execution fails.
|
16
|
-
# The exception's message will contain the explaination of the failure.
|
17
|
-
# @param [String] command the command to be executed
|
18
|
-
# @return [String] stdout from the command if execution was successful
|
19
|
-
def OperatingSystem.execute(command)
|
20
|
-
status, stdout, stderr = systemu(command)
|
21
|
-
if (status.exitstatus != 0)
|
22
|
-
raise stderr
|
23
|
-
end
|
24
|
-
return stdout
|
25
|
-
rescue
|
26
|
-
msg = "Command failed to execute: [#{command}] caused by <STDERR = #{stderr.split($/).join('; ')}>"
|
27
|
-
msg << " STDOUT = #{stdout.split($/).join('; ')}" if (stdout && (stdout.length > 0))
|
28
|
-
raise msg
|
29
|
-
end
|
30
|
-
|
31
|
-
end
|
32
|
-
|
33
|
-
end
|
data/lib/replication/tarfile.rb
DELETED
@@ -1,160 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../libdir')
|
2
|
-
require 'sdr_replication'
|
3
|
-
|
4
|
-
module Replication
|
5
|
-
|
6
|
-
# A tar archive file containing a set of digital object files
|
7
|
-
#
|
8
|
-
# @note Copyright (c) 2014 by The Board of Trustees of the Leland Stanford Junior University.
|
9
|
-
# All rights reserved. See {file:LICENSE.rdoc} for details.
|
10
|
-
class Tarfile
|
11
|
-
|
12
|
-
# @return [String] create archive of the specified format
|
13
|
-
# * gnu = GNU tar 1.13.x format
|
14
|
-
# * posix = POSIX 1003.1-2001 (pax) format
|
15
|
-
attr_accessor :format
|
16
|
-
|
17
|
-
# @return [Boolean] Follow symlinks and archive the files they point to
|
18
|
-
attr_accessor :dereference
|
19
|
-
|
20
|
-
# @return [Boolean] Verify that files were copied faithfully
|
21
|
-
attr_accessor :verify
|
22
|
-
|
23
|
-
# @return [Boolean] Create/list/extract multi-volume archive (not yet implemented)
|
24
|
-
attr_accessor :multi_volume
|
25
|
-
|
26
|
-
# @param [Hash<Symbol,Object>] options Key,Value pairs specifying initial values of attributes
|
27
|
-
# @return [Tarfile] Initialize a new Tarfile object
|
28
|
-
def initialize(options=nil)
|
29
|
-
options={} if options.nil?
|
30
|
-
# set defaults
|
31
|
-
@format=:posix
|
32
|
-
@dereference = true
|
33
|
-
@verify = false
|
34
|
-
@multi_volume = false
|
35
|
-
# override defaults
|
36
|
-
options.each do |key,value|
|
37
|
-
#instance_variable_set("@#{key}", value)
|
38
|
-
send "#{key}=", value
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
# @return [Pathname] The full path of the ancestor dir in which the tar file resides
|
43
|
-
def tarfile_basepath
|
44
|
-
raise "Tarfile basepath is nil" unless @tarfile_basepath
|
45
|
-
@tarfile_basepath
|
46
|
-
end
|
47
|
-
|
48
|
-
# @param [Pathname,String] basepath The full path of the ancestor dir in which the tar file resides
|
49
|
-
# @return [Void] Set the full path of the ancestor dir in which the tar file resides
|
50
|
-
def tarfile_basepath=(basepath)
|
51
|
-
raise "No pathname specified" unless basepath
|
52
|
-
@tarfile_basepath = Pathname(basepath).expand_path
|
53
|
-
end
|
54
|
-
|
55
|
-
# @return [Pathname] the full path of the tar archive file to be created or extracted from
|
56
|
-
def tarfile_fullpath
|
57
|
-
@tarfile_fullpath
|
58
|
-
end
|
59
|
-
|
60
|
-
# @param [Pathname,String] fullpath The full path of tar file
|
61
|
-
# @return [Void] Sets the full path of tar file
|
62
|
-
def tarfile_fullpath=(fullpath)
|
63
|
-
@tarfile_fullpath = Pathname(fullpath).expand_path
|
64
|
-
end
|
65
|
-
|
66
|
-
# @return [String] The id (path relative to basepath) of the tar file
|
67
|
-
def tarfile_relative_path
|
68
|
-
@tarfile_fullpath.relative_path_from(@tarfile_basepath).to_s
|
69
|
-
end
|
70
|
-
|
71
|
-
# @return [Pathname] The full path of the source file or directory being archived
|
72
|
-
def source_fullpath
|
73
|
-
raise "Source pathname is nil" unless @source_pathname
|
74
|
-
@source_pathname
|
75
|
-
end
|
76
|
-
|
77
|
-
# @param [Pathname,String] source The full path of the source file or directory being archived
|
78
|
-
# @return [Void] Set the full path of the source file or directory being archived
|
79
|
-
def source_fullpath=(source)
|
80
|
-
raise "No pathname specified" unless source
|
81
|
-
@source_pathname = Pathname(source).expand_path
|
82
|
-
end
|
83
|
-
|
84
|
-
# @return [Pathname] The directory that is the basis of relative paths
|
85
|
-
def source_basepath
|
86
|
-
@source_basepath
|
87
|
-
end
|
88
|
-
|
89
|
-
# @param [Pathname,String] base The directory that is the basis of relative paths
|
90
|
-
# @return [Void] Set the base path of the source file or directory being archived
|
91
|
-
def source_basepath=(base)
|
92
|
-
raise "No pathname specified" unless base
|
93
|
-
@source_basepath = Pathname(base).expand_path
|
94
|
-
end
|
95
|
-
|
96
|
-
# @return [Pathname] The relative path from the source base directory to the source directory
|
97
|
-
def source_relative_path
|
98
|
-
source_fullpath.relative_path_from(source_basepath)
|
99
|
-
end
|
100
|
-
|
101
|
-
# @return [String] The shell command string to be used to create the tarfile
|
102
|
-
def create_cmd
|
103
|
-
command = "tar --create --file=#{tarfile_fullpath} --format=#{@format} "
|
104
|
-
command << "--dereference " if @dereference
|
105
|
-
command << "--verify " if @verify
|
106
|
-
command << "--directory='#{source_basepath}' " if source_basepath
|
107
|
-
command << source_relative_path.to_s
|
108
|
-
command
|
109
|
-
end
|
110
|
-
|
111
|
-
# @return [Tarfile] Shell out to the operating system and create the tar archive file
|
112
|
-
def create_tarfile
|
113
|
-
command = create_cmd
|
114
|
-
OperatingSystem.execute(command)
|
115
|
-
self
|
116
|
-
end
|
117
|
-
|
118
|
-
# @return [String] The shell command that will list the tarfile's contents
|
119
|
-
def list_cmd
|
120
|
-
command = "tar --list --file=#{tarfile_fullpath} "
|
121
|
-
command
|
122
|
-
end
|
123
|
-
|
124
|
-
# @return [String] The list of the tarfile's contents
|
125
|
-
def list_tarfile
|
126
|
-
command = list_cmd
|
127
|
-
list = OperatingSystem.execute(command)
|
128
|
-
list
|
129
|
-
end
|
130
|
-
|
131
|
-
# @return [Pathname] The location of the directory into which the tarfile should be extracted
|
132
|
-
def target_pathname
|
133
|
-
raise "Target pathname is nil" unless @target_pathname
|
134
|
-
@target_pathname
|
135
|
-
end
|
136
|
-
|
137
|
-
# @param [Pathname,String] source The location of the directory into which the tarfile should be extracted
|
138
|
-
# @return [Void] Set the location of the directory into which the tarfile should be extracted
|
139
|
-
def target_pathname=(target)
|
140
|
-
raise "No target pathname specified" unless target
|
141
|
-
@target_pathname = Pathname(target).expand_path
|
142
|
-
end
|
143
|
-
|
144
|
-
# @return [String] The shell command that will extract the tarfile's contents # @return [Void]
|
145
|
-
def extract_cmd
|
146
|
-
command = "tar --extract --file=#{tarfile_fullpath} "
|
147
|
-
command << "--directory='#{target_pathname}' " if target_pathname
|
148
|
-
command
|
149
|
-
end
|
150
|
-
|
151
|
-
# @return [String] Shell out to the operating system and extract the tar archive file
|
152
|
-
def extract_tarfile
|
153
|
-
command = extract_cmd
|
154
|
-
stdout = OperatingSystem.execute(command)
|
155
|
-
stdout
|
156
|
-
end
|
157
|
-
|
158
|
-
end
|
159
|
-
|
160
|
-
end
|