sdr-replication 0.4.2 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c04fc63fa050cdbb2c45b3100ce8026ef01cb046
4
- data.tar.gz: bd7d2e898a1989494086e08186e68dcd1a02b8ca
3
+ metadata.gz: 97ad8544df9baab2d69065f1ef6612eae515fc78
4
+ data.tar.gz: a81bee33c44cd1fa8a28176fa938891bdaecdad1
5
5
  SHA512:
6
- metadata.gz: c0a39e7551eddff9a371594663fb097ca435d123e904a03e399b3be52a2f73fad4564c672b999e3cf562d558076e233a534f69237ac47787752eac044c29c861
7
- data.tar.gz: 59e27ee6f9765c65054f57bab410e3e3d716bf775903c2d92e05ba0597c1a7f28355f2ef5438c5200d052fcfdadd73aa36ba81fe9c3a1e59d702c938bd0f2c00
6
+ metadata.gz: 05d4d43b5ba1abc93bfd450eb3d93645d3a7d0685d905e1a1541bc1eafde910005af98755bbd9344074bc82f5882b0786a9ec31b1bc0f0428bd586fbc0bd1428
7
+ data.tar.gz: 27f7d6ca7037724abae331f4f3f16cd4fec143f42f2c292b00503a9acd166869cdf26289fd6dde186b66f92c26f038c858ec33d09099b68e111cbaa5e595c79c
@@ -152,7 +152,7 @@ module Replication
152
152
  # @return [Replica] Copy the object version into a BagIt Bag in tarfile format
153
153
  def create_replica
154
154
  replica = self.replica
155
- bag = BagitBag.create_bag(replica.bag_pathname)
155
+ bag = Archive::BagitBag.create_bag(replica.bag_pathname)
156
156
  bag.bag_checksum_types = [:sha256]
157
157
  bag.add_payload_tarfile("#{replica.replica_id}.tar",version_pathname, storage_object.object_pathname.parent)
158
158
  bag.write_bag_info_txt
@@ -163,4 +163,4 @@ module Replication
163
163
 
164
164
  end
165
165
 
166
- end
166
+ end
@@ -1,27 +1,16 @@
1
1
  require 'rubygems'
2
2
  require 'bundler/setup'
3
3
  Bundler.setup
4
- require 'digest'
5
- require 'find'
6
- require 'json/pure'
4
+ require 'archive-utils'
7
5
  require 'moab_stanford'
8
- require 'pathname'
9
- require 'rest-client'
10
- require 'systemu'
11
-
12
6
 
13
7
  # The classes used for SDR Replication workflows
14
8
  module Replication
15
9
  end
16
10
 
17
11
  require 'replication/archive_catalog'
18
- require 'replication/bagit_bag'
19
- require 'replication/file_fixity'
20
- require 'replication/fixity'
21
- require 'replication/operating_system'
22
12
  require 'replication/replica'
23
13
  require 'replication/sdr_object'
24
14
  require 'replication/sdr_object_version'
25
- require 'replication/tarfile'
26
15
  include Replication
27
16
 
metadata CHANGED
@@ -1,29 +1,44 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sdr-replication
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
+ - Darren Weber
7
8
  - Richard Anderson
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2014-08-01 00:00:00.000000000 Z
12
+ date: 2014-10-15 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: json_pure
15
16
  requirement: !ruby/object:Gem::Requirement
16
17
  requirements:
17
- - - ">="
18
+ - - "~>"
18
19
  - !ruby/object:Gem::Version
19
- version: '0'
20
+ version: '1.8'
20
21
  type: :runtime
21
22
  prerelease: false
22
23
  version_requirements: !ruby/object:Gem::Requirement
23
24
  requirements:
24
- - - ">="
25
+ - - "~>"
25
26
  - !ruby/object:Gem::Version
26
- version: '0'
27
+ version: '1.8'
28
+ - !ruby/object:Gem::Dependency
29
+ name: rest-client
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '1.7'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '1.7'
27
42
  - !ruby/object:Gem::Dependency
28
43
  name: moab-versioning
29
44
  requirement: !ruby/object:Gem::Requirement
@@ -39,152 +54,148 @@ dependencies:
39
54
  - !ruby/object:Gem::Version
40
55
  version: '1.3'
41
56
  - !ruby/object:Gem::Dependency
42
- name: systemu
57
+ name: archive-utils
43
58
  requirement: !ruby/object:Gem::Requirement
44
59
  requirements:
45
- - - ">="
60
+ - - "~>"
46
61
  - !ruby/object:Gem::Version
47
- version: '0'
62
+ version: '0.0'
48
63
  type: :runtime
49
64
  prerelease: false
50
65
  version_requirements: !ruby/object:Gem::Requirement
51
66
  requirements:
52
- - - ">="
67
+ - - "~>"
53
68
  - !ruby/object:Gem::Version
54
- version: '0'
69
+ version: '0.0'
55
70
  - !ruby/object:Gem::Dependency
56
- name: rest-client
71
+ name: pry
57
72
  requirement: !ruby/object:Gem::Requirement
58
73
  requirements:
59
- - - ">="
74
+ - - "~>"
60
75
  - !ruby/object:Gem::Version
61
76
  version: '0'
62
- type: :runtime
77
+ type: :development
63
78
  prerelease: false
64
79
  version_requirements: !ruby/object:Gem::Requirement
65
80
  requirements:
66
- - - ">="
81
+ - - "~>"
67
82
  - !ruby/object:Gem::Version
68
83
  version: '0'
69
84
  - !ruby/object:Gem::Dependency
70
85
  name: rake
71
86
  requirement: !ruby/object:Gem::Requirement
72
87
  requirements:
73
- - - ">="
88
+ - - "~>"
74
89
  - !ruby/object:Gem::Version
75
- version: '0'
90
+ version: '10'
76
91
  type: :development
77
92
  prerelease: false
78
93
  version_requirements: !ruby/object:Gem::Requirement
79
94
  requirements:
80
- - - ">="
95
+ - - "~>"
81
96
  - !ruby/object:Gem::Version
82
- version: '0'
97
+ version: '10'
83
98
  - !ruby/object:Gem::Dependency
84
99
  name: awesome_print
85
100
  requirement: !ruby/object:Gem::Requirement
86
101
  requirements:
87
- - - ">="
102
+ - - "~>"
88
103
  - !ruby/object:Gem::Version
89
- version: '0'
104
+ version: '1'
90
105
  type: :development
91
106
  prerelease: false
92
107
  version_requirements: !ruby/object:Gem::Requirement
93
108
  requirements:
94
- - - ">="
109
+ - - "~>"
95
110
  - !ruby/object:Gem::Version
96
- version: '0'
111
+ version: '1'
97
112
  - !ruby/object:Gem::Dependency
98
113
  name: equivalent-xml
99
114
  requirement: !ruby/object:Gem::Requirement
100
115
  requirements:
101
- - - ">="
116
+ - - "~>"
102
117
  - !ruby/object:Gem::Version
103
- version: '0'
118
+ version: '0.5'
104
119
  type: :development
105
120
  prerelease: false
106
121
  version_requirements: !ruby/object:Gem::Requirement
107
122
  requirements:
108
- - - ">="
123
+ - - "~>"
109
124
  - !ruby/object:Gem::Version
110
- version: '0'
125
+ version: '0.5'
111
126
  - !ruby/object:Gem::Dependency
112
127
  name: fakeweb
113
128
  requirement: !ruby/object:Gem::Requirement
114
129
  requirements:
115
- - - ">="
130
+ - - "~>"
116
131
  - !ruby/object:Gem::Version
117
- version: '0'
132
+ version: '1'
118
133
  type: :development
119
134
  prerelease: false
120
135
  version_requirements: !ruby/object:Gem::Requirement
121
136
  requirements:
122
- - - ">="
137
+ - - "~>"
123
138
  - !ruby/object:Gem::Version
124
- version: '0'
139
+ version: '1'
125
140
  - !ruby/object:Gem::Dependency
126
141
  name: rspec
127
142
  requirement: !ruby/object:Gem::Requirement
128
143
  requirements:
129
144
  - - "~>"
130
145
  - !ruby/object:Gem::Version
131
- version: 2.14.1
146
+ version: '2.0'
132
147
  type: :development
133
148
  prerelease: false
134
149
  version_requirements: !ruby/object:Gem::Requirement
135
150
  requirements:
136
151
  - - "~>"
137
152
  - !ruby/object:Gem::Version
138
- version: 2.14.1
153
+ version: '2.0'
139
154
  - !ruby/object:Gem::Dependency
140
155
  name: simplecov
141
156
  requirement: !ruby/object:Gem::Requirement
142
157
  requirements:
143
158
  - - "~>"
144
159
  - !ruby/object:Gem::Version
145
- version: 0.7.1
160
+ version: '0.7'
146
161
  type: :development
147
162
  prerelease: false
148
163
  version_requirements: !ruby/object:Gem::Requirement
149
164
  requirements:
150
165
  - - "~>"
151
166
  - !ruby/object:Gem::Version
152
- version: 0.7.1
167
+ version: '0.7'
153
168
  - !ruby/object:Gem::Dependency
154
169
  name: yard
155
170
  requirement: !ruby/object:Gem::Requirement
156
171
  requirements:
157
- - - ">="
172
+ - - "~>"
158
173
  - !ruby/object:Gem::Version
159
- version: '0'
174
+ version: '0.8'
160
175
  type: :development
161
176
  prerelease: false
162
177
  version_requirements: !ruby/object:Gem::Requirement
163
178
  requirements:
164
- - - ">="
179
+ - - "~>"
165
180
  - !ruby/object:Gem::Version
166
- version: '0'
181
+ version: '0.8'
167
182
  description: Contains classes to archive and retrieve digital object version content
168
183
  and metadata
169
184
  email:
170
- - rnanders@stanford.edu
185
+ - darren.weber@stanford.edu
171
186
  executables: []
172
187
  extensions: []
173
188
  extra_rdoc_files: []
174
189
  files:
175
190
  - lib/libdir.rb
176
191
  - lib/replication/archive_catalog.rb
177
- - lib/replication/bagit_bag.rb
178
- - lib/replication/file_fixity.rb
179
- - lib/replication/fixity.rb
180
- - lib/replication/operating_system.rb
181
192
  - lib/replication/replica.rb
182
193
  - lib/replication/sdr_object.rb
183
194
  - lib/replication/sdr_object_version.rb
184
- - lib/replication/tarfile.rb
185
195
  - lib/sdr_replication.rb
186
- homepage:
187
- licenses: []
196
+ homepage: https://github.com/sul-dlss/sdr-replication
197
+ licenses:
198
+ - Apache-2.0
188
199
  metadata: {}
189
200
  post_install_message:
190
201
  rdoc_options: []
@@ -202,7 +213,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
202
213
  version: 2.2.1
203
214
  requirements: []
204
215
  rubyforge_project:
205
- rubygems_version: 2.4.1
216
+ rubygems_version: 2.4.2
206
217
  signing_key:
207
218
  specification_version: 4
208
219
  summary: Core methods for support of SDR Preservation Core replication
@@ -1,353 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../libdir')
2
- require 'sdr_replication'
3
-
4
- module Replication
5
-
6
- # A BagIt bag contains a structured copy of a digital object for storage, transfer, or replication
7
- # @see https://tools.ietf.org/html/draft-kunze-bagit-10
8
- # This class can be used to create, parse, or validate a bag instance
9
- #
10
- # @note Copyright (c) 2014 by The Board of Trustees of the Leland Stanford Junior University.
11
- # All rights reserved. See {file:LICENSE.rdoc} for details.
12
- class BagitBag
13
-
14
- # @param [Pathname,String] pathname The location of the bag home directory
15
- # @return [BagitBag] Initialize a new bag, create home and payload folders, write bagit.txt file
16
- def BagitBag.create_bag(pathname)
17
- bag = BagitBag.new
18
- bag.bag_pathname = pathname
19
- bag.payload_pathname.mkpath
20
- bag.write_bagit_txt
21
- bag
22
- end
23
-
24
- # @param [Pathname,String] pathname The location of the bag home directory
25
- # @return [BagitBag] Initialize a new bag, create home and payload folders, write bagit.txt file
26
- def BagitBag.open_bag(pathname)
27
- bag = BagitBag.new
28
- bag.bag_pathname = pathname
29
- raise "No bag found at #{bag.bag_pathname}" unless bag.bag_pathname.exist?
30
- bagit_txt = bag.bag_pathname.join("bagit.txt")
31
- raise "No bagit.txt file found at #{bagit_txt}" unless bagit_txt.exist?
32
- bag
33
- end
34
-
35
- # @return [Pathname] The location of the bag home directory
36
- def bag_pathname
37
- @bag_pathname
38
- end
39
-
40
- # @param [Pathname,String] pathname The location of the bag home directory
41
- # @return [Void] Set the location of the bag home directory
42
- def bag_pathname=(pathname)
43
- @bag_pathname = Pathname(pathname)
44
- end
45
-
46
- # @return [Pathname] The location of the bag data directory
47
- def payload_pathname
48
- bag_pathname.join('data')
49
- end
50
-
51
- # @return [Pathname] Generate the bagit.txt tag file
52
- def write_bagit_txt
53
- bagit_txt = bag_pathname.join("bagit.txt")
54
- bagit_txt.open('w') do |f|
55
- f.puts "Tag-File-Character-Encoding: UTF-8"
56
- f.puts "BagIt-Version: 0.97"
57
- end
58
- bagit_txt
59
- end
60
-
61
- # @return [Hash<String,String] A hash containing the properties documented in the bagit.txt tagfile
62
- def read_bagit_txt
63
- properties = Hash.new
64
- bagit_txt = bag_pathname.join("bagit.txt")
65
- bagit_txt.readlines.each do |line|
66
- line.chomp!.strip!
67
- key,value = line.split(':',2)
68
- properties[key.strip] = value.strip if value
69
- end
70
- properties
71
- end
72
-
73
- # @return [Array<Symbol>] The list of checksum types to be used when generating fixity data
74
- def bag_checksum_types
75
- @bag_checksum_types ||= Fixity.default_checksum_types
76
- end
77
-
78
- # @param [Object] types The list of checksum types to be used when generating fixity data
79
- # @return [Void] Set the list of checksum types to be used when generating fixity data
80
- def bag_checksum_types=(*types)
81
- @bag_checksum_types = Fixity.validate_checksum_types(*types)
82
- end
83
-
84
- # @param [Symbol] link_mode Specifies whether to :copy, :link, or :symlink the files to the payload directory
85
- # @param [Pathname] source_dir The source location of the directory whose contents are to be bagged
86
- # @return [Pathname] Generate file_fixity_hash and send it to #add_files_to_payload
87
- def add_dir_to_payload (link_mode, source_dir)
88
- file_fixity_hash = Fixity.generate_checksums(source_dir, source_dir.find ,bag_checksum_types)
89
- add_files_to_payload(link_mode, source_dir, file_fixity_hash)
90
- payload_pathname
91
- end
92
-
93
- # @param [Symbol] link_mode Specifies whether to :copy, :link, or :symlink the files to the payload directory
94
- # @param [Pathname] source_basepath The source location of the directory whose contents are to be ingested
95
- # @param [Hash<String,FileFixity>] file_fixity_hash The list of files (with fixity data) to be added to the payload
96
- # @return [Pathname] Copy or link the files specified in the file_fixity_hash to the payload directory,
97
- # then update the payload manifest files
98
- def add_files_to_payload(link_mode, source_basepath, file_fixity_hash)
99
- file_fixity_hash.keys.each do |file_id|
100
- source_pathname = source_basepath.join(file_id)
101
- target_pathname = payload_pathname.join(file_id)
102
- copy_file(link_mode, source_pathname, target_pathname)
103
- end
104
- write_manifest_checksums('manifest', add_data_prefix(file_fixity_hash))
105
- payload_pathname
106
- end
107
-
108
- # @param [Hash<String,FileFixity>] file_fixity_hash key is file_id, values are Fixity objects containing checksums
109
- # @return [Hash<String,FileFixity>] A revised hash with file_id paths prefixed with 'data/'
110
- def add_data_prefix(file_fixity_hash)
111
- new_hash = Hash.new
112
- file_fixity_hash.values.each do |fixity|
113
- fixity.file_id = "data/#{fixity.file_id}"
114
- new_hash[fixity.file_id] = fixity
115
- end
116
- new_hash
117
- end
118
-
119
- # @param [Symbol] link_mode Specifies whether to :copy, :link, or :symlink the files to the payload directory
120
- # @param [Pathname] source_pathname The source location of the file to be ingested
121
- # @param [Pathname] target_pathname The location of the directory in which to place the file
122
- # @return [Pathname] link or copy the specified file from source location to the target location
123
- def copy_file(link_mode, source_pathname, target_pathname)
124
- target_pathname.parent.mkpath
125
- case link_mode
126
- when :copy, nil
127
- FileUtils.copy(source_pathname.to_s, target_pathname.to_s) # automatically dereferences symlinks
128
- when :link
129
- FileUtils.link(source_pathname.to_s, target_pathname.to_s) #, :force => true (false is default)
130
- when :symlink
131
- FileUtils.symlink(source_pathname.to_s, target_pathname.to_s) #, :force => true (false is default)
132
- else
133
- raise "Invalid link_mode: #{link_mode}, expected one of [:copy,:link,:symlink]"
134
- end
135
- target_pathname
136
- end
137
-
138
- # @param [Pathname,String] source_fullpath The location of the directory whose content will be tarred
139
- # @param [Pathname,String] source_basepath The location of the directory to change to before doing the tar create
140
- # @return [Tarfile] Create a tar archive of a directory into the payload directory,
141
- # generating checksums in parallel processes and recording those checksums in the payload manifests
142
- def add_payload_tarfile(tarfile_id,source_fullpath, source_basepath)
143
- tarfile = Tarfile.new
144
- tarfile.source_basepath = Pathname(source_basepath)
145
- tarfile.source_fullpath = Pathname(source_fullpath)
146
- tarfile.tarfile_basepath = payload_pathname
147
- tarfile.tarfile_fullpath = payload_pathname.join("#{tarfile_id}")
148
- tarfile.create_tarfile
149
- file_fixity_hash = Fixity.generate_checksums(bag_pathname,[tarfile.tarfile_fullpath],bag_checksum_types)
150
- write_manifest_checksums('manifest', file_fixity_hash)
151
- tarfile
152
- end
153
-
154
- # @return [Pathname] Generate the bag-info.txt tag file to record the payload size
155
- def write_bag_info_txt
156
- payload_size = bag_payload_size
157
- bag_info_txt = bag_pathname.join("bag-info.txt")
158
- bag_info_txt.open('w') do |f|
159
- f.puts "External-Identifier: #{bag_pathname.basename}"
160
- f.puts "Payload-Oxum: #{payload_size[:bytes]}.#{payload_size[:files]}"
161
- f.puts "Bag-Size: #{bag_size_human(payload_size[:bytes])}"
162
- end
163
- bag_info_txt
164
- end
165
-
166
- # @return [Hash<Symbol,Integer>] A hash contining the payload size in bytes, and the number of files,
167
- # derived from the payload directory contents
168
- def bag_payload_size
169
- payload_pathname.find.select{|f| f.file?}.inject({bytes: 0, files: 0}) do |hash,file|
170
- hash[:bytes] += file.size
171
- hash[:files] += 1
172
- hash
173
- end
174
- end
175
-
176
- # @param [Integer] bytes The total number of bytes in the payload
177
- # @return [String] Human-readable rendition of the total payload size
178
- def bag_size_human(bytes)
179
- count = 0
180
- size = bytes
181
- while ( size >= 1024 and count < 4 )
182
- size /= 1024.0
183
- count += 1
184
- end
185
- if (count == 0)
186
- return sprintf("%d B", size)
187
- else
188
- return sprintf("%.2f %s", size, %w[B KB MB GB TB][count] )
189
- end
190
- end
191
-
192
- # @return [Hash<String,String] A hash containing the properties documented in the bag-info.txt tagfile
193
- def read_bag_info_txt
194
- properties = Hash.new
195
- bag_info = bag_pathname.join("bag-info.txt")
196
- bag_info.readlines.each do |line|
197
- line.chomp!.strip!
198
- key,value = line.split(':',2)
199
- properties[key.strip] = value.strip if value
200
- end
201
- properties
202
- end
203
-
204
- # @return [Hash<Symbol,Integer>] A hash contining the payload size in bytes, and the number of files,
205
- # derived from the Payload-Oxum property
206
- def info_payload_size
207
- info = read_bag_info_txt
208
- size_array = info['Payload-Oxum'].split('.')
209
- size_hash = {:bytes => size_array[0].to_i, :files => size_array[1].to_i}
210
- size_hash
211
- end
212
-
213
- # @return [Boolean] Compare the actual measured payload size against the value recorded in bag-info.txt
214
- def verify_payload_size
215
- info_size = info_payload_size
216
- bag_size = bag_payload_size
217
- if info_size != bag_size
218
- raise "Failed payload size verification! Expected: #{info_size}, Found: #{bag_size}"
219
- end
220
- true
221
- end
222
-
223
- # @return [Hash<String,FileFixity>] create hash containing ids and checksums for all files in the bag's root directory
224
- def generate_tagfile_checksums
225
- # get list of all files in the bag home dir, except those starting with 'tagmanifest'
226
- tagfiles = bag_pathname.children.reject{|file| file.basename.to_s.start_with?('tagmanifest')}
227
- # generate checksums, using bag home dir as the base directory for file ids (per bagit spec)
228
- Fixity.generate_checksums(bag_pathname, tagfiles, bag_checksum_types )
229
- end
230
-
231
- # @return [Hash<String,FileFixity>] create hash containing ids and checksums for all files in the bag's payload
232
- def generate_payload_checksums
233
- # get list of all files in the data directory
234
- path_list = payload_pathname.find
235
- # generate checksums, but use bag home dir as the base directory for file ids (per bagit spec)
236
- Fixity.generate_checksums(bag_pathname, path_list, bag_checksum_types)
237
- end
238
-
239
- # @param [String] manifest_type The type of manifest file ('manifest' or 'tagmanifest') to be updated
240
- # @param [Hash<String,FileFixity>] file_fixity_hash A hash containing file ids and fixity data
241
- # @param [String] open_mode The file open mode (default is 'a')
242
- # @return [Hash<Symbol,Pathname] Update each of the manifests with data from the file_fixity_hash
243
- def write_manifest_checksums(manifest_type, file_fixity_hash, open_mode='a')
244
- manifests = Hash.new
245
- self.bag_checksum_types.each do |checksum_type|
246
- manifest_pathname = bag_pathname.join("#{manifest_type}-#{checksum_type}.txt")
247
- manifest_file = manifest_pathname.open(open_mode)
248
- file_fixity_hash.values.each do |fixity|
249
- checksum = fixity.get_checksum(checksum_type)
250
- manifest_file.puts("#{checksum} #{fixity.file_id}") if checksum
251
- end
252
- manifest_file.close
253
- manifests[checksum_type] = manifest_pathname
254
- end
255
- manifests
256
- end
257
-
258
- # @param [String] manifest_type The type of manifest file ('manifest' or 'tagmanifest') to be read
259
- # @return [Hash<String,FileFixity>] A hash containing file ids and fixity data derived from the manifest files
260
- def read_manifest_files(manifest_type)
261
- file_fixity_hash = Hash.new
262
- checksum_type_list = Array.new
263
- Fixity.valid_checksum_ids.each do |checksum_type|
264
- manifest_pathname = bag_pathname.join("#{manifest_type}-#{checksum_type}.txt")
265
- if manifest_pathname.file?
266
- checksum_type_list << checksum_type
267
- manifest_pathname.readlines.each do |line|
268
- line.chomp!.strip!
269
- checksum,file_id = line.split(/[\s*]+/,2)
270
- file_fixity = file_fixity_hash[file_id] || FileFixity.new(file_id: file_id)
271
- file_fixity.set_checksum(checksum_type,checksum)
272
- file_fixity_hash[file_id] = file_fixity
273
- end
274
- end
275
- end
276
- self.bag_checksum_types = self.bag_checksum_types | checksum_type_list
277
- file_fixity_hash
278
- end
279
-
280
- # @return [Boolean] Compare fixity data from the tag manifest files against the values measured by digesting the files
281
- def verify_tagfile_manifests
282
- manifest_type = 'tagmanifest'
283
- manifest_fixity_hash = read_manifest_files(manifest_type)
284
- bag_fixity_hash = generate_tagfile_checksums
285
- verify_manifests(manifest_type, manifest_fixity_hash, bag_fixity_hash)
286
- end
287
-
288
- # @return [Boolean] Compare fixity data from the payload manifest files against the values measured by digesting the files
289
- def verify_payload_manifests
290
- manifest_type = 'manifest'
291
- manifest_fixity_hash = read_manifest_files(manifest_type)
292
- bag_fixity_hash = generate_payload_checksums
293
- verify_manifests(manifest_type, manifest_fixity_hash, bag_fixity_hash)
294
- end
295
-
296
- # @param [String] manifest_type The type of manifest file ('manifest' or 'tagmanifest') to be read
297
- # @param [Hash<String,FileFixity>] manifest_fixity_hash A hash containing file ids and fixity data derived from the manifest files
298
- # @param [Hash<String,FileFixity>] bag_fixity_hash A hash containing file ids and fixity data derived from the actual files
299
- # @return [Boolean] Compare fixity data from the manifest files against the values measured by digesting the files,
300
- # returning true if equal or false if not equal
301
- def verify_manifests(manifest_type, manifest_fixity_hash, bag_fixity_hash)
302
- diff = manifest_diff(manifest_fixity_hash, bag_fixity_hash)
303
- if diff.size > 0
304
- raise "Failed #{manifest_type} verification! Differences: \n#{diff.inspect}"
305
- end
306
- true
307
- end
308
-
309
- # @param [Hash<String,FileFixity>] manifest_fixity_hash A hash containing file ids and fixity data derived from the manifest files
310
- # @param [Hash<String,FileFixity>] bag_fixity_hash A hash containing file ids and fixity data derived from the actual files
311
- # @return [Hash] A report of the differences between the fixity data from the manifest files
312
- # against the values measured by digesting the files
313
- def manifest_diff(manifest_fixity_hash, bag_fixity_hash)
314
- diff = Hash.new
315
- (manifest_fixity_hash.keys | bag_fixity_hash.keys).each do |file_id|
316
- manifest_fixity = manifest_fixity_hash[file_id] || FileFixity.new(file_id: file_id)
317
- bag_fixity = bag_fixity_hash[file_id] || FileFixity.new(file_id: file_id)
318
- if manifest_fixity != bag_fixity
319
- diff[file_id] = manifest_fixity.diff(bag_fixity,'manifest','bag')
320
- end
321
- end
322
- diff
323
- end
324
-
325
- # @return [Boolean] Validate the bag containing the digital object
326
- def verify_bag
327
- verify_bag_structure
328
- verify_tagfile_manifests
329
- verify_payload_size
330
- verify_payload_manifests
331
- true
332
- end
333
-
334
- # @return [Boolean] Test the existence of expected files, return true if files exist, raise exception if not
335
- def verify_bag_structure
336
- required_files = ['data','bagit.txt','bag-info.txt','manifest-sha256.txt','tagmanifest-sha256.txt']
337
- required_files.each{|filename| verify_pathname(bag_pathname.join(filename))}
338
- optional_files = []
339
- true
340
- end
341
-
342
- # @param [Pathname] pathname The file whose existence should be verified
343
- # @return [Boolean] Test the existence of the specified path. Return true if file exists, raise exception if not
344
- def verify_pathname(pathname)
345
- raise "#{pathname.basename} not found at #{pathname}" unless pathname.exist?
346
- true
347
- end
348
-
349
-
350
- end
351
-
352
-
353
- end
@@ -1,98 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../libdir')
2
- require 'sdr_replication'
3
-
4
- module Replication
5
-
6
- # The fixity properties of a file, used to determine file content equivalence.
7
- # Placing this data in a class by itself facilitates using the MD5, SHA1, etc checksums (and optionally the file size)
8
- # as a single key when doing comparisons against other file instances. The design assumes that this file fixity
9
- # is sufficiently unique to act as a comparator for determining file equality or verifying checksum manifests.
10
- #
11
- # @note Copyright (c) 2014 by The Board of Trustees of the Leland Stanford Junior University.
12
- # All rights reserved. See {file:LICENSE.rdoc} for details.
13
- class FileFixity
14
-
15
- # @param [Hash<Symbol,Object>] options Key,Value pairs specifying initial values of attributes
16
- def initialize(options=nil)
17
- @checksums=Hash.new
18
- options = {} if options.nil?
19
- options.each do |key,value|
20
- #instance_variable_set("@#{key}", value)
21
- send "#{key}=", value
22
- end
23
- end
24
-
25
- # @return [String] The name of the file, relative to its base directory
26
- # (for payload files, path relative to the data folder. For tag files, path relative to the bag home folder)
27
- attr_accessor :file_id
28
-
29
- # @return [Integer] The size of the file in bytes
30
- attr_accessor :bytes
31
-
32
- # @return [Hash<Symbol,String>] The MD5, SHA1, SHA256, etc checksum values of the file
33
- attr_accessor :checksums
34
-
35
- # @param [Symbol,String] type The type of checksum (e.g. :md5, :sha1, :sha256)
36
- # @return [String] The value of the file digest
37
- def get_checksum(type)
38
- checksum_type = type.to_s.downcase.to_sym
39
- self.checksums[checksum_type]
40
- end
41
-
42
- # @param type [Symbol,String] The type of checksum
43
- # @param value [String] value of the file digest
44
- # @return [void] Set the value for the specified checksum type in the checksum hash
45
- def set_checksum(type,value)
46
- checksum_type = type.to_s.downcase.to_sym
47
- Fixity.validate_checksum_types(checksum_type)
48
- self.checksums[checksum_type] = value
49
- end
50
-
51
- # @param other [FileFixity] The other file fixity being compared to this fixity
52
- # @return [Boolean] Returns true if self and other have comparable fixity data.
53
- def eql?(other)
54
- matching_checksum_types = self.checksums.keys & other.checksums.keys
55
- return false if matching_checksum_types.size == 0
56
- matching_checksum_types.each do |type|
57
- return false if self.checksums[type] != other.checksums[type]
58
- end
59
- true
60
- end
61
-
62
- # (see #eql?)
63
- def ==(other)
64
- eql?(other)
65
- end
66
-
67
- # @return [Fixnum] Compute a hash-code for the fixity value array.
68
- # Two file instances with the same content will have the same hash code (and will compare using eql?).
69
- # @note The hash and eql? methods override the methods inherited from Object.
70
- # These methods ensure that instances of this class can be used as Hash keys. See
71
- # * {http://www.paulbutcher.com/2007/10/navigating-the-equality-maze/}
72
- # * {http://techbot.me/2011/05/ruby-basics-equality-operators-ruby/}
73
- # Also overriden is {#==} so that equality tests in other contexts will also return the expected result.
74
- def hash
75
- [self.file_id].hash
76
- end
77
-
78
- # @param [FileFixity] other The other FileFixity object being compared to this one
79
- # @param [String] left The label to use for values from this base FileFixity object
80
- # @param [String] right he label to use for values from the other FileFixity object
81
- # @return [Hash<symbol,Hash<String,String>] details of the checksum differences between fixity objects
82
- def diff(other,left='base',right='other')
83
- diff_hash = Hash.new
84
- matching_checksum_types = (self.checksums.keys & other.checksums.keys)
85
- matching_checksum_types = (self.checksums.keys | other.checksums.keys) if matching_checksum_types.empty?
86
- matching_checksum_types.each do |type|
87
- base_checksum = self.checksums[type]
88
- other_checksum = other.checksums[type]
89
- if base_checksum != other_checksum
90
- diff_hash[type] = {left => base_checksum, right => other_checksum }
91
- end
92
- end
93
- return diff_hash.size > 0 ? diff_hash : nil
94
- end
95
-
96
- end
97
-
98
- end
@@ -1,155 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../libdir')
2
- require 'sdr_replication'
3
-
4
- module Replication
5
-
6
- # A Struct to hold properties of a given checksum digest type
7
- ChecksumType = Struct.new(:id, :hex_length, :names)
8
-
9
- # A helper class that facilites the generation and processing of checksums
10
- #
11
- # @note Copyright (c) 2014 by The Board of Trustees of the Leland Stanford Junior University.
12
- # All rights reserved. See {file:LICENSE.rdoc} for details.
13
- class Fixity
14
-
15
- @@default_checksum_types = [:sha1, :sha256]
16
-
17
- # @return [Array<Symbol>] The list of checksum types to be used when generating fixity data
18
- def Fixity.default_checksum_types
19
- @@default_checksum_types
20
- end
21
-
22
- # @param [Array<Symbol>] types The list of checksum types to be used when generating fixity data
23
- # @return [Void] Set the list of checksum types to be used when generating fixity data
24
- def Fixity.default_checksum_types=(*types)
25
- @@default_checksum_types = Fixity.validate_checksum_types(*types)
26
- end
27
-
28
- @@valid_checksum_types = [
29
- ChecksumType.new(:md5, 32, ['MD5']),
30
- ChecksumType.new(:sha1, 40, ['SHA-1', 'SHA1']),
31
- ChecksumType.new(:sha256, 64, ['SHA-256', 'SHA256']),
32
- ChecksumType.new(:sha384, 96, ['SHA-384', 'SHA384']),
33
- ChecksumType.new(:sha512, 128, ['SHA-512', 'SHA512'])
34
- ]
35
-
36
- # @return [Array<ChecksumType>] The list of allowed ChecksumType structs containing the type's properties
37
- def Fixity.valid_checksum_types
38
- @@valid_checksum_types
39
- end
40
-
41
- # @return [Array<Symbol>] The list of allowed checksum types
42
- def Fixity.valid_checksum_ids
43
- @@valid_checksum_types.map { |type| type.id }
44
- end
45
-
46
- # @param [Array<Symbol>] types The list of checksum types being specified by the caller
47
- # @return [Object] The list of specified checksum types after being checked for validity
48
- def Fixity.validate_checksum_types(*types)
49
- checksum_types = types.flatten
50
- invalid_types = checksum_types - valid_checksum_ids
51
- raise "Invalid digest type specified: #{invalid_types.inspect}" unless invalid_types.empty?
52
- checksum_types
53
- end
54
-
55
- # @param [Array<Symbol>] checksum_types The list of checksum types being specified by the caller
56
- # @return [Array<Digest::Class>] The list of digest implementation objects that will generate the checksums
57
- def Fixity.get_digesters(checksum_types=@@default_checksum_types)
58
- checksum_types.inject(Hash.new) do |digesters, checksum_type|
59
- case checksum_type
60
- when :md5
61
- digesters[checksum_type] = Digest::MD5.new
62
- when :sha1
63
- digesters[checksum_type] = Digest::SHA1.new
64
- when :sha256
65
- digesters[checksum_type] = Digest::SHA2.new(256)
66
- when :sha384
67
- digesters[checksum_type] = Digest::SHA2.new(384)
68
- when :sha512
69
- digesters[checksum_type] = Digest::SHA2.new(512)
70
- else
71
- raise "Unrecognized checksum type: #{checksum_type}"
72
- end
73
- digesters
74
- end
75
- end
76
-
77
- # @param pathname [Pathname] The location of the file to be digested
78
- # @param [Object] base_pathname The base directory from which relative paths (file IDS) will be derived
79
- # @param [Object] checksum_types The list of checksum types being specified by the caller (or default list)
80
- # @return [FileFixity] Generate a FileFixity instance containing fixity properties measured from of a physical file
81
- def Fixity.fixity_from_file(pathname, base_pathname, checksum_types=@@default_checksum_types)
82
- file_fixity = FileFixity.new
83
- file_fixity.file_id = pathname.relative_path_from(base_pathname).to_s
84
- file_fixity.bytes = pathname.size
85
- digesters = Fixity.get_digesters(checksum_types)
86
- pathname.open("r") do |stream|
87
- while buffer = stream.read(8192)
88
- digesters.values.each { |digest| digest.update(buffer) }
89
- end
90
- end
91
- digesters.each { |checksum_type, digest| file_fixity.checksums[checksum_type] = digest.hexdigest }
92
- file_fixity
93
- end
94
-
95
- # @param [Pathname] base_pathname The directory path used as the base for deriving relative paths (file IDs)
96
- # @param [Array<Pathname>] path_list The list of pathnames for files whose fixity will be generated
97
- # @return [Hash<String,FileFixity>] A hash containing file ids and fixity data derived from the actual files
98
- def Fixity.generate_checksums(base_pathname, path_list, checksum_types=@@default_checksum_types)
99
- path_list = base_pathname.find if path_list.nil?
100
- file_fixity_hash = Hash.new
101
- path_list.select{|pathname| pathname.file?}.each do |file|
102
- file_fixity = Fixity.fixity_from_file(file, base_pathname, checksum_types)
103
- file_fixity_hash[file_fixity.file_id] = file_fixity
104
- end
105
- file_fixity_hash
106
- end
107
-
108
- # @param [Integer] length The length of the checksum value in hex format
109
- # @return [ChecksumType] The ChecksumType struct that contains the properties of the matching checksum type
110
- def Fixity.type_for_length(length)
111
- @@valid_checksum_types.select {|type| type.hex_length == length}.first
112
- end
113
-
114
- # @param [Object] file_id The filename or relative path of the file from its base directory
115
- # @param [Object] checksum_values The digest values of the file
116
- # @return [FileFixity] Generate a FileFixity instance containing fixity properties supplied by the caller
117
- def Fixity.fixity_from_checksum_values(file_id, checksum_values)
118
- file_fixity = FileFixity.new
119
- file_fixity.file_id = file_id
120
- checksum_values.each do |digest|
121
- checksum_type = Fixity.type_for_length(digest.length)
122
- file_fixity.checksums[checksum_type.id] = digest
123
- end
124
- file_fixity
125
- end
126
-
127
- # @param [Hash<String,FileFixity>] file_fixity_hash A hash containing file ids and fixity data derived from the manifest files
128
- # @return [Hash<String,Hash<Symbol,String] A hash containing file ids and checksum data derived from the file_fixity_hash
129
- def Fixity.file_checksum_hash(file_fixity_hash)
130
- checksum_hash = Hash.new
131
- file_fixity_hash.values.each{|file| checksum_hash[file.file_id] = file.checksums}
132
- checksum_hash
133
- end
134
-
135
- # @param [Symbol,String] checksum_type The type of checksum digest to be generated
136
- # @param [Pathname,String] file_pathname The location of the file to digest
137
- # @return [String] The operating system shell command that will generate the checksum digest value
138
- def Fixity.openssl_digest_command(checksum_type,file_pathname)
139
- command = "openssl dgst -#{checksum_type} #{file_pathname}"
140
- command
141
- end
142
-
143
- # @param [Symbol,String] checksum_type The type of checksum digest to be generated
144
- # @param [Pathname,String] file_pathname The location of the file to digest
145
- # @return [String] The checksum digest value for the file
146
- def Fixity.openssl_digest(checksum_type,file_pathname)
147
- command = openssl_digest_command(checksum_type,file_pathname)
148
- stdout = OperatingSystem.execute(command)
149
- checksum = stdout.scan(/[A-Za-z0-9]+/).last
150
- checksum
151
- end
152
-
153
- end
154
-
155
- end
@@ -1,33 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../libdir')
2
- require 'sdr_replication'
3
-
4
- module Replication
5
-
6
- # A wrapper class around the systemu gem that is used for shelling out to the operating system
7
- # and executing a command
8
- #
9
- # @note Copyright (c) 2014 by The Board of Trustees of the Leland Stanford Junior University.
10
- # All rights reserved. See {file:LICENSE.rdoc} for details.
11
- class OperatingSystem
12
-
13
- # Executes a system command in a subprocess.
14
- # The method will return stdout from the command if execution was successful.
15
- # The method will raise an exception if if execution fails.
16
- # The exception's message will contain the explaination of the failure.
17
- # @param [String] command the command to be executed
18
- # @return [String] stdout from the command if execution was successful
19
- def OperatingSystem.execute(command)
20
- status, stdout, stderr = systemu(command)
21
- if (status.exitstatus != 0)
22
- raise stderr
23
- end
24
- return stdout
25
- rescue
26
- msg = "Command failed to execute: [#{command}] caused by <STDERR = #{stderr.split($/).join('; ')}>"
27
- msg << " STDOUT = #{stdout.split($/).join('; ')}" if (stdout && (stdout.length > 0))
28
- raise msg
29
- end
30
-
31
- end
32
-
33
- end
@@ -1,160 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../libdir')
2
- require 'sdr_replication'
3
-
4
- module Replication
5
-
6
- # A tar archive file containing a set of digital object files
7
- #
8
- # @note Copyright (c) 2014 by The Board of Trustees of the Leland Stanford Junior University.
9
- # All rights reserved. See {file:LICENSE.rdoc} for details.
10
- class Tarfile
11
-
12
- # @return [String] create archive of the specified format
13
- # * gnu = GNU tar 1.13.x format
14
- # * posix = POSIX 1003.1-2001 (pax) format
15
- attr_accessor :format
16
-
17
- # @return [Boolean] Follow symlinks and archive the files they point to
18
- attr_accessor :dereference
19
-
20
- # @return [Boolean] Verify that files were copied faithfully
21
- attr_accessor :verify
22
-
23
- # @return [Boolean] Create/list/extract multi-volume archive (not yet implemented)
24
- attr_accessor :multi_volume
25
-
26
- # @param [Hash<Symbol,Object>] options Key,Value pairs specifying initial values of attributes
27
- # @return [Tarfile] Initialize a new Tarfile object
28
- def initialize(options=nil)
29
- options={} if options.nil?
30
- # set defaults
31
- @format=:posix
32
- @dereference = true
33
- @verify = false
34
- @multi_volume = false
35
- # override defaults
36
- options.each do |key,value|
37
- #instance_variable_set("@#{key}", value)
38
- send "#{key}=", value
39
- end
40
- end
41
-
42
- # @return [Pathname] The full path of the ancestor dir in which the tar file resides
43
- def tarfile_basepath
44
- raise "Tarfile basepath is nil" unless @tarfile_basepath
45
- @tarfile_basepath
46
- end
47
-
48
- # @param [Pathname,String] basepath The full path of the ancestor dir in which the tar file resides
49
- # @return [Void] Set the full path of the ancestor dir in which the tar file resides
50
- def tarfile_basepath=(basepath)
51
- raise "No pathname specified" unless basepath
52
- @tarfile_basepath = Pathname(basepath).expand_path
53
- end
54
-
55
- # @return [Pathname] the full path of the tar archive file to be created or extracted from
56
- def tarfile_fullpath
57
- @tarfile_fullpath
58
- end
59
-
60
- # @param [Pathname,String] fullpath The full path of tar file
61
- # @return [Void] Sets the full path of tar file
62
- def tarfile_fullpath=(fullpath)
63
- @tarfile_fullpath = Pathname(fullpath).expand_path
64
- end
65
-
66
- # @return [String] The id (path relative to basepath) of the tar file
67
- def tarfile_relative_path
68
- @tarfile_fullpath.relative_path_from(@tarfile_basepath).to_s
69
- end
70
-
71
- # @return [Pathname] The full path of the source file or directory being archived
72
- def source_fullpath
73
- raise "Source pathname is nil" unless @source_pathname
74
- @source_pathname
75
- end
76
-
77
- # @param [Pathname,String] source The full path of the source file or directory being archived
78
- # @return [Void] Set the full path of the source file or directory being archived
79
- def source_fullpath=(source)
80
- raise "No pathname specified" unless source
81
- @source_pathname = Pathname(source).expand_path
82
- end
83
-
84
- # @return [Pathname] The directory that is the basis of relative paths
85
- def source_basepath
86
- @source_basepath
87
- end
88
-
89
- # @param [Pathname,String] base The directory that is the basis of relative paths
90
- # @return [Void] Set the base path of the source file or directory being archived
91
- def source_basepath=(base)
92
- raise "No pathname specified" unless base
93
- @source_basepath = Pathname(base).expand_path
94
- end
95
-
96
- # @return [Pathname] The relative path from the source base directory to the source directory
97
- def source_relative_path
98
- source_fullpath.relative_path_from(source_basepath)
99
- end
100
-
101
- # @return [String] The shell command string to be used to create the tarfile
102
- def create_cmd
103
- command = "tar --create --file=#{tarfile_fullpath} --format=#{@format} "
104
- command << "--dereference " if @dereference
105
- command << "--verify " if @verify
106
- command << "--directory='#{source_basepath}' " if source_basepath
107
- command << source_relative_path.to_s
108
- command
109
- end
110
-
111
- # @return [Tarfile] Shell out to the operating system and create the tar archive file
112
- def create_tarfile
113
- command = create_cmd
114
- OperatingSystem.execute(command)
115
- self
116
- end
117
-
118
- # @return [String] The shell command that will list the tarfile's contents
119
- def list_cmd
120
- command = "tar --list --file=#{tarfile_fullpath} "
121
- command
122
- end
123
-
124
- # @return [String] The list of the tarfile's contents
125
- def list_tarfile
126
- command = list_cmd
127
- list = OperatingSystem.execute(command)
128
- list
129
- end
130
-
131
- # @return [Pathname] The location of the directory into which the tarfile should be extracted
132
- def target_pathname
133
- raise "Target pathname is nil" unless @target_pathname
134
- @target_pathname
135
- end
136
-
137
- # @param [Pathname,String] source The location of the directory into which the tarfile should be extracted
138
- # @return [Void] Set the location of the directory into which the tarfile should be extracted
139
- def target_pathname=(target)
140
- raise "No target pathname specified" unless target
141
- @target_pathname = Pathname(target).expand_path
142
- end
143
-
144
- # @return [String] The shell command that will extract the tarfile's contents # @return [Void]
145
- def extract_cmd
146
- command = "tar --extract --file=#{tarfile_fullpath} "
147
- command << "--directory='#{target_pathname}' " if target_pathname
148
- command
149
- end
150
-
151
- # @return [String] Shell out to the operating system and extract the tar archive file
152
- def extract_tarfile
153
- command = extract_cmd
154
- stdout = OperatingSystem.execute(command)
155
- stdout
156
- end
157
-
158
- end
159
-
160
- end