ddr-ingesttools 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/convert_dpc_folder.rb +20 -4
- data/config/locales/en.yml +7 -0
- data/ddr-ingesttools.gemspec +1 -0
- data/lib/ddr/ingesttools.rb +5 -0
- data/lib/ddr/ingesttools/checksum_file.rb +28 -0
- data/lib/ddr/ingesttools/dpc_folder_converter/converter.rb +86 -37
- data/lib/ddr/ingesttools/version.rb +1 -1
- data/spec/fixtures/files/bad-checksums-sha1.txt +11 -0
- data/spec/fixtures/files/good-checksums-sha1.txt +11 -0
- data/spec/fixtures/files/manifest-sha1-collection-title.txt +10 -0
- data/spec/fixtures/files/manifest-sha1.txt +2 -0
- data/spec/integration/dpc_folder_converter_spec.rb +136 -26
- data/spec/spec_helper.rb +1 -0
- data/spec/unit/checksum_file_spec.rb +17 -0
- metadata +26 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c8a03cc7a443dbca08bd1c8cb86d6cc70f18050
|
4
|
+
data.tar.gz: 3f1d806ba1c1b8ccde507157372e30c93c1cd0cd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 361c4c511a7bb10f03087412aa700739e188161f9b6650e0c6eb295560bfa1a5ff9d23a8621db46d552e418bae0ff50bd08b7dbda3fabc5263becb3fc7644c16
|
7
|
+
data.tar.gz: a14ed57c5cbb449f02d1c29c1f4cf5df37960152c7a2c8d2717eea2363ae9de05bdf8e2e7e5478a0e0048b3b82d73e7328fcb00ef0a5b199bc49640c93aa9cd7
|
data/bin/convert_dpc_folder.rb
CHANGED
@@ -3,8 +3,9 @@
|
|
3
3
|
require 'ddr/ingesttools'
|
4
4
|
require 'optparse'
|
5
5
|
|
6
|
-
# Parse command line arguments
|
7
6
|
options = {}
|
7
|
+
|
8
|
+
# Parse command line arguments
|
8
9
|
parser = OptionParser.new do |opts|
|
9
10
|
opts.banner = 'Usage: convert_dpc_folder.rb [options]'
|
10
11
|
|
@@ -20,6 +21,20 @@ parser = OptionParser.new do |opts|
|
|
20
21
|
'to use as the local ID of the item of which that file is a component') do |v|
|
21
22
|
options[:item_id_length] = v
|
22
23
|
end
|
24
|
+
|
25
|
+
opts.on('-c', '--checksums [CHECKSUM_FILE]', 'External checksum file') do |v|
|
26
|
+
options[:checksums] = v
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on('--[no-]copy_files', 'Copy files to target location instead of using a symlink') do |v|
|
30
|
+
options[:copy_files] = v
|
31
|
+
end
|
32
|
+
|
33
|
+
opts.on('--collection_title [TITLE]', 'Title for collection',
|
34
|
+
'required if intending to create a collection-creating Standard Ingest') do |v|
|
35
|
+
options[:collection_title] = v
|
36
|
+
end
|
37
|
+
|
23
38
|
end
|
24
39
|
|
25
40
|
begin
|
@@ -35,6 +50,7 @@ rescue OptionParser::InvalidOption, OptionParser::MissingArgument
|
|
35
50
|
exit(false)
|
36
51
|
end
|
37
52
|
|
38
|
-
|
39
|
-
|
40
|
-
|
53
|
+
converter = Ddr::IngestTools::DpcFolderConverter::Converter.new(options)
|
54
|
+
results = converter.call
|
55
|
+
puts I18n.translate('errors.count', { count: results.errors.size })
|
56
|
+
results.errors.each { |e| puts e }
|
data/ddr-ingesttools.gemspec
CHANGED
data/lib/ddr/ingesttools.rb
CHANGED
@@ -1,8 +1,13 @@
|
|
1
1
|
require_relative 'ingesttools/version'
|
2
2
|
require_relative 'ingesttools/dpc_folder_converter'
|
3
|
+
require_relative 'ingesttools/checksum_file'
|
4
|
+
|
5
|
+
require 'i18n'
|
3
6
|
|
4
7
|
module Ddr
|
5
8
|
module IngestTools
|
6
9
|
|
10
|
+
I18n.load_path = Dir['config/locales/*.yml']
|
11
|
+
|
7
12
|
end
|
8
13
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Ddr::IngestTools
|
2
|
+
class ChecksumFile
|
3
|
+
|
4
|
+
attr_reader :digests
|
5
|
+
|
6
|
+
def initialize(checksum_filepath)
|
7
|
+
@digests = digest_hash(checksum_filepath)
|
8
|
+
end
|
9
|
+
|
10
|
+
def digest(filepath)
|
11
|
+
digests[filepath]
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def digest_hash(checksum_filepath)
|
17
|
+
h = {}
|
18
|
+
File.open(checksum_filepath, 'r') do |file|
|
19
|
+
file.each_line do |line|
|
20
|
+
digest, path = line.chomp.split
|
21
|
+
h[path] = digest
|
22
|
+
end
|
23
|
+
end
|
24
|
+
h
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
@@ -6,54 +6,70 @@ require 'find'
|
|
6
6
|
module Ddr::IngestTools::DpcFolderConverter
|
7
7
|
class Converter
|
8
8
|
|
9
|
-
|
9
|
+
INTERMEDIATE_FILES_DIRNAME = 'intermediate_files'
|
10
|
+
DPC_TARGETS_DIRNAME = 'targets'
|
11
|
+
SIF_TARGETS_DIRNAME = 'dpc_targets'
|
12
|
+
SIF_METADATA_FILENAME = 'metadata.txt'
|
13
|
+
SIF_MANIFEST_SHA1_FILENAME = 'manifest-sha1.txt'
|
10
14
|
|
11
|
-
|
12
|
-
attr_accessor :local_id_metadata
|
15
|
+
Results = Struct.new(:file_map, :errors)
|
13
16
|
|
14
|
-
|
17
|
+
attr_reader :source, :target, :data_dir, :item_id_length, :checksums, :copy_files, :collection_title,
|
18
|
+
:metadata_headers
|
19
|
+
attr_accessor :errors, :file_map, :local_id_metadata, :results
|
20
|
+
|
21
|
+
def initialize(source:, target:, item_id_length:, checksums: nil, copy_files: false, collection_title: nil)
|
15
22
|
@source = source
|
16
23
|
@target = target
|
17
|
-
@data_dir = File.join(target, 'data')
|
18
24
|
@item_id_length = item_id_length
|
19
|
-
@
|
25
|
+
@checksums = checksums
|
26
|
+
@copy_files = copy_files
|
27
|
+
@collection_title = collection_title
|
28
|
+
@metadata_headers = [ 'path', 'local_id' ]
|
29
|
+
@metadata_headers << 'title' unless collection_title.nil?
|
20
30
|
end
|
21
31
|
|
22
32
|
def call
|
23
|
-
|
24
|
-
|
25
|
-
find_target_files(source).each { |file| handle_target(file) }
|
33
|
+
setup
|
34
|
+
scan_files(source)
|
26
35
|
output_metadata
|
27
36
|
bagitup
|
37
|
+
validate_checksums if checksums
|
38
|
+
Results.new(file_map, errors)
|
28
39
|
end
|
29
40
|
|
30
41
|
private
|
31
42
|
|
32
|
-
def
|
33
|
-
|
43
|
+
def setup
|
44
|
+
@data_dir = File.join(target, 'data')
|
45
|
+
@errors = []
|
46
|
+
@file_map = {}
|
47
|
+
@local_id_metadata = {}
|
48
|
+
FileUtils.mkdir_p data_dir
|
34
49
|
end
|
35
50
|
|
36
|
-
def
|
37
|
-
|
38
|
-
Find.find(dir) do |path|
|
39
|
-
Find.prune if path.include?('targets')
|
40
|
-
Find.prune if path.include?('intermediate_files')
|
41
|
-
next unless File.file?(path)
|
42
|
-
next unless included_extensions.include?(File.extname(path))
|
43
|
-
files << path
|
44
|
-
end
|
45
|
-
files
|
51
|
+
def included_extensions
|
52
|
+
Ddr::IngestTools::DpcFolderConverter.config[:included_extensions]
|
46
53
|
end
|
47
54
|
|
48
|
-
def
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
+
def scan_files(dirpath, file_handler='handle_component'.to_sym)
|
56
|
+
Dir.foreach(dirpath).each do |entry|
|
57
|
+
next if [ '.', '..' ].include?(entry)
|
58
|
+
path = File.join(dirpath, entry)
|
59
|
+
if File.directory?(path)
|
60
|
+
if entry == DPC_TARGETS_DIRNAME
|
61
|
+
scan_files(path, :handle_target)
|
62
|
+
elsif entry == INTERMEDIATE_FILES_DIRNAME
|
63
|
+
scan_files(path, :handle_intermediate_file)
|
64
|
+
else
|
65
|
+
scan_files(path, file_handler)
|
66
|
+
end
|
67
|
+
else
|
68
|
+
if included_extensions.include?(File.extname(entry))
|
69
|
+
self.send(file_handler, path)
|
70
|
+
end
|
71
|
+
end
|
55
72
|
end
|
56
|
-
files
|
57
73
|
end
|
58
74
|
|
59
75
|
def handle_component(file)
|
@@ -61,26 +77,45 @@ module Ddr::IngestTools::DpcFolderConverter
|
|
61
77
|
item_id = item_id_length == 0 ? base : base[0, item_id_length]
|
62
78
|
FileUtils.mkdir_p(File.join(data_dir, item_id))
|
63
79
|
local_id_metadata[item_id] = item_id
|
64
|
-
|
80
|
+
handle_file(file, item_id)
|
65
81
|
local_id_metadata[File.join(item_id, File.basename(file))] = base
|
66
82
|
end
|
67
83
|
|
84
|
+
def handle_intermediate_file(file)
|
85
|
+
FileUtils.mkdir_p(File.join(data_dir, INTERMEDIATE_FILES_DIRNAME))
|
86
|
+
handle_file(file, INTERMEDIATE_FILES_DIRNAME)
|
87
|
+
end
|
88
|
+
|
68
89
|
def handle_target(file)
|
69
90
|
base = File.basename(file, File.extname(file))
|
70
|
-
FileUtils.mkdir_p(File.join(data_dir,
|
71
|
-
|
72
|
-
local_id_metadata[File.join(
|
91
|
+
FileUtils.mkdir_p(File.join(data_dir, SIF_TARGETS_DIRNAME))
|
92
|
+
handle_file(file, SIF_TARGETS_DIRNAME)
|
93
|
+
local_id_metadata[File.join(SIF_TARGETS_DIRNAME, File.basename(file))] = base
|
94
|
+
end
|
95
|
+
|
96
|
+
def handle_file(file, folder_name)
|
97
|
+
if copy_files
|
98
|
+
FileUtils.cp file, File.join(data_dir, folder_name)
|
99
|
+
else
|
100
|
+
FileUtils.ln_s file, File.join(data_dir, folder_name)
|
101
|
+
end
|
102
|
+
file_map[file] = File.join(data_dir, folder_name, File.basename(file))
|
73
103
|
end
|
74
104
|
|
75
105
|
def output_metadata
|
76
106
|
metadata_rows = []
|
107
|
+
if collection_title
|
108
|
+
metadata_rows << CSV::Row.new(metadata_headers, [ nil, nil, collection_title ])
|
109
|
+
end
|
77
110
|
local_id_metadata.each_pair do |k,v|
|
78
|
-
|
111
|
+
row_elements = [ k, v ]
|
112
|
+
row_elements << nil if collection_title
|
113
|
+
metadata_rows << CSV::Row.new(metadata_headers, row_elements)
|
79
114
|
end
|
80
|
-
File.open(File.join(data_dir,
|
81
|
-
file.puts(
|
115
|
+
File.open(File.join(data_dir, SIF_METADATA_FILENAME), 'w') do |file|
|
116
|
+
file.puts(metadata_headers.join(Ddr::IngestTools::DpcFolderConverter.config[:csv_options][:col_sep]))
|
82
117
|
metadata_rows.each do |row|
|
83
|
-
file.puts(row.to_csv(Ddr::IngestTools::DpcFolderConverter.config[:csv_options])
|
118
|
+
file.puts(row.to_csv(Ddr::IngestTools::DpcFolderConverter.config[:csv_options]))
|
84
119
|
end
|
85
120
|
end
|
86
121
|
end
|
@@ -89,5 +124,19 @@ module Ddr::IngestTools::DpcFolderConverter
|
|
89
124
|
bag = BagIt::Bag.new(target)
|
90
125
|
bag.manifest!
|
91
126
|
end
|
127
|
+
|
128
|
+
def validate_checksums
|
129
|
+
external_checksums = Ddr::IngestTools::ChecksumFile.new(checksums)
|
130
|
+
sif_manifest = Ddr::IngestTools::ChecksumFile.new(File.join(target, SIF_MANIFEST_SHA1_FILENAME))
|
131
|
+
file_map.each do |source_path, target_path|
|
132
|
+
external_checksum = external_checksums.digest(source_path)
|
133
|
+
manifest_path = target_path.sub("#{target}/", '')
|
134
|
+
sif_checksum = sif_manifest.digest(manifest_path)
|
135
|
+
unless external_checksum == sif_checksum
|
136
|
+
errors << I18n.translate('errors.checksum_mismatch', { c1: external_checksum, f1: source_path,
|
137
|
+
c2: sif_checksum, f2: target_path })
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
92
141
|
end
|
93
142
|
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
3201454891f1d63a1493f393e061491e2d65ffcd SOURCE_DIRECTORY/Thumbs.db
|
2
|
+
59ec01f979a76b968bc579e5cd0ceb3bcf3e629f SOURCE_DIRECTORY/abc001001.tif
|
3
|
+
d0a2f2482783ae3c83d06f3cdeaa1a306cc043ad SOURCE_DIRECTORY/abc001002.tif
|
4
|
+
38ee72ab417192589f3a54ef1016131c7d7e9e4e SOURCE_DIRECTORY/abc002001.tif
|
5
|
+
1da7765e3ca71ed76395bc56dae69d64732beff8 SOURCE_DIRECTORY/checksums.txt
|
6
|
+
c227abc095d3b758ab1c1c1c9e922494b6b6e0b0 SOURCE_DIRECTORY/g/abc003001.wav
|
7
|
+
541af8df7d2c631382bd0ec189476894d0323df5 SOURCE_DIRECTORY/g/abc003002.wav
|
8
|
+
49d77d27bf82ec3dafa1967490594883f5e8b432 SOURCE_DIRECTORY/intermediate_files/abc001001.jpg
|
9
|
+
260b3c2d20a1726de96671d29f73ba09d13b61ba SOURCE_DIRECTORY/intermediate_files/abc002001.jpg
|
10
|
+
a08c4d5a76d1b8735587be6ffcba66a9baf475c4 SOURCE_DIRECTORY/targets/T001.tif
|
11
|
+
40f9993d06945544fa57af88f7c3e9102ecc69e3 SOURCE_DIRECTORY/targets/T002.tif
|
@@ -0,0 +1,11 @@
|
|
1
|
+
3201454891f1d63a1493f393e061491e2d65ffcd SOURCE_DIRECTORY/Thumbs.db
|
2
|
+
59ec01f979a76b968bc579e5cd0ceb3bcf3e629f SOURCE_DIRECTORY/abc001001.tif
|
3
|
+
d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad SOURCE_DIRECTORY/abc001002.tif
|
4
|
+
38ee72ab417192589f3a54ef1016131c7d7e9e4e SOURCE_DIRECTORY/abc002001.tif
|
5
|
+
1da7765e3ca71ed76395bc56dae69d64732beff8 SOURCE_DIRECTORY/checksums.txt
|
6
|
+
c227abc095d3b758051c1c1c9e922494b6b6e0b0 SOURCE_DIRECTORY/g/abc003001.wav
|
7
|
+
541af8df7d2c631382bd0ec189476894d0323df5 SOURCE_DIRECTORY/g/abc003002.wav
|
8
|
+
49d77d27bf82ec3dafa1967490594883f5e8b432 SOURCE_DIRECTORY/intermediate_files/abc001001.jpg
|
9
|
+
260b3c2d20a7126de96671d29f73ba09d13b61ba SOURCE_DIRECTORY/intermediate_files/abc002001.jpg
|
10
|
+
a08c4d5a76d1b8734487be6ffcba66a9baf475c4 SOURCE_DIRECTORY/targets/T001.tif
|
11
|
+
40f9993d06945544fa57af88f7c3e9102ecc69e3 SOURCE_DIRECTORY/targets/T002.tif
|
@@ -0,0 +1,10 @@
|
|
1
|
+
59ec01f979a76b968bc579e5cd0ceb3bcf3e629f data/abc001/abc001001.tif
|
2
|
+
d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad data/abc001/abc001002.tif
|
3
|
+
38ee72ab417192589f3a54ef1016131c7d7e9e4e data/abc002/abc002001.tif
|
4
|
+
c227abc095d3b758051c1c1c9e922494b6b6e0b0 data/abc003/abc003001.wav
|
5
|
+
541af8df7d2c631382bd0ec189476894d0323df5 data/abc003/abc003002.wav
|
6
|
+
a08c4d5a76d1b8734487be6ffcba66a9baf475c4 data/dpc_targets/T001.tif
|
7
|
+
40f9993d06945544fa57af88f7c3e9102ecc69e3 data/dpc_targets/T002.tif
|
8
|
+
49d77d27bf82ec3dafa1967490594883f5e8b432 data/intermediate_files/abc001001.jpg
|
9
|
+
260b3c2d20a7126de96671d29f73ba09d13b61ba data/intermediate_files/abc002001.jpg
|
10
|
+
8a7878863fd183436f50060343d3757747772d9f data/metadata.txt
|
@@ -5,4 +5,6 @@ c227abc095d3b758051c1c1c9e922494b6b6e0b0 data/abc003/abc003001.wav
|
|
5
5
|
541af8df7d2c631382bd0ec189476894d0323df5 data/abc003/abc003002.wav
|
6
6
|
a08c4d5a76d1b8734487be6ffcba66a9baf475c4 data/dpc_targets/T001.tif
|
7
7
|
40f9993d06945544fa57af88f7c3e9102ecc69e3 data/dpc_targets/T002.tif
|
8
|
+
49d77d27bf82ec3dafa1967490594883f5e8b432 data/intermediate_files/abc001001.jpg
|
9
|
+
260b3c2d20a7126de96671d29f73ba09d13b61ba data/intermediate_files/abc002001.jpg
|
8
10
|
913699468893882d1dec463f3df1a405c7f32784 data/metadata.txt
|
@@ -2,10 +2,49 @@ module Ddr::IngestTools::DpcFolderConverter
|
|
2
2
|
|
3
3
|
RSpec.describe Converter do
|
4
4
|
|
5
|
+
shared_examples 'a conversion to standard ingest format' do
|
6
|
+
subject { described_class.new(converter_args) }
|
7
|
+
it 'produces the correct standard ingest format directory' do
|
8
|
+
results = subject.call
|
9
|
+
# Target directory contains all the expected files and only the expected files
|
10
|
+
expect(Array(Find.find(target_directory))).to match_array(expected_files)
|
11
|
+
# Target content files are same as source content files
|
12
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001001.tif'),
|
13
|
+
File.join(source_directory, 'abc001001.tif'))).to be true
|
14
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001002.tif'),
|
15
|
+
File.join(source_directory, 'abc001002.tif'))).to be true
|
16
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'abc002', 'abc002001.tif'),
|
17
|
+
File.join(source_directory, 'abc002001.tif'))).to be true
|
18
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003001.wav'),
|
19
|
+
File.join(source_directory, 'g', 'abc003001.wav'))).to be true
|
20
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003002.wav'),
|
21
|
+
File.join(source_directory, 'g', 'abc003002.wav'))).to be true
|
22
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T001.tif'),
|
23
|
+
File.join(source_directory, 'targets', 'T001.tif'))).to be true
|
24
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T002.tif'),
|
25
|
+
File.join(source_directory, 'targets', 'T002.tif'))).to be true
|
26
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'intermediate_files', 'abc001001.jpg'),
|
27
|
+
File.join(source_directory, 'intermediate_files', 'abc001001.jpg'))).to be true
|
28
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'intermediate_files', 'abc002001.jpg'),
|
29
|
+
File.join(source_directory, 'intermediate_files', 'abc002001.jpg'))).to be true
|
30
|
+
# Generated metadata file contains the expected contents
|
31
|
+
metadata_lines = File.readlines(File.join(data_directory, 'metadata.txt')).map(&:chomp)
|
32
|
+
expect(metadata_lines).to match_array(expected_metadata)
|
33
|
+
# Generated manifest contains the expected contents (ignoring line order)
|
34
|
+
generated_manifest = File.readlines(File.join(File.join(target_directory, 'manifest-sha1.txt'))).sort
|
35
|
+
expect(generated_manifest).to match_array(expected_manifest)
|
36
|
+
# Conversion process produces the expected errors
|
37
|
+
expect(results.errors).to match_array(checksum_errors)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
5
41
|
let(:source_directory) { Dir.mktmpdir('dpc') }
|
6
42
|
let(:target_directory) { Dir.mktmpdir('sif') }
|
7
43
|
let(:data_directory) { File.join(target_directory, 'data') }
|
8
44
|
let(:item_id_length) { 6 }
|
45
|
+
let(:checksums_directory) { Dir.mktmpdir('checksums') }
|
46
|
+
let(:checksums) { File.join(checksums_directory, 'checksums-sha1.txt') }
|
47
|
+
let(:converter_args) { { source: source_directory, target: target_directory, item_id_length: item_id_length } }
|
9
48
|
let(:expected_files) { [
|
10
49
|
target_directory,
|
11
50
|
File.join(target_directory, 'bag-info.txt'),
|
@@ -22,6 +61,9 @@ module Ddr::IngestTools::DpcFolderConverter
|
|
22
61
|
File.join(data_directory, 'dpc_targets'),
|
23
62
|
File.join(data_directory, 'dpc_targets', 'T001.tif'),
|
24
63
|
File.join(data_directory, 'dpc_targets', 'T002.tif'),
|
64
|
+
File.join(data_directory, 'intermediate_files'),
|
65
|
+
File.join(data_directory, 'intermediate_files', 'abc001001.jpg'),
|
66
|
+
File.join(data_directory, 'intermediate_files', 'abc002001.jpg'),
|
25
67
|
File.join(data_directory, 'metadata.txt'),
|
26
68
|
File.join(target_directory, 'manifest-md5.txt'),
|
27
69
|
File.join(target_directory, 'manifest-sha1.txt'),
|
@@ -41,8 +83,9 @@ module Ddr::IngestTools::DpcFolderConverter
|
|
41
83
|
"dpc_targets/T001.tif\tT001",
|
42
84
|
"dpc_targets/T002.tif\tT002"
|
43
85
|
] }
|
44
|
-
|
45
|
-
|
86
|
+
let(:expected_manifest) do
|
87
|
+
File.readlines(File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt')).sort
|
88
|
+
end
|
46
89
|
|
47
90
|
before do
|
48
91
|
File.open(File.join(source_directory, 'Thumbs.db'), 'w') { |f| f.write('Thumbs') }
|
@@ -53,35 +96,102 @@ module Ddr::IngestTools::DpcFolderConverter
|
|
53
96
|
Dir.mkdir(File.join(source_directory,'g'))
|
54
97
|
File.open(File.join(source_directory, 'g', 'abc003001.wav'), 'w') { |f| f.write('abc003001') }
|
55
98
|
File.open(File.join(source_directory, 'g', 'abc003002.wav'), 'w') { |f| f.write('abc003002') }
|
56
|
-
Dir.mkdir(File.join(source_directory,'
|
99
|
+
Dir.mkdir(File.join(source_directory, 'intermediate_files'))
|
100
|
+
File.open(File.join(source_directory, 'intermediate_files', 'abc001001.jpg'), 'w') { |f| f.write('abc001001 jpg')}
|
101
|
+
File.open(File.join(source_directory, 'intermediate_files', 'abc002001.jpg'), 'w') { |f| f.write('abc002001 jpg')}
|
102
|
+
Dir.mkdir(File.join(source_directory, 'targets'))
|
57
103
|
File.open(File.join(source_directory, 'targets', 'T001.tif'), 'w') { |f| f.write('T001') }
|
58
104
|
File.open(File.join(source_directory, 'targets', 'T002.tif'), 'w') { |f| f.write('T002') }
|
59
105
|
end
|
60
106
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
107
|
+
describe 'external checksum files' do
|
108
|
+
describe 'external checksum file' do
|
109
|
+
before do
|
110
|
+
File.open(checksums, 'w') do |f|
|
111
|
+
f << File.open(checksum_file_template).read.gsub('SOURCE_DIRECTORY', source_directory)
|
112
|
+
end
|
113
|
+
converter_args[:checksums] = checksums
|
114
|
+
end
|
115
|
+
describe 'mismatch' do
|
116
|
+
let(:checksum_file_template) { File.join('spec', 'fixtures', 'files', 'bad-checksums-sha1.txt') }
|
117
|
+
let(:checksum_errors) {
|
118
|
+
[ I18n.translate('errors.checksum_mismatch', { c1: 'd0a2f2482783ae3c83d06f3cdeaa1a306cc043ad',
|
119
|
+
f1: File.join(source_directory, 'abc001002.tif'),
|
120
|
+
c2: 'd0a2f2482783ae3c38d06f3cdeaa1a306cc043ad',
|
121
|
+
f2: File.join(target_directory, 'data/abc001/abc001002.tif') }),
|
122
|
+
I18n.translate('errors.checksum_mismatch', { c1: 'c227abc095d3b758ab1c1c1c9e922494b6b6e0b0',
|
123
|
+
f1: File.join(source_directory, 'g/abc003001.wav'),
|
124
|
+
c2: 'c227abc095d3b758051c1c1c9e922494b6b6e0b0',
|
125
|
+
f2: File.join(target_directory, 'data/abc003/abc003001.wav') }),
|
126
|
+
I18n.translate('errors.checksum_mismatch', { c1: '260b3c2d20a1726de96671d29f73ba09d13b61ba',
|
127
|
+
f1: File.join(source_directory, 'intermediate_files/abc002001.jpg'),
|
128
|
+
c2: '260b3c2d20a7126de96671d29f73ba09d13b61ba',
|
129
|
+
f2: File.join(target_directory, 'data/intermediate_files/abc002001.jpg') }),
|
130
|
+
I18n.translate('errors.checksum_mismatch', { c1: 'a08c4d5a76d1b8735587be6ffcba66a9baf475c4',
|
131
|
+
f1: File.join(source_directory, 'targets/T001.tif'),
|
132
|
+
c2: 'a08c4d5a76d1b8734487be6ffcba66a9baf475c4',
|
133
|
+
f2: File.join(target_directory, 'data/dpc_targets/T001.tif') })
|
134
|
+
]
|
135
|
+
}
|
136
|
+
describe 'files are copied' do
|
137
|
+
before { converter_args[:copy_files] = true }
|
138
|
+
it_behaves_like 'a conversion to standard ingest format'
|
139
|
+
end
|
140
|
+
describe 'files are not copied' do
|
141
|
+
before { converter_args[:copy_files] = false }
|
142
|
+
it_behaves_like 'a conversion to standard ingest format'
|
143
|
+
end
|
144
|
+
end
|
145
|
+
describe 'no mismatch' do
|
146
|
+
let(:checksum_file_template) { File.join('spec', 'fixtures', 'files', 'good-checksums-sha1.txt') }
|
147
|
+
let(:checksum_errors) { [] }
|
148
|
+
describe 'files are copied' do
|
149
|
+
before { converter_args[:copy_files] = true }
|
150
|
+
it_behaves_like 'a conversion to standard ingest format'
|
151
|
+
end
|
152
|
+
describe 'files are not copied' do
|
153
|
+
before { converter_args[:copy_files] = false }
|
154
|
+
it_behaves_like 'a conversion to standard ingest format'
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
describe 'no external checksum file' do
|
160
|
+
let(:checksum_errors) { [] }
|
161
|
+
describe 'files are copied' do
|
162
|
+
before { converter_args[:copy_files] = true }
|
163
|
+
it_behaves_like 'a conversion to standard ingest format'
|
164
|
+
end
|
165
|
+
describe 'files are not copied' do
|
166
|
+
before { converter_args[:copy_files] = false }
|
167
|
+
it_behaves_like 'a conversion to standard ingest format'
|
168
|
+
end
|
169
|
+
end
|
84
170
|
end
|
85
171
|
|
172
|
+
describe 'collection titles' do
|
173
|
+
let(:checksum_errors) { [] }
|
174
|
+
describe 'collection title provided' do
|
175
|
+
let(:expected_metadata) { [
|
176
|
+
"path\tlocal_id\ttitle",
|
177
|
+
"\t\tTest Collection",
|
178
|
+
"abc001\tabc001\t",
|
179
|
+
"abc002\tabc002\t",
|
180
|
+
"abc003\tabc003\t",
|
181
|
+
"abc001/abc001001.tif\tabc001001\t",
|
182
|
+
"abc001/abc001002.tif\tabc001002\t",
|
183
|
+
"abc002/abc002001.tif\tabc002001\t",
|
184
|
+
"abc003/abc003001.wav\tabc003001\t",
|
185
|
+
"abc003/abc003002.wav\tabc003002\t",
|
186
|
+
"dpc_targets/T001.tif\tT001\t",
|
187
|
+
"dpc_targets/T002.tif\tT002\t"
|
188
|
+
] }
|
189
|
+
let(:expected_manifest) do
|
190
|
+
File.readlines(File.join('spec', 'fixtures', 'files', 'manifest-sha1-collection-title.txt')).sort
|
191
|
+
end
|
192
|
+
before { converter_args[:collection_title] = 'Test Collection' }
|
193
|
+
it_behaves_like 'a conversion to standard ingest format'
|
194
|
+
end
|
195
|
+
end
|
86
196
|
end
|
87
197
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
module Ddr::IngestTools
|
2
|
+
|
3
|
+
RSpec.describe ChecksumFile do
|
4
|
+
|
5
|
+
subject { described_class.new(checksum_filepath) }
|
6
|
+
|
7
|
+
let(:checksum_filepath) { File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt') }
|
8
|
+
|
9
|
+
describe 'digest' do
|
10
|
+
it 'provides the requested digest' do
|
11
|
+
expect(subject.digest('data/abc001/abc001002.tif')).to eq('d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad')
|
12
|
+
expect(subject.digest('not/in/checksum/file.txt')).to be nil
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ddr-ingesttools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jim Coble
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bagit
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0.4'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: i18n
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.8'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.8'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -83,14 +97,20 @@ files:
|
|
83
97
|
- README.md
|
84
98
|
- Rakefile
|
85
99
|
- bin/convert_dpc_folder.rb
|
100
|
+
- config/locales/en.yml
|
86
101
|
- ddr-ingesttools.gemspec
|
87
102
|
- lib/ddr/ingesttools.rb
|
103
|
+
- lib/ddr/ingesttools/checksum_file.rb
|
88
104
|
- lib/ddr/ingesttools/dpc_folder_converter.rb
|
89
105
|
- lib/ddr/ingesttools/dpc_folder_converter/converter.rb
|
90
106
|
- lib/ddr/ingesttools/version.rb
|
107
|
+
- spec/fixtures/files/bad-checksums-sha1.txt
|
108
|
+
- spec/fixtures/files/good-checksums-sha1.txt
|
109
|
+
- spec/fixtures/files/manifest-sha1-collection-title.txt
|
91
110
|
- spec/fixtures/files/manifest-sha1.txt
|
92
111
|
- spec/integration/dpc_folder_converter_spec.rb
|
93
112
|
- spec/spec_helper.rb
|
113
|
+
- spec/unit/checksum_file_spec.rb
|
94
114
|
homepage: https://github.com/duke-libraries/ddr-ingesttools
|
95
115
|
licenses:
|
96
116
|
- BSD-3-Clause
|
@@ -116,6 +136,10 @@ signing_key:
|
|
116
136
|
specification_version: 4
|
117
137
|
summary: Ruby tools supporting ingest into the Duke Digital Repository.
|
118
138
|
test_files:
|
139
|
+
- spec/fixtures/files/bad-checksums-sha1.txt
|
140
|
+
- spec/fixtures/files/good-checksums-sha1.txt
|
141
|
+
- spec/fixtures/files/manifest-sha1-collection-title.txt
|
119
142
|
- spec/fixtures/files/manifest-sha1.txt
|
120
143
|
- spec/integration/dpc_folder_converter_spec.rb
|
121
144
|
- spec/spec_helper.rb
|
145
|
+
- spec/unit/checksum_file_spec.rb
|