ddr-ingesttools 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/convert_dpc_folder.rb +20 -4
- data/config/locales/en.yml +7 -0
- data/ddr-ingesttools.gemspec +1 -0
- data/lib/ddr/ingesttools.rb +5 -0
- data/lib/ddr/ingesttools/checksum_file.rb +28 -0
- data/lib/ddr/ingesttools/dpc_folder_converter/converter.rb +86 -37
- data/lib/ddr/ingesttools/version.rb +1 -1
- data/spec/fixtures/files/bad-checksums-sha1.txt +11 -0
- data/spec/fixtures/files/good-checksums-sha1.txt +11 -0
- data/spec/fixtures/files/manifest-sha1-collection-title.txt +10 -0
- data/spec/fixtures/files/manifest-sha1.txt +2 -0
- data/spec/integration/dpc_folder_converter_spec.rb +136 -26
- data/spec/spec_helper.rb +1 -0
- data/spec/unit/checksum_file_spec.rb +17 -0
- metadata +26 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c8a03cc7a443dbca08bd1c8cb86d6cc70f18050
|
4
|
+
data.tar.gz: 3f1d806ba1c1b8ccde507157372e30c93c1cd0cd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 361c4c511a7bb10f03087412aa700739e188161f9b6650e0c6eb295560bfa1a5ff9d23a8621db46d552e418bae0ff50bd08b7dbda3fabc5263becb3fc7644c16
|
7
|
+
data.tar.gz: a14ed57c5cbb449f02d1c29c1f4cf5df37960152c7a2c8d2717eea2363ae9de05bdf8e2e7e5478a0e0048b3b82d73e7328fcb00ef0a5b199bc49640c93aa9cd7
|
data/bin/convert_dpc_folder.rb
CHANGED
@@ -3,8 +3,9 @@
|
|
3
3
|
require 'ddr/ingesttools'
|
4
4
|
require 'optparse'
|
5
5
|
|
6
|
-
# Parse command line arguments
|
7
6
|
options = {}
|
7
|
+
|
8
|
+
# Parse command line arguments
|
8
9
|
parser = OptionParser.new do |opts|
|
9
10
|
opts.banner = 'Usage: convert_dpc_folder.rb [options]'
|
10
11
|
|
@@ -20,6 +21,20 @@ parser = OptionParser.new do |opts|
|
|
20
21
|
'to use as the local ID of the item of which that file is a component') do |v|
|
21
22
|
options[:item_id_length] = v
|
22
23
|
end
|
24
|
+
|
25
|
+
opts.on('-c', '--checksums [CHECKSUM_FILE]', 'External checksum file') do |v|
|
26
|
+
options[:checksums] = v
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on('--[no-]copy_files', 'Copy files to target location instead of using a symlink') do |v|
|
30
|
+
options[:copy_files] = v
|
31
|
+
end
|
32
|
+
|
33
|
+
opts.on('--collection_title [TITLE]', 'Title for collection',
|
34
|
+
'required if intending to create a collection-creating Standard Ingest') do |v|
|
35
|
+
options[:collection_title] = v
|
36
|
+
end
|
37
|
+
|
23
38
|
end
|
24
39
|
|
25
40
|
begin
|
@@ -35,6 +50,7 @@ rescue OptionParser::InvalidOption, OptionParser::MissingArgument
|
|
35
50
|
exit(false)
|
36
51
|
end
|
37
52
|
|
38
|
-
|
39
|
-
|
40
|
-
|
53
|
+
converter = Ddr::IngestTools::DpcFolderConverter::Converter.new(options)
|
54
|
+
results = converter.call
|
55
|
+
puts I18n.translate('errors.count', { count: results.errors.size })
|
56
|
+
results.errors.each { |e| puts e }
|
data/ddr-ingesttools.gemspec
CHANGED
data/lib/ddr/ingesttools.rb
CHANGED
@@ -1,8 +1,13 @@
|
|
1
1
|
require_relative 'ingesttools/version'
|
2
2
|
require_relative 'ingesttools/dpc_folder_converter'
|
3
|
+
require_relative 'ingesttools/checksum_file'
|
4
|
+
|
5
|
+
require 'i18n'
|
3
6
|
|
4
7
|
module Ddr
|
5
8
|
module IngestTools
|
6
9
|
|
10
|
+
I18n.load_path = Dir['config/locales/*.yml']
|
11
|
+
|
7
12
|
end
|
8
13
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Ddr::IngestTools
|
2
|
+
class ChecksumFile
|
3
|
+
|
4
|
+
attr_reader :digests
|
5
|
+
|
6
|
+
def initialize(checksum_filepath)
|
7
|
+
@digests = digest_hash(checksum_filepath)
|
8
|
+
end
|
9
|
+
|
10
|
+
def digest(filepath)
|
11
|
+
digests[filepath]
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def digest_hash(checksum_filepath)
|
17
|
+
h = {}
|
18
|
+
File.open(checksum_filepath, 'r') do |file|
|
19
|
+
file.each_line do |line|
|
20
|
+
digest, path = line.chomp.split
|
21
|
+
h[path] = digest
|
22
|
+
end
|
23
|
+
end
|
24
|
+
h
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
@@ -6,54 +6,70 @@ require 'find'
|
|
6
6
|
module Ddr::IngestTools::DpcFolderConverter
|
7
7
|
class Converter
|
8
8
|
|
9
|
-
|
9
|
+
INTERMEDIATE_FILES_DIRNAME = 'intermediate_files'
|
10
|
+
DPC_TARGETS_DIRNAME = 'targets'
|
11
|
+
SIF_TARGETS_DIRNAME = 'dpc_targets'
|
12
|
+
SIF_METADATA_FILENAME = 'metadata.txt'
|
13
|
+
SIF_MANIFEST_SHA1_FILENAME = 'manifest-sha1.txt'
|
10
14
|
|
11
|
-
|
12
|
-
attr_accessor :local_id_metadata
|
15
|
+
Results = Struct.new(:file_map, :errors)
|
13
16
|
|
14
|
-
|
17
|
+
attr_reader :source, :target, :data_dir, :item_id_length, :checksums, :copy_files, :collection_title,
|
18
|
+
:metadata_headers
|
19
|
+
attr_accessor :errors, :file_map, :local_id_metadata, :results
|
20
|
+
|
21
|
+
def initialize(source:, target:, item_id_length:, checksums: nil, copy_files: false, collection_title: nil)
|
15
22
|
@source = source
|
16
23
|
@target = target
|
17
|
-
@data_dir = File.join(target, 'data')
|
18
24
|
@item_id_length = item_id_length
|
19
|
-
@
|
25
|
+
@checksums = checksums
|
26
|
+
@copy_files = copy_files
|
27
|
+
@collection_title = collection_title
|
28
|
+
@metadata_headers = [ 'path', 'local_id' ]
|
29
|
+
@metadata_headers << 'title' unless collection_title.nil?
|
20
30
|
end
|
21
31
|
|
22
32
|
def call
|
23
|
-
|
24
|
-
|
25
|
-
find_target_files(source).each { |file| handle_target(file) }
|
33
|
+
setup
|
34
|
+
scan_files(source)
|
26
35
|
output_metadata
|
27
36
|
bagitup
|
37
|
+
validate_checksums if checksums
|
38
|
+
Results.new(file_map, errors)
|
28
39
|
end
|
29
40
|
|
30
41
|
private
|
31
42
|
|
32
|
-
def
|
33
|
-
|
43
|
+
def setup
|
44
|
+
@data_dir = File.join(target, 'data')
|
45
|
+
@errors = []
|
46
|
+
@file_map = {}
|
47
|
+
@local_id_metadata = {}
|
48
|
+
FileUtils.mkdir_p data_dir
|
34
49
|
end
|
35
50
|
|
36
|
-
def
|
37
|
-
|
38
|
-
Find.find(dir) do |path|
|
39
|
-
Find.prune if path.include?('targets')
|
40
|
-
Find.prune if path.include?('intermediate_files')
|
41
|
-
next unless File.file?(path)
|
42
|
-
next unless included_extensions.include?(File.extname(path))
|
43
|
-
files << path
|
44
|
-
end
|
45
|
-
files
|
51
|
+
def included_extensions
|
52
|
+
Ddr::IngestTools::DpcFolderConverter.config[:included_extensions]
|
46
53
|
end
|
47
54
|
|
48
|
-
def
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
+
def scan_files(dirpath, file_handler='handle_component'.to_sym)
|
56
|
+
Dir.foreach(dirpath).each do |entry|
|
57
|
+
next if [ '.', '..' ].include?(entry)
|
58
|
+
path = File.join(dirpath, entry)
|
59
|
+
if File.directory?(path)
|
60
|
+
if entry == DPC_TARGETS_DIRNAME
|
61
|
+
scan_files(path, :handle_target)
|
62
|
+
elsif entry == INTERMEDIATE_FILES_DIRNAME
|
63
|
+
scan_files(path, :handle_intermediate_file)
|
64
|
+
else
|
65
|
+
scan_files(path, file_handler)
|
66
|
+
end
|
67
|
+
else
|
68
|
+
if included_extensions.include?(File.extname(entry))
|
69
|
+
self.send(file_handler, path)
|
70
|
+
end
|
71
|
+
end
|
55
72
|
end
|
56
|
-
files
|
57
73
|
end
|
58
74
|
|
59
75
|
def handle_component(file)
|
@@ -61,26 +77,45 @@ module Ddr::IngestTools::DpcFolderConverter
|
|
61
77
|
item_id = item_id_length == 0 ? base : base[0, item_id_length]
|
62
78
|
FileUtils.mkdir_p(File.join(data_dir, item_id))
|
63
79
|
local_id_metadata[item_id] = item_id
|
64
|
-
|
80
|
+
handle_file(file, item_id)
|
65
81
|
local_id_metadata[File.join(item_id, File.basename(file))] = base
|
66
82
|
end
|
67
83
|
|
84
|
+
def handle_intermediate_file(file)
|
85
|
+
FileUtils.mkdir_p(File.join(data_dir, INTERMEDIATE_FILES_DIRNAME))
|
86
|
+
handle_file(file, INTERMEDIATE_FILES_DIRNAME)
|
87
|
+
end
|
88
|
+
|
68
89
|
def handle_target(file)
|
69
90
|
base = File.basename(file, File.extname(file))
|
70
|
-
FileUtils.mkdir_p(File.join(data_dir,
|
71
|
-
|
72
|
-
local_id_metadata[File.join(
|
91
|
+
FileUtils.mkdir_p(File.join(data_dir, SIF_TARGETS_DIRNAME))
|
92
|
+
handle_file(file, SIF_TARGETS_DIRNAME)
|
93
|
+
local_id_metadata[File.join(SIF_TARGETS_DIRNAME, File.basename(file))] = base
|
94
|
+
end
|
95
|
+
|
96
|
+
def handle_file(file, folder_name)
|
97
|
+
if copy_files
|
98
|
+
FileUtils.cp file, File.join(data_dir, folder_name)
|
99
|
+
else
|
100
|
+
FileUtils.ln_s file, File.join(data_dir, folder_name)
|
101
|
+
end
|
102
|
+
file_map[file] = File.join(data_dir, folder_name, File.basename(file))
|
73
103
|
end
|
74
104
|
|
75
105
|
def output_metadata
|
76
106
|
metadata_rows = []
|
107
|
+
if collection_title
|
108
|
+
metadata_rows << CSV::Row.new(metadata_headers, [ nil, nil, collection_title ])
|
109
|
+
end
|
77
110
|
local_id_metadata.each_pair do |k,v|
|
78
|
-
|
111
|
+
row_elements = [ k, v ]
|
112
|
+
row_elements << nil if collection_title
|
113
|
+
metadata_rows << CSV::Row.new(metadata_headers, row_elements)
|
79
114
|
end
|
80
|
-
File.open(File.join(data_dir,
|
81
|
-
file.puts(
|
115
|
+
File.open(File.join(data_dir, SIF_METADATA_FILENAME), 'w') do |file|
|
116
|
+
file.puts(metadata_headers.join(Ddr::IngestTools::DpcFolderConverter.config[:csv_options][:col_sep]))
|
82
117
|
metadata_rows.each do |row|
|
83
|
-
file.puts(row.to_csv(Ddr::IngestTools::DpcFolderConverter.config[:csv_options])
|
118
|
+
file.puts(row.to_csv(Ddr::IngestTools::DpcFolderConverter.config[:csv_options]))
|
84
119
|
end
|
85
120
|
end
|
86
121
|
end
|
@@ -89,5 +124,19 @@ module Ddr::IngestTools::DpcFolderConverter
|
|
89
124
|
bag = BagIt::Bag.new(target)
|
90
125
|
bag.manifest!
|
91
126
|
end
|
127
|
+
|
128
|
+
def validate_checksums
|
129
|
+
external_checksums = Ddr::IngestTools::ChecksumFile.new(checksums)
|
130
|
+
sif_manifest = Ddr::IngestTools::ChecksumFile.new(File.join(target, SIF_MANIFEST_SHA1_FILENAME))
|
131
|
+
file_map.each do |source_path, target_path|
|
132
|
+
external_checksum = external_checksums.digest(source_path)
|
133
|
+
manifest_path = target_path.sub("#{target}/", '')
|
134
|
+
sif_checksum = sif_manifest.digest(manifest_path)
|
135
|
+
unless external_checksum == sif_checksum
|
136
|
+
errors << I18n.translate('errors.checksum_mismatch', { c1: external_checksum, f1: source_path,
|
137
|
+
c2: sif_checksum, f2: target_path })
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
92
141
|
end
|
93
142
|
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
3201454891f1d63a1493f393e061491e2d65ffcd SOURCE_DIRECTORY/Thumbs.db
|
2
|
+
59ec01f979a76b968bc579e5cd0ceb3bcf3e629f SOURCE_DIRECTORY/abc001001.tif
|
3
|
+
d0a2f2482783ae3c83d06f3cdeaa1a306cc043ad SOURCE_DIRECTORY/abc001002.tif
|
4
|
+
38ee72ab417192589f3a54ef1016131c7d7e9e4e SOURCE_DIRECTORY/abc002001.tif
|
5
|
+
1da7765e3ca71ed76395bc56dae69d64732beff8 SOURCE_DIRECTORY/checksums.txt
|
6
|
+
c227abc095d3b758ab1c1c1c9e922494b6b6e0b0 SOURCE_DIRECTORY/g/abc003001.wav
|
7
|
+
541af8df7d2c631382bd0ec189476894d0323df5 SOURCE_DIRECTORY/g/abc003002.wav
|
8
|
+
49d77d27bf82ec3dafa1967490594883f5e8b432 SOURCE_DIRECTORY/intermediate_files/abc001001.jpg
|
9
|
+
260b3c2d20a1726de96671d29f73ba09d13b61ba SOURCE_DIRECTORY/intermediate_files/abc002001.jpg
|
10
|
+
a08c4d5a76d1b8735587be6ffcba66a9baf475c4 SOURCE_DIRECTORY/targets/T001.tif
|
11
|
+
40f9993d06945544fa57af88f7c3e9102ecc69e3 SOURCE_DIRECTORY/targets/T002.tif
|
@@ -0,0 +1,11 @@
|
|
1
|
+
3201454891f1d63a1493f393e061491e2d65ffcd SOURCE_DIRECTORY/Thumbs.db
|
2
|
+
59ec01f979a76b968bc579e5cd0ceb3bcf3e629f SOURCE_DIRECTORY/abc001001.tif
|
3
|
+
d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad SOURCE_DIRECTORY/abc001002.tif
|
4
|
+
38ee72ab417192589f3a54ef1016131c7d7e9e4e SOURCE_DIRECTORY/abc002001.tif
|
5
|
+
1da7765e3ca71ed76395bc56dae69d64732beff8 SOURCE_DIRECTORY/checksums.txt
|
6
|
+
c227abc095d3b758051c1c1c9e922494b6b6e0b0 SOURCE_DIRECTORY/g/abc003001.wav
|
7
|
+
541af8df7d2c631382bd0ec189476894d0323df5 SOURCE_DIRECTORY/g/abc003002.wav
|
8
|
+
49d77d27bf82ec3dafa1967490594883f5e8b432 SOURCE_DIRECTORY/intermediate_files/abc001001.jpg
|
9
|
+
260b3c2d20a7126de96671d29f73ba09d13b61ba SOURCE_DIRECTORY/intermediate_files/abc002001.jpg
|
10
|
+
a08c4d5a76d1b8734487be6ffcba66a9baf475c4 SOURCE_DIRECTORY/targets/T001.tif
|
11
|
+
40f9993d06945544fa57af88f7c3e9102ecc69e3 SOURCE_DIRECTORY/targets/T002.tif
|
@@ -0,0 +1,10 @@
|
|
1
|
+
59ec01f979a76b968bc579e5cd0ceb3bcf3e629f data/abc001/abc001001.tif
|
2
|
+
d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad data/abc001/abc001002.tif
|
3
|
+
38ee72ab417192589f3a54ef1016131c7d7e9e4e data/abc002/abc002001.tif
|
4
|
+
c227abc095d3b758051c1c1c9e922494b6b6e0b0 data/abc003/abc003001.wav
|
5
|
+
541af8df7d2c631382bd0ec189476894d0323df5 data/abc003/abc003002.wav
|
6
|
+
a08c4d5a76d1b8734487be6ffcba66a9baf475c4 data/dpc_targets/T001.tif
|
7
|
+
40f9993d06945544fa57af88f7c3e9102ecc69e3 data/dpc_targets/T002.tif
|
8
|
+
49d77d27bf82ec3dafa1967490594883f5e8b432 data/intermediate_files/abc001001.jpg
|
9
|
+
260b3c2d20a7126de96671d29f73ba09d13b61ba data/intermediate_files/abc002001.jpg
|
10
|
+
8a7878863fd183436f50060343d3757747772d9f data/metadata.txt
|
@@ -5,4 +5,6 @@ c227abc095d3b758051c1c1c9e922494b6b6e0b0 data/abc003/abc003001.wav
|
|
5
5
|
541af8df7d2c631382bd0ec189476894d0323df5 data/abc003/abc003002.wav
|
6
6
|
a08c4d5a76d1b8734487be6ffcba66a9baf475c4 data/dpc_targets/T001.tif
|
7
7
|
40f9993d06945544fa57af88f7c3e9102ecc69e3 data/dpc_targets/T002.tif
|
8
|
+
49d77d27bf82ec3dafa1967490594883f5e8b432 data/intermediate_files/abc001001.jpg
|
9
|
+
260b3c2d20a7126de96671d29f73ba09d13b61ba data/intermediate_files/abc002001.jpg
|
8
10
|
913699468893882d1dec463f3df1a405c7f32784 data/metadata.txt
|
@@ -2,10 +2,49 @@ module Ddr::IngestTools::DpcFolderConverter
|
|
2
2
|
|
3
3
|
RSpec.describe Converter do
|
4
4
|
|
5
|
+
shared_examples 'a conversion to standard ingest format' do
|
6
|
+
subject { described_class.new(converter_args) }
|
7
|
+
it 'produces the correct standard ingest format directory' do
|
8
|
+
results = subject.call
|
9
|
+
# Target directory contains all the expected files and only the expected files
|
10
|
+
expect(Array(Find.find(target_directory))).to match_array(expected_files)
|
11
|
+
# Target content files are same as source content files
|
12
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001001.tif'),
|
13
|
+
File.join(source_directory, 'abc001001.tif'))).to be true
|
14
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001002.tif'),
|
15
|
+
File.join(source_directory, 'abc001002.tif'))).to be true
|
16
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'abc002', 'abc002001.tif'),
|
17
|
+
File.join(source_directory, 'abc002001.tif'))).to be true
|
18
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003001.wav'),
|
19
|
+
File.join(source_directory, 'g', 'abc003001.wav'))).to be true
|
20
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003002.wav'),
|
21
|
+
File.join(source_directory, 'g', 'abc003002.wav'))).to be true
|
22
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T001.tif'),
|
23
|
+
File.join(source_directory, 'targets', 'T001.tif'))).to be true
|
24
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T002.tif'),
|
25
|
+
File.join(source_directory, 'targets', 'T002.tif'))).to be true
|
26
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'intermediate_files', 'abc001001.jpg'),
|
27
|
+
File.join(source_directory, 'intermediate_files', 'abc001001.jpg'))).to be true
|
28
|
+
expect(FileUtils.compare_file(File.join(data_directory, 'intermediate_files', 'abc002001.jpg'),
|
29
|
+
File.join(source_directory, 'intermediate_files', 'abc002001.jpg'))).to be true
|
30
|
+
# Generated metadata file contains the expected contents
|
31
|
+
metadata_lines = File.readlines(File.join(data_directory, 'metadata.txt')).map(&:chomp)
|
32
|
+
expect(metadata_lines).to match_array(expected_metadata)
|
33
|
+
# Generated manifest contains the expected contents (ignoring line order)
|
34
|
+
generated_manifest = File.readlines(File.join(File.join(target_directory, 'manifest-sha1.txt'))).sort
|
35
|
+
expect(generated_manifest).to match_array(expected_manifest)
|
36
|
+
# Conversion process produces the expected errors
|
37
|
+
expect(results.errors).to match_array(checksum_errors)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
5
41
|
let(:source_directory) { Dir.mktmpdir('dpc') }
|
6
42
|
let(:target_directory) { Dir.mktmpdir('sif') }
|
7
43
|
let(:data_directory) { File.join(target_directory, 'data') }
|
8
44
|
let(:item_id_length) { 6 }
|
45
|
+
let(:checksums_directory) { Dir.mktmpdir('checksums') }
|
46
|
+
let(:checksums) { File.join(checksums_directory, 'checksums-sha1.txt') }
|
47
|
+
let(:converter_args) { { source: source_directory, target: target_directory, item_id_length: item_id_length } }
|
9
48
|
let(:expected_files) { [
|
10
49
|
target_directory,
|
11
50
|
File.join(target_directory, 'bag-info.txt'),
|
@@ -22,6 +61,9 @@ module Ddr::IngestTools::DpcFolderConverter
|
|
22
61
|
File.join(data_directory, 'dpc_targets'),
|
23
62
|
File.join(data_directory, 'dpc_targets', 'T001.tif'),
|
24
63
|
File.join(data_directory, 'dpc_targets', 'T002.tif'),
|
64
|
+
File.join(data_directory, 'intermediate_files'),
|
65
|
+
File.join(data_directory, 'intermediate_files', 'abc001001.jpg'),
|
66
|
+
File.join(data_directory, 'intermediate_files', 'abc002001.jpg'),
|
25
67
|
File.join(data_directory, 'metadata.txt'),
|
26
68
|
File.join(target_directory, 'manifest-md5.txt'),
|
27
69
|
File.join(target_directory, 'manifest-sha1.txt'),
|
@@ -41,8 +83,9 @@ module Ddr::IngestTools::DpcFolderConverter
|
|
41
83
|
"dpc_targets/T001.tif\tT001",
|
42
84
|
"dpc_targets/T002.tif\tT002"
|
43
85
|
] }
|
44
|
-
|
45
|
-
|
86
|
+
let(:expected_manifest) do
|
87
|
+
File.readlines(File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt')).sort
|
88
|
+
end
|
46
89
|
|
47
90
|
before do
|
48
91
|
File.open(File.join(source_directory, 'Thumbs.db'), 'w') { |f| f.write('Thumbs') }
|
@@ -53,35 +96,102 @@ module Ddr::IngestTools::DpcFolderConverter
|
|
53
96
|
Dir.mkdir(File.join(source_directory,'g'))
|
54
97
|
File.open(File.join(source_directory, 'g', 'abc003001.wav'), 'w') { |f| f.write('abc003001') }
|
55
98
|
File.open(File.join(source_directory, 'g', 'abc003002.wav'), 'w') { |f| f.write('abc003002') }
|
56
|
-
Dir.mkdir(File.join(source_directory,'
|
99
|
+
Dir.mkdir(File.join(source_directory, 'intermediate_files'))
|
100
|
+
File.open(File.join(source_directory, 'intermediate_files', 'abc001001.jpg'), 'w') { |f| f.write('abc001001 jpg')}
|
101
|
+
File.open(File.join(source_directory, 'intermediate_files', 'abc002001.jpg'), 'w') { |f| f.write('abc002001 jpg')}
|
102
|
+
Dir.mkdir(File.join(source_directory, 'targets'))
|
57
103
|
File.open(File.join(source_directory, 'targets', 'T001.tif'), 'w') { |f| f.write('T001') }
|
58
104
|
File.open(File.join(source_directory, 'targets', 'T002.tif'), 'w') { |f| f.write('T002') }
|
59
105
|
end
|
60
106
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
107
|
+
describe 'external checksum files' do
|
108
|
+
describe 'external checksum file' do
|
109
|
+
before do
|
110
|
+
File.open(checksums, 'w') do |f|
|
111
|
+
f << File.open(checksum_file_template).read.gsub('SOURCE_DIRECTORY', source_directory)
|
112
|
+
end
|
113
|
+
converter_args[:checksums] = checksums
|
114
|
+
end
|
115
|
+
describe 'mismatch' do
|
116
|
+
let(:checksum_file_template) { File.join('spec', 'fixtures', 'files', 'bad-checksums-sha1.txt') }
|
117
|
+
let(:checksum_errors) {
|
118
|
+
[ I18n.translate('errors.checksum_mismatch', { c1: 'd0a2f2482783ae3c83d06f3cdeaa1a306cc043ad',
|
119
|
+
f1: File.join(source_directory, 'abc001002.tif'),
|
120
|
+
c2: 'd0a2f2482783ae3c38d06f3cdeaa1a306cc043ad',
|
121
|
+
f2: File.join(target_directory, 'data/abc001/abc001002.tif') }),
|
122
|
+
I18n.translate('errors.checksum_mismatch', { c1: 'c227abc095d3b758ab1c1c1c9e922494b6b6e0b0',
|
123
|
+
f1: File.join(source_directory, 'g/abc003001.wav'),
|
124
|
+
c2: 'c227abc095d3b758051c1c1c9e922494b6b6e0b0',
|
125
|
+
f2: File.join(target_directory, 'data/abc003/abc003001.wav') }),
|
126
|
+
I18n.translate('errors.checksum_mismatch', { c1: '260b3c2d20a1726de96671d29f73ba09d13b61ba',
|
127
|
+
f1: File.join(source_directory, 'intermediate_files/abc002001.jpg'),
|
128
|
+
c2: '260b3c2d20a7126de96671d29f73ba09d13b61ba',
|
129
|
+
f2: File.join(target_directory, 'data/intermediate_files/abc002001.jpg') }),
|
130
|
+
I18n.translate('errors.checksum_mismatch', { c1: 'a08c4d5a76d1b8735587be6ffcba66a9baf475c4',
|
131
|
+
f1: File.join(source_directory, 'targets/T001.tif'),
|
132
|
+
c2: 'a08c4d5a76d1b8734487be6ffcba66a9baf475c4',
|
133
|
+
f2: File.join(target_directory, 'data/dpc_targets/T001.tif') })
|
134
|
+
]
|
135
|
+
}
|
136
|
+
describe 'files are copied' do
|
137
|
+
before { converter_args[:copy_files] = true }
|
138
|
+
it_behaves_like 'a conversion to standard ingest format'
|
139
|
+
end
|
140
|
+
describe 'files are not copied' do
|
141
|
+
before { converter_args[:copy_files] = false }
|
142
|
+
it_behaves_like 'a conversion to standard ingest format'
|
143
|
+
end
|
144
|
+
end
|
145
|
+
describe 'no mismatch' do
|
146
|
+
let(:checksum_file_template) { File.join('spec', 'fixtures', 'files', 'good-checksums-sha1.txt') }
|
147
|
+
let(:checksum_errors) { [] }
|
148
|
+
describe 'files are copied' do
|
149
|
+
before { converter_args[:copy_files] = true }
|
150
|
+
it_behaves_like 'a conversion to standard ingest format'
|
151
|
+
end
|
152
|
+
describe 'files are not copied' do
|
153
|
+
before { converter_args[:copy_files] = false }
|
154
|
+
it_behaves_like 'a conversion to standard ingest format'
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
describe 'no external checksum file' do
|
160
|
+
let(:checksum_errors) { [] }
|
161
|
+
describe 'files are copied' do
|
162
|
+
before { converter_args[:copy_files] = true }
|
163
|
+
it_behaves_like 'a conversion to standard ingest format'
|
164
|
+
end
|
165
|
+
describe 'files are not copied' do
|
166
|
+
before { converter_args[:copy_files] = false }
|
167
|
+
it_behaves_like 'a conversion to standard ingest format'
|
168
|
+
end
|
169
|
+
end
|
84
170
|
end
|
85
171
|
|
172
|
+
describe 'collection titles' do
|
173
|
+
let(:checksum_errors) { [] }
|
174
|
+
describe 'collection title provided' do
|
175
|
+
let(:expected_metadata) { [
|
176
|
+
"path\tlocal_id\ttitle",
|
177
|
+
"\t\tTest Collection",
|
178
|
+
"abc001\tabc001\t",
|
179
|
+
"abc002\tabc002\t",
|
180
|
+
"abc003\tabc003\t",
|
181
|
+
"abc001/abc001001.tif\tabc001001\t",
|
182
|
+
"abc001/abc001002.tif\tabc001002\t",
|
183
|
+
"abc002/abc002001.tif\tabc002001\t",
|
184
|
+
"abc003/abc003001.wav\tabc003001\t",
|
185
|
+
"abc003/abc003002.wav\tabc003002\t",
|
186
|
+
"dpc_targets/T001.tif\tT001\t",
|
187
|
+
"dpc_targets/T002.tif\tT002\t"
|
188
|
+
] }
|
189
|
+
let(:expected_manifest) do
|
190
|
+
File.readlines(File.join('spec', 'fixtures', 'files', 'manifest-sha1-collection-title.txt')).sort
|
191
|
+
end
|
192
|
+
before { converter_args[:collection_title] = 'Test Collection' }
|
193
|
+
it_behaves_like 'a conversion to standard ingest format'
|
194
|
+
end
|
195
|
+
end
|
86
196
|
end
|
87
197
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
module Ddr::IngestTools
|
2
|
+
|
3
|
+
RSpec.describe ChecksumFile do
|
4
|
+
|
5
|
+
subject { described_class.new(checksum_filepath) }
|
6
|
+
|
7
|
+
let(:checksum_filepath) { File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt') }
|
8
|
+
|
9
|
+
describe 'digest' do
|
10
|
+
it 'provides the requested digest' do
|
11
|
+
expect(subject.digest('data/abc001/abc001002.tif')).to eq('d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad')
|
12
|
+
expect(subject.digest('not/in/checksum/file.txt')).to be nil
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ddr-ingesttools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jim Coble
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bagit
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0.4'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: i18n
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.8'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.8'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -83,14 +97,20 @@ files:
|
|
83
97
|
- README.md
|
84
98
|
- Rakefile
|
85
99
|
- bin/convert_dpc_folder.rb
|
100
|
+
- config/locales/en.yml
|
86
101
|
- ddr-ingesttools.gemspec
|
87
102
|
- lib/ddr/ingesttools.rb
|
103
|
+
- lib/ddr/ingesttools/checksum_file.rb
|
88
104
|
- lib/ddr/ingesttools/dpc_folder_converter.rb
|
89
105
|
- lib/ddr/ingesttools/dpc_folder_converter/converter.rb
|
90
106
|
- lib/ddr/ingesttools/version.rb
|
107
|
+
- spec/fixtures/files/bad-checksums-sha1.txt
|
108
|
+
- spec/fixtures/files/good-checksums-sha1.txt
|
109
|
+
- spec/fixtures/files/manifest-sha1-collection-title.txt
|
91
110
|
- spec/fixtures/files/manifest-sha1.txt
|
92
111
|
- spec/integration/dpc_folder_converter_spec.rb
|
93
112
|
- spec/spec_helper.rb
|
113
|
+
- spec/unit/checksum_file_spec.rb
|
94
114
|
homepage: https://github.com/duke-libraries/ddr-ingesttools
|
95
115
|
licenses:
|
96
116
|
- BSD-3-Clause
|
@@ -116,6 +136,10 @@ signing_key:
|
|
116
136
|
specification_version: 4
|
117
137
|
summary: Ruby tools supporting ingest into the Duke Digital Repository.
|
118
138
|
test_files:
|
139
|
+
- spec/fixtures/files/bad-checksums-sha1.txt
|
140
|
+
- spec/fixtures/files/good-checksums-sha1.txt
|
141
|
+
- spec/fixtures/files/manifest-sha1-collection-title.txt
|
119
142
|
- spec/fixtures/files/manifest-sha1.txt
|
120
143
|
- spec/integration/dpc_folder_converter_spec.rb
|
121
144
|
- spec/spec_helper.rb
|
145
|
+
- spec/unit/checksum_file_spec.rb
|