ddr-ingesttools 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5171561cdd0df54c62ed927f82edbc2f72e07b2a
4
- data.tar.gz: 46e0b9ad341186c05f7558b8b622b5be4b919021
3
+ metadata.gz: 4c8a03cc7a443dbca08bd1c8cb86d6cc70f18050
4
+ data.tar.gz: 3f1d806ba1c1b8ccde507157372e30c93c1cd0cd
5
5
  SHA512:
6
- metadata.gz: bee3fb79556ed7e262be0caa783514181db1abf7384e32946567a83ec2e21e41300cad0c972de24772ddf019a93543f0b2b70ab0d8344d6dc2e1fb1d133e0152
7
- data.tar.gz: a28b353c14b0dc32ae08687dc925d2cf37b72e35b0f10d36e30f943ce23a63e3850a98f4db1707e35d155e0b348da38706b2cbfd5ab1979229284396dea91c6f
6
+ metadata.gz: 361c4c511a7bb10f03087412aa700739e188161f9b6650e0c6eb295560bfa1a5ff9d23a8621db46d552e418bae0ff50bd08b7dbda3fabc5263becb3fc7644c16
7
+ data.tar.gz: a14ed57c5cbb449f02d1c29c1f4cf5df37960152c7a2c8d2717eea2363ae9de05bdf8e2e7e5478a0e0048b3b82d73e7328fcb00ef0a5b199bc49640c93aa9cd7
@@ -3,8 +3,9 @@
3
3
  require 'ddr/ingesttools'
4
4
  require 'optparse'
5
5
 
6
- # Parse command line arguments
7
6
  options = {}
7
+
8
+ # Parse command line arguments
8
9
  parser = OptionParser.new do |opts|
9
10
  opts.banner = 'Usage: convert_dpc_folder.rb [options]'
10
11
 
@@ -20,6 +21,20 @@ parser = OptionParser.new do |opts|
20
21
  'to use as the local ID of the item of which that file is a component') do |v|
21
22
  options[:item_id_length] = v
22
23
  end
24
+
25
+ opts.on('-c', '--checksums [CHECKSUM_FILE]', 'External checksum file') do |v|
26
+ options[:checksums] = v
27
+ end
28
+
29
+ opts.on('--[no-]copy_files', 'Copy files to target location instead of using a symlink') do |v|
30
+ options[:copy_files] = v
31
+ end
32
+
33
+ opts.on('--collection_title [TITLE]', 'Title for collection',
34
+ 'required if intending to create a collection-creating Standard Ingest') do |v|
35
+ options[:collection_title] = v
36
+ end
37
+
23
38
  end
24
39
 
25
40
  begin
@@ -35,6 +50,7 @@ rescue OptionParser::InvalidOption, OptionParser::MissingArgument
35
50
  exit(false)
36
51
  end
37
52
 
38
- converter_args = [ options[:source], options[:target], options[:item_id_length] ]
39
- converter = Ddr::IngestTools::DpcFolderConverter::Converter.new(*converter_args)
40
- converter.call
53
+ converter = Ddr::IngestTools::DpcFolderConverter::Converter.new(options)
54
+ results = converter.call
55
+ puts I18n.translate('errors.count', { count: results.errors.size })
56
+ results.errors.each { |e| puts e }
@@ -0,0 +1,7 @@
1
+ en:
2
+ errors:
3
+ count: "%{count} error(s)"
4
+ checksum_mismatch: |
5
+ ***** Checksum mismatch:
6
+ %{c1} %{f1}
7
+ %{c2} %{f2}
@@ -19,6 +19,7 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
 
21
21
  spec.add_dependency "bagit", "~> 0.4"
22
+ spec.add_dependency "i18n", "~> 0.8"
22
23
 
23
24
  spec.add_development_dependency "bundler", "~> 1.14"
24
25
  spec.add_development_dependency "rake", "~> 12.0"
@@ -1,8 +1,13 @@
1
1
  require_relative 'ingesttools/version'
2
2
  require_relative 'ingesttools/dpc_folder_converter'
3
+ require_relative 'ingesttools/checksum_file'
4
+
5
+ require 'i18n'
3
6
 
4
7
  module Ddr
5
8
  module IngestTools
6
9
 
10
+ I18n.load_path = Dir['config/locales/*.yml']
11
+
7
12
  end
8
13
  end
@@ -0,0 +1,28 @@
1
+ module Ddr::IngestTools
2
+ class ChecksumFile
3
+
4
+ attr_reader :digests
5
+
6
+ def initialize(checksum_filepath)
7
+ @digests = digest_hash(checksum_filepath)
8
+ end
9
+
10
+ def digest(filepath)
11
+ digests[filepath]
12
+ end
13
+
14
+ private
15
+
16
+ def digest_hash(checksum_filepath)
17
+ h = {}
18
+ File.open(checksum_filepath, 'r') do |file|
19
+ file.each_line do |line|
20
+ digest, path = line.chomp.split
21
+ h[path] = digest
22
+ end
23
+ end
24
+ h
25
+ end
26
+
27
+ end
28
+ end
@@ -6,54 +6,70 @@ require 'find'
6
6
  module Ddr::IngestTools::DpcFolderConverter
7
7
  class Converter
8
8
 
9
- METADATA_HEADERS = [ 'path', 'local_id' ]
9
+ INTERMEDIATE_FILES_DIRNAME = 'intermediate_files'
10
+ DPC_TARGETS_DIRNAME = 'targets'
11
+ SIF_TARGETS_DIRNAME = 'dpc_targets'
12
+ SIF_METADATA_FILENAME = 'metadata.txt'
13
+ SIF_MANIFEST_SHA1_FILENAME = 'manifest-sha1.txt'
10
14
 
11
- attr_reader :source, :target, :data_dir, :item_id_length
12
- attr_accessor :local_id_metadata
15
+ Results = Struct.new(:file_map, :errors)
13
16
 
14
- def initialize(source, target, item_id_length)
17
+ attr_reader :source, :target, :data_dir, :item_id_length, :checksums, :copy_files, :collection_title,
18
+ :metadata_headers
19
+ attr_accessor :errors, :file_map, :local_id_metadata, :results
20
+
21
+ def initialize(source:, target:, item_id_length:, checksums: nil, copy_files: false, collection_title: nil)
15
22
  @source = source
16
23
  @target = target
17
- @data_dir = File.join(target, 'data')
18
24
  @item_id_length = item_id_length
19
- @local_id_metadata = {}
25
+ @checksums = checksums
26
+ @copy_files = copy_files
27
+ @collection_title = collection_title
28
+ @metadata_headers = [ 'path', 'local_id' ]
29
+ @metadata_headers << 'title' unless collection_title.nil?
20
30
  end
21
31
 
22
32
  def call
23
- FileUtils.mkdir_p data_dir
24
- find_component_files(source).each { |file| handle_component(file) }
25
- find_target_files(source).each { |file| handle_target(file) }
33
+ setup
34
+ scan_files(source)
26
35
  output_metadata
27
36
  bagitup
37
+ validate_checksums if checksums
38
+ Results.new(file_map, errors)
28
39
  end
29
40
 
30
41
  private
31
42
 
32
- def included_extensions
33
- Ddr::IngestTools::DpcFolderConverter.config[:included_extensions]
43
+ def setup
44
+ @data_dir = File.join(target, 'data')
45
+ @errors = []
46
+ @file_map = {}
47
+ @local_id_metadata = {}
48
+ FileUtils.mkdir_p data_dir
34
49
  end
35
50
 
36
- def find_component_files(dir)
37
- files = []
38
- Find.find(dir) do |path|
39
- Find.prune if path.include?('targets')
40
- Find.prune if path.include?('intermediate_files')
41
- next unless File.file?(path)
42
- next unless included_extensions.include?(File.extname(path))
43
- files << path
44
- end
45
- files
51
+ def included_extensions
52
+ Ddr::IngestTools::DpcFolderConverter.config[:included_extensions]
46
53
  end
47
54
 
48
- def find_target_files(dir)
49
- files = []
50
- Find.find(dir) do |path|
51
- next unless path.include?('targets')
52
- next unless File.file?(path)
53
- next unless included_extensions.include?(File.extname(path))
54
- files << path
55
+ def scan_files(dirpath, file_handler='handle_component'.to_sym)
56
+ Dir.foreach(dirpath).each do |entry|
57
+ next if [ '.', '..' ].include?(entry)
58
+ path = File.join(dirpath, entry)
59
+ if File.directory?(path)
60
+ if entry == DPC_TARGETS_DIRNAME
61
+ scan_files(path, :handle_target)
62
+ elsif entry == INTERMEDIATE_FILES_DIRNAME
63
+ scan_files(path, :handle_intermediate_file)
64
+ else
65
+ scan_files(path, file_handler)
66
+ end
67
+ else
68
+ if included_extensions.include?(File.extname(entry))
69
+ self.send(file_handler, path)
70
+ end
71
+ end
55
72
  end
56
- files
57
73
  end
58
74
 
59
75
  def handle_component(file)
@@ -61,26 +77,45 @@ module Ddr::IngestTools::DpcFolderConverter
61
77
  item_id = item_id_length == 0 ? base : base[0, item_id_length]
62
78
  FileUtils.mkdir_p(File.join(data_dir, item_id))
63
79
  local_id_metadata[item_id] = item_id
64
- FileUtils.cp file, File.join(data_dir, item_id)
80
+ handle_file(file, item_id)
65
81
  local_id_metadata[File.join(item_id, File.basename(file))] = base
66
82
  end
67
83
 
84
+ def handle_intermediate_file(file)
85
+ FileUtils.mkdir_p(File.join(data_dir, INTERMEDIATE_FILES_DIRNAME))
86
+ handle_file(file, INTERMEDIATE_FILES_DIRNAME)
87
+ end
88
+
68
89
  def handle_target(file)
69
90
  base = File.basename(file, File.extname(file))
70
- FileUtils.mkdir_p(File.join(data_dir, 'dpc_targets'))
71
- FileUtils.cp file, File.join(data_dir, 'dpc_targets')
72
- local_id_metadata[File.join('dpc_targets', File.basename(file))] = base
91
+ FileUtils.mkdir_p(File.join(data_dir, SIF_TARGETS_DIRNAME))
92
+ handle_file(file, SIF_TARGETS_DIRNAME)
93
+ local_id_metadata[File.join(SIF_TARGETS_DIRNAME, File.basename(file))] = base
94
+ end
95
+
96
+ def handle_file(file, folder_name)
97
+ if copy_files
98
+ FileUtils.cp file, File.join(data_dir, folder_name)
99
+ else
100
+ FileUtils.ln_s file, File.join(data_dir, folder_name)
101
+ end
102
+ file_map[file] = File.join(data_dir, folder_name, File.basename(file))
73
103
  end
74
104
 
75
105
  def output_metadata
76
106
  metadata_rows = []
107
+ if collection_title
108
+ metadata_rows << CSV::Row.new(metadata_headers, [ nil, nil, collection_title ])
109
+ end
77
110
  local_id_metadata.each_pair do |k,v|
78
- metadata_rows << CSV::Row.new(METADATA_HEADERS, [ k, v ])
111
+ row_elements = [ k, v ]
112
+ row_elements << nil if collection_title
113
+ metadata_rows << CSV::Row.new(metadata_headers, row_elements)
79
114
  end
80
- File.open(File.join(data_dir, 'metadata.txt'), 'w') do |file|
81
- file.puts(METADATA_HEADERS.join(Ddr::IngestTools::DpcFolderConverter.config[:csv_options][:col_sep]))
115
+ File.open(File.join(data_dir, SIF_METADATA_FILENAME), 'w') do |file|
116
+ file.puts(metadata_headers.join(Ddr::IngestTools::DpcFolderConverter.config[:csv_options][:col_sep]))
82
117
  metadata_rows.each do |row|
83
- file.puts(row.to_csv(Ddr::IngestTools::DpcFolderConverter.config[:csv_options]).strip)
118
+ file.puts(row.to_csv(Ddr::IngestTools::DpcFolderConverter.config[:csv_options]))
84
119
  end
85
120
  end
86
121
  end
@@ -89,5 +124,19 @@ module Ddr::IngestTools::DpcFolderConverter
89
124
  bag = BagIt::Bag.new(target)
90
125
  bag.manifest!
91
126
  end
127
+
128
+ def validate_checksums
129
+ external_checksums = Ddr::IngestTools::ChecksumFile.new(checksums)
130
+ sif_manifest = Ddr::IngestTools::ChecksumFile.new(File.join(target, SIF_MANIFEST_SHA1_FILENAME))
131
+ file_map.each do |source_path, target_path|
132
+ external_checksum = external_checksums.digest(source_path)
133
+ manifest_path = target_path.sub("#{target}/", '')
134
+ sif_checksum = sif_manifest.digest(manifest_path)
135
+ unless external_checksum == sif_checksum
136
+ errors << I18n.translate('errors.checksum_mismatch', { c1: external_checksum, f1: source_path,
137
+ c2: sif_checksum, f2: target_path })
138
+ end
139
+ end
140
+ end
92
141
  end
93
142
  end
@@ -1,5 +1,5 @@
1
1
  module Ddr
2
2
  module IngestTools
3
- VERSION = '0.1.0'
3
+ VERSION = '0.2.0'
4
4
  end
5
5
  end
@@ -0,0 +1,11 @@
1
+ 3201454891f1d63a1493f393e061491e2d65ffcd SOURCE_DIRECTORY/Thumbs.db
2
+ 59ec01f979a76b968bc579e5cd0ceb3bcf3e629f SOURCE_DIRECTORY/abc001001.tif
3
+ d0a2f2482783ae3c83d06f3cdeaa1a306cc043ad SOURCE_DIRECTORY/abc001002.tif
4
+ 38ee72ab417192589f3a54ef1016131c7d7e9e4e SOURCE_DIRECTORY/abc002001.tif
5
+ 1da7765e3ca71ed76395bc56dae69d64732beff8 SOURCE_DIRECTORY/checksums.txt
6
+ c227abc095d3b758ab1c1c1c9e922494b6b6e0b0 SOURCE_DIRECTORY/g/abc003001.wav
7
+ 541af8df7d2c631382bd0ec189476894d0323df5 SOURCE_DIRECTORY/g/abc003002.wav
8
+ 49d77d27bf82ec3dafa1967490594883f5e8b432 SOURCE_DIRECTORY/intermediate_files/abc001001.jpg
9
+ 260b3c2d20a1726de96671d29f73ba09d13b61ba SOURCE_DIRECTORY/intermediate_files/abc002001.jpg
10
+ a08c4d5a76d1b8735587be6ffcba66a9baf475c4 SOURCE_DIRECTORY/targets/T001.tif
11
+ 40f9993d06945544fa57af88f7c3e9102ecc69e3 SOURCE_DIRECTORY/targets/T002.tif
@@ -0,0 +1,11 @@
1
+ 3201454891f1d63a1493f393e061491e2d65ffcd SOURCE_DIRECTORY/Thumbs.db
2
+ 59ec01f979a76b968bc579e5cd0ceb3bcf3e629f SOURCE_DIRECTORY/abc001001.tif
3
+ d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad SOURCE_DIRECTORY/abc001002.tif
4
+ 38ee72ab417192589f3a54ef1016131c7d7e9e4e SOURCE_DIRECTORY/abc002001.tif
5
+ 1da7765e3ca71ed76395bc56dae69d64732beff8 SOURCE_DIRECTORY/checksums.txt
6
+ c227abc095d3b758051c1c1c9e922494b6b6e0b0 SOURCE_DIRECTORY/g/abc003001.wav
7
+ 541af8df7d2c631382bd0ec189476894d0323df5 SOURCE_DIRECTORY/g/abc003002.wav
8
+ 49d77d27bf82ec3dafa1967490594883f5e8b432 SOURCE_DIRECTORY/intermediate_files/abc001001.jpg
9
+ 260b3c2d20a7126de96671d29f73ba09d13b61ba SOURCE_DIRECTORY/intermediate_files/abc002001.jpg
10
+ a08c4d5a76d1b8734487be6ffcba66a9baf475c4 SOURCE_DIRECTORY/targets/T001.tif
11
+ 40f9993d06945544fa57af88f7c3e9102ecc69e3 SOURCE_DIRECTORY/targets/T002.tif
@@ -0,0 +1,10 @@
1
+ 59ec01f979a76b968bc579e5cd0ceb3bcf3e629f data/abc001/abc001001.tif
2
+ d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad data/abc001/abc001002.tif
3
+ 38ee72ab417192589f3a54ef1016131c7d7e9e4e data/abc002/abc002001.tif
4
+ c227abc095d3b758051c1c1c9e922494b6b6e0b0 data/abc003/abc003001.wav
5
+ 541af8df7d2c631382bd0ec189476894d0323df5 data/abc003/abc003002.wav
6
+ a08c4d5a76d1b8734487be6ffcba66a9baf475c4 data/dpc_targets/T001.tif
7
+ 40f9993d06945544fa57af88f7c3e9102ecc69e3 data/dpc_targets/T002.tif
8
+ 49d77d27bf82ec3dafa1967490594883f5e8b432 data/intermediate_files/abc001001.jpg
9
+ 260b3c2d20a7126de96671d29f73ba09d13b61ba data/intermediate_files/abc002001.jpg
10
+ 8a7878863fd183436f50060343d3757747772d9f data/metadata.txt
@@ -5,4 +5,6 @@ c227abc095d3b758051c1c1c9e922494b6b6e0b0 data/abc003/abc003001.wav
5
5
  541af8df7d2c631382bd0ec189476894d0323df5 data/abc003/abc003002.wav
6
6
  a08c4d5a76d1b8734487be6ffcba66a9baf475c4 data/dpc_targets/T001.tif
7
7
  40f9993d06945544fa57af88f7c3e9102ecc69e3 data/dpc_targets/T002.tif
8
+ 49d77d27bf82ec3dafa1967490594883f5e8b432 data/intermediate_files/abc001001.jpg
9
+ 260b3c2d20a7126de96671d29f73ba09d13b61ba data/intermediate_files/abc002001.jpg
8
10
  913699468893882d1dec463f3df1a405c7f32784 data/metadata.txt
@@ -2,10 +2,49 @@ module Ddr::IngestTools::DpcFolderConverter
2
2
 
3
3
  RSpec.describe Converter do
4
4
 
5
+ shared_examples 'a conversion to standard ingest format' do
6
+ subject { described_class.new(converter_args) }
7
+ it 'produces the correct standard ingest format directory' do
8
+ results = subject.call
9
+ # Target directory contains all the expected files and only the expected files
10
+ expect(Array(Find.find(target_directory))).to match_array(expected_files)
11
+ # Target content files are same as source content files
12
+ expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001001.tif'),
13
+ File.join(source_directory, 'abc001001.tif'))).to be true
14
+ expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001002.tif'),
15
+ File.join(source_directory, 'abc001002.tif'))).to be true
16
+ expect(FileUtils.compare_file(File.join(data_directory, 'abc002', 'abc002001.tif'),
17
+ File.join(source_directory, 'abc002001.tif'))).to be true
18
+ expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003001.wav'),
19
+ File.join(source_directory, 'g', 'abc003001.wav'))).to be true
20
+ expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003002.wav'),
21
+ File.join(source_directory, 'g', 'abc003002.wav'))).to be true
22
+ expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T001.tif'),
23
+ File.join(source_directory, 'targets', 'T001.tif'))).to be true
24
+ expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T002.tif'),
25
+ File.join(source_directory, 'targets', 'T002.tif'))).to be true
26
+ expect(FileUtils.compare_file(File.join(data_directory, 'intermediate_files', 'abc001001.jpg'),
27
+ File.join(source_directory, 'intermediate_files', 'abc001001.jpg'))).to be true
28
+ expect(FileUtils.compare_file(File.join(data_directory, 'intermediate_files', 'abc002001.jpg'),
29
+ File.join(source_directory, 'intermediate_files', 'abc002001.jpg'))).to be true
30
+ # Generated metadata file contains the expected contents
31
+ metadata_lines = File.readlines(File.join(data_directory, 'metadata.txt')).map(&:chomp)
32
+ expect(metadata_lines).to match_array(expected_metadata)
33
+ # Generated manifest contains the expected contents (ignoring line order)
34
+ generated_manifest = File.readlines(File.join(File.join(target_directory, 'manifest-sha1.txt'))).sort
35
+ expect(generated_manifest).to match_array(expected_manifest)
36
+ # Conversion process produces the expected errors
37
+ expect(results.errors).to match_array(checksum_errors)
38
+ end
39
+ end
40
+
5
41
  let(:source_directory) { Dir.mktmpdir('dpc') }
6
42
  let(:target_directory) { Dir.mktmpdir('sif') }
7
43
  let(:data_directory) { File.join(target_directory, 'data') }
8
44
  let(:item_id_length) { 6 }
45
+ let(:checksums_directory) { Dir.mktmpdir('checksums') }
46
+ let(:checksums) { File.join(checksums_directory, 'checksums-sha1.txt') }
47
+ let(:converter_args) { { source: source_directory, target: target_directory, item_id_length: item_id_length } }
9
48
  let(:expected_files) { [
10
49
  target_directory,
11
50
  File.join(target_directory, 'bag-info.txt'),
@@ -22,6 +61,9 @@ module Ddr::IngestTools::DpcFolderConverter
22
61
  File.join(data_directory, 'dpc_targets'),
23
62
  File.join(data_directory, 'dpc_targets', 'T001.tif'),
24
63
  File.join(data_directory, 'dpc_targets', 'T002.tif'),
64
+ File.join(data_directory, 'intermediate_files'),
65
+ File.join(data_directory, 'intermediate_files', 'abc001001.jpg'),
66
+ File.join(data_directory, 'intermediate_files', 'abc002001.jpg'),
25
67
  File.join(data_directory, 'metadata.txt'),
26
68
  File.join(target_directory, 'manifest-md5.txt'),
27
69
  File.join(target_directory, 'manifest-sha1.txt'),
@@ -41,8 +83,9 @@ module Ddr::IngestTools::DpcFolderConverter
41
83
  "dpc_targets/T001.tif\tT001",
42
84
  "dpc_targets/T002.tif\tT002"
43
85
  ] }
44
-
45
- subject { Converter.new(source_directory, target_directory, item_id_length) }
86
+ let(:expected_manifest) do
87
+ File.readlines(File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt')).sort
88
+ end
46
89
 
47
90
  before do
48
91
  File.open(File.join(source_directory, 'Thumbs.db'), 'w') { |f| f.write('Thumbs') }
@@ -53,35 +96,102 @@ module Ddr::IngestTools::DpcFolderConverter
53
96
  Dir.mkdir(File.join(source_directory,'g'))
54
97
  File.open(File.join(source_directory, 'g', 'abc003001.wav'), 'w') { |f| f.write('abc003001') }
55
98
  File.open(File.join(source_directory, 'g', 'abc003002.wav'), 'w') { |f| f.write('abc003002') }
56
- Dir.mkdir(File.join(source_directory,'targets'))
99
+ Dir.mkdir(File.join(source_directory, 'intermediate_files'))
100
+ File.open(File.join(source_directory, 'intermediate_files', 'abc001001.jpg'), 'w') { |f| f.write('abc001001 jpg')}
101
+ File.open(File.join(source_directory, 'intermediate_files', 'abc002001.jpg'), 'w') { |f| f.write('abc002001 jpg')}
102
+ Dir.mkdir(File.join(source_directory, 'targets'))
57
103
  File.open(File.join(source_directory, 'targets', 'T001.tif'), 'w') { |f| f.write('T001') }
58
104
  File.open(File.join(source_directory, 'targets', 'T002.tif'), 'w') { |f| f.write('T002') }
59
105
  end
60
106
 
61
- it 'produces the correct standard ingest format directory' do
62
- subject.call
63
- expect(Array(Find.find(target_directory))).to match_array(expected_files)
64
- expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001001.tif'),
65
- File.join(source_directory, 'abc001001.tif'))).to be true
66
- expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001002.tif'),
67
- File.join(source_directory, 'abc001002.tif'))).to be true
68
- expect(FileUtils.compare_file(File.join(data_directory, 'abc002', 'abc002001.tif'),
69
- File.join(source_directory, 'abc002001.tif'))).to be true
70
- expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003001.wav'),
71
- File.join(source_directory, 'g', 'abc003001.wav'))).to be true
72
- expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003002.wav'),
73
- File.join(source_directory, 'g', 'abc003002.wav'))).to be true
74
- expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T001.tif'),
75
- File.join(source_directory, 'targets', 'T001.tif'))).to be true
76
- expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T002.tif'),
77
- File.join(source_directory, 'targets', 'T002.tif'))).to be true
78
- metadata_lines = File.readlines(File.join(data_directory, 'metadata.txt')).map(&:strip)
79
- expect(metadata_lines).to match_array(expected_metadata)
80
- # expect(FileUtils.compare_file(File.join(target_directory, 'manifest-sha1.txt'),
81
- # File.join(File.dirname(__FILE__), '..', 'fixtures', 'files', 'manifest-sha1.txt'))).to be true
82
- expect(FileUtils.compare_file(File.join(target_directory, 'manifest-sha1.txt'),
83
- File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt'))).to be true
107
+ describe 'external checksum files' do
108
+ describe 'external checksum file' do
109
+ before do
110
+ File.open(checksums, 'w') do |f|
111
+ f << File.open(checksum_file_template).read.gsub('SOURCE_DIRECTORY', source_directory)
112
+ end
113
+ converter_args[:checksums] = checksums
114
+ end
115
+ describe 'mismatch' do
116
+ let(:checksum_file_template) { File.join('spec', 'fixtures', 'files', 'bad-checksums-sha1.txt') }
117
+ let(:checksum_errors) {
118
+ [ I18n.translate('errors.checksum_mismatch', { c1: 'd0a2f2482783ae3c83d06f3cdeaa1a306cc043ad',
119
+ f1: File.join(source_directory, 'abc001002.tif'),
120
+ c2: 'd0a2f2482783ae3c38d06f3cdeaa1a306cc043ad',
121
+ f2: File.join(target_directory, 'data/abc001/abc001002.tif') }),
122
+ I18n.translate('errors.checksum_mismatch', { c1: 'c227abc095d3b758ab1c1c1c9e922494b6b6e0b0',
123
+ f1: File.join(source_directory, 'g/abc003001.wav'),
124
+ c2: 'c227abc095d3b758051c1c1c9e922494b6b6e0b0',
125
+ f2: File.join(target_directory, 'data/abc003/abc003001.wav') }),
126
+ I18n.translate('errors.checksum_mismatch', { c1: '260b3c2d20a1726de96671d29f73ba09d13b61ba',
127
+ f1: File.join(source_directory, 'intermediate_files/abc002001.jpg'),
128
+ c2: '260b3c2d20a7126de96671d29f73ba09d13b61ba',
129
+ f2: File.join(target_directory, 'data/intermediate_files/abc002001.jpg') }),
130
+ I18n.translate('errors.checksum_mismatch', { c1: 'a08c4d5a76d1b8735587be6ffcba66a9baf475c4',
131
+ f1: File.join(source_directory, 'targets/T001.tif'),
132
+ c2: 'a08c4d5a76d1b8734487be6ffcba66a9baf475c4',
133
+ f2: File.join(target_directory, 'data/dpc_targets/T001.tif') })
134
+ ]
135
+ }
136
+ describe 'files are copied' do
137
+ before { converter_args[:copy_files] = true }
138
+ it_behaves_like 'a conversion to standard ingest format'
139
+ end
140
+ describe 'files are not copied' do
141
+ before { converter_args[:copy_files] = false }
142
+ it_behaves_like 'a conversion to standard ingest format'
143
+ end
144
+ end
145
+ describe 'no mismatch' do
146
+ let(:checksum_file_template) { File.join('spec', 'fixtures', 'files', 'good-checksums-sha1.txt') }
147
+ let(:checksum_errors) { [] }
148
+ describe 'files are copied' do
149
+ before { converter_args[:copy_files] = true }
150
+ it_behaves_like 'a conversion to standard ingest format'
151
+ end
152
+ describe 'files are not copied' do
153
+ before { converter_args[:copy_files] = false }
154
+ it_behaves_like 'a conversion to standard ingest format'
155
+ end
156
+ end
157
+ end
158
+
159
+ describe 'no external checksum file' do
160
+ let(:checksum_errors) { [] }
161
+ describe 'files are copied' do
162
+ before { converter_args[:copy_files] = true }
163
+ it_behaves_like 'a conversion to standard ingest format'
164
+ end
165
+ describe 'files are not copied' do
166
+ before { converter_args[:copy_files] = false }
167
+ it_behaves_like 'a conversion to standard ingest format'
168
+ end
169
+ end
84
170
  end
85
171
 
172
+ describe 'collection titles' do
173
+ let(:checksum_errors) { [] }
174
+ describe 'collection title provided' do
175
+ let(:expected_metadata) { [
176
+ "path\tlocal_id\ttitle",
177
+ "\t\tTest Collection",
178
+ "abc001\tabc001\t",
179
+ "abc002\tabc002\t",
180
+ "abc003\tabc003\t",
181
+ "abc001/abc001001.tif\tabc001001\t",
182
+ "abc001/abc001002.tif\tabc001002\t",
183
+ "abc002/abc002001.tif\tabc002001\t",
184
+ "abc003/abc003001.wav\tabc003001\t",
185
+ "abc003/abc003002.wav\tabc003002\t",
186
+ "dpc_targets/T001.tif\tT001\t",
187
+ "dpc_targets/T002.tif\tT002\t"
188
+ ] }
189
+ let(:expected_manifest) do
190
+ File.readlines(File.join('spec', 'fixtures', 'files', 'manifest-sha1-collection-title.txt')).sort
191
+ end
192
+ before { converter_args[:collection_title] = 'Test Collection' }
193
+ it_behaves_like 'a conversion to standard ingest format'
194
+ end
195
+ end
86
196
  end
87
197
  end
data/spec/spec_helper.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'ddr/ingesttools'
2
+ require 'i18n'
2
3
 
3
4
  # This file was generated by the `rspec --init` command. Conventionally, all
4
5
  # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
@@ -0,0 +1,17 @@
1
+ module Ddr::IngestTools
2
+
3
+ RSpec.describe ChecksumFile do
4
+
5
+ subject { described_class.new(checksum_filepath) }
6
+
7
+ let(:checksum_filepath) { File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt') }
8
+
9
+ describe 'digest' do
10
+ it 'provides the requested digest' do
11
+ expect(subject.digest('data/abc001/abc001002.tif')).to eq('d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad')
12
+ expect(subject.digest('not/in/checksum/file.txt')).to be nil
13
+ end
14
+ end
15
+ end
16
+
17
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ddr-ingesttools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jim Coble
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-08 00:00:00.000000000 Z
11
+ date: 2017-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bagit
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0.4'
27
+ - !ruby/object:Gem::Dependency
28
+ name: i18n
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.8'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.8'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -83,14 +97,20 @@ files:
83
97
  - README.md
84
98
  - Rakefile
85
99
  - bin/convert_dpc_folder.rb
100
+ - config/locales/en.yml
86
101
  - ddr-ingesttools.gemspec
87
102
  - lib/ddr/ingesttools.rb
103
+ - lib/ddr/ingesttools/checksum_file.rb
88
104
  - lib/ddr/ingesttools/dpc_folder_converter.rb
89
105
  - lib/ddr/ingesttools/dpc_folder_converter/converter.rb
90
106
  - lib/ddr/ingesttools/version.rb
107
+ - spec/fixtures/files/bad-checksums-sha1.txt
108
+ - spec/fixtures/files/good-checksums-sha1.txt
109
+ - spec/fixtures/files/manifest-sha1-collection-title.txt
91
110
  - spec/fixtures/files/manifest-sha1.txt
92
111
  - spec/integration/dpc_folder_converter_spec.rb
93
112
  - spec/spec_helper.rb
113
+ - spec/unit/checksum_file_spec.rb
94
114
  homepage: https://github.com/duke-libraries/ddr-ingesttools
95
115
  licenses:
96
116
  - BSD-3-Clause
@@ -116,6 +136,10 @@ signing_key:
116
136
  specification_version: 4
117
137
  summary: Ruby tools supporting ingest into the Duke Digital Repository.
118
138
  test_files:
139
+ - spec/fixtures/files/bad-checksums-sha1.txt
140
+ - spec/fixtures/files/good-checksums-sha1.txt
141
+ - spec/fixtures/files/manifest-sha1-collection-title.txt
119
142
  - spec/fixtures/files/manifest-sha1.txt
120
143
  - spec/integration/dpc_folder_converter_spec.rb
121
144
  - spec/spec_helper.rb
145
+ - spec/unit/checksum_file_spec.rb