ddr-ingesttools 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5171561cdd0df54c62ed927f82edbc2f72e07b2a
4
- data.tar.gz: 46e0b9ad341186c05f7558b8b622b5be4b919021
3
+ metadata.gz: 4c8a03cc7a443dbca08bd1c8cb86d6cc70f18050
4
+ data.tar.gz: 3f1d806ba1c1b8ccde507157372e30c93c1cd0cd
5
5
  SHA512:
6
- metadata.gz: bee3fb79556ed7e262be0caa783514181db1abf7384e32946567a83ec2e21e41300cad0c972de24772ddf019a93543f0b2b70ab0d8344d6dc2e1fb1d133e0152
7
- data.tar.gz: a28b353c14b0dc32ae08687dc925d2cf37b72e35b0f10d36e30f943ce23a63e3850a98f4db1707e35d155e0b348da38706b2cbfd5ab1979229284396dea91c6f
6
+ metadata.gz: 361c4c511a7bb10f03087412aa700739e188161f9b6650e0c6eb295560bfa1a5ff9d23a8621db46d552e418bae0ff50bd08b7dbda3fabc5263becb3fc7644c16
7
+ data.tar.gz: a14ed57c5cbb449f02d1c29c1f4cf5df37960152c7a2c8d2717eea2363ae9de05bdf8e2e7e5478a0e0048b3b82d73e7328fcb00ef0a5b199bc49640c93aa9cd7
@@ -3,8 +3,9 @@
3
3
  require 'ddr/ingesttools'
4
4
  require 'optparse'
5
5
 
6
- # Parse command line arguments
7
6
  options = {}
7
+
8
+ # Parse command line arguments
8
9
  parser = OptionParser.new do |opts|
9
10
  opts.banner = 'Usage: convert_dpc_folder.rb [options]'
10
11
 
@@ -20,6 +21,20 @@ parser = OptionParser.new do |opts|
20
21
  'to use as the local ID of the item of which that file is a component') do |v|
21
22
  options[:item_id_length] = v
22
23
  end
24
+
25
+ opts.on('-c', '--checksums [CHECKSUM_FILE]', 'External checksum file') do |v|
26
+ options[:checksums] = v
27
+ end
28
+
29
+ opts.on('--[no-]copy_files', 'Copy files to target location instead of using a symlink') do |v|
30
+ options[:copy_files] = v
31
+ end
32
+
33
+ opts.on('--collection_title [TITLE]', 'Title for collection',
34
+ 'required if intending to create a collection-creating Standard Ingest') do |v|
35
+ options[:collection_title] = v
36
+ end
37
+
23
38
  end
24
39
 
25
40
  begin
@@ -35,6 +50,7 @@ rescue OptionParser::InvalidOption, OptionParser::MissingArgument
35
50
  exit(false)
36
51
  end
37
52
 
38
- converter_args = [ options[:source], options[:target], options[:item_id_length] ]
39
- converter = Ddr::IngestTools::DpcFolderConverter::Converter.new(*converter_args)
40
- converter.call
53
+ converter = Ddr::IngestTools::DpcFolderConverter::Converter.new(options)
54
+ results = converter.call
55
+ puts I18n.translate('errors.count', { count: results.errors.size })
56
+ results.errors.each { |e| puts e }
@@ -0,0 +1,7 @@
1
+ en:
2
+ errors:
3
+ count: "%{count} error(s)"
4
+ checksum_mismatch: |
5
+ ***** Checksum mismatch:
6
+ %{c1} %{f1}
7
+ %{c2} %{f2}
@@ -19,6 +19,7 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
 
21
21
  spec.add_dependency "bagit", "~> 0.4"
22
+ spec.add_dependency "i18n", "~> 0.8"
22
23
 
23
24
  spec.add_development_dependency "bundler", "~> 1.14"
24
25
  spec.add_development_dependency "rake", "~> 12.0"
@@ -1,8 +1,13 @@
1
1
  require_relative 'ingesttools/version'
2
2
  require_relative 'ingesttools/dpc_folder_converter'
3
+ require_relative 'ingesttools/checksum_file'
4
+
5
+ require 'i18n'
3
6
 
4
7
  module Ddr
5
8
  module IngestTools
6
9
 
10
+ I18n.load_path = Dir['config/locales/*.yml']
11
+
7
12
  end
8
13
  end
@@ -0,0 +1,28 @@
1
+ module Ddr::IngestTools
2
+ class ChecksumFile
3
+
4
+ attr_reader :digests
5
+
6
+ def initialize(checksum_filepath)
7
+ @digests = digest_hash(checksum_filepath)
8
+ end
9
+
10
+ def digest(filepath)
11
+ digests[filepath]
12
+ end
13
+
14
+ private
15
+
16
+ def digest_hash(checksum_filepath)
17
+ h = {}
18
+ File.open(checksum_filepath, 'r') do |file|
19
+ file.each_line do |line|
20
+ digest, path = line.chomp.split
21
+ h[path] = digest
22
+ end
23
+ end
24
+ h
25
+ end
26
+
27
+ end
28
+ end
@@ -6,54 +6,70 @@ require 'find'
6
6
  module Ddr::IngestTools::DpcFolderConverter
7
7
  class Converter
8
8
 
9
- METADATA_HEADERS = [ 'path', 'local_id' ]
9
+ INTERMEDIATE_FILES_DIRNAME = 'intermediate_files'
10
+ DPC_TARGETS_DIRNAME = 'targets'
11
+ SIF_TARGETS_DIRNAME = 'dpc_targets'
12
+ SIF_METADATA_FILENAME = 'metadata.txt'
13
+ SIF_MANIFEST_SHA1_FILENAME = 'manifest-sha1.txt'
10
14
 
11
- attr_reader :source, :target, :data_dir, :item_id_length
12
- attr_accessor :local_id_metadata
15
+ Results = Struct.new(:file_map, :errors)
13
16
 
14
- def initialize(source, target, item_id_length)
17
+ attr_reader :source, :target, :data_dir, :item_id_length, :checksums, :copy_files, :collection_title,
18
+ :metadata_headers
19
+ attr_accessor :errors, :file_map, :local_id_metadata, :results
20
+
21
+ def initialize(source:, target:, item_id_length:, checksums: nil, copy_files: false, collection_title: nil)
15
22
  @source = source
16
23
  @target = target
17
- @data_dir = File.join(target, 'data')
18
24
  @item_id_length = item_id_length
19
- @local_id_metadata = {}
25
+ @checksums = checksums
26
+ @copy_files = copy_files
27
+ @collection_title = collection_title
28
+ @metadata_headers = [ 'path', 'local_id' ]
29
+ @metadata_headers << 'title' unless collection_title.nil?
20
30
  end
21
31
 
22
32
  def call
23
- FileUtils.mkdir_p data_dir
24
- find_component_files(source).each { |file| handle_component(file) }
25
- find_target_files(source).each { |file| handle_target(file) }
33
+ setup
34
+ scan_files(source)
26
35
  output_metadata
27
36
  bagitup
37
+ validate_checksums if checksums
38
+ Results.new(file_map, errors)
28
39
  end
29
40
 
30
41
  private
31
42
 
32
- def included_extensions
33
- Ddr::IngestTools::DpcFolderConverter.config[:included_extensions]
43
+ def setup
44
+ @data_dir = File.join(target, 'data')
45
+ @errors = []
46
+ @file_map = {}
47
+ @local_id_metadata = {}
48
+ FileUtils.mkdir_p data_dir
34
49
  end
35
50
 
36
- def find_component_files(dir)
37
- files = []
38
- Find.find(dir) do |path|
39
- Find.prune if path.include?('targets')
40
- Find.prune if path.include?('intermediate_files')
41
- next unless File.file?(path)
42
- next unless included_extensions.include?(File.extname(path))
43
- files << path
44
- end
45
- files
51
+ def included_extensions
52
+ Ddr::IngestTools::DpcFolderConverter.config[:included_extensions]
46
53
  end
47
54
 
48
- def find_target_files(dir)
49
- files = []
50
- Find.find(dir) do |path|
51
- next unless path.include?('targets')
52
- next unless File.file?(path)
53
- next unless included_extensions.include?(File.extname(path))
54
- files << path
55
+ def scan_files(dirpath, file_handler='handle_component'.to_sym)
56
+ Dir.foreach(dirpath).each do |entry|
57
+ next if [ '.', '..' ].include?(entry)
58
+ path = File.join(dirpath, entry)
59
+ if File.directory?(path)
60
+ if entry == DPC_TARGETS_DIRNAME
61
+ scan_files(path, :handle_target)
62
+ elsif entry == INTERMEDIATE_FILES_DIRNAME
63
+ scan_files(path, :handle_intermediate_file)
64
+ else
65
+ scan_files(path, file_handler)
66
+ end
67
+ else
68
+ if included_extensions.include?(File.extname(entry))
69
+ self.send(file_handler, path)
70
+ end
71
+ end
55
72
  end
56
- files
57
73
  end
58
74
 
59
75
  def handle_component(file)
@@ -61,26 +77,45 @@ module Ddr::IngestTools::DpcFolderConverter
61
77
  item_id = item_id_length == 0 ? base : base[0, item_id_length]
62
78
  FileUtils.mkdir_p(File.join(data_dir, item_id))
63
79
  local_id_metadata[item_id] = item_id
64
- FileUtils.cp file, File.join(data_dir, item_id)
80
+ handle_file(file, item_id)
65
81
  local_id_metadata[File.join(item_id, File.basename(file))] = base
66
82
  end
67
83
 
84
+ def handle_intermediate_file(file)
85
+ FileUtils.mkdir_p(File.join(data_dir, INTERMEDIATE_FILES_DIRNAME))
86
+ handle_file(file, INTERMEDIATE_FILES_DIRNAME)
87
+ end
88
+
68
89
  def handle_target(file)
69
90
  base = File.basename(file, File.extname(file))
70
- FileUtils.mkdir_p(File.join(data_dir, 'dpc_targets'))
71
- FileUtils.cp file, File.join(data_dir, 'dpc_targets')
72
- local_id_metadata[File.join('dpc_targets', File.basename(file))] = base
91
+ FileUtils.mkdir_p(File.join(data_dir, SIF_TARGETS_DIRNAME))
92
+ handle_file(file, SIF_TARGETS_DIRNAME)
93
+ local_id_metadata[File.join(SIF_TARGETS_DIRNAME, File.basename(file))] = base
94
+ end
95
+
96
+ def handle_file(file, folder_name)
97
+ if copy_files
98
+ FileUtils.cp file, File.join(data_dir, folder_name)
99
+ else
100
+ FileUtils.ln_s file, File.join(data_dir, folder_name)
101
+ end
102
+ file_map[file] = File.join(data_dir, folder_name, File.basename(file))
73
103
  end
74
104
 
75
105
  def output_metadata
76
106
  metadata_rows = []
107
+ if collection_title
108
+ metadata_rows << CSV::Row.new(metadata_headers, [ nil, nil, collection_title ])
109
+ end
77
110
  local_id_metadata.each_pair do |k,v|
78
- metadata_rows << CSV::Row.new(METADATA_HEADERS, [ k, v ])
111
+ row_elements = [ k, v ]
112
+ row_elements << nil if collection_title
113
+ metadata_rows << CSV::Row.new(metadata_headers, row_elements)
79
114
  end
80
- File.open(File.join(data_dir, 'metadata.txt'), 'w') do |file|
81
- file.puts(METADATA_HEADERS.join(Ddr::IngestTools::DpcFolderConverter.config[:csv_options][:col_sep]))
115
+ File.open(File.join(data_dir, SIF_METADATA_FILENAME), 'w') do |file|
116
+ file.puts(metadata_headers.join(Ddr::IngestTools::DpcFolderConverter.config[:csv_options][:col_sep]))
82
117
  metadata_rows.each do |row|
83
- file.puts(row.to_csv(Ddr::IngestTools::DpcFolderConverter.config[:csv_options]).strip)
118
+ file.puts(row.to_csv(Ddr::IngestTools::DpcFolderConverter.config[:csv_options]))
84
119
  end
85
120
  end
86
121
  end
@@ -89,5 +124,19 @@ module Ddr::IngestTools::DpcFolderConverter
89
124
  bag = BagIt::Bag.new(target)
90
125
  bag.manifest!
91
126
  end
127
+
128
+ def validate_checksums
129
+ external_checksums = Ddr::IngestTools::ChecksumFile.new(checksums)
130
+ sif_manifest = Ddr::IngestTools::ChecksumFile.new(File.join(target, SIF_MANIFEST_SHA1_FILENAME))
131
+ file_map.each do |source_path, target_path|
132
+ external_checksum = external_checksums.digest(source_path)
133
+ manifest_path = target_path.sub("#{target}/", '')
134
+ sif_checksum = sif_manifest.digest(manifest_path)
135
+ unless external_checksum == sif_checksum
136
+ errors << I18n.translate('errors.checksum_mismatch', { c1: external_checksum, f1: source_path,
137
+ c2: sif_checksum, f2: target_path })
138
+ end
139
+ end
140
+ end
92
141
  end
93
142
  end
@@ -1,5 +1,5 @@
1
1
  module Ddr
2
2
  module IngestTools
3
- VERSION = '0.1.0'
3
+ VERSION = '0.2.0'
4
4
  end
5
5
  end
@@ -0,0 +1,11 @@
1
+ 3201454891f1d63a1493f393e061491e2d65ffcd SOURCE_DIRECTORY/Thumbs.db
2
+ 59ec01f979a76b968bc579e5cd0ceb3bcf3e629f SOURCE_DIRECTORY/abc001001.tif
3
+ d0a2f2482783ae3c83d06f3cdeaa1a306cc043ad SOURCE_DIRECTORY/abc001002.tif
4
+ 38ee72ab417192589f3a54ef1016131c7d7e9e4e SOURCE_DIRECTORY/abc002001.tif
5
+ 1da7765e3ca71ed76395bc56dae69d64732beff8 SOURCE_DIRECTORY/checksums.txt
6
+ c227abc095d3b758ab1c1c1c9e922494b6b6e0b0 SOURCE_DIRECTORY/g/abc003001.wav
7
+ 541af8df7d2c631382bd0ec189476894d0323df5 SOURCE_DIRECTORY/g/abc003002.wav
8
+ 49d77d27bf82ec3dafa1967490594883f5e8b432 SOURCE_DIRECTORY/intermediate_files/abc001001.jpg
9
+ 260b3c2d20a1726de96671d29f73ba09d13b61ba SOURCE_DIRECTORY/intermediate_files/abc002001.jpg
10
+ a08c4d5a76d1b8735587be6ffcba66a9baf475c4 SOURCE_DIRECTORY/targets/T001.tif
11
+ 40f9993d06945544fa57af88f7c3e9102ecc69e3 SOURCE_DIRECTORY/targets/T002.tif
@@ -0,0 +1,11 @@
1
+ 3201454891f1d63a1493f393e061491e2d65ffcd SOURCE_DIRECTORY/Thumbs.db
2
+ 59ec01f979a76b968bc579e5cd0ceb3bcf3e629f SOURCE_DIRECTORY/abc001001.tif
3
+ d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad SOURCE_DIRECTORY/abc001002.tif
4
+ 38ee72ab417192589f3a54ef1016131c7d7e9e4e SOURCE_DIRECTORY/abc002001.tif
5
+ 1da7765e3ca71ed76395bc56dae69d64732beff8 SOURCE_DIRECTORY/checksums.txt
6
+ c227abc095d3b758051c1c1c9e922494b6b6e0b0 SOURCE_DIRECTORY/g/abc003001.wav
7
+ 541af8df7d2c631382bd0ec189476894d0323df5 SOURCE_DIRECTORY/g/abc003002.wav
8
+ 49d77d27bf82ec3dafa1967490594883f5e8b432 SOURCE_DIRECTORY/intermediate_files/abc001001.jpg
9
+ 260b3c2d20a7126de96671d29f73ba09d13b61ba SOURCE_DIRECTORY/intermediate_files/abc002001.jpg
10
+ a08c4d5a76d1b8734487be6ffcba66a9baf475c4 SOURCE_DIRECTORY/targets/T001.tif
11
+ 40f9993d06945544fa57af88f7c3e9102ecc69e3 SOURCE_DIRECTORY/targets/T002.tif
@@ -0,0 +1,10 @@
1
+ 59ec01f979a76b968bc579e5cd0ceb3bcf3e629f data/abc001/abc001001.tif
2
+ d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad data/abc001/abc001002.tif
3
+ 38ee72ab417192589f3a54ef1016131c7d7e9e4e data/abc002/abc002001.tif
4
+ c227abc095d3b758051c1c1c9e922494b6b6e0b0 data/abc003/abc003001.wav
5
+ 541af8df7d2c631382bd0ec189476894d0323df5 data/abc003/abc003002.wav
6
+ a08c4d5a76d1b8734487be6ffcba66a9baf475c4 data/dpc_targets/T001.tif
7
+ 40f9993d06945544fa57af88f7c3e9102ecc69e3 data/dpc_targets/T002.tif
8
+ 49d77d27bf82ec3dafa1967490594883f5e8b432 data/intermediate_files/abc001001.jpg
9
+ 260b3c2d20a7126de96671d29f73ba09d13b61ba data/intermediate_files/abc002001.jpg
10
+ 8a7878863fd183436f50060343d3757747772d9f data/metadata.txt
@@ -5,4 +5,6 @@ c227abc095d3b758051c1c1c9e922494b6b6e0b0 data/abc003/abc003001.wav
5
5
  541af8df7d2c631382bd0ec189476894d0323df5 data/abc003/abc003002.wav
6
6
  a08c4d5a76d1b8734487be6ffcba66a9baf475c4 data/dpc_targets/T001.tif
7
7
  40f9993d06945544fa57af88f7c3e9102ecc69e3 data/dpc_targets/T002.tif
8
+ 49d77d27bf82ec3dafa1967490594883f5e8b432 data/intermediate_files/abc001001.jpg
9
+ 260b3c2d20a7126de96671d29f73ba09d13b61ba data/intermediate_files/abc002001.jpg
8
10
  913699468893882d1dec463f3df1a405c7f32784 data/metadata.txt
@@ -2,10 +2,49 @@ module Ddr::IngestTools::DpcFolderConverter
2
2
 
3
3
  RSpec.describe Converter do
4
4
 
5
+ shared_examples 'a conversion to standard ingest format' do
6
+ subject { described_class.new(converter_args) }
7
+ it 'produces the correct standard ingest format directory' do
8
+ results = subject.call
9
+ # Target directory contains all the expected files and only the expected files
10
+ expect(Array(Find.find(target_directory))).to match_array(expected_files)
11
+ # Target content files are same as source content files
12
+ expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001001.tif'),
13
+ File.join(source_directory, 'abc001001.tif'))).to be true
14
+ expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001002.tif'),
15
+ File.join(source_directory, 'abc001002.tif'))).to be true
16
+ expect(FileUtils.compare_file(File.join(data_directory, 'abc002', 'abc002001.tif'),
17
+ File.join(source_directory, 'abc002001.tif'))).to be true
18
+ expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003001.wav'),
19
+ File.join(source_directory, 'g', 'abc003001.wav'))).to be true
20
+ expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003002.wav'),
21
+ File.join(source_directory, 'g', 'abc003002.wav'))).to be true
22
+ expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T001.tif'),
23
+ File.join(source_directory, 'targets', 'T001.tif'))).to be true
24
+ expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T002.tif'),
25
+ File.join(source_directory, 'targets', 'T002.tif'))).to be true
26
+ expect(FileUtils.compare_file(File.join(data_directory, 'intermediate_files', 'abc001001.jpg'),
27
+ File.join(source_directory, 'intermediate_files', 'abc001001.jpg'))).to be true
28
+ expect(FileUtils.compare_file(File.join(data_directory, 'intermediate_files', 'abc002001.jpg'),
29
+ File.join(source_directory, 'intermediate_files', 'abc002001.jpg'))).to be true
30
+ # Generated metadata file contains the expected contents
31
+ metadata_lines = File.readlines(File.join(data_directory, 'metadata.txt')).map(&:chomp)
32
+ expect(metadata_lines).to match_array(expected_metadata)
33
+ # Generated manifest contains the expected contents (ignoring line order)
34
+ generated_manifest = File.readlines(File.join(File.join(target_directory, 'manifest-sha1.txt'))).sort
35
+ expect(generated_manifest).to match_array(expected_manifest)
36
+ # Conversion process produces the expected errors
37
+ expect(results.errors).to match_array(checksum_errors)
38
+ end
39
+ end
40
+
5
41
  let(:source_directory) { Dir.mktmpdir('dpc') }
6
42
  let(:target_directory) { Dir.mktmpdir('sif') }
7
43
  let(:data_directory) { File.join(target_directory, 'data') }
8
44
  let(:item_id_length) { 6 }
45
+ let(:checksums_directory) { Dir.mktmpdir('checksums') }
46
+ let(:checksums) { File.join(checksums_directory, 'checksums-sha1.txt') }
47
+ let(:converter_args) { { source: source_directory, target: target_directory, item_id_length: item_id_length } }
9
48
  let(:expected_files) { [
10
49
  target_directory,
11
50
  File.join(target_directory, 'bag-info.txt'),
@@ -22,6 +61,9 @@ module Ddr::IngestTools::DpcFolderConverter
22
61
  File.join(data_directory, 'dpc_targets'),
23
62
  File.join(data_directory, 'dpc_targets', 'T001.tif'),
24
63
  File.join(data_directory, 'dpc_targets', 'T002.tif'),
64
+ File.join(data_directory, 'intermediate_files'),
65
+ File.join(data_directory, 'intermediate_files', 'abc001001.jpg'),
66
+ File.join(data_directory, 'intermediate_files', 'abc002001.jpg'),
25
67
  File.join(data_directory, 'metadata.txt'),
26
68
  File.join(target_directory, 'manifest-md5.txt'),
27
69
  File.join(target_directory, 'manifest-sha1.txt'),
@@ -41,8 +83,9 @@ module Ddr::IngestTools::DpcFolderConverter
41
83
  "dpc_targets/T001.tif\tT001",
42
84
  "dpc_targets/T002.tif\tT002"
43
85
  ] }
44
-
45
- subject { Converter.new(source_directory, target_directory, item_id_length) }
86
+ let(:expected_manifest) do
87
+ File.readlines(File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt')).sort
88
+ end
46
89
 
47
90
  before do
48
91
  File.open(File.join(source_directory, 'Thumbs.db'), 'w') { |f| f.write('Thumbs') }
@@ -53,35 +96,102 @@ module Ddr::IngestTools::DpcFolderConverter
53
96
  Dir.mkdir(File.join(source_directory,'g'))
54
97
  File.open(File.join(source_directory, 'g', 'abc003001.wav'), 'w') { |f| f.write('abc003001') }
55
98
  File.open(File.join(source_directory, 'g', 'abc003002.wav'), 'w') { |f| f.write('abc003002') }
56
- Dir.mkdir(File.join(source_directory,'targets'))
99
+ Dir.mkdir(File.join(source_directory, 'intermediate_files'))
100
+ File.open(File.join(source_directory, 'intermediate_files', 'abc001001.jpg'), 'w') { |f| f.write('abc001001 jpg')}
101
+ File.open(File.join(source_directory, 'intermediate_files', 'abc002001.jpg'), 'w') { |f| f.write('abc002001 jpg')}
102
+ Dir.mkdir(File.join(source_directory, 'targets'))
57
103
  File.open(File.join(source_directory, 'targets', 'T001.tif'), 'w') { |f| f.write('T001') }
58
104
  File.open(File.join(source_directory, 'targets', 'T002.tif'), 'w') { |f| f.write('T002') }
59
105
  end
60
106
 
61
- it 'produces the correct standard ingest format directory' do
62
- subject.call
63
- expect(Array(Find.find(target_directory))).to match_array(expected_files)
64
- expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001001.tif'),
65
- File.join(source_directory, 'abc001001.tif'))).to be true
66
- expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001002.tif'),
67
- File.join(source_directory, 'abc001002.tif'))).to be true
68
- expect(FileUtils.compare_file(File.join(data_directory, 'abc002', 'abc002001.tif'),
69
- File.join(source_directory, 'abc002001.tif'))).to be true
70
- expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003001.wav'),
71
- File.join(source_directory, 'g', 'abc003001.wav'))).to be true
72
- expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003002.wav'),
73
- File.join(source_directory, 'g', 'abc003002.wav'))).to be true
74
- expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T001.tif'),
75
- File.join(source_directory, 'targets', 'T001.tif'))).to be true
76
- expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T002.tif'),
77
- File.join(source_directory, 'targets', 'T002.tif'))).to be true
78
- metadata_lines = File.readlines(File.join(data_directory, 'metadata.txt')).map(&:strip)
79
- expect(metadata_lines).to match_array(expected_metadata)
80
- # expect(FileUtils.compare_file(File.join(target_directory, 'manifest-sha1.txt'),
81
- # File.join(File.dirname(__FILE__), '..', 'fixtures', 'files', 'manifest-sha1.txt'))).to be true
82
- expect(FileUtils.compare_file(File.join(target_directory, 'manifest-sha1.txt'),
83
- File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt'))).to be true
107
+ describe 'external checksum files' do
108
+ describe 'external checksum file' do
109
+ before do
110
+ File.open(checksums, 'w') do |f|
111
+ f << File.open(checksum_file_template).read.gsub('SOURCE_DIRECTORY', source_directory)
112
+ end
113
+ converter_args[:checksums] = checksums
114
+ end
115
+ describe 'mismatch' do
116
+ let(:checksum_file_template) { File.join('spec', 'fixtures', 'files', 'bad-checksums-sha1.txt') }
117
+ let(:checksum_errors) {
118
+ [ I18n.translate('errors.checksum_mismatch', { c1: 'd0a2f2482783ae3c83d06f3cdeaa1a306cc043ad',
119
+ f1: File.join(source_directory, 'abc001002.tif'),
120
+ c2: 'd0a2f2482783ae3c38d06f3cdeaa1a306cc043ad',
121
+ f2: File.join(target_directory, 'data/abc001/abc001002.tif') }),
122
+ I18n.translate('errors.checksum_mismatch', { c1: 'c227abc095d3b758ab1c1c1c9e922494b6b6e0b0',
123
+ f1: File.join(source_directory, 'g/abc003001.wav'),
124
+ c2: 'c227abc095d3b758051c1c1c9e922494b6b6e0b0',
125
+ f2: File.join(target_directory, 'data/abc003/abc003001.wav') }),
126
+ I18n.translate('errors.checksum_mismatch', { c1: '260b3c2d20a1726de96671d29f73ba09d13b61ba',
127
+ f1: File.join(source_directory, 'intermediate_files/abc002001.jpg'),
128
+ c2: '260b3c2d20a7126de96671d29f73ba09d13b61ba',
129
+ f2: File.join(target_directory, 'data/intermediate_files/abc002001.jpg') }),
130
+ I18n.translate('errors.checksum_mismatch', { c1: 'a08c4d5a76d1b8735587be6ffcba66a9baf475c4',
131
+ f1: File.join(source_directory, 'targets/T001.tif'),
132
+ c2: 'a08c4d5a76d1b8734487be6ffcba66a9baf475c4',
133
+ f2: File.join(target_directory, 'data/dpc_targets/T001.tif') })
134
+ ]
135
+ }
136
+ describe 'files are copied' do
137
+ before { converter_args[:copy_files] = true }
138
+ it_behaves_like 'a conversion to standard ingest format'
139
+ end
140
+ describe 'files are not copied' do
141
+ before { converter_args[:copy_files] = false }
142
+ it_behaves_like 'a conversion to standard ingest format'
143
+ end
144
+ end
145
+ describe 'no mismatch' do
146
+ let(:checksum_file_template) { File.join('spec', 'fixtures', 'files', 'good-checksums-sha1.txt') }
147
+ let(:checksum_errors) { [] }
148
+ describe 'files are copied' do
149
+ before { converter_args[:copy_files] = true }
150
+ it_behaves_like 'a conversion to standard ingest format'
151
+ end
152
+ describe 'files are not copied' do
153
+ before { converter_args[:copy_files] = false }
154
+ it_behaves_like 'a conversion to standard ingest format'
155
+ end
156
+ end
157
+ end
158
+
159
+ describe 'no external checksum file' do
160
+ let(:checksum_errors) { [] }
161
+ describe 'files are copied' do
162
+ before { converter_args[:copy_files] = true }
163
+ it_behaves_like 'a conversion to standard ingest format'
164
+ end
165
+ describe 'files are not copied' do
166
+ before { converter_args[:copy_files] = false }
167
+ it_behaves_like 'a conversion to standard ingest format'
168
+ end
169
+ end
84
170
  end
85
171
 
172
+ describe 'collection titles' do
173
+ let(:checksum_errors) { [] }
174
+ describe 'collection title provided' do
175
+ let(:expected_metadata) { [
176
+ "path\tlocal_id\ttitle",
177
+ "\t\tTest Collection",
178
+ "abc001\tabc001\t",
179
+ "abc002\tabc002\t",
180
+ "abc003\tabc003\t",
181
+ "abc001/abc001001.tif\tabc001001\t",
182
+ "abc001/abc001002.tif\tabc001002\t",
183
+ "abc002/abc002001.tif\tabc002001\t",
184
+ "abc003/abc003001.wav\tabc003001\t",
185
+ "abc003/abc003002.wav\tabc003002\t",
186
+ "dpc_targets/T001.tif\tT001\t",
187
+ "dpc_targets/T002.tif\tT002\t"
188
+ ] }
189
+ let(:expected_manifest) do
190
+ File.readlines(File.join('spec', 'fixtures', 'files', 'manifest-sha1-collection-title.txt')).sort
191
+ end
192
+ before { converter_args[:collection_title] = 'Test Collection' }
193
+ it_behaves_like 'a conversion to standard ingest format'
194
+ end
195
+ end
86
196
  end
87
197
  end
data/spec/spec_helper.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'ddr/ingesttools'
2
+ require 'i18n'
2
3
 
3
4
  # This file was generated by the `rspec --init` command. Conventionally, all
4
5
  # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
@@ -0,0 +1,17 @@
1
+ module Ddr::IngestTools
2
+
3
+ RSpec.describe ChecksumFile do
4
+
5
+ subject { described_class.new(checksum_filepath) }
6
+
7
+ let(:checksum_filepath) { File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt') }
8
+
9
+ describe 'digest' do
10
+ it 'provides the requested digest' do
11
+ expect(subject.digest('data/abc001/abc001002.tif')).to eq('d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad')
12
+ expect(subject.digest('not/in/checksum/file.txt')).to be nil
13
+ end
14
+ end
15
+ end
16
+
17
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ddr-ingesttools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jim Coble
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-08 00:00:00.000000000 Z
11
+ date: 2017-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bagit
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0.4'
27
+ - !ruby/object:Gem::Dependency
28
+ name: i18n
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.8'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.8'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -83,14 +97,20 @@ files:
83
97
  - README.md
84
98
  - Rakefile
85
99
  - bin/convert_dpc_folder.rb
100
+ - config/locales/en.yml
86
101
  - ddr-ingesttools.gemspec
87
102
  - lib/ddr/ingesttools.rb
103
+ - lib/ddr/ingesttools/checksum_file.rb
88
104
  - lib/ddr/ingesttools/dpc_folder_converter.rb
89
105
  - lib/ddr/ingesttools/dpc_folder_converter/converter.rb
90
106
  - lib/ddr/ingesttools/version.rb
107
+ - spec/fixtures/files/bad-checksums-sha1.txt
108
+ - spec/fixtures/files/good-checksums-sha1.txt
109
+ - spec/fixtures/files/manifest-sha1-collection-title.txt
91
110
  - spec/fixtures/files/manifest-sha1.txt
92
111
  - spec/integration/dpc_folder_converter_spec.rb
93
112
  - spec/spec_helper.rb
113
+ - spec/unit/checksum_file_spec.rb
94
114
  homepage: https://github.com/duke-libraries/ddr-ingesttools
95
115
  licenses:
96
116
  - BSD-3-Clause
@@ -116,6 +136,10 @@ signing_key:
116
136
  specification_version: 4
117
137
  summary: Ruby tools supporting ingest into the Duke Digital Repository.
118
138
  test_files:
139
+ - spec/fixtures/files/bad-checksums-sha1.txt
140
+ - spec/fixtures/files/good-checksums-sha1.txt
141
+ - spec/fixtures/files/manifest-sha1-collection-title.txt
119
142
  - spec/fixtures/files/manifest-sha1.txt
120
143
  - spec/integration/dpc_folder_converter_spec.rb
121
144
  - spec/spec_helper.rb
145
+ - spec/unit/checksum_file_spec.rb