ddr-ingesttools 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/bin/mint_manifest_arks.rb +49 -0
- data/config/locales/en.yml +6 -6
- data/ddr-ingesttools.gemspec +3 -2
- data/lib/ddr/ingesttools.rb +1 -2
- data/lib/ddr/ingesttools/manifest_ark_minter.rb +22 -0
- data/lib/ddr/ingesttools/manifest_ark_minter/configuration.rb +13 -0
- data/lib/ddr/ingesttools/manifest_ark_minter/manifest_parser.rb +33 -0
- data/lib/ddr/ingesttools/manifest_ark_minter/manifest_updater.rb +80 -0
- data/lib/ddr/ingesttools/manifest_ark_minter/minter.rb +43 -0
- data/lib/ddr/ingesttools/version.rb +1 -1
- data/spec/fixtures/rdr_importer/configs/default.yml +3 -0
- data/spec/fixtures/rdr_importer/manifests/manifest_with_all_arks.csv +4 -0
- data/spec/fixtures/rdr_importer/manifests/manifest_with_no_arks.csv +4 -0
- data/spec/fixtures/rdr_importer/manifests/manifest_with_some_arks.csv +4 -0
- data/spec/manifest_ark_minter/manifest_parser_spec.rb +38 -0
- data/spec/manifest_ark_minter/manifest_updater_spec.rb +46 -0
- data/spec/manifest_ark_minter/minter_spec.rb +50 -0
- data/spec/spec_helper.rb +1 -0
- metadata +43 -25
- data/bin/convert_dpc_folder.rb +0 -61
- data/lib/ddr/ingesttools/checksum_file.rb +0 -28
- data/lib/ddr/ingesttools/dpc_folder_converter.rb +0 -35
- data/lib/ddr/ingesttools/dpc_folder_converter/converter.rb +0 -151
- data/spec/fixtures/files/bad-checksums-sha1.txt +0 -11
- data/spec/fixtures/files/good-checksums-sha1.txt +0 -11
- data/spec/fixtures/files/manifest-sha1-collection-title_admin_set.txt +0 -10
- data/spec/fixtures/files/manifest-sha1.txt +0 -10
- data/spec/integration/dpc_folder_converter_spec.rb +0 -201
- data/spec/unit/checksum_file_spec.rb +0 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ef121cd2d5211caf9ba9b1beebbbc316d2b7214e
|
4
|
+
data.tar.gz: 922b0ca3eba98b5dc5d1bc07c126c842e1a093cc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 89d1fc5adacf2986a47019e0cca3da6ba2a5cf8054d72a4c1a21b82805ef0a0711abc2569f5367878aa6e8b452d8e79482d63d4ac5fdb28deef459bb3175d820
|
7
|
+
data.tar.gz: 24ebbe3f71fce16d96e032ba6c283e0431ba7f87a3495144ab218908052837699ff0687430eceeffcac7c31a5ab75fda7a677759b685f0583c9257c290482493
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'i18n'
|
4
|
+
require 'ddr/ingesttools'
|
5
|
+
require 'optparse'
|
6
|
+
|
7
|
+
options = {}
|
8
|
+
|
9
|
+
puts I18n.t('marquee')
|
10
|
+
puts I18n.t('suite.name')
|
11
|
+
puts I18n.t('manifest_ark_minter.name')
|
12
|
+
puts I18n.t('marquee')
|
13
|
+
|
14
|
+
# Parse command line arguments
|
15
|
+
parser = OptionParser.new do |opts|
|
16
|
+
opts.banner = 'Usage: mint_manifest_arks.rb [options]'
|
17
|
+
|
18
|
+
opts.on('-c', '--config CONFIG_FILE', 'Path to configuration file') do |v|
|
19
|
+
options[:config] = v
|
20
|
+
end
|
21
|
+
|
22
|
+
opts.on('-m', '--manifest MANIFEST_FILE', 'Path to manifest file for which ARKs are to be minted') do |v|
|
23
|
+
options[:manifest] = v
|
24
|
+
end
|
25
|
+
|
26
|
+
opts.on('-o', '--output OUTPUT_FILE', 'Path to which updated manifest file should be written') do |v|
|
27
|
+
options[:output] = v
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
begin
|
32
|
+
parser.parse!
|
33
|
+
if options['config'].nil?
|
34
|
+
puts I18n.t('manifest_ark_minter.use_default_config_file',
|
35
|
+
default_config_file: Ddr::IngestTools::ManifestArkMinter::ManifestUpdater::DEFAULT_CONFIG_FILE)
|
36
|
+
end
|
37
|
+
mandatory = [ :manifest, :output ]
|
38
|
+
missing = mandatory.select{ |param| options[param].nil? }
|
39
|
+
unless missing.empty?
|
40
|
+
raise OptionParser::MissingArgument.new(missing.join(', '))
|
41
|
+
end
|
42
|
+
rescue OptionParser::InvalidOption, OptionParser::MissingArgument
|
43
|
+
puts $!.to_s
|
44
|
+
puts parser
|
45
|
+
exit(false)
|
46
|
+
end
|
47
|
+
|
48
|
+
updater = Ddr::IngestTools::ManifestArkMinter::ManifestUpdater.new(options)
|
49
|
+
updater.call
|
data/config/locales/en.yml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
en:
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
2
|
+
marquee: ==================================================
|
3
|
+
suite:
|
4
|
+
name: DDR Ingest Tools
|
5
|
+
manifest_ark_minter:
|
6
|
+
name: Manifest ARK Minter
|
7
|
+
use_default_config_file: 'Will use default configuration file: %{default_config_file}'
|
data/ddr-ingesttools.gemspec
CHANGED
@@ -18,10 +18,11 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_dependency "
|
22
|
-
spec.add_dependency "i18n", "~> 0
|
21
|
+
spec.add_dependency "ezid-client", "~> 1.7"
|
22
|
+
spec.add_dependency "i18n", "~> 1.0"
|
23
23
|
|
24
24
|
spec.add_development_dependency "bundler", "~> 1.14"
|
25
|
+
spec.add_development_dependency "byebug"
|
25
26
|
spec.add_development_dependency "rake", "~> 12.0"
|
26
27
|
spec.add_development_dependency "rspec", "~> 3.0"
|
27
28
|
end
|
data/lib/ddr/ingesttools.rb
CHANGED
@@ -0,0 +1,22 @@
|
|
1
|
+
require_relative 'manifest_ark_minter/configuration'
|
2
|
+
require_relative 'manifest_ark_minter/manifest_updater'
|
3
|
+
require_relative 'manifest_ark_minter/manifest_parser'
|
4
|
+
require_relative 'manifest_ark_minter/minter'
|
5
|
+
|
6
|
+
module Ddr::IngestTools
|
7
|
+
module ManifestArkMinter
|
8
|
+
|
9
|
+
class << self
|
10
|
+
attr_writer :configuration
|
11
|
+
|
12
|
+
def configuration
|
13
|
+
@configuration ||= Configuration.new
|
14
|
+
end
|
15
|
+
|
16
|
+
def configure
|
17
|
+
yield(configuration)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module Ddr::IngestTools::ManifestArkMinter
|
4
|
+
class ManifestParser
|
5
|
+
|
6
|
+
attr_reader :manifest_file_path
|
7
|
+
|
8
|
+
ARK_HEADER = 'ark'
|
9
|
+
|
10
|
+
def initialize(manifest_file_path)
|
11
|
+
@manifest_file_path = manifest_file_path
|
12
|
+
end
|
13
|
+
|
14
|
+
def as_csv_table
|
15
|
+
@csv_table ||= CSV.read(manifest_file_path, headers: true)
|
16
|
+
end
|
17
|
+
|
18
|
+
def arks_missing?
|
19
|
+
arks.any? { |value| value.compact.empty? }
|
20
|
+
end
|
21
|
+
|
22
|
+
def headers
|
23
|
+
as_csv_table.headers
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def arks
|
29
|
+
as_csv_table.values_at(ARK_HEADER)
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'logger'
|
3
|
+
require 'tempfile'
|
4
|
+
|
5
|
+
module Ddr::IngestTools::ManifestArkMinter
|
6
|
+
class ManifestUpdater
|
7
|
+
|
8
|
+
attr_reader :config, :logger, :manifest, :output
|
9
|
+
|
10
|
+
DEFAULT_CONFIG_FILE = 'manifest_ark_minter_config.yml'
|
11
|
+
|
12
|
+
def initialize(config: DEFAULT_CONFIG_FILE, manifest:, output:, logger: nil)
|
13
|
+
@config = config
|
14
|
+
@manifest = manifest
|
15
|
+
@output = output
|
16
|
+
@logger = logger || Logger.new(STDOUT)
|
17
|
+
end
|
18
|
+
|
19
|
+
def call
|
20
|
+
configure
|
21
|
+
if needs_updating?
|
22
|
+
update
|
23
|
+
else
|
24
|
+
logger.info("Manifest file already has ARKs ... nothing to mint")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def configure
|
31
|
+
conf = YAML::load(IO.read(config))
|
32
|
+
Ddr::IngestTools::ManifestArkMinter.configure do |config|
|
33
|
+
config.ezid_default_shoulder = conf.fetch('ezid_default_shoulder')
|
34
|
+
config.ezid_password = conf.fetch('ezid_password')
|
35
|
+
config.ezid_user = conf.fetch('ezid_user')
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def update
|
40
|
+
update_manifest_table
|
41
|
+
write_updated_manifest
|
42
|
+
end
|
43
|
+
|
44
|
+
def update_manifest_table
|
45
|
+
logger.info("Minting ARKs for manifest file")
|
46
|
+
mint_counter = 0
|
47
|
+
manifest_as_csv_table.each do |row|
|
48
|
+
unless row['ark']
|
49
|
+
row['ark'] = minter.mint
|
50
|
+
mint_counter += 1
|
51
|
+
end
|
52
|
+
end
|
53
|
+
logger.info("Minted #{mint_counter} ARK(s)")
|
54
|
+
end
|
55
|
+
|
56
|
+
def manifest_as_csv_table
|
57
|
+
@manifest_as_csv_table ||= parser.as_csv_table
|
58
|
+
end
|
59
|
+
|
60
|
+
def write_updated_manifest
|
61
|
+
File.open(output, 'w') do |f|
|
62
|
+
f.write(manifest_as_csv_table.to_csv)
|
63
|
+
end
|
64
|
+
logger.info("Updated manifest file is at #{output}")
|
65
|
+
end
|
66
|
+
|
67
|
+
def needs_updating?
|
68
|
+
parser.arks_missing?
|
69
|
+
end
|
70
|
+
|
71
|
+
def minter
|
72
|
+
@minter ||= Minter.new
|
73
|
+
end
|
74
|
+
|
75
|
+
def parser
|
76
|
+
@parser ||= ManifestParser.new(manifest)
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'ezid-client'
|
2
|
+
|
3
|
+
module Ddr::IngestTools::ManifestArkMinter
|
4
|
+
class Minter
|
5
|
+
|
6
|
+
DEFAULT_EXPORT = 'no'.freeze
|
7
|
+
DEFAULT_PROFILE = 'dc'.freeze
|
8
|
+
DEFAULT_STATUS = Ezid::Status::RESERVED
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
configure_ark
|
12
|
+
configure_client
|
13
|
+
end
|
14
|
+
|
15
|
+
def mint
|
16
|
+
Ezid::Identifier.mint
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def configure_ark
|
22
|
+
Ezid::Identifier.defaults = {
|
23
|
+
export: DEFAULT_EXPORT,
|
24
|
+
profile: DEFAULT_PROFILE,
|
25
|
+
status: DEFAULT_STATUS
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
def configure_client
|
30
|
+
Ezid::Client.configure do |config|
|
31
|
+
config.default_shoulder = module_configuration.ezid_default_shoulder
|
32
|
+
config.password = module_configuration.ezid_password
|
33
|
+
config.user = module_configuration.ezid_user
|
34
|
+
config.logger = Logger.new(File::NULL)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def module_configuration
|
39
|
+
Ddr::IngestTools::ManifestArkMinter.configuration
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,4 @@
|
|
1
|
+
ark,visibility,title,contributor,resource_type,license,file,file,file
|
2
|
+
ark:/99999/fk4s76kg89,open,Test 1,"Smith, Sue",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv,data/data2.csv,docs/doc1.txt
|
3
|
+
ark:/99999/fk4ng5vp6m,,Test 2,"Jones, Bill",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data3.csv,docs/doc2.txt
|
4
|
+
ark:/99999/fk4hq54w3t,authenticated,Test 3,"Allen, Jane",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv
|
@@ -0,0 +1,4 @@
|
|
1
|
+
visibility,title,contributor,resource_type,license,file,file,file
|
2
|
+
open,Test 1,"Smith, Sue",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv,data/data2.csv,docs/doc1.txt
|
3
|
+
,Test 2,"Jones, Bill",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data3.csv,docs/doc2.txt
|
4
|
+
authenticated,Test 3,"Allen, Jane",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv
|
@@ -0,0 +1,4 @@
|
|
1
|
+
ark,visibility,title,contributor,resource_type,license,file,file,file
|
2
|
+
,open,Test 1,"Smith, Sue",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv,data/data2.csv,docs/doc1.txt
|
3
|
+
ark:/99999/fk4ng5vp6m,,Test 2,"Jones, Bill",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data3.csv,docs/doc2.txt
|
4
|
+
,authenticated,Test 3,"Allen, Jane",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Ddr::IngestTools::ManifestArkMinter
|
4
|
+
|
5
|
+
RSpec.describe ManifestParser do
|
6
|
+
|
7
|
+
subject { described_class.new(manifest_file) }
|
8
|
+
|
9
|
+
describe '#as_csv_table' do
|
10
|
+
let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_some_arks.csv') }
|
11
|
+
specify { expect(subject.as_csv_table).to be_a CSV::Table }
|
12
|
+
end
|
13
|
+
|
14
|
+
describe '#headers' do
|
15
|
+
let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_some_arks.csv') }
|
16
|
+
let(:expected_headers) { %w(ark visibility title contributor resource_type license file) }
|
17
|
+
it 'parses out the list of headers' do
|
18
|
+
expect(subject.headers).to include(*expected_headers)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe '#arks_missing?' do
|
23
|
+
describe 'no arks assigned' do
|
24
|
+
let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_no_arks.csv') }
|
25
|
+
specify { expect(subject.arks_missing?).to be true }
|
26
|
+
end
|
27
|
+
describe 'some arks assigned' do
|
28
|
+
let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_some_arks.csv') }
|
29
|
+
specify { expect(subject.arks_missing?).to be true }
|
30
|
+
end
|
31
|
+
describe 'all arks assigned' do
|
32
|
+
let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_all_arks.csv') }
|
33
|
+
specify { expect(subject.arks_missing?).to be false }
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
module Ddr::IngestTools::ManifestArkMinter
|
5
|
+
|
6
|
+
RSpec.describe ManifestUpdater do
|
7
|
+
|
8
|
+
subject { described_class.new(config: config_file, manifest: manifest_file, output: output_file) }
|
9
|
+
|
10
|
+
let(:config_file) { File.join('spec', 'fixtures', 'rdr_importer', 'configs', 'default.yml') }
|
11
|
+
let(:output_dir) { Dir.mktmpdir }
|
12
|
+
let(:output_file) { File.join(output_dir, 'output.csv') }
|
13
|
+
|
14
|
+
after { FileUtils.remove_dir output_dir }
|
15
|
+
|
16
|
+
describe 'manifest has ARKs for all rows' do
|
17
|
+
let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_all_arks.csv') }
|
18
|
+
it 'does not produce an output file' do
|
19
|
+
expect{ subject.call }.not_to change{ File.exist?(output_file) }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
describe 'manifest has ARKs for some rows' do
|
24
|
+
let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_some_arks.csv') }
|
25
|
+
it 'mints ARKs for rows without them' do
|
26
|
+
expect_any_instance_of(Minter).to receive(:mint).exactly(2).times.and_call_original
|
27
|
+
subject.call
|
28
|
+
table = CSV.read(output_file, headers: true)
|
29
|
+
# expect(table['ark']).to all(match(/ark:\/99999\/fk4/))
|
30
|
+
expect(table['ark']).to match([ /ark:\/99999\/fk4/, 'ark:/99999/fk4ng5vp6m', /ark:\/99999\/fk4/ ])
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe 'manifest has ARKs for no rows' do
|
35
|
+
let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_no_arks.csv') }
|
36
|
+
it 'mints ARKs for every row' do
|
37
|
+
expect_any_instance_of(Minter).to receive(:mint).exactly(3).times.and_call_original
|
38
|
+
subject.call
|
39
|
+
table = CSV.read(output_file, headers: true)
|
40
|
+
expect(table['ark']).to all(match(/ark:\/99999\/fk4/))
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Ddr::IngestTools::ManifestArkMinter
|
4
|
+
|
5
|
+
RSpec.describe Minter do
|
6
|
+
|
7
|
+
describe '#initialize' do
|
8
|
+
describe 'Ezid::Identifier defaults' do
|
9
|
+
before { described_class.new }
|
10
|
+
let(:ark_defaults) { { export: described_class::DEFAULT_EXPORT,
|
11
|
+
profile: described_class::DEFAULT_PROFILE,
|
12
|
+
status: described_class::DEFAULT_STATUS } }
|
13
|
+
it 'configures Ezid::Identifer defaults' do
|
14
|
+
expect(Ezid::Identifier.defaults).to match(ark_defaults)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
describe 'Ezid::Client configuration' do
|
18
|
+
let(:configuration) { Configuration.new }
|
19
|
+
before do
|
20
|
+
configuration.ezid_default_shoulder = 'ark:/99999/fk4'
|
21
|
+
configuration.ezid_password = 'apitest'
|
22
|
+
configuration.ezid_user = 'apitest'
|
23
|
+
allow(Ddr::IngestTools::ManifestArkMinter).to receive(:configuration) { configuration }
|
24
|
+
described_class.new
|
25
|
+
end
|
26
|
+
it 'configures the Ezid::Client' do
|
27
|
+
expect(Ezid::Client.config.default_shoulder).to eq('ark:/99999/fk4')
|
28
|
+
expect(Ezid::Client.config.password).to eq('apitest')
|
29
|
+
expect(Ezid::Client.config.user).to eq('apitest')
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe '#mint' do
|
35
|
+
let(:configuration) { Configuration.new }
|
36
|
+
before do
|
37
|
+
configuration.ezid_default_shoulder = 'ark:/99999/fk4'
|
38
|
+
configuration.ezid_password = 'apitest'
|
39
|
+
configuration.ezid_user = 'apitest'
|
40
|
+
allow(Ddr::IngestTools::ManifestArkMinter).to receive(:configuration) { configuration }
|
41
|
+
end
|
42
|
+
it 'calls Ezid::Identifier to mint an ark' do
|
43
|
+
expect(Ezid::Identifier).to receive(:mint)
|
44
|
+
subject.mint
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ddr-ingesttools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jim Coble
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-03-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: ezid-client
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '1.7'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '1.7'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: i18n
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '0
|
33
|
+
version: '1.0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '0
|
40
|
+
version: '1.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: bundler
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '1.14'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: byebug
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: rake
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -84,7 +98,7 @@ description: A collection of Ruby tools supporting ingest into the Duke Digital
|
|
84
98
|
email:
|
85
99
|
- jim.coble@duke.edu
|
86
100
|
executables:
|
87
|
-
-
|
101
|
+
- mint_manifest_arks.rb
|
88
102
|
extensions: []
|
89
103
|
extra_rdoc_files: []
|
90
104
|
files:
|
@@ -96,21 +110,24 @@ files:
|
|
96
110
|
- LICENSE.txt
|
97
111
|
- README.md
|
98
112
|
- Rakefile
|
99
|
-
- bin/
|
113
|
+
- bin/mint_manifest_arks.rb
|
100
114
|
- config/locales/en.yml
|
101
115
|
- ddr-ingesttools.gemspec
|
102
116
|
- lib/ddr/ingesttools.rb
|
103
|
-
- lib/ddr/ingesttools/
|
104
|
-
- lib/ddr/ingesttools/
|
105
|
-
- lib/ddr/ingesttools/
|
117
|
+
- lib/ddr/ingesttools/manifest_ark_minter.rb
|
118
|
+
- lib/ddr/ingesttools/manifest_ark_minter/configuration.rb
|
119
|
+
- lib/ddr/ingesttools/manifest_ark_minter/manifest_parser.rb
|
120
|
+
- lib/ddr/ingesttools/manifest_ark_minter/manifest_updater.rb
|
121
|
+
- lib/ddr/ingesttools/manifest_ark_minter/minter.rb
|
106
122
|
- lib/ddr/ingesttools/version.rb
|
107
|
-
- spec/fixtures/
|
108
|
-
- spec/fixtures/
|
109
|
-
- spec/fixtures/
|
110
|
-
- spec/fixtures/
|
111
|
-
- spec/
|
123
|
+
- spec/fixtures/rdr_importer/configs/default.yml
|
124
|
+
- spec/fixtures/rdr_importer/manifests/manifest_with_all_arks.csv
|
125
|
+
- spec/fixtures/rdr_importer/manifests/manifest_with_no_arks.csv
|
126
|
+
- spec/fixtures/rdr_importer/manifests/manifest_with_some_arks.csv
|
127
|
+
- spec/manifest_ark_minter/manifest_parser_spec.rb
|
128
|
+
- spec/manifest_ark_minter/manifest_updater_spec.rb
|
129
|
+
- spec/manifest_ark_minter/minter_spec.rb
|
112
130
|
- spec/spec_helper.rb
|
113
|
-
- spec/unit/checksum_file_spec.rb
|
114
131
|
homepage: https://github.com/duke-libraries/ddr-ingesttools
|
115
132
|
licenses:
|
116
133
|
- BSD-3-Clause
|
@@ -131,15 +148,16 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
131
148
|
version: '0'
|
132
149
|
requirements: []
|
133
150
|
rubyforge_project:
|
134
|
-
rubygems_version: 2.6.
|
151
|
+
rubygems_version: 2.6.14
|
135
152
|
signing_key:
|
136
153
|
specification_version: 4
|
137
154
|
summary: Ruby tools supporting ingest into the Duke Digital Repository.
|
138
155
|
test_files:
|
139
|
-
- spec/fixtures/
|
140
|
-
- spec/fixtures/
|
141
|
-
- spec/fixtures/
|
142
|
-
- spec/fixtures/
|
143
|
-
- spec/
|
156
|
+
- spec/fixtures/rdr_importer/configs/default.yml
|
157
|
+
- spec/fixtures/rdr_importer/manifests/manifest_with_all_arks.csv
|
158
|
+
- spec/fixtures/rdr_importer/manifests/manifest_with_no_arks.csv
|
159
|
+
- spec/fixtures/rdr_importer/manifests/manifest_with_some_arks.csv
|
160
|
+
- spec/manifest_ark_minter/manifest_parser_spec.rb
|
161
|
+
- spec/manifest_ark_minter/manifest_updater_spec.rb
|
162
|
+
- spec/manifest_ark_minter/minter_spec.rb
|
144
163
|
- spec/spec_helper.rb
|
145
|
-
- spec/unit/checksum_file_spec.rb
|
data/bin/convert_dpc_folder.rb
DELETED
@@ -1,61 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'ddr/ingesttools'
|
4
|
-
require 'optparse'
|
5
|
-
|
6
|
-
options = {}
|
7
|
-
|
8
|
-
# Parse command line arguments
|
9
|
-
parser = OptionParser.new do |opts|
|
10
|
-
opts.banner = 'Usage: convert_dpc_folder.rb [options]'
|
11
|
-
|
12
|
-
opts.on('-s', '--source SOURCE', 'Path to DPC Folder to be converted') do |v|
|
13
|
-
options[:source] = v
|
14
|
-
end
|
15
|
-
|
16
|
-
opts.on('-t', '--target TARGET', 'Path to folder where Standard Ingest Format is to be built') do |v|
|
17
|
-
options[:target] = v
|
18
|
-
end
|
19
|
-
|
20
|
-
opts.on('-i', '--item_id_length LENGTH', Integer, 'Number of characters to copy from the beginning of each file name',
|
21
|
-
'to use as the local ID of the item of which that file is a component') do |v|
|
22
|
-
options[:item_id_length] = v
|
23
|
-
end
|
24
|
-
|
25
|
-
opts.on('-c', '--checksums [CHECKSUM_FILE]', 'External checksum file') do |v|
|
26
|
-
options[:checksums] = v
|
27
|
-
end
|
28
|
-
|
29
|
-
opts.on('--[no-]copy_files', 'Copy files to target location instead of using a symlink') do |v|
|
30
|
-
options[:copy_files] = v
|
31
|
-
end
|
32
|
-
|
33
|
-
opts.on('--collection_title [TITLE]', 'Title for collection',
|
34
|
-
'required if intending to create a collection-creating Standard Ingest') do |v|
|
35
|
-
options[:collection_title] = v
|
36
|
-
end
|
37
|
-
|
38
|
-
opts.on('--admin_set [ADMIN_SET]', 'Admin set for collection',
|
39
|
-
'required if intending to create a collection-creating Standard Ingest') do |v|
|
40
|
-
options[:admin_set] = v
|
41
|
-
end
|
42
|
-
|
43
|
-
end
|
44
|
-
|
45
|
-
begin
|
46
|
-
parser.parse!
|
47
|
-
mandatory = [ :source, :target, :item_id_length ]
|
48
|
-
missing = mandatory.select{ |param| options[param].nil? }
|
49
|
-
unless missing.empty?
|
50
|
-
raise OptionParser::MissingArgument.new(missing.join(', '))
|
51
|
-
end
|
52
|
-
rescue OptionParser::InvalidOption, OptionParser::MissingArgument
|
53
|
-
puts $!.to_s
|
54
|
-
puts parser
|
55
|
-
exit(false)
|
56
|
-
end
|
57
|
-
|
58
|
-
converter = Ddr::IngestTools::DpcFolderConverter::Converter.new(options)
|
59
|
-
results = converter.call
|
60
|
-
puts I18n.translate('errors.count', { count: results.errors.size })
|
61
|
-
results.errors.each { |e| puts e }
|
@@ -1,28 +0,0 @@
|
|
1
|
-
module Ddr::IngestTools
|
2
|
-
class ChecksumFile
|
3
|
-
|
4
|
-
attr_reader :digests
|
5
|
-
|
6
|
-
def initialize(checksum_filepath)
|
7
|
-
@digests = digest_hash(checksum_filepath)
|
8
|
-
end
|
9
|
-
|
10
|
-
def digest(filepath)
|
11
|
-
digests[filepath]
|
12
|
-
end
|
13
|
-
|
14
|
-
private
|
15
|
-
|
16
|
-
def digest_hash(checksum_filepath)
|
17
|
-
h = {}
|
18
|
-
File.open(checksum_filepath, 'r') do |file|
|
19
|
-
file.each_line do |line|
|
20
|
-
digest, path = line.chomp.split
|
21
|
-
h[path] = digest
|
22
|
-
end
|
23
|
-
end
|
24
|
-
h
|
25
|
-
end
|
26
|
-
|
27
|
-
end
|
28
|
-
end
|
@@ -1,35 +0,0 @@
|
|
1
|
-
require_relative 'dpc_folder_converter/converter'
|
2
|
-
|
3
|
-
module Ddr::IngestTools
|
4
|
-
module DpcFolderConverter
|
5
|
-
#Configuration defaults
|
6
|
-
@config = {
|
7
|
-
included_extensions: [ '.jpg', '.mov', '.mp3', '.mp4', '.pdf', '.tif', '.tiff', '.wav' ],
|
8
|
-
csv_options: { :encoding=>"UTF-8", :col_sep=>"\t", :headers=>true, :write_headers=>true,
|
9
|
-
:header_converters=>:symbol }
|
10
|
-
}
|
11
|
-
|
12
|
-
@valid_config_keys = @config.keys
|
13
|
-
|
14
|
-
# Configure through hash
|
15
|
-
def self.configure(opts = {})
|
16
|
-
opts.each {|k,v| @config[k.to_sym] = v if @valid_config_keys.include?(k.to_sym)}
|
17
|
-
end
|
18
|
-
|
19
|
-
# Configure through yaml file
|
20
|
-
def self.configure_with(path_to_yaml_file)
|
21
|
-
begin
|
22
|
-
config = YAML::load(IO.read(path_to_yaml_file))
|
23
|
-
rescue Errno::ENOENT
|
24
|
-
log(:warning, "YAML configuration file couldn't be found. Using defaults."); return
|
25
|
-
rescue Psych::SyntaxError
|
26
|
-
log(:warning, "YAML configuration file contains invalid syntax. Using defaults."); return
|
27
|
-
end
|
28
|
-
configure(config)
|
29
|
-
end
|
30
|
-
|
31
|
-
def self.config
|
32
|
-
@config
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
@@ -1,151 +0,0 @@
|
|
1
|
-
require 'bagit'
|
2
|
-
require 'csv'
|
3
|
-
require 'fileutils'
|
4
|
-
require 'find'
|
5
|
-
|
6
|
-
module Ddr::IngestTools::DpcFolderConverter
|
7
|
-
class Converter
|
8
|
-
|
9
|
-
INTERMEDIATE_FILES_DIRNAME = 'intermediate_files'
|
10
|
-
DPC_TARGETS_DIRNAME = 'targets'
|
11
|
-
SIF_TARGETS_DIRNAME = 'dpc_targets'
|
12
|
-
SIF_METADATA_FILENAME = 'metadata.txt'
|
13
|
-
SIF_MANIFEST_SHA1_FILENAME = 'manifest-sha1.txt'
|
14
|
-
|
15
|
-
Results = Struct.new(:file_map, :errors)
|
16
|
-
|
17
|
-
attr_reader :source, :target, :data_dir, :item_id_length, :checksums, :copy_files, :collection_title,
|
18
|
-
:admin_set, :metadata_headers
|
19
|
-
attr_accessor :errors, :file_map, :local_id_metadata, :results
|
20
|
-
|
21
|
-
def initialize(source:, target:, item_id_length:, checksums: nil, copy_files: false, collection_title: nil,
|
22
|
-
admin_set: nil)
|
23
|
-
@source = source
|
24
|
-
@target = target
|
25
|
-
@item_id_length = item_id_length
|
26
|
-
@checksums = checksums
|
27
|
-
@copy_files = copy_files
|
28
|
-
@collection_title = collection_title
|
29
|
-
@admin_set = admin_set
|
30
|
-
@metadata_headers = [ 'path', 'local_id' ]
|
31
|
-
@metadata_headers << 'title' unless collection_title.nil?
|
32
|
-
@metadata_headers << 'admin_set' unless admin_set.nil?
|
33
|
-
end
|
34
|
-
|
35
|
-
def call
|
36
|
-
setup
|
37
|
-
scan_files(source)
|
38
|
-
output_metadata
|
39
|
-
bagitup
|
40
|
-
validate_checksums if checksums
|
41
|
-
Results.new(file_map, errors)
|
42
|
-
end
|
43
|
-
|
44
|
-
private
|
45
|
-
|
46
|
-
def setup
|
47
|
-
@data_dir = File.join(target, 'data')
|
48
|
-
@errors = []
|
49
|
-
@file_map = {}
|
50
|
-
@local_id_metadata = {}
|
51
|
-
FileUtils.mkdir_p data_dir
|
52
|
-
end
|
53
|
-
|
54
|
-
def included_extensions
|
55
|
-
Ddr::IngestTools::DpcFolderConverter.config[:included_extensions]
|
56
|
-
end
|
57
|
-
|
58
|
-
def scan_files(dirpath, file_handler='handle_component'.to_sym)
|
59
|
-
Dir.foreach(dirpath).each do |entry|
|
60
|
-
next if [ '.', '..' ].include?(entry)
|
61
|
-
path = File.join(dirpath, entry)
|
62
|
-
if File.directory?(path)
|
63
|
-
if entry == DPC_TARGETS_DIRNAME
|
64
|
-
scan_files(path, :handle_target)
|
65
|
-
elsif entry == INTERMEDIATE_FILES_DIRNAME
|
66
|
-
scan_files(path, :handle_intermediate_file)
|
67
|
-
else
|
68
|
-
scan_files(path, file_handler)
|
69
|
-
end
|
70
|
-
else
|
71
|
-
if included_extensions.include?(File.extname(entry))
|
72
|
-
self.send(file_handler, path)
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def handle_component(file)
|
79
|
-
base = File.basename(file, File.extname(file))
|
80
|
-
item_id = item_id_length == 0 ? base : base[0, item_id_length]
|
81
|
-
FileUtils.mkdir_p(File.join(data_dir, item_id))
|
82
|
-
local_id_metadata[item_id] = item_id
|
83
|
-
handle_file(file, item_id)
|
84
|
-
local_id_metadata[File.join(item_id, File.basename(file))] = base
|
85
|
-
end
|
86
|
-
|
87
|
-
def handle_intermediate_file(file)
|
88
|
-
FileUtils.mkdir_p(File.join(data_dir, INTERMEDIATE_FILES_DIRNAME))
|
89
|
-
handle_file(file, INTERMEDIATE_FILES_DIRNAME)
|
90
|
-
end
|
91
|
-
|
92
|
-
def handle_target(file)
|
93
|
-
base = File.basename(file, File.extname(file))
|
94
|
-
FileUtils.mkdir_p(File.join(data_dir, SIF_TARGETS_DIRNAME))
|
95
|
-
handle_file(file, SIF_TARGETS_DIRNAME)
|
96
|
-
local_id_metadata[File.join(SIF_TARGETS_DIRNAME, File.basename(file))] = base
|
97
|
-
end
|
98
|
-
|
99
|
-
def handle_file(file, folder_name)
|
100
|
-
if copy_files
|
101
|
-
FileUtils.cp file, File.join(data_dir, folder_name)
|
102
|
-
else
|
103
|
-
FileUtils.ln_s file, File.join(data_dir, folder_name)
|
104
|
-
end
|
105
|
-
file_map[file] = File.join(data_dir, folder_name, File.basename(file))
|
106
|
-
end
|
107
|
-
|
108
|
-
def output_metadata
|
109
|
-
metadata_rows = []
|
110
|
-
case
|
111
|
-
when collection_title && admin_set
|
112
|
-
metadata_rows << CSV::Row.new(metadata_headers, [ nil, nil, collection_title, admin_set ])
|
113
|
-
when collection_title
|
114
|
-
metadata_rows << CSV::Row.new(metadata_headers, [ nil, nil, collection_title ])
|
115
|
-
when admin_set
|
116
|
-
metadata_rows << CSV::Row.new(metadata_headers, [ nil, nil, admin_set ])
|
117
|
-
end
|
118
|
-
local_id_metadata.each_pair do |k,v|
|
119
|
-
row_elements = [ k, v ]
|
120
|
-
row_elements << nil if collection_title
|
121
|
-
row_elements << nil if admin_set
|
122
|
-
metadata_rows << CSV::Row.new(metadata_headers, row_elements)
|
123
|
-
end
|
124
|
-
File.open(File.join(data_dir, SIF_METADATA_FILENAME), 'w') do |file|
|
125
|
-
file.puts(metadata_headers.join(Ddr::IngestTools::DpcFolderConverter.config[:csv_options][:col_sep]))
|
126
|
-
metadata_rows.each do |row|
|
127
|
-
file.puts(row.to_csv(Ddr::IngestTools::DpcFolderConverter.config[:csv_options]))
|
128
|
-
end
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
def bagitup
|
133
|
-
bag = BagIt::Bag.new(target)
|
134
|
-
bag.manifest!
|
135
|
-
end
|
136
|
-
|
137
|
-
def validate_checksums
|
138
|
-
external_checksums = Ddr::IngestTools::ChecksumFile.new(checksums)
|
139
|
-
sif_manifest = Ddr::IngestTools::ChecksumFile.new(File.join(target, SIF_MANIFEST_SHA1_FILENAME))
|
140
|
-
file_map.each do |source_path, target_path|
|
141
|
-
external_checksum = external_checksums.digest(source_path)
|
142
|
-
manifest_path = target_path.sub("#{target}/", '')
|
143
|
-
sif_checksum = sif_manifest.digest(manifest_path)
|
144
|
-
unless external_checksum == sif_checksum
|
145
|
-
errors << I18n.translate('errors.checksum_mismatch', { c1: external_checksum, f1: source_path,
|
146
|
-
c2: sif_checksum, f2: target_path })
|
147
|
-
end
|
148
|
-
end
|
149
|
-
end
|
150
|
-
end
|
151
|
-
end
|
@@ -1,11 +0,0 @@
|
|
1
|
-
3201454891f1d63a1493f393e061491e2d65ffcd SOURCE_DIRECTORY/Thumbs.db
|
2
|
-
59ec01f979a76b968bc579e5cd0ceb3bcf3e629f SOURCE_DIRECTORY/abc001001.tif
|
3
|
-
d0a2f2482783ae3c83d06f3cdeaa1a306cc043ad SOURCE_DIRECTORY/abc001002.tif
|
4
|
-
38ee72ab417192589f3a54ef1016131c7d7e9e4e SOURCE_DIRECTORY/abc002001.tif
|
5
|
-
1da7765e3ca71ed76395bc56dae69d64732beff8 SOURCE_DIRECTORY/checksums.txt
|
6
|
-
c227abc095d3b758ab1c1c1c9e922494b6b6e0b0 SOURCE_DIRECTORY/g/abc003001.wav
|
7
|
-
541af8df7d2c631382bd0ec189476894d0323df5 SOURCE_DIRECTORY/g/abc003002.wav
|
8
|
-
49d77d27bf82ec3dafa1967490594883f5e8b432 SOURCE_DIRECTORY/intermediate_files/abc001001.jpg
|
9
|
-
260b3c2d20a1726de96671d29f73ba09d13b61ba SOURCE_DIRECTORY/intermediate_files/abc002001.jpg
|
10
|
-
a08c4d5a76d1b8735587be6ffcba66a9baf475c4 SOURCE_DIRECTORY/targets/T001.tif
|
11
|
-
40f9993d06945544fa57af88f7c3e9102ecc69e3 SOURCE_DIRECTORY/targets/T002.tif
|
@@ -1,11 +0,0 @@
|
|
1
|
-
3201454891f1d63a1493f393e061491e2d65ffcd SOURCE_DIRECTORY/Thumbs.db
|
2
|
-
59ec01f979a76b968bc579e5cd0ceb3bcf3e629f SOURCE_DIRECTORY/abc001001.tif
|
3
|
-
d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad SOURCE_DIRECTORY/abc001002.tif
|
4
|
-
38ee72ab417192589f3a54ef1016131c7d7e9e4e SOURCE_DIRECTORY/abc002001.tif
|
5
|
-
1da7765e3ca71ed76395bc56dae69d64732beff8 SOURCE_DIRECTORY/checksums.txt
|
6
|
-
c227abc095d3b758051c1c1c9e922494b6b6e0b0 SOURCE_DIRECTORY/g/abc003001.wav
|
7
|
-
541af8df7d2c631382bd0ec189476894d0323df5 SOURCE_DIRECTORY/g/abc003002.wav
|
8
|
-
49d77d27bf82ec3dafa1967490594883f5e8b432 SOURCE_DIRECTORY/intermediate_files/abc001001.jpg
|
9
|
-
260b3c2d20a7126de96671d29f73ba09d13b61ba SOURCE_DIRECTORY/intermediate_files/abc002001.jpg
|
10
|
-
a08c4d5a76d1b8734487be6ffcba66a9baf475c4 SOURCE_DIRECTORY/targets/T001.tif
|
11
|
-
40f9993d06945544fa57af88f7c3e9102ecc69e3 SOURCE_DIRECTORY/targets/T002.tif
|
@@ -1,10 +0,0 @@
|
|
1
|
-
59ec01f979a76b968bc579e5cd0ceb3bcf3e629f data/abc001/abc001001.tif
|
2
|
-
d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad data/abc001/abc001002.tif
|
3
|
-
38ee72ab417192589f3a54ef1016131c7d7e9e4e data/abc002/abc002001.tif
|
4
|
-
c227abc095d3b758051c1c1c9e922494b6b6e0b0 data/abc003/abc003001.wav
|
5
|
-
541af8df7d2c631382bd0ec189476894d0323df5 data/abc003/abc003002.wav
|
6
|
-
a08c4d5a76d1b8734487be6ffcba66a9baf475c4 data/dpc_targets/T001.tif
|
7
|
-
40f9993d06945544fa57af88f7c3e9102ecc69e3 data/dpc_targets/T002.tif
|
8
|
-
49d77d27bf82ec3dafa1967490594883f5e8b432 data/intermediate_files/abc001001.jpg
|
9
|
-
260b3c2d20a7126de96671d29f73ba09d13b61ba data/intermediate_files/abc002001.jpg
|
10
|
-
21f041cdd694f7755ed84b8cd2668214a43bad6c data/metadata.txt
|
@@ -1,10 +0,0 @@
|
|
1
|
-
59ec01f979a76b968bc579e5cd0ceb3bcf3e629f data/abc001/abc001001.tif
|
2
|
-
d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad data/abc001/abc001002.tif
|
3
|
-
38ee72ab417192589f3a54ef1016131c7d7e9e4e data/abc002/abc002001.tif
|
4
|
-
c227abc095d3b758051c1c1c9e922494b6b6e0b0 data/abc003/abc003001.wav
|
5
|
-
541af8df7d2c631382bd0ec189476894d0323df5 data/abc003/abc003002.wav
|
6
|
-
a08c4d5a76d1b8734487be6ffcba66a9baf475c4 data/dpc_targets/T001.tif
|
7
|
-
40f9993d06945544fa57af88f7c3e9102ecc69e3 data/dpc_targets/T002.tif
|
8
|
-
49d77d27bf82ec3dafa1967490594883f5e8b432 data/intermediate_files/abc001001.jpg
|
9
|
-
260b3c2d20a7126de96671d29f73ba09d13b61ba data/intermediate_files/abc002001.jpg
|
10
|
-
913699468893882d1dec463f3df1a405c7f32784 data/metadata.txt
|
@@ -1,201 +0,0 @@
|
|
1
|
-
module Ddr::IngestTools::DpcFolderConverter
|
2
|
-
|
3
|
-
RSpec.describe Converter do
|
4
|
-
|
5
|
-
shared_examples 'a conversion to standard ingest format' do
|
6
|
-
subject { described_class.new(converter_args) }
|
7
|
-
it 'produces the correct standard ingest format directory' do
|
8
|
-
results = subject.call
|
9
|
-
# Target directory contains all the expected files and only the expected files
|
10
|
-
expect(Array(Find.find(target_directory))).to match_array(expected_files)
|
11
|
-
# Target content files are same as source content files
|
12
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001001.tif'),
|
13
|
-
File.join(source_directory, 'abc001001.tif'))).to be true
|
14
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001002.tif'),
|
15
|
-
File.join(source_directory, 'abc001002.tif'))).to be true
|
16
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'abc002', 'abc002001.tif'),
|
17
|
-
File.join(source_directory, 'abc002001.tif'))).to be true
|
18
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003001.wav'),
|
19
|
-
File.join(source_directory, 'g', 'abc003001.wav'))).to be true
|
20
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003002.wav'),
|
21
|
-
File.join(source_directory, 'g', 'abc003002.wav'))).to be true
|
22
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T001.tif'),
|
23
|
-
File.join(source_directory, 'targets', 'T001.tif'))).to be true
|
24
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T002.tif'),
|
25
|
-
File.join(source_directory, 'targets', 'T002.tif'))).to be true
|
26
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'intermediate_files', 'abc001001.jpg'),
|
27
|
-
File.join(source_directory, 'intermediate_files', 'abc001001.jpg'))).to be true
|
28
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'intermediate_files', 'abc002001.jpg'),
|
29
|
-
File.join(source_directory, 'intermediate_files', 'abc002001.jpg'))).to be true
|
30
|
-
# Generated metadata file contains the expected contents
|
31
|
-
metadata_lines = File.readlines(File.join(data_directory, 'metadata.txt')).map(&:chomp)
|
32
|
-
expect(metadata_lines).to match_array(expected_metadata)
|
33
|
-
# Generated manifest contains the expected contents (ignoring line order)
|
34
|
-
generated_manifest = File.readlines(File.join(File.join(target_directory, 'manifest-sha1.txt'))).sort
|
35
|
-
expect(generated_manifest).to match_array(expected_manifest)
|
36
|
-
# Conversion process produces the expected errors
|
37
|
-
expect(results.errors).to match_array(checksum_errors)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
let(:source_directory) { Dir.mktmpdir('dpc') }
|
42
|
-
let(:target_directory) { Dir.mktmpdir('sif') }
|
43
|
-
let(:data_directory) { File.join(target_directory, 'data') }
|
44
|
-
let(:item_id_length) { 6 }
|
45
|
-
let(:checksums_directory) { Dir.mktmpdir('checksums') }
|
46
|
-
let(:checksums) { File.join(checksums_directory, 'checksums-sha1.txt') }
|
47
|
-
let(:converter_args) { { source: source_directory, target: target_directory, item_id_length: item_id_length } }
|
48
|
-
let(:expected_files) { [
|
49
|
-
target_directory,
|
50
|
-
File.join(target_directory, 'bag-info.txt'),
|
51
|
-
File.join(target_directory, 'bagit.txt'),
|
52
|
-
data_directory,
|
53
|
-
File.join(data_directory, 'abc001'),
|
54
|
-
File.join(data_directory, 'abc001', 'abc001001.tif'),
|
55
|
-
File.join(data_directory, 'abc001', 'abc001002.tif'),
|
56
|
-
File.join(data_directory, 'abc002'),
|
57
|
-
File.join(data_directory, 'abc002', 'abc002001.tif'),
|
58
|
-
File.join(data_directory, 'abc003', 'abc003001.wav'),
|
59
|
-
File.join(data_directory, 'abc003'),
|
60
|
-
File.join(data_directory, 'abc003', 'abc003002.wav'),
|
61
|
-
File.join(data_directory, 'dpc_targets'),
|
62
|
-
File.join(data_directory, 'dpc_targets', 'T001.tif'),
|
63
|
-
File.join(data_directory, 'dpc_targets', 'T002.tif'),
|
64
|
-
File.join(data_directory, 'intermediate_files'),
|
65
|
-
File.join(data_directory, 'intermediate_files', 'abc001001.jpg'),
|
66
|
-
File.join(data_directory, 'intermediate_files', 'abc002001.jpg'),
|
67
|
-
File.join(data_directory, 'metadata.txt'),
|
68
|
-
File.join(target_directory, 'manifest-md5.txt'),
|
69
|
-
File.join(target_directory, 'manifest-sha1.txt'),
|
70
|
-
File.join(target_directory, 'tagmanifest-md5.txt'),
|
71
|
-
File.join(target_directory, 'tagmanifest-sha1.txt')
|
72
|
-
] }
|
73
|
-
let(:expected_metadata) { [
|
74
|
-
"path\tlocal_id",
|
75
|
-
"abc001\tabc001",
|
76
|
-
"abc002\tabc002",
|
77
|
-
"abc003\tabc003",
|
78
|
-
"abc001/abc001001.tif\tabc001001",
|
79
|
-
"abc001/abc001002.tif\tabc001002",
|
80
|
-
"abc002/abc002001.tif\tabc002001",
|
81
|
-
"abc003/abc003001.wav\tabc003001",
|
82
|
-
"abc003/abc003002.wav\tabc003002",
|
83
|
-
"dpc_targets/T001.tif\tT001",
|
84
|
-
"dpc_targets/T002.tif\tT002"
|
85
|
-
] }
|
86
|
-
let(:expected_manifest) do
|
87
|
-
File.readlines(File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt')).sort
|
88
|
-
end
|
89
|
-
|
90
|
-
before do
|
91
|
-
File.open(File.join(source_directory, 'Thumbs.db'), 'w') { |f| f.write('Thumbs') }
|
92
|
-
File.open(File.join(source_directory, 'abc001001.tif'), 'w') { |f| f.write('abc001001') }
|
93
|
-
File.open(File.join(source_directory, 'abc001002.tif'), 'w') { |f| f.write('abc001002') }
|
94
|
-
File.open(File.join(source_directory, 'abc002001.tif'), 'w') { |f| f.write('abc002001') }
|
95
|
-
File.open(File.join(source_directory, 'checksums.txt'), 'w') { |f| f.write('checksums') }
|
96
|
-
Dir.mkdir(File.join(source_directory,'g'))
|
97
|
-
File.open(File.join(source_directory, 'g', 'abc003001.wav'), 'w') { |f| f.write('abc003001') }
|
98
|
-
File.open(File.join(source_directory, 'g', 'abc003002.wav'), 'w') { |f| f.write('abc003002') }
|
99
|
-
Dir.mkdir(File.join(source_directory, 'intermediate_files'))
|
100
|
-
File.open(File.join(source_directory, 'intermediate_files', 'abc001001.jpg'), 'w') { |f| f.write('abc001001 jpg')}
|
101
|
-
File.open(File.join(source_directory, 'intermediate_files', 'abc002001.jpg'), 'w') { |f| f.write('abc002001 jpg')}
|
102
|
-
Dir.mkdir(File.join(source_directory, 'targets'))
|
103
|
-
File.open(File.join(source_directory, 'targets', 'T001.tif'), 'w') { |f| f.write('T001') }
|
104
|
-
File.open(File.join(source_directory, 'targets', 'T002.tif'), 'w') { |f| f.write('T002') }
|
105
|
-
end
|
106
|
-
|
107
|
-
describe 'external checksum files' do
|
108
|
-
describe 'external checksum file' do
|
109
|
-
before do
|
110
|
-
File.open(checksums, 'w') do |f|
|
111
|
-
f << File.open(checksum_file_template).read.gsub('SOURCE_DIRECTORY', source_directory)
|
112
|
-
end
|
113
|
-
converter_args[:checksums] = checksums
|
114
|
-
end
|
115
|
-
describe 'mismatch' do
|
116
|
-
let(:checksum_file_template) { File.join('spec', 'fixtures', 'files', 'bad-checksums-sha1.txt') }
|
117
|
-
let(:checksum_errors) {
|
118
|
-
[ I18n.translate('errors.checksum_mismatch', { c1: 'd0a2f2482783ae3c83d06f3cdeaa1a306cc043ad',
|
119
|
-
f1: File.join(source_directory, 'abc001002.tif'),
|
120
|
-
c2: 'd0a2f2482783ae3c38d06f3cdeaa1a306cc043ad',
|
121
|
-
f2: File.join(target_directory, 'data/abc001/abc001002.tif') }),
|
122
|
-
I18n.translate('errors.checksum_mismatch', { c1: 'c227abc095d3b758ab1c1c1c9e922494b6b6e0b0',
|
123
|
-
f1: File.join(source_directory, 'g/abc003001.wav'),
|
124
|
-
c2: 'c227abc095d3b758051c1c1c9e922494b6b6e0b0',
|
125
|
-
f2: File.join(target_directory, 'data/abc003/abc003001.wav') }),
|
126
|
-
I18n.translate('errors.checksum_mismatch', { c1: '260b3c2d20a1726de96671d29f73ba09d13b61ba',
|
127
|
-
f1: File.join(source_directory, 'intermediate_files/abc002001.jpg'),
|
128
|
-
c2: '260b3c2d20a7126de96671d29f73ba09d13b61ba',
|
129
|
-
f2: File.join(target_directory, 'data/intermediate_files/abc002001.jpg') }),
|
130
|
-
I18n.translate('errors.checksum_mismatch', { c1: 'a08c4d5a76d1b8735587be6ffcba66a9baf475c4',
|
131
|
-
f1: File.join(source_directory, 'targets/T001.tif'),
|
132
|
-
c2: 'a08c4d5a76d1b8734487be6ffcba66a9baf475c4',
|
133
|
-
f2: File.join(target_directory, 'data/dpc_targets/T001.tif') })
|
134
|
-
]
|
135
|
-
}
|
136
|
-
describe 'files are copied' do
|
137
|
-
before { converter_args[:copy_files] = true }
|
138
|
-
it_behaves_like 'a conversion to standard ingest format'
|
139
|
-
end
|
140
|
-
describe 'files are not copied' do
|
141
|
-
before { converter_args[:copy_files] = false }
|
142
|
-
it_behaves_like 'a conversion to standard ingest format'
|
143
|
-
end
|
144
|
-
end
|
145
|
-
describe 'no mismatch' do
|
146
|
-
let(:checksum_file_template) { File.join('spec', 'fixtures', 'files', 'good-checksums-sha1.txt') }
|
147
|
-
let(:checksum_errors) { [] }
|
148
|
-
describe 'files are copied' do
|
149
|
-
before { converter_args[:copy_files] = true }
|
150
|
-
it_behaves_like 'a conversion to standard ingest format'
|
151
|
-
end
|
152
|
-
describe 'files are not copied' do
|
153
|
-
before { converter_args[:copy_files] = false }
|
154
|
-
it_behaves_like 'a conversion to standard ingest format'
|
155
|
-
end
|
156
|
-
end
|
157
|
-
end
|
158
|
-
|
159
|
-
describe 'no external checksum file' do
|
160
|
-
let(:checksum_errors) { [] }
|
161
|
-
describe 'files are copied' do
|
162
|
-
before { converter_args[:copy_files] = true }
|
163
|
-
it_behaves_like 'a conversion to standard ingest format'
|
164
|
-
end
|
165
|
-
describe 'files are not copied' do
|
166
|
-
before { converter_args[:copy_files] = false }
|
167
|
-
it_behaves_like 'a conversion to standard ingest format'
|
168
|
-
end
|
169
|
-
end
|
170
|
-
end
|
171
|
-
|
172
|
-
describe 'collection titles and admin sets' do
|
173
|
-
let(:checksum_errors) { [] }
|
174
|
-
describe 'collection title and admin set provided' do
|
175
|
-
let(:expected_metadata) { [
|
176
|
-
"path\tlocal_id\ttitle\tadmin_set",
|
177
|
-
"\t\tTest Collection\tfoo",
|
178
|
-
"abc001\tabc001\t\t",
|
179
|
-
"abc002\tabc002\t\t",
|
180
|
-
"abc003\tabc003\t\t",
|
181
|
-
"abc001/abc001001.tif\tabc001001\t\t",
|
182
|
-
"abc001/abc001002.tif\tabc001002\t\t",
|
183
|
-
"abc002/abc002001.tif\tabc002001\t\t",
|
184
|
-
"abc003/abc003001.wav\tabc003001\t\t",
|
185
|
-
"abc003/abc003002.wav\tabc003002\t\t",
|
186
|
-
"dpc_targets/T001.tif\tT001\t\t",
|
187
|
-
"dpc_targets/T002.tif\tT002\t\t"
|
188
|
-
] }
|
189
|
-
let(:expected_manifest) do
|
190
|
-
File.readlines(File.join('spec', 'fixtures', 'files', 'manifest-sha1-collection-title_admin_set.txt')).sort
|
191
|
-
end
|
192
|
-
before do
|
193
|
-
converter_args[:collection_title] = 'Test Collection'
|
194
|
-
converter_args[:admin_set] = 'foo'
|
195
|
-
end
|
196
|
-
it_behaves_like 'a conversion to standard ingest format'
|
197
|
-
end
|
198
|
-
end
|
199
|
-
|
200
|
-
end
|
201
|
-
end
|
@@ -1,17 +0,0 @@
|
|
1
|
-
module Ddr::IngestTools
|
2
|
-
|
3
|
-
RSpec.describe ChecksumFile do
|
4
|
-
|
5
|
-
subject { described_class.new(checksum_filepath) }
|
6
|
-
|
7
|
-
let(:checksum_filepath) { File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt') }
|
8
|
-
|
9
|
-
describe 'digest' do
|
10
|
-
it 'provides the requested digest' do
|
11
|
-
expect(subject.digest('data/abc001/abc001002.tif')).to eq('d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad')
|
12
|
-
expect(subject.digest('not/in/checksum/file.txt')).to be nil
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
end
|