ddr-ingesttools 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/bin/mint_manifest_arks.rb +49 -0
- data/config/locales/en.yml +6 -6
- data/ddr-ingesttools.gemspec +3 -2
- data/lib/ddr/ingesttools.rb +1 -2
- data/lib/ddr/ingesttools/manifest_ark_minter.rb +22 -0
- data/lib/ddr/ingesttools/manifest_ark_minter/configuration.rb +13 -0
- data/lib/ddr/ingesttools/manifest_ark_minter/manifest_parser.rb +33 -0
- data/lib/ddr/ingesttools/manifest_ark_minter/manifest_updater.rb +80 -0
- data/lib/ddr/ingesttools/manifest_ark_minter/minter.rb +43 -0
- data/lib/ddr/ingesttools/version.rb +1 -1
- data/spec/fixtures/rdr_importer/configs/default.yml +3 -0
- data/spec/fixtures/rdr_importer/manifests/manifest_with_all_arks.csv +4 -0
- data/spec/fixtures/rdr_importer/manifests/manifest_with_no_arks.csv +4 -0
- data/spec/fixtures/rdr_importer/manifests/manifest_with_some_arks.csv +4 -0
- data/spec/manifest_ark_minter/manifest_parser_spec.rb +38 -0
- data/spec/manifest_ark_minter/manifest_updater_spec.rb +46 -0
- data/spec/manifest_ark_minter/minter_spec.rb +50 -0
- data/spec/spec_helper.rb +1 -0
- metadata +43 -25
- data/bin/convert_dpc_folder.rb +0 -61
- data/lib/ddr/ingesttools/checksum_file.rb +0 -28
- data/lib/ddr/ingesttools/dpc_folder_converter.rb +0 -35
- data/lib/ddr/ingesttools/dpc_folder_converter/converter.rb +0 -151
- data/spec/fixtures/files/bad-checksums-sha1.txt +0 -11
- data/spec/fixtures/files/good-checksums-sha1.txt +0 -11
- data/spec/fixtures/files/manifest-sha1-collection-title_admin_set.txt +0 -10
- data/spec/fixtures/files/manifest-sha1.txt +0 -10
- data/spec/integration/dpc_folder_converter_spec.rb +0 -201
- data/spec/unit/checksum_file_spec.rb +0 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ef121cd2d5211caf9ba9b1beebbbc316d2b7214e
|
4
|
+
data.tar.gz: 922b0ca3eba98b5dc5d1bc07c126c842e1a093cc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 89d1fc5adacf2986a47019e0cca3da6ba2a5cf8054d72a4c1a21b82805ef0a0711abc2569f5367878aa6e8b452d8e79482d63d4ac5fdb28deef459bb3175d820
|
7
|
+
data.tar.gz: 24ebbe3f71fce16d96e032ba6c283e0431ba7f87a3495144ab218908052837699ff0687430eceeffcac7c31a5ab75fda7a677759b685f0583c9257c290482493
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'i18n'
|
4
|
+
require 'ddr/ingesttools'
|
5
|
+
require 'optparse'
|
6
|
+
|
7
|
+
options = {}
|
8
|
+
|
9
|
+
puts I18n.t('marquee')
|
10
|
+
puts I18n.t('suite.name')
|
11
|
+
puts I18n.t('manifest_ark_minter.name')
|
12
|
+
puts I18n.t('marquee')
|
13
|
+
|
14
|
+
# Parse command line arguments
|
15
|
+
parser = OptionParser.new do |opts|
|
16
|
+
opts.banner = 'Usage: mint_manifest_arks.rb [options]'
|
17
|
+
|
18
|
+
opts.on('-c', '--config CONFIG_FILE', 'Path to configuration file') do |v|
|
19
|
+
options[:config] = v
|
20
|
+
end
|
21
|
+
|
22
|
+
opts.on('-m', '--manifest MANIFEST_FILE', 'Path to manifest file for which ARKs are to be minted') do |v|
|
23
|
+
options[:manifest] = v
|
24
|
+
end
|
25
|
+
|
26
|
+
opts.on('-o', '--output OUTPUT_FILE', 'Path to which updated manifest file should be written') do |v|
|
27
|
+
options[:output] = v
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
begin
|
32
|
+
parser.parse!
|
33
|
+
if options['config'].nil?
|
34
|
+
puts I18n.t('manifest_ark_minter.use_default_config_file',
|
35
|
+
default_config_file: Ddr::IngestTools::ManifestArkMinter::ManifestUpdater::DEFAULT_CONFIG_FILE)
|
36
|
+
end
|
37
|
+
mandatory = [ :manifest, :output ]
|
38
|
+
missing = mandatory.select{ |param| options[param].nil? }
|
39
|
+
unless missing.empty?
|
40
|
+
raise OptionParser::MissingArgument.new(missing.join(', '))
|
41
|
+
end
|
42
|
+
rescue OptionParser::InvalidOption, OptionParser::MissingArgument
|
43
|
+
puts $!.to_s
|
44
|
+
puts parser
|
45
|
+
exit(false)
|
46
|
+
end
|
47
|
+
|
48
|
+
updater = Ddr::IngestTools::ManifestArkMinter::ManifestUpdater.new(options)
|
49
|
+
updater.call
|
data/config/locales/en.yml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
en:
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
2
|
+
marquee: ==================================================
|
3
|
+
suite:
|
4
|
+
name: DDR Ingest Tools
|
5
|
+
manifest_ark_minter:
|
6
|
+
name: Manifest ARK Minter
|
7
|
+
use_default_config_file: 'Will use default configuration file: %{default_config_file}'
|
data/ddr-ingesttools.gemspec
CHANGED
@@ -18,10 +18,11 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_dependency "
|
22
|
-
spec.add_dependency "i18n", "~> 0
|
21
|
+
spec.add_dependency "ezid-client", "~> 1.7"
|
22
|
+
spec.add_dependency "i18n", "~> 1.0"
|
23
23
|
|
24
24
|
spec.add_development_dependency "bundler", "~> 1.14"
|
25
|
+
spec.add_development_dependency "byebug"
|
25
26
|
spec.add_development_dependency "rake", "~> 12.0"
|
26
27
|
spec.add_development_dependency "rspec", "~> 3.0"
|
27
28
|
end
|
data/lib/ddr/ingesttools.rb
CHANGED
@@ -0,0 +1,22 @@
|
|
1
|
+
require_relative 'manifest_ark_minter/configuration'
|
2
|
+
require_relative 'manifest_ark_minter/manifest_updater'
|
3
|
+
require_relative 'manifest_ark_minter/manifest_parser'
|
4
|
+
require_relative 'manifest_ark_minter/minter'
|
5
|
+
|
6
|
+
module Ddr::IngestTools
|
7
|
+
module ManifestArkMinter
|
8
|
+
|
9
|
+
class << self
|
10
|
+
attr_writer :configuration
|
11
|
+
|
12
|
+
def configuration
|
13
|
+
@configuration ||= Configuration.new
|
14
|
+
end
|
15
|
+
|
16
|
+
def configure
|
17
|
+
yield(configuration)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module Ddr::IngestTools::ManifestArkMinter
|
4
|
+
class ManifestParser
|
5
|
+
|
6
|
+
attr_reader :manifest_file_path
|
7
|
+
|
8
|
+
ARK_HEADER = 'ark'
|
9
|
+
|
10
|
+
def initialize(manifest_file_path)
|
11
|
+
@manifest_file_path = manifest_file_path
|
12
|
+
end
|
13
|
+
|
14
|
+
def as_csv_table
|
15
|
+
@csv_table ||= CSV.read(manifest_file_path, headers: true)
|
16
|
+
end
|
17
|
+
|
18
|
+
def arks_missing?
|
19
|
+
arks.any? { |value| value.compact.empty? }
|
20
|
+
end
|
21
|
+
|
22
|
+
def headers
|
23
|
+
as_csv_table.headers
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def arks
|
29
|
+
as_csv_table.values_at(ARK_HEADER)
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'logger'
|
3
|
+
require 'tempfile'
|
4
|
+
|
5
|
+
module Ddr::IngestTools::ManifestArkMinter
|
6
|
+
class ManifestUpdater
|
7
|
+
|
8
|
+
attr_reader :config, :logger, :manifest, :output
|
9
|
+
|
10
|
+
DEFAULT_CONFIG_FILE = 'manifest_ark_minter_config.yml'
|
11
|
+
|
12
|
+
def initialize(config: DEFAULT_CONFIG_FILE, manifest:, output:, logger: nil)
|
13
|
+
@config = config
|
14
|
+
@manifest = manifest
|
15
|
+
@output = output
|
16
|
+
@logger = logger || Logger.new(STDOUT)
|
17
|
+
end
|
18
|
+
|
19
|
+
def call
|
20
|
+
configure
|
21
|
+
if needs_updating?
|
22
|
+
update
|
23
|
+
else
|
24
|
+
logger.info("Manifest file already has ARKs ... nothing to mint")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def configure
|
31
|
+
conf = YAML::load(IO.read(config))
|
32
|
+
Ddr::IngestTools::ManifestArkMinter.configure do |config|
|
33
|
+
config.ezid_default_shoulder = conf.fetch('ezid_default_shoulder')
|
34
|
+
config.ezid_password = conf.fetch('ezid_password')
|
35
|
+
config.ezid_user = conf.fetch('ezid_user')
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def update
|
40
|
+
update_manifest_table
|
41
|
+
write_updated_manifest
|
42
|
+
end
|
43
|
+
|
44
|
+
def update_manifest_table
|
45
|
+
logger.info("Minting ARKs for manifest file")
|
46
|
+
mint_counter = 0
|
47
|
+
manifest_as_csv_table.each do |row|
|
48
|
+
unless row['ark']
|
49
|
+
row['ark'] = minter.mint
|
50
|
+
mint_counter += 1
|
51
|
+
end
|
52
|
+
end
|
53
|
+
logger.info("Minted #{mint_counter} ARK(s)")
|
54
|
+
end
|
55
|
+
|
56
|
+
def manifest_as_csv_table
|
57
|
+
@manifest_as_csv_table ||= parser.as_csv_table
|
58
|
+
end
|
59
|
+
|
60
|
+
def write_updated_manifest
|
61
|
+
File.open(output, 'w') do |f|
|
62
|
+
f.write(manifest_as_csv_table.to_csv)
|
63
|
+
end
|
64
|
+
logger.info("Updated manifest file is at #{output}")
|
65
|
+
end
|
66
|
+
|
67
|
+
def needs_updating?
|
68
|
+
parser.arks_missing?
|
69
|
+
end
|
70
|
+
|
71
|
+
def minter
|
72
|
+
@minter ||= Minter.new
|
73
|
+
end
|
74
|
+
|
75
|
+
def parser
|
76
|
+
@parser ||= ManifestParser.new(manifest)
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'ezid-client'
|
2
|
+
|
3
|
+
module Ddr::IngestTools::ManifestArkMinter
|
4
|
+
class Minter
|
5
|
+
|
6
|
+
DEFAULT_EXPORT = 'no'.freeze
|
7
|
+
DEFAULT_PROFILE = 'dc'.freeze
|
8
|
+
DEFAULT_STATUS = Ezid::Status::RESERVED
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
configure_ark
|
12
|
+
configure_client
|
13
|
+
end
|
14
|
+
|
15
|
+
def mint
|
16
|
+
Ezid::Identifier.mint
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def configure_ark
|
22
|
+
Ezid::Identifier.defaults = {
|
23
|
+
export: DEFAULT_EXPORT,
|
24
|
+
profile: DEFAULT_PROFILE,
|
25
|
+
status: DEFAULT_STATUS
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
def configure_client
|
30
|
+
Ezid::Client.configure do |config|
|
31
|
+
config.default_shoulder = module_configuration.ezid_default_shoulder
|
32
|
+
config.password = module_configuration.ezid_password
|
33
|
+
config.user = module_configuration.ezid_user
|
34
|
+
config.logger = Logger.new(File::NULL)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def module_configuration
|
39
|
+
Ddr::IngestTools::ManifestArkMinter.configuration
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,4 @@
|
|
1
|
+
ark,visibility,title,contributor,resource_type,license,file,file,file
|
2
|
+
ark:/99999/fk4s76kg89,open,Test 1,"Smith, Sue",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv,data/data2.csv,docs/doc1.txt
|
3
|
+
ark:/99999/fk4ng5vp6m,,Test 2,"Jones, Bill",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data3.csv,docs/doc2.txt
|
4
|
+
ark:/99999/fk4hq54w3t,authenticated,Test 3,"Allen, Jane",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv
|
@@ -0,0 +1,4 @@
|
|
1
|
+
visibility,title,contributor,resource_type,license,file,file,file
|
2
|
+
open,Test 1,"Smith, Sue",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv,data/data2.csv,docs/doc1.txt
|
3
|
+
,Test 2,"Jones, Bill",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data3.csv,docs/doc2.txt
|
4
|
+
authenticated,Test 3,"Allen, Jane",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv
|
@@ -0,0 +1,4 @@
|
|
1
|
+
ark,visibility,title,contributor,resource_type,license,file,file,file
|
2
|
+
,open,Test 1,"Smith, Sue",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv,data/data2.csv,docs/doc1.txt
|
3
|
+
ark:/99999/fk4ng5vp6m,,Test 2,"Jones, Bill",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data3.csv,docs/doc2.txt
|
4
|
+
,authenticated,Test 3,"Allen, Jane",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Ddr::IngestTools::ManifestArkMinter
|
4
|
+
|
5
|
+
RSpec.describe ManifestParser do
|
6
|
+
|
7
|
+
subject { described_class.new(manifest_file) }
|
8
|
+
|
9
|
+
describe '#as_csv_table' do
|
10
|
+
let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_some_arks.csv') }
|
11
|
+
specify { expect(subject.as_csv_table).to be_a CSV::Table }
|
12
|
+
end
|
13
|
+
|
14
|
+
describe '#headers' do
|
15
|
+
let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_some_arks.csv') }
|
16
|
+
let(:expected_headers) { %w(ark visibility title contributor resource_type license file) }
|
17
|
+
it 'parses out the list of headers' do
|
18
|
+
expect(subject.headers).to include(*expected_headers)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe '#arks_missing?' do
|
23
|
+
describe 'no arks assigned' do
|
24
|
+
let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_no_arks.csv') }
|
25
|
+
specify { expect(subject.arks_missing?).to be true }
|
26
|
+
end
|
27
|
+
describe 'some arks assigned' do
|
28
|
+
let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_some_arks.csv') }
|
29
|
+
specify { expect(subject.arks_missing?).to be true }
|
30
|
+
end
|
31
|
+
describe 'all arks assigned' do
|
32
|
+
let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_all_arks.csv') }
|
33
|
+
specify { expect(subject.arks_missing?).to be false }
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
module Ddr::IngestTools::ManifestArkMinter
|
5
|
+
|
6
|
+
RSpec.describe ManifestUpdater do
|
7
|
+
|
8
|
+
subject { described_class.new(config: config_file, manifest: manifest_file, output: output_file) }
|
9
|
+
|
10
|
+
let(:config_file) { File.join('spec', 'fixtures', 'rdr_importer', 'configs', 'default.yml') }
|
11
|
+
let(:output_dir) { Dir.mktmpdir }
|
12
|
+
let(:output_file) { File.join(output_dir, 'output.csv') }
|
13
|
+
|
14
|
+
after { FileUtils.remove_dir output_dir }
|
15
|
+
|
16
|
+
describe 'manifest has ARKs for all rows' do
|
17
|
+
let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_all_arks.csv') }
|
18
|
+
it 'does not produce an output file' do
|
19
|
+
expect{ subject.call }.not_to change{ File.exist?(output_file) }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
describe 'manifest has ARKs for some rows' do
|
24
|
+
let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_some_arks.csv') }
|
25
|
+
it 'mints ARKs for rows without them' do
|
26
|
+
expect_any_instance_of(Minter).to receive(:mint).exactly(2).times.and_call_original
|
27
|
+
subject.call
|
28
|
+
table = CSV.read(output_file, headers: true)
|
29
|
+
# expect(table['ark']).to all(match(/ark:\/99999\/fk4/))
|
30
|
+
expect(table['ark']).to match([ /ark:\/99999\/fk4/, 'ark:/99999/fk4ng5vp6m', /ark:\/99999\/fk4/ ])
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe 'manifest has ARKs for no rows' do
|
35
|
+
let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_no_arks.csv') }
|
36
|
+
it 'mints ARKs for every row' do
|
37
|
+
expect_any_instance_of(Minter).to receive(:mint).exactly(3).times.and_call_original
|
38
|
+
subject.call
|
39
|
+
table = CSV.read(output_file, headers: true)
|
40
|
+
expect(table['ark']).to all(match(/ark:\/99999\/fk4/))
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Ddr::IngestTools::ManifestArkMinter
|
4
|
+
|
5
|
+
RSpec.describe Minter do
|
6
|
+
|
7
|
+
describe '#initialize' do
|
8
|
+
describe 'Ezid::Identifier defaults' do
|
9
|
+
before { described_class.new }
|
10
|
+
let(:ark_defaults) { { export: described_class::DEFAULT_EXPORT,
|
11
|
+
profile: described_class::DEFAULT_PROFILE,
|
12
|
+
status: described_class::DEFAULT_STATUS } }
|
13
|
+
it 'configures Ezid::Identifer defaults' do
|
14
|
+
expect(Ezid::Identifier.defaults).to match(ark_defaults)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
describe 'Ezid::Client configuration' do
|
18
|
+
let(:configuration) { Configuration.new }
|
19
|
+
before do
|
20
|
+
configuration.ezid_default_shoulder = 'ark:/99999/fk4'
|
21
|
+
configuration.ezid_password = 'apitest'
|
22
|
+
configuration.ezid_user = 'apitest'
|
23
|
+
allow(Ddr::IngestTools::ManifestArkMinter).to receive(:configuration) { configuration }
|
24
|
+
described_class.new
|
25
|
+
end
|
26
|
+
it 'configures the Ezid::Client' do
|
27
|
+
expect(Ezid::Client.config.default_shoulder).to eq('ark:/99999/fk4')
|
28
|
+
expect(Ezid::Client.config.password).to eq('apitest')
|
29
|
+
expect(Ezid::Client.config.user).to eq('apitest')
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe '#mint' do
|
35
|
+
let(:configuration) { Configuration.new }
|
36
|
+
before do
|
37
|
+
configuration.ezid_default_shoulder = 'ark:/99999/fk4'
|
38
|
+
configuration.ezid_password = 'apitest'
|
39
|
+
configuration.ezid_user = 'apitest'
|
40
|
+
allow(Ddr::IngestTools::ManifestArkMinter).to receive(:configuration) { configuration }
|
41
|
+
end
|
42
|
+
it 'calls Ezid::Identifier to mint an ark' do
|
43
|
+
expect(Ezid::Identifier).to receive(:mint)
|
44
|
+
subject.mint
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ddr-ingesttools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jim Coble
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-03-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: ezid-client
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '1.7'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '1.7'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: i18n
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '0
|
33
|
+
version: '1.0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '0
|
40
|
+
version: '1.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: bundler
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '1.14'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: byebug
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: rake
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -84,7 +98,7 @@ description: A collection of Ruby tools supporting ingest into the Duke Digital
|
|
84
98
|
email:
|
85
99
|
- jim.coble@duke.edu
|
86
100
|
executables:
|
87
|
-
-
|
101
|
+
- mint_manifest_arks.rb
|
88
102
|
extensions: []
|
89
103
|
extra_rdoc_files: []
|
90
104
|
files:
|
@@ -96,21 +110,24 @@ files:
|
|
96
110
|
- LICENSE.txt
|
97
111
|
- README.md
|
98
112
|
- Rakefile
|
99
|
-
- bin/
|
113
|
+
- bin/mint_manifest_arks.rb
|
100
114
|
- config/locales/en.yml
|
101
115
|
- ddr-ingesttools.gemspec
|
102
116
|
- lib/ddr/ingesttools.rb
|
103
|
-
- lib/ddr/ingesttools/
|
104
|
-
- lib/ddr/ingesttools/
|
105
|
-
- lib/ddr/ingesttools/
|
117
|
+
- lib/ddr/ingesttools/manifest_ark_minter.rb
|
118
|
+
- lib/ddr/ingesttools/manifest_ark_minter/configuration.rb
|
119
|
+
- lib/ddr/ingesttools/manifest_ark_minter/manifest_parser.rb
|
120
|
+
- lib/ddr/ingesttools/manifest_ark_minter/manifest_updater.rb
|
121
|
+
- lib/ddr/ingesttools/manifest_ark_minter/minter.rb
|
106
122
|
- lib/ddr/ingesttools/version.rb
|
107
|
-
- spec/fixtures/
|
108
|
-
- spec/fixtures/
|
109
|
-
- spec/fixtures/
|
110
|
-
- spec/fixtures/
|
111
|
-
- spec/
|
123
|
+
- spec/fixtures/rdr_importer/configs/default.yml
|
124
|
+
- spec/fixtures/rdr_importer/manifests/manifest_with_all_arks.csv
|
125
|
+
- spec/fixtures/rdr_importer/manifests/manifest_with_no_arks.csv
|
126
|
+
- spec/fixtures/rdr_importer/manifests/manifest_with_some_arks.csv
|
127
|
+
- spec/manifest_ark_minter/manifest_parser_spec.rb
|
128
|
+
- spec/manifest_ark_minter/manifest_updater_spec.rb
|
129
|
+
- spec/manifest_ark_minter/minter_spec.rb
|
112
130
|
- spec/spec_helper.rb
|
113
|
-
- spec/unit/checksum_file_spec.rb
|
114
131
|
homepage: https://github.com/duke-libraries/ddr-ingesttools
|
115
132
|
licenses:
|
116
133
|
- BSD-3-Clause
|
@@ -131,15 +148,16 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
131
148
|
version: '0'
|
132
149
|
requirements: []
|
133
150
|
rubyforge_project:
|
134
|
-
rubygems_version: 2.6.
|
151
|
+
rubygems_version: 2.6.14
|
135
152
|
signing_key:
|
136
153
|
specification_version: 4
|
137
154
|
summary: Ruby tools supporting ingest into the Duke Digital Repository.
|
138
155
|
test_files:
|
139
|
-
- spec/fixtures/
|
140
|
-
- spec/fixtures/
|
141
|
-
- spec/fixtures/
|
142
|
-
- spec/fixtures/
|
143
|
-
- spec/
|
156
|
+
- spec/fixtures/rdr_importer/configs/default.yml
|
157
|
+
- spec/fixtures/rdr_importer/manifests/manifest_with_all_arks.csv
|
158
|
+
- spec/fixtures/rdr_importer/manifests/manifest_with_no_arks.csv
|
159
|
+
- spec/fixtures/rdr_importer/manifests/manifest_with_some_arks.csv
|
160
|
+
- spec/manifest_ark_minter/manifest_parser_spec.rb
|
161
|
+
- spec/manifest_ark_minter/manifest_updater_spec.rb
|
162
|
+
- spec/manifest_ark_minter/minter_spec.rb
|
144
163
|
- spec/spec_helper.rb
|
145
|
-
- spec/unit/checksum_file_spec.rb
|
data/bin/convert_dpc_folder.rb
DELETED
@@ -1,61 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'ddr/ingesttools'
|
4
|
-
require 'optparse'
|
5
|
-
|
6
|
-
options = {}
|
7
|
-
|
8
|
-
# Parse command line arguments
|
9
|
-
parser = OptionParser.new do |opts|
|
10
|
-
opts.banner = 'Usage: convert_dpc_folder.rb [options]'
|
11
|
-
|
12
|
-
opts.on('-s', '--source SOURCE', 'Path to DPC Folder to be converted') do |v|
|
13
|
-
options[:source] = v
|
14
|
-
end
|
15
|
-
|
16
|
-
opts.on('-t', '--target TARGET', 'Path to folder where Standard Ingest Format is to be built') do |v|
|
17
|
-
options[:target] = v
|
18
|
-
end
|
19
|
-
|
20
|
-
opts.on('-i', '--item_id_length LENGTH', Integer, 'Number of characters to copy from the beginning of each file name',
|
21
|
-
'to use as the local ID of the item of which that file is a component') do |v|
|
22
|
-
options[:item_id_length] = v
|
23
|
-
end
|
24
|
-
|
25
|
-
opts.on('-c', '--checksums [CHECKSUM_FILE]', 'External checksum file') do |v|
|
26
|
-
options[:checksums] = v
|
27
|
-
end
|
28
|
-
|
29
|
-
opts.on('--[no-]copy_files', 'Copy files to target location instead of using a symlink') do |v|
|
30
|
-
options[:copy_files] = v
|
31
|
-
end
|
32
|
-
|
33
|
-
opts.on('--collection_title [TITLE]', 'Title for collection',
|
34
|
-
'required if intending to create a collection-creating Standard Ingest') do |v|
|
35
|
-
options[:collection_title] = v
|
36
|
-
end
|
37
|
-
|
38
|
-
opts.on('--admin_set [ADMIN_SET]', 'Admin set for collection',
|
39
|
-
'required if intending to create a collection-creating Standard Ingest') do |v|
|
40
|
-
options[:admin_set] = v
|
41
|
-
end
|
42
|
-
|
43
|
-
end
|
44
|
-
|
45
|
-
begin
|
46
|
-
parser.parse!
|
47
|
-
mandatory = [ :source, :target, :item_id_length ]
|
48
|
-
missing = mandatory.select{ |param| options[param].nil? }
|
49
|
-
unless missing.empty?
|
50
|
-
raise OptionParser::MissingArgument.new(missing.join(', '))
|
51
|
-
end
|
52
|
-
rescue OptionParser::InvalidOption, OptionParser::MissingArgument
|
53
|
-
puts $!.to_s
|
54
|
-
puts parser
|
55
|
-
exit(false)
|
56
|
-
end
|
57
|
-
|
58
|
-
converter = Ddr::IngestTools::DpcFolderConverter::Converter.new(options)
|
59
|
-
results = converter.call
|
60
|
-
puts I18n.translate('errors.count', { count: results.errors.size })
|
61
|
-
results.errors.each { |e| puts e }
|
@@ -1,28 +0,0 @@
|
|
1
|
-
module Ddr::IngestTools
|
2
|
-
class ChecksumFile
|
3
|
-
|
4
|
-
attr_reader :digests
|
5
|
-
|
6
|
-
def initialize(checksum_filepath)
|
7
|
-
@digests = digest_hash(checksum_filepath)
|
8
|
-
end
|
9
|
-
|
10
|
-
def digest(filepath)
|
11
|
-
digests[filepath]
|
12
|
-
end
|
13
|
-
|
14
|
-
private
|
15
|
-
|
16
|
-
def digest_hash(checksum_filepath)
|
17
|
-
h = {}
|
18
|
-
File.open(checksum_filepath, 'r') do |file|
|
19
|
-
file.each_line do |line|
|
20
|
-
digest, path = line.chomp.split
|
21
|
-
h[path] = digest
|
22
|
-
end
|
23
|
-
end
|
24
|
-
h
|
25
|
-
end
|
26
|
-
|
27
|
-
end
|
28
|
-
end
|
@@ -1,35 +0,0 @@
|
|
1
|
-
require_relative 'dpc_folder_converter/converter'
|
2
|
-
|
3
|
-
module Ddr::IngestTools
|
4
|
-
module DpcFolderConverter
|
5
|
-
#Configuration defaults
|
6
|
-
@config = {
|
7
|
-
included_extensions: [ '.jpg', '.mov', '.mp3', '.mp4', '.pdf', '.tif', '.tiff', '.wav' ],
|
8
|
-
csv_options: { :encoding=>"UTF-8", :col_sep=>"\t", :headers=>true, :write_headers=>true,
|
9
|
-
:header_converters=>:symbol }
|
10
|
-
}
|
11
|
-
|
12
|
-
@valid_config_keys = @config.keys
|
13
|
-
|
14
|
-
# Configure through hash
|
15
|
-
def self.configure(opts = {})
|
16
|
-
opts.each {|k,v| @config[k.to_sym] = v if @valid_config_keys.include?(k.to_sym)}
|
17
|
-
end
|
18
|
-
|
19
|
-
# Configure through yaml file
|
20
|
-
def self.configure_with(path_to_yaml_file)
|
21
|
-
begin
|
22
|
-
config = YAML::load(IO.read(path_to_yaml_file))
|
23
|
-
rescue Errno::ENOENT
|
24
|
-
log(:warning, "YAML configuration file couldn't be found. Using defaults."); return
|
25
|
-
rescue Psych::SyntaxError
|
26
|
-
log(:warning, "YAML configuration file contains invalid syntax. Using defaults."); return
|
27
|
-
end
|
28
|
-
configure(config)
|
29
|
-
end
|
30
|
-
|
31
|
-
def self.config
|
32
|
-
@config
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
@@ -1,151 +0,0 @@
|
|
1
|
-
require 'bagit'
|
2
|
-
require 'csv'
|
3
|
-
require 'fileutils'
|
4
|
-
require 'find'
|
5
|
-
|
6
|
-
module Ddr::IngestTools::DpcFolderConverter
|
7
|
-
class Converter
|
8
|
-
|
9
|
-
INTERMEDIATE_FILES_DIRNAME = 'intermediate_files'
|
10
|
-
DPC_TARGETS_DIRNAME = 'targets'
|
11
|
-
SIF_TARGETS_DIRNAME = 'dpc_targets'
|
12
|
-
SIF_METADATA_FILENAME = 'metadata.txt'
|
13
|
-
SIF_MANIFEST_SHA1_FILENAME = 'manifest-sha1.txt'
|
14
|
-
|
15
|
-
Results = Struct.new(:file_map, :errors)
|
16
|
-
|
17
|
-
attr_reader :source, :target, :data_dir, :item_id_length, :checksums, :copy_files, :collection_title,
|
18
|
-
:admin_set, :metadata_headers
|
19
|
-
attr_accessor :errors, :file_map, :local_id_metadata, :results
|
20
|
-
|
21
|
-
def initialize(source:, target:, item_id_length:, checksums: nil, copy_files: false, collection_title: nil,
|
22
|
-
admin_set: nil)
|
23
|
-
@source = source
|
24
|
-
@target = target
|
25
|
-
@item_id_length = item_id_length
|
26
|
-
@checksums = checksums
|
27
|
-
@copy_files = copy_files
|
28
|
-
@collection_title = collection_title
|
29
|
-
@admin_set = admin_set
|
30
|
-
@metadata_headers = [ 'path', 'local_id' ]
|
31
|
-
@metadata_headers << 'title' unless collection_title.nil?
|
32
|
-
@metadata_headers << 'admin_set' unless admin_set.nil?
|
33
|
-
end
|
34
|
-
|
35
|
-
def call
|
36
|
-
setup
|
37
|
-
scan_files(source)
|
38
|
-
output_metadata
|
39
|
-
bagitup
|
40
|
-
validate_checksums if checksums
|
41
|
-
Results.new(file_map, errors)
|
42
|
-
end
|
43
|
-
|
44
|
-
private
|
45
|
-
|
46
|
-
def setup
|
47
|
-
@data_dir = File.join(target, 'data')
|
48
|
-
@errors = []
|
49
|
-
@file_map = {}
|
50
|
-
@local_id_metadata = {}
|
51
|
-
FileUtils.mkdir_p data_dir
|
52
|
-
end
|
53
|
-
|
54
|
-
def included_extensions
|
55
|
-
Ddr::IngestTools::DpcFolderConverter.config[:included_extensions]
|
56
|
-
end
|
57
|
-
|
58
|
-
def scan_files(dirpath, file_handler='handle_component'.to_sym)
|
59
|
-
Dir.foreach(dirpath).each do |entry|
|
60
|
-
next if [ '.', '..' ].include?(entry)
|
61
|
-
path = File.join(dirpath, entry)
|
62
|
-
if File.directory?(path)
|
63
|
-
if entry == DPC_TARGETS_DIRNAME
|
64
|
-
scan_files(path, :handle_target)
|
65
|
-
elsif entry == INTERMEDIATE_FILES_DIRNAME
|
66
|
-
scan_files(path, :handle_intermediate_file)
|
67
|
-
else
|
68
|
-
scan_files(path, file_handler)
|
69
|
-
end
|
70
|
-
else
|
71
|
-
if included_extensions.include?(File.extname(entry))
|
72
|
-
self.send(file_handler, path)
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def handle_component(file)
|
79
|
-
base = File.basename(file, File.extname(file))
|
80
|
-
item_id = item_id_length == 0 ? base : base[0, item_id_length]
|
81
|
-
FileUtils.mkdir_p(File.join(data_dir, item_id))
|
82
|
-
local_id_metadata[item_id] = item_id
|
83
|
-
handle_file(file, item_id)
|
84
|
-
local_id_metadata[File.join(item_id, File.basename(file))] = base
|
85
|
-
end
|
86
|
-
|
87
|
-
def handle_intermediate_file(file)
|
88
|
-
FileUtils.mkdir_p(File.join(data_dir, INTERMEDIATE_FILES_DIRNAME))
|
89
|
-
handle_file(file, INTERMEDIATE_FILES_DIRNAME)
|
90
|
-
end
|
91
|
-
|
92
|
-
def handle_target(file)
|
93
|
-
base = File.basename(file, File.extname(file))
|
94
|
-
FileUtils.mkdir_p(File.join(data_dir, SIF_TARGETS_DIRNAME))
|
95
|
-
handle_file(file, SIF_TARGETS_DIRNAME)
|
96
|
-
local_id_metadata[File.join(SIF_TARGETS_DIRNAME, File.basename(file))] = base
|
97
|
-
end
|
98
|
-
|
99
|
-
def handle_file(file, folder_name)
|
100
|
-
if copy_files
|
101
|
-
FileUtils.cp file, File.join(data_dir, folder_name)
|
102
|
-
else
|
103
|
-
FileUtils.ln_s file, File.join(data_dir, folder_name)
|
104
|
-
end
|
105
|
-
file_map[file] = File.join(data_dir, folder_name, File.basename(file))
|
106
|
-
end
|
107
|
-
|
108
|
-
def output_metadata
|
109
|
-
metadata_rows = []
|
110
|
-
case
|
111
|
-
when collection_title && admin_set
|
112
|
-
metadata_rows << CSV::Row.new(metadata_headers, [ nil, nil, collection_title, admin_set ])
|
113
|
-
when collection_title
|
114
|
-
metadata_rows << CSV::Row.new(metadata_headers, [ nil, nil, collection_title ])
|
115
|
-
when admin_set
|
116
|
-
metadata_rows << CSV::Row.new(metadata_headers, [ nil, nil, admin_set ])
|
117
|
-
end
|
118
|
-
local_id_metadata.each_pair do |k,v|
|
119
|
-
row_elements = [ k, v ]
|
120
|
-
row_elements << nil if collection_title
|
121
|
-
row_elements << nil if admin_set
|
122
|
-
metadata_rows << CSV::Row.new(metadata_headers, row_elements)
|
123
|
-
end
|
124
|
-
File.open(File.join(data_dir, SIF_METADATA_FILENAME), 'w') do |file|
|
125
|
-
file.puts(metadata_headers.join(Ddr::IngestTools::DpcFolderConverter.config[:csv_options][:col_sep]))
|
126
|
-
metadata_rows.each do |row|
|
127
|
-
file.puts(row.to_csv(Ddr::IngestTools::DpcFolderConverter.config[:csv_options]))
|
128
|
-
end
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
def bagitup
|
133
|
-
bag = BagIt::Bag.new(target)
|
134
|
-
bag.manifest!
|
135
|
-
end
|
136
|
-
|
137
|
-
def validate_checksums
|
138
|
-
external_checksums = Ddr::IngestTools::ChecksumFile.new(checksums)
|
139
|
-
sif_manifest = Ddr::IngestTools::ChecksumFile.new(File.join(target, SIF_MANIFEST_SHA1_FILENAME))
|
140
|
-
file_map.each do |source_path, target_path|
|
141
|
-
external_checksum = external_checksums.digest(source_path)
|
142
|
-
manifest_path = target_path.sub("#{target}/", '')
|
143
|
-
sif_checksum = sif_manifest.digest(manifest_path)
|
144
|
-
unless external_checksum == sif_checksum
|
145
|
-
errors << I18n.translate('errors.checksum_mismatch', { c1: external_checksum, f1: source_path,
|
146
|
-
c2: sif_checksum, f2: target_path })
|
147
|
-
end
|
148
|
-
end
|
149
|
-
end
|
150
|
-
end
|
151
|
-
end
|
@@ -1,11 +0,0 @@
|
|
1
|
-
3201454891f1d63a1493f393e061491e2d65ffcd SOURCE_DIRECTORY/Thumbs.db
|
2
|
-
59ec01f979a76b968bc579e5cd0ceb3bcf3e629f SOURCE_DIRECTORY/abc001001.tif
|
3
|
-
d0a2f2482783ae3c83d06f3cdeaa1a306cc043ad SOURCE_DIRECTORY/abc001002.tif
|
4
|
-
38ee72ab417192589f3a54ef1016131c7d7e9e4e SOURCE_DIRECTORY/abc002001.tif
|
5
|
-
1da7765e3ca71ed76395bc56dae69d64732beff8 SOURCE_DIRECTORY/checksums.txt
|
6
|
-
c227abc095d3b758ab1c1c1c9e922494b6b6e0b0 SOURCE_DIRECTORY/g/abc003001.wav
|
7
|
-
541af8df7d2c631382bd0ec189476894d0323df5 SOURCE_DIRECTORY/g/abc003002.wav
|
8
|
-
49d77d27bf82ec3dafa1967490594883f5e8b432 SOURCE_DIRECTORY/intermediate_files/abc001001.jpg
|
9
|
-
260b3c2d20a1726de96671d29f73ba09d13b61ba SOURCE_DIRECTORY/intermediate_files/abc002001.jpg
|
10
|
-
a08c4d5a76d1b8735587be6ffcba66a9baf475c4 SOURCE_DIRECTORY/targets/T001.tif
|
11
|
-
40f9993d06945544fa57af88f7c3e9102ecc69e3 SOURCE_DIRECTORY/targets/T002.tif
|
@@ -1,11 +0,0 @@
|
|
1
|
-
3201454891f1d63a1493f393e061491e2d65ffcd SOURCE_DIRECTORY/Thumbs.db
|
2
|
-
59ec01f979a76b968bc579e5cd0ceb3bcf3e629f SOURCE_DIRECTORY/abc001001.tif
|
3
|
-
d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad SOURCE_DIRECTORY/abc001002.tif
|
4
|
-
38ee72ab417192589f3a54ef1016131c7d7e9e4e SOURCE_DIRECTORY/abc002001.tif
|
5
|
-
1da7765e3ca71ed76395bc56dae69d64732beff8 SOURCE_DIRECTORY/checksums.txt
|
6
|
-
c227abc095d3b758051c1c1c9e922494b6b6e0b0 SOURCE_DIRECTORY/g/abc003001.wav
|
7
|
-
541af8df7d2c631382bd0ec189476894d0323df5 SOURCE_DIRECTORY/g/abc003002.wav
|
8
|
-
49d77d27bf82ec3dafa1967490594883f5e8b432 SOURCE_DIRECTORY/intermediate_files/abc001001.jpg
|
9
|
-
260b3c2d20a7126de96671d29f73ba09d13b61ba SOURCE_DIRECTORY/intermediate_files/abc002001.jpg
|
10
|
-
a08c4d5a76d1b8734487be6ffcba66a9baf475c4 SOURCE_DIRECTORY/targets/T001.tif
|
11
|
-
40f9993d06945544fa57af88f7c3e9102ecc69e3 SOURCE_DIRECTORY/targets/T002.tif
|
@@ -1,10 +0,0 @@
|
|
1
|
-
59ec01f979a76b968bc579e5cd0ceb3bcf3e629f data/abc001/abc001001.tif
|
2
|
-
d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad data/abc001/abc001002.tif
|
3
|
-
38ee72ab417192589f3a54ef1016131c7d7e9e4e data/abc002/abc002001.tif
|
4
|
-
c227abc095d3b758051c1c1c9e922494b6b6e0b0 data/abc003/abc003001.wav
|
5
|
-
541af8df7d2c631382bd0ec189476894d0323df5 data/abc003/abc003002.wav
|
6
|
-
a08c4d5a76d1b8734487be6ffcba66a9baf475c4 data/dpc_targets/T001.tif
|
7
|
-
40f9993d06945544fa57af88f7c3e9102ecc69e3 data/dpc_targets/T002.tif
|
8
|
-
49d77d27bf82ec3dafa1967490594883f5e8b432 data/intermediate_files/abc001001.jpg
|
9
|
-
260b3c2d20a7126de96671d29f73ba09d13b61ba data/intermediate_files/abc002001.jpg
|
10
|
-
21f041cdd694f7755ed84b8cd2668214a43bad6c data/metadata.txt
|
@@ -1,10 +0,0 @@
|
|
1
|
-
59ec01f979a76b968bc579e5cd0ceb3bcf3e629f data/abc001/abc001001.tif
|
2
|
-
d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad data/abc001/abc001002.tif
|
3
|
-
38ee72ab417192589f3a54ef1016131c7d7e9e4e data/abc002/abc002001.tif
|
4
|
-
c227abc095d3b758051c1c1c9e922494b6b6e0b0 data/abc003/abc003001.wav
|
5
|
-
541af8df7d2c631382bd0ec189476894d0323df5 data/abc003/abc003002.wav
|
6
|
-
a08c4d5a76d1b8734487be6ffcba66a9baf475c4 data/dpc_targets/T001.tif
|
7
|
-
40f9993d06945544fa57af88f7c3e9102ecc69e3 data/dpc_targets/T002.tif
|
8
|
-
49d77d27bf82ec3dafa1967490594883f5e8b432 data/intermediate_files/abc001001.jpg
|
9
|
-
260b3c2d20a7126de96671d29f73ba09d13b61ba data/intermediate_files/abc002001.jpg
|
10
|
-
913699468893882d1dec463f3df1a405c7f32784 data/metadata.txt
|
@@ -1,201 +0,0 @@
|
|
1
|
-
module Ddr::IngestTools::DpcFolderConverter
|
2
|
-
|
3
|
-
RSpec.describe Converter do
|
4
|
-
|
5
|
-
shared_examples 'a conversion to standard ingest format' do
|
6
|
-
subject { described_class.new(converter_args) }
|
7
|
-
it 'produces the correct standard ingest format directory' do
|
8
|
-
results = subject.call
|
9
|
-
# Target directory contains all the expected files and only the expected files
|
10
|
-
expect(Array(Find.find(target_directory))).to match_array(expected_files)
|
11
|
-
# Target content files are same as source content files
|
12
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001001.tif'),
|
13
|
-
File.join(source_directory, 'abc001001.tif'))).to be true
|
14
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001002.tif'),
|
15
|
-
File.join(source_directory, 'abc001002.tif'))).to be true
|
16
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'abc002', 'abc002001.tif'),
|
17
|
-
File.join(source_directory, 'abc002001.tif'))).to be true
|
18
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003001.wav'),
|
19
|
-
File.join(source_directory, 'g', 'abc003001.wav'))).to be true
|
20
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003002.wav'),
|
21
|
-
File.join(source_directory, 'g', 'abc003002.wav'))).to be true
|
22
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T001.tif'),
|
23
|
-
File.join(source_directory, 'targets', 'T001.tif'))).to be true
|
24
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T002.tif'),
|
25
|
-
File.join(source_directory, 'targets', 'T002.tif'))).to be true
|
26
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'intermediate_files', 'abc001001.jpg'),
|
27
|
-
File.join(source_directory, 'intermediate_files', 'abc001001.jpg'))).to be true
|
28
|
-
expect(FileUtils.compare_file(File.join(data_directory, 'intermediate_files', 'abc002001.jpg'),
|
29
|
-
File.join(source_directory, 'intermediate_files', 'abc002001.jpg'))).to be true
|
30
|
-
# Generated metadata file contains the expected contents
|
31
|
-
metadata_lines = File.readlines(File.join(data_directory, 'metadata.txt')).map(&:chomp)
|
32
|
-
expect(metadata_lines).to match_array(expected_metadata)
|
33
|
-
# Generated manifest contains the expected contents (ignoring line order)
|
34
|
-
generated_manifest = File.readlines(File.join(File.join(target_directory, 'manifest-sha1.txt'))).sort
|
35
|
-
expect(generated_manifest).to match_array(expected_manifest)
|
36
|
-
# Conversion process produces the expected errors
|
37
|
-
expect(results.errors).to match_array(checksum_errors)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
let(:source_directory) { Dir.mktmpdir('dpc') }
|
42
|
-
let(:target_directory) { Dir.mktmpdir('sif') }
|
43
|
-
let(:data_directory) { File.join(target_directory, 'data') }
|
44
|
-
let(:item_id_length) { 6 }
|
45
|
-
let(:checksums_directory) { Dir.mktmpdir('checksums') }
|
46
|
-
let(:checksums) { File.join(checksums_directory, 'checksums-sha1.txt') }
|
47
|
-
let(:converter_args) { { source: source_directory, target: target_directory, item_id_length: item_id_length } }
|
48
|
-
let(:expected_files) { [
|
49
|
-
target_directory,
|
50
|
-
File.join(target_directory, 'bag-info.txt'),
|
51
|
-
File.join(target_directory, 'bagit.txt'),
|
52
|
-
data_directory,
|
53
|
-
File.join(data_directory, 'abc001'),
|
54
|
-
File.join(data_directory, 'abc001', 'abc001001.tif'),
|
55
|
-
File.join(data_directory, 'abc001', 'abc001002.tif'),
|
56
|
-
File.join(data_directory, 'abc002'),
|
57
|
-
File.join(data_directory, 'abc002', 'abc002001.tif'),
|
58
|
-
File.join(data_directory, 'abc003', 'abc003001.wav'),
|
59
|
-
File.join(data_directory, 'abc003'),
|
60
|
-
File.join(data_directory, 'abc003', 'abc003002.wav'),
|
61
|
-
File.join(data_directory, 'dpc_targets'),
|
62
|
-
File.join(data_directory, 'dpc_targets', 'T001.tif'),
|
63
|
-
File.join(data_directory, 'dpc_targets', 'T002.tif'),
|
64
|
-
File.join(data_directory, 'intermediate_files'),
|
65
|
-
File.join(data_directory, 'intermediate_files', 'abc001001.jpg'),
|
66
|
-
File.join(data_directory, 'intermediate_files', 'abc002001.jpg'),
|
67
|
-
File.join(data_directory, 'metadata.txt'),
|
68
|
-
File.join(target_directory, 'manifest-md5.txt'),
|
69
|
-
File.join(target_directory, 'manifest-sha1.txt'),
|
70
|
-
File.join(target_directory, 'tagmanifest-md5.txt'),
|
71
|
-
File.join(target_directory, 'tagmanifest-sha1.txt')
|
72
|
-
] }
|
73
|
-
let(:expected_metadata) { [
|
74
|
-
"path\tlocal_id",
|
75
|
-
"abc001\tabc001",
|
76
|
-
"abc002\tabc002",
|
77
|
-
"abc003\tabc003",
|
78
|
-
"abc001/abc001001.tif\tabc001001",
|
79
|
-
"abc001/abc001002.tif\tabc001002",
|
80
|
-
"abc002/abc002001.tif\tabc002001",
|
81
|
-
"abc003/abc003001.wav\tabc003001",
|
82
|
-
"abc003/abc003002.wav\tabc003002",
|
83
|
-
"dpc_targets/T001.tif\tT001",
|
84
|
-
"dpc_targets/T002.tif\tT002"
|
85
|
-
] }
|
86
|
-
let(:expected_manifest) do
|
87
|
-
File.readlines(File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt')).sort
|
88
|
-
end
|
89
|
-
|
90
|
-
before do
|
91
|
-
File.open(File.join(source_directory, 'Thumbs.db'), 'w') { |f| f.write('Thumbs') }
|
92
|
-
File.open(File.join(source_directory, 'abc001001.tif'), 'w') { |f| f.write('abc001001') }
|
93
|
-
File.open(File.join(source_directory, 'abc001002.tif'), 'w') { |f| f.write('abc001002') }
|
94
|
-
File.open(File.join(source_directory, 'abc002001.tif'), 'w') { |f| f.write('abc002001') }
|
95
|
-
File.open(File.join(source_directory, 'checksums.txt'), 'w') { |f| f.write('checksums') }
|
96
|
-
Dir.mkdir(File.join(source_directory,'g'))
|
97
|
-
File.open(File.join(source_directory, 'g', 'abc003001.wav'), 'w') { |f| f.write('abc003001') }
|
98
|
-
File.open(File.join(source_directory, 'g', 'abc003002.wav'), 'w') { |f| f.write('abc003002') }
|
99
|
-
Dir.mkdir(File.join(source_directory, 'intermediate_files'))
|
100
|
-
File.open(File.join(source_directory, 'intermediate_files', 'abc001001.jpg'), 'w') { |f| f.write('abc001001 jpg')}
|
101
|
-
File.open(File.join(source_directory, 'intermediate_files', 'abc002001.jpg'), 'w') { |f| f.write('abc002001 jpg')}
|
102
|
-
Dir.mkdir(File.join(source_directory, 'targets'))
|
103
|
-
File.open(File.join(source_directory, 'targets', 'T001.tif'), 'w') { |f| f.write('T001') }
|
104
|
-
File.open(File.join(source_directory, 'targets', 'T002.tif'), 'w') { |f| f.write('T002') }
|
105
|
-
end
|
106
|
-
|
107
|
-
describe 'external checksum files' do
|
108
|
-
describe 'external checksum file' do
|
109
|
-
before do
|
110
|
-
File.open(checksums, 'w') do |f|
|
111
|
-
f << File.open(checksum_file_template).read.gsub('SOURCE_DIRECTORY', source_directory)
|
112
|
-
end
|
113
|
-
converter_args[:checksums] = checksums
|
114
|
-
end
|
115
|
-
describe 'mismatch' do
|
116
|
-
let(:checksum_file_template) { File.join('spec', 'fixtures', 'files', 'bad-checksums-sha1.txt') }
|
117
|
-
let(:checksum_errors) {
|
118
|
-
[ I18n.translate('errors.checksum_mismatch', { c1: 'd0a2f2482783ae3c83d06f3cdeaa1a306cc043ad',
|
119
|
-
f1: File.join(source_directory, 'abc001002.tif'),
|
120
|
-
c2: 'd0a2f2482783ae3c38d06f3cdeaa1a306cc043ad',
|
121
|
-
f2: File.join(target_directory, 'data/abc001/abc001002.tif') }),
|
122
|
-
I18n.translate('errors.checksum_mismatch', { c1: 'c227abc095d3b758ab1c1c1c9e922494b6b6e0b0',
|
123
|
-
f1: File.join(source_directory, 'g/abc003001.wav'),
|
124
|
-
c2: 'c227abc095d3b758051c1c1c9e922494b6b6e0b0',
|
125
|
-
f2: File.join(target_directory, 'data/abc003/abc003001.wav') }),
|
126
|
-
I18n.translate('errors.checksum_mismatch', { c1: '260b3c2d20a1726de96671d29f73ba09d13b61ba',
|
127
|
-
f1: File.join(source_directory, 'intermediate_files/abc002001.jpg'),
|
128
|
-
c2: '260b3c2d20a7126de96671d29f73ba09d13b61ba',
|
129
|
-
f2: File.join(target_directory, 'data/intermediate_files/abc002001.jpg') }),
|
130
|
-
I18n.translate('errors.checksum_mismatch', { c1: 'a08c4d5a76d1b8735587be6ffcba66a9baf475c4',
|
131
|
-
f1: File.join(source_directory, 'targets/T001.tif'),
|
132
|
-
c2: 'a08c4d5a76d1b8734487be6ffcba66a9baf475c4',
|
133
|
-
f2: File.join(target_directory, 'data/dpc_targets/T001.tif') })
|
134
|
-
]
|
135
|
-
}
|
136
|
-
describe 'files are copied' do
|
137
|
-
before { converter_args[:copy_files] = true }
|
138
|
-
it_behaves_like 'a conversion to standard ingest format'
|
139
|
-
end
|
140
|
-
describe 'files are not copied' do
|
141
|
-
before { converter_args[:copy_files] = false }
|
142
|
-
it_behaves_like 'a conversion to standard ingest format'
|
143
|
-
end
|
144
|
-
end
|
145
|
-
describe 'no mismatch' do
|
146
|
-
let(:checksum_file_template) { File.join('spec', 'fixtures', 'files', 'good-checksums-sha1.txt') }
|
147
|
-
let(:checksum_errors) { [] }
|
148
|
-
describe 'files are copied' do
|
149
|
-
before { converter_args[:copy_files] = true }
|
150
|
-
it_behaves_like 'a conversion to standard ingest format'
|
151
|
-
end
|
152
|
-
describe 'files are not copied' do
|
153
|
-
before { converter_args[:copy_files] = false }
|
154
|
-
it_behaves_like 'a conversion to standard ingest format'
|
155
|
-
end
|
156
|
-
end
|
157
|
-
end
|
158
|
-
|
159
|
-
describe 'no external checksum file' do
|
160
|
-
let(:checksum_errors) { [] }
|
161
|
-
describe 'files are copied' do
|
162
|
-
before { converter_args[:copy_files] = true }
|
163
|
-
it_behaves_like 'a conversion to standard ingest format'
|
164
|
-
end
|
165
|
-
describe 'files are not copied' do
|
166
|
-
before { converter_args[:copy_files] = false }
|
167
|
-
it_behaves_like 'a conversion to standard ingest format'
|
168
|
-
end
|
169
|
-
end
|
170
|
-
end
|
171
|
-
|
172
|
-
describe 'collection titles and admin sets' do
|
173
|
-
let(:checksum_errors) { [] }
|
174
|
-
describe 'collection title and admin set provided' do
|
175
|
-
let(:expected_metadata) { [
|
176
|
-
"path\tlocal_id\ttitle\tadmin_set",
|
177
|
-
"\t\tTest Collection\tfoo",
|
178
|
-
"abc001\tabc001\t\t",
|
179
|
-
"abc002\tabc002\t\t",
|
180
|
-
"abc003\tabc003\t\t",
|
181
|
-
"abc001/abc001001.tif\tabc001001\t\t",
|
182
|
-
"abc001/abc001002.tif\tabc001002\t\t",
|
183
|
-
"abc002/abc002001.tif\tabc002001\t\t",
|
184
|
-
"abc003/abc003001.wav\tabc003001\t\t",
|
185
|
-
"abc003/abc003002.wav\tabc003002\t\t",
|
186
|
-
"dpc_targets/T001.tif\tT001\t\t",
|
187
|
-
"dpc_targets/T002.tif\tT002\t\t"
|
188
|
-
] }
|
189
|
-
let(:expected_manifest) do
|
190
|
-
File.readlines(File.join('spec', 'fixtures', 'files', 'manifest-sha1-collection-title_admin_set.txt')).sort
|
191
|
-
end
|
192
|
-
before do
|
193
|
-
converter_args[:collection_title] = 'Test Collection'
|
194
|
-
converter_args[:admin_set] = 'foo'
|
195
|
-
end
|
196
|
-
it_behaves_like 'a conversion to standard ingest format'
|
197
|
-
end
|
198
|
-
end
|
199
|
-
|
200
|
-
end
|
201
|
-
end
|
@@ -1,17 +0,0 @@
|
|
1
|
-
module Ddr::IngestTools
|
2
|
-
|
3
|
-
RSpec.describe ChecksumFile do
|
4
|
-
|
5
|
-
subject { described_class.new(checksum_filepath) }
|
6
|
-
|
7
|
-
let(:checksum_filepath) { File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt') }
|
8
|
-
|
9
|
-
describe 'digest' do
|
10
|
-
it 'provides the requested digest' do
|
11
|
-
expect(subject.digest('data/abc001/abc001002.tif')).to eq('d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad')
|
12
|
-
expect(subject.digest('not/in/checksum/file.txt')).to be nil
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
end
|