ddr-ingesttools 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/bin/mint_manifest_arks.rb +49 -0
  4. data/config/locales/en.yml +6 -6
  5. data/ddr-ingesttools.gemspec +3 -2
  6. data/lib/ddr/ingesttools.rb +1 -2
  7. data/lib/ddr/ingesttools/manifest_ark_minter.rb +22 -0
  8. data/lib/ddr/ingesttools/manifest_ark_minter/configuration.rb +13 -0
  9. data/lib/ddr/ingesttools/manifest_ark_minter/manifest_parser.rb +33 -0
  10. data/lib/ddr/ingesttools/manifest_ark_minter/manifest_updater.rb +80 -0
  11. data/lib/ddr/ingesttools/manifest_ark_minter/minter.rb +43 -0
  12. data/lib/ddr/ingesttools/version.rb +1 -1
  13. data/spec/fixtures/rdr_importer/configs/default.yml +3 -0
  14. data/spec/fixtures/rdr_importer/manifests/manifest_with_all_arks.csv +4 -0
  15. data/spec/fixtures/rdr_importer/manifests/manifest_with_no_arks.csv +4 -0
  16. data/spec/fixtures/rdr_importer/manifests/manifest_with_some_arks.csv +4 -0
  17. data/spec/manifest_ark_minter/manifest_parser_spec.rb +38 -0
  18. data/spec/manifest_ark_minter/manifest_updater_spec.rb +46 -0
  19. data/spec/manifest_ark_minter/minter_spec.rb +50 -0
  20. data/spec/spec_helper.rb +1 -0
  21. metadata +43 -25
  22. data/bin/convert_dpc_folder.rb +0 -61
  23. data/lib/ddr/ingesttools/checksum_file.rb +0 -28
  24. data/lib/ddr/ingesttools/dpc_folder_converter.rb +0 -35
  25. data/lib/ddr/ingesttools/dpc_folder_converter/converter.rb +0 -151
  26. data/spec/fixtures/files/bad-checksums-sha1.txt +0 -11
  27. data/spec/fixtures/files/good-checksums-sha1.txt +0 -11
  28. data/spec/fixtures/files/manifest-sha1-collection-title_admin_set.txt +0 -10
  29. data/spec/fixtures/files/manifest-sha1.txt +0 -10
  30. data/spec/integration/dpc_folder_converter_spec.rb +0 -201
  31. data/spec/unit/checksum_file_spec.rb +0 -17
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fb22fa65d3323b243dcafc3b999f55d42894b130
4
- data.tar.gz: f85324eb9f8efd80febb1de7c9d1a5f2f87898aa
3
+ metadata.gz: ef121cd2d5211caf9ba9b1beebbbc316d2b7214e
4
+ data.tar.gz: 922b0ca3eba98b5dc5d1bc07c126c842e1a093cc
5
5
  SHA512:
6
- metadata.gz: 88bbc9f8d76a0b11bc0dec289c9bba3fc54efc340b6bfe9a0d7855108b0d040e88c385facda2d5d892a8aad02a18e9f64dc4d243bc547cce4987e7e9dd71df4e
7
- data.tar.gz: 5e475ae95b4d2ca9c5c8dd4b1d9fa85efc34956f4aac5b9778f62f837b495dd55a5d6d683785526b002a23d131f8caefc6170e46dd485867d5b22e099bbedbae
6
+ metadata.gz: 89d1fc5adacf2986a47019e0cca3da6ba2a5cf8054d72a4c1a21b82805ef0a0711abc2569f5367878aa6e8b452d8e79482d63d4ac5fdb28deef459bb3175d820
7
+ data.tar.gz: 24ebbe3f71fce16d96e032ba6c283e0431ba7f87a3495144ab218908052837699ff0687430eceeffcac7c31a5ab75fda7a677759b685f0583c9257c290482493
data/.gitignore CHANGED
@@ -1,3 +1,5 @@
1
+ .byebug_history
1
2
  Gemfile.lock
2
3
  pkg
3
4
  tmp
5
+
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'i18n'
4
+ require 'ddr/ingesttools'
5
+ require 'optparse'
6
+
7
+ options = {}
8
+
9
+ puts I18n.t('marquee')
10
+ puts I18n.t('suite.name')
11
+ puts I18n.t('manifest_ark_minter.name')
12
+ puts I18n.t('marquee')
13
+
14
+ # Parse command line arguments
15
+ parser = OptionParser.new do |opts|
16
+ opts.banner = 'Usage: mint_manifest_arks.rb [options]'
17
+
18
+ opts.on('-c', '--config CONFIG_FILE', 'Path to configuration file') do |v|
19
+ options[:config] = v
20
+ end
21
+
22
+ opts.on('-m', '--manifest MANIFEST_FILE', 'Path to manifest file for which ARKs are to be minted') do |v|
23
+ options[:manifest] = v
24
+ end
25
+
26
+ opts.on('-o', '--output OUTPUT_FILE', 'Path to which updated manifest file should be written') do |v|
27
+ options[:output] = v
28
+ end
29
+ end
30
+
31
+ begin
32
+ parser.parse!
33
+ if options['config'].nil?
34
+ puts I18n.t('manifest_ark_minter.use_default_config_file',
35
+ default_config_file: Ddr::IngestTools::ManifestArkMinter::ManifestUpdater::DEFAULT_CONFIG_FILE)
36
+ end
37
+ mandatory = [ :manifest, :output ]
38
+ missing = mandatory.select{ |param| options[param].nil? }
39
+ unless missing.empty?
40
+ raise OptionParser::MissingArgument.new(missing.join(', '))
41
+ end
42
+ rescue OptionParser::InvalidOption, OptionParser::MissingArgument
43
+ puts $!.to_s
44
+ puts parser
45
+ exit(false)
46
+ end
47
+
48
+ updater = Ddr::IngestTools::ManifestArkMinter::ManifestUpdater.new(options)
49
+ updater.call
@@ -1,7 +1,7 @@
1
1
  en:
2
- errors:
3
- count: "%{count} error(s)"
4
- checksum_mismatch: |
5
- ***** Checksum mismatch:
6
- %{c1} %{f1}
7
- %{c2} %{f2}
2
+ marquee: ==================================================
3
+ suite:
4
+ name: DDR Ingest Tools
5
+ manifest_ark_minter:
6
+ name: Manifest ARK Minter
7
+ use_default_config_file: 'Will use default configuration file: %{default_config_file}'
@@ -18,10 +18,11 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_dependency "bagit", "~> 0.4"
22
- spec.add_dependency "i18n", "~> 0.8"
21
+ spec.add_dependency "ezid-client", "~> 1.7"
22
+ spec.add_dependency "i18n", "~> 1.0"
23
23
 
24
24
  spec.add_development_dependency "bundler", "~> 1.14"
25
+ spec.add_development_dependency "byebug"
25
26
  spec.add_development_dependency "rake", "~> 12.0"
26
27
  spec.add_development_dependency "rspec", "~> 3.0"
27
28
  end
@@ -1,6 +1,5 @@
1
1
  require_relative 'ingesttools/version'
2
- require_relative 'ingesttools/dpc_folder_converter'
3
- require_relative 'ingesttools/checksum_file'
2
+ require_relative 'ingesttools/manifest_ark_minter'
4
3
 
5
4
  require 'i18n'
6
5
 
@@ -0,0 +1,22 @@
1
+ require_relative 'manifest_ark_minter/configuration'
2
+ require_relative 'manifest_ark_minter/manifest_updater'
3
+ require_relative 'manifest_ark_minter/manifest_parser'
4
+ require_relative 'manifest_ark_minter/minter'
5
+
6
+ module Ddr::IngestTools
7
+ module ManifestArkMinter
8
+
9
+ class << self
10
+ attr_writer :configuration
11
+
12
+ def configuration
13
+ @configuration ||= Configuration.new
14
+ end
15
+
16
+ def configure
17
+ yield(configuration)
18
+ end
19
+ end
20
+
21
+ end
22
+ end
@@ -0,0 +1,13 @@
1
+ module Ddr::IngestTools::ManifestArkMinter
2
+ class Configuration
3
+
4
+ attr_accessor :ezid_default_shoulder, :ezid_password, :ezid_user
5
+
6
+ def initialize
7
+ @ezid_default_shoulder = nil
8
+ @ezid_password = nil
9
+ @ezid_user = nil
10
+ end
11
+
12
+ end
13
+ end
@@ -0,0 +1,33 @@
1
+ require 'csv'
2
+
3
+ module Ddr::IngestTools::ManifestArkMinter
4
+ class ManifestParser
5
+
6
+ attr_reader :manifest_file_path
7
+
8
+ ARK_HEADER = 'ark'
9
+
10
+ def initialize(manifest_file_path)
11
+ @manifest_file_path = manifest_file_path
12
+ end
13
+
14
+ def as_csv_table
15
+ @csv_table ||= CSV.read(manifest_file_path, headers: true)
16
+ end
17
+
18
+ def arks_missing?
19
+ arks.any? { |value| value.compact.empty? }
20
+ end
21
+
22
+ def headers
23
+ as_csv_table.headers
24
+ end
25
+
26
+ private
27
+
28
+ def arks
29
+ as_csv_table.values_at(ARK_HEADER)
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,80 @@
1
+ require 'fileutils'
2
+ require 'logger'
3
+ require 'tempfile'
4
+
5
+ module Ddr::IngestTools::ManifestArkMinter
6
+ class ManifestUpdater
7
+
8
+ attr_reader :config, :logger, :manifest, :output
9
+
10
+ DEFAULT_CONFIG_FILE = 'manifest_ark_minter_config.yml'
11
+
12
+ def initialize(config: DEFAULT_CONFIG_FILE, manifest:, output:, logger: nil)
13
+ @config = config
14
+ @manifest = manifest
15
+ @output = output
16
+ @logger = logger || Logger.new(STDOUT)
17
+ end
18
+
19
+ def call
20
+ configure
21
+ if needs_updating?
22
+ update
23
+ else
24
+ logger.info("Manifest file already has ARKs ... nothing to mint")
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def configure
31
+ conf = YAML::load(IO.read(config))
32
+ Ddr::IngestTools::ManifestArkMinter.configure do |config|
33
+ config.ezid_default_shoulder = conf.fetch('ezid_default_shoulder')
34
+ config.ezid_password = conf.fetch('ezid_password')
35
+ config.ezid_user = conf.fetch('ezid_user')
36
+ end
37
+ end
38
+
39
+ def update
40
+ update_manifest_table
41
+ write_updated_manifest
42
+ end
43
+
44
+ def update_manifest_table
45
+ logger.info("Minting ARKs for manifest file")
46
+ mint_counter = 0
47
+ manifest_as_csv_table.each do |row|
48
+ unless row['ark']
49
+ row['ark'] = minter.mint
50
+ mint_counter += 1
51
+ end
52
+ end
53
+ logger.info("Minted #{mint_counter} ARK(s)")
54
+ end
55
+
56
+ def manifest_as_csv_table
57
+ @manifest_as_csv_table ||= parser.as_csv_table
58
+ end
59
+
60
+ def write_updated_manifest
61
+ File.open(output, 'w') do |f|
62
+ f.write(manifest_as_csv_table.to_csv)
63
+ end
64
+ logger.info("Updated manifest file is at #{output}")
65
+ end
66
+
67
+ def needs_updating?
68
+ parser.arks_missing?
69
+ end
70
+
71
+ def minter
72
+ @minter ||= Minter.new
73
+ end
74
+
75
+ def parser
76
+ @parser ||= ManifestParser.new(manifest)
77
+ end
78
+
79
+ end
80
+ end
@@ -0,0 +1,43 @@
1
+ require 'ezid-client'
2
+
3
+ module Ddr::IngestTools::ManifestArkMinter
4
+ class Minter
5
+
6
+ DEFAULT_EXPORT = 'no'.freeze
7
+ DEFAULT_PROFILE = 'dc'.freeze
8
+ DEFAULT_STATUS = Ezid::Status::RESERVED
9
+
10
+ def initialize
11
+ configure_ark
12
+ configure_client
13
+ end
14
+
15
+ def mint
16
+ Ezid::Identifier.mint
17
+ end
18
+
19
+ private
20
+
21
+ def configure_ark
22
+ Ezid::Identifier.defaults = {
23
+ export: DEFAULT_EXPORT,
24
+ profile: DEFAULT_PROFILE,
25
+ status: DEFAULT_STATUS
26
+ }
27
+ end
28
+
29
+ def configure_client
30
+ Ezid::Client.configure do |config|
31
+ config.default_shoulder = module_configuration.ezid_default_shoulder
32
+ config.password = module_configuration.ezid_password
33
+ config.user = module_configuration.ezid_user
34
+ config.logger = Logger.new(File::NULL)
35
+ end
36
+ end
37
+
38
+ def module_configuration
39
+ Ddr::IngestTools::ManifestArkMinter.configuration
40
+ end
41
+
42
+ end
43
+ end
@@ -1,5 +1,5 @@
1
1
  module Ddr
2
2
  module IngestTools
3
- VERSION = '0.3.0'
3
+ VERSION = '0.4.0'
4
4
  end
5
5
  end
@@ -0,0 +1,3 @@
1
+ ezid_default_shoulder: ark:/99999/fk4
2
+ ezid_password: apitest
3
+ ezid_user: apitest
@@ -0,0 +1,4 @@
1
+ ark,visibility,title,contributor,resource_type,license,file,file,file
2
+ ark:/99999/fk4s76kg89,open,Test 1,"Smith, Sue",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv,data/data2.csv,docs/doc1.txt
3
+ ark:/99999/fk4ng5vp6m,,Test 2,"Jones, Bill",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data3.csv,docs/doc2.txt
4
+ ark:/99999/fk4hq54w3t,authenticated,Test 3,"Allen, Jane",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv
@@ -0,0 +1,4 @@
1
+ visibility,title,contributor,resource_type,license,file,file,file
2
+ open,Test 1,"Smith, Sue",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv,data/data2.csv,docs/doc1.txt
3
+ ,Test 2,"Jones, Bill",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data3.csv,docs/doc2.txt
4
+ authenticated,Test 3,"Allen, Jane",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv
@@ -0,0 +1,4 @@
1
+ ark,visibility,title,contributor,resource_type,license,file,file,file
2
+ ,open,Test 1,"Smith, Sue",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv,data/data2.csv,docs/doc1.txt
3
+ ark:/99999/fk4ng5vp6m,,Test 2,"Jones, Bill",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data3.csv,docs/doc2.txt
4
+ ,authenticated,Test 3,"Allen, Jane",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv
@@ -0,0 +1,38 @@
1
+ require 'spec_helper'
2
+
3
+ module Ddr::IngestTools::ManifestArkMinter
4
+
5
+ RSpec.describe ManifestParser do
6
+
7
+ subject { described_class.new(manifest_file) }
8
+
9
+ describe '#as_csv_table' do
10
+ let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_some_arks.csv') }
11
+ specify { expect(subject.as_csv_table).to be_a CSV::Table }
12
+ end
13
+
14
+ describe '#headers' do
15
+ let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_some_arks.csv') }
16
+ let(:expected_headers) { %w(ark visibility title contributor resource_type license file) }
17
+ it 'parses out the list of headers' do
18
+ expect(subject.headers).to include(*expected_headers)
19
+ end
20
+ end
21
+
22
+ describe '#arks_missing?' do
23
+ describe 'no arks assigned' do
24
+ let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_no_arks.csv') }
25
+ specify { expect(subject.arks_missing?).to be true }
26
+ end
27
+ describe 'some arks assigned' do
28
+ let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_some_arks.csv') }
29
+ specify { expect(subject.arks_missing?).to be true }
30
+ end
31
+ describe 'all arks assigned' do
32
+ let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_all_arks.csv') }
33
+ specify { expect(subject.arks_missing?).to be false }
34
+ end
35
+ end
36
+ end
37
+
38
+ end
@@ -0,0 +1,46 @@
1
+ require 'spec_helper'
2
+ require 'tempfile'
3
+
4
+ module Ddr::IngestTools::ManifestArkMinter
5
+
6
+ RSpec.describe ManifestUpdater do
7
+
8
+ subject { described_class.new(config: config_file, manifest: manifest_file, output: output_file) }
9
+
10
+ let(:config_file) { File.join('spec', 'fixtures', 'rdr_importer', 'configs', 'default.yml') }
11
+ let(:output_dir) { Dir.mktmpdir }
12
+ let(:output_file) { File.join(output_dir, 'output.csv') }
13
+
14
+ after { FileUtils.remove_dir output_dir }
15
+
16
+ describe 'manifest has ARKs for all rows' do
17
+ let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_all_arks.csv') }
18
+ it 'does not produce an output file' do
19
+ expect{ subject.call }.not_to change{ File.exist?(output_file) }
20
+ end
21
+ end
22
+
23
+ describe 'manifest has ARKs for some rows' do
24
+ let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_some_arks.csv') }
25
+ it 'mints ARKs for rows without them' do
26
+ expect_any_instance_of(Minter).to receive(:mint).exactly(2).times.and_call_original
27
+ subject.call
28
+ table = CSV.read(output_file, headers: true)
29
+ # expect(table['ark']).to all(match(/ark:\/99999\/fk4/))
30
+ expect(table['ark']).to match([ /ark:\/99999\/fk4/, 'ark:/99999/fk4ng5vp6m', /ark:\/99999\/fk4/ ])
31
+ end
32
+ end
33
+
34
+ describe 'manifest has ARKs for no rows' do
35
+ let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_no_arks.csv') }
36
+ it 'mints ARKs for every row' do
37
+ expect_any_instance_of(Minter).to receive(:mint).exactly(3).times.and_call_original
38
+ subject.call
39
+ table = CSV.read(output_file, headers: true)
40
+ expect(table['ark']).to all(match(/ark:\/99999\/fk4/))
41
+ end
42
+ end
43
+
44
+ end
45
+
46
+ end
@@ -0,0 +1,50 @@
1
+ require 'spec_helper'
2
+
3
+ module Ddr::IngestTools::ManifestArkMinter
4
+
5
+ RSpec.describe Minter do
6
+
7
+ describe '#initialize' do
8
+ describe 'Ezid::Identifier defaults' do
9
+ before { described_class.new }
10
+ let(:ark_defaults) { { export: described_class::DEFAULT_EXPORT,
11
+ profile: described_class::DEFAULT_PROFILE,
12
+ status: described_class::DEFAULT_STATUS } }
13
+ it 'configures Ezid::Identifer defaults' do
14
+ expect(Ezid::Identifier.defaults).to match(ark_defaults)
15
+ end
16
+ end
17
+ describe 'Ezid::Client configuration' do
18
+ let(:configuration) { Configuration.new }
19
+ before do
20
+ configuration.ezid_default_shoulder = 'ark:/99999/fk4'
21
+ configuration.ezid_password = 'apitest'
22
+ configuration.ezid_user = 'apitest'
23
+ allow(Ddr::IngestTools::ManifestArkMinter).to receive(:configuration) { configuration }
24
+ described_class.new
25
+ end
26
+ it 'configures the Ezid::Client' do
27
+ expect(Ezid::Client.config.default_shoulder).to eq('ark:/99999/fk4')
28
+ expect(Ezid::Client.config.password).to eq('apitest')
29
+ expect(Ezid::Client.config.user).to eq('apitest')
30
+ end
31
+ end
32
+ end
33
+
34
+ describe '#mint' do
35
+ let(:configuration) { Configuration.new }
36
+ before do
37
+ configuration.ezid_default_shoulder = 'ark:/99999/fk4'
38
+ configuration.ezid_password = 'apitest'
39
+ configuration.ezid_user = 'apitest'
40
+ allow(Ddr::IngestTools::ManifestArkMinter).to receive(:configuration) { configuration }
41
+ end
42
+ it 'calls Ezid::Identifier to mint an ark' do
43
+ expect(Ezid::Identifier).to receive(:mint)
44
+ subject.mint
45
+ end
46
+ end
47
+
48
+ end
49
+
50
+ end
@@ -1,3 +1,4 @@
1
+ require 'byebug'
1
2
  require 'ddr/ingesttools'
2
3
  require 'i18n'
3
4
 
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ddr-ingesttools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jim Coble
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-05 00:00:00.000000000 Z
11
+ date: 2018-03-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bagit
14
+ name: ezid-client
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0.4'
19
+ version: '1.7'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0.4'
26
+ version: '1.7'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: i18n
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0.8'
33
+ version: '1.0'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '0.8'
40
+ version: '1.0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: bundler
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '1.14'
55
+ - !ruby/object:Gem::Dependency
56
+ name: byebug
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: rake
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -84,7 +98,7 @@ description: A collection of Ruby tools supporting ingest into the Duke Digital
84
98
  email:
85
99
  - jim.coble@duke.edu
86
100
  executables:
87
- - convert_dpc_folder.rb
101
+ - mint_manifest_arks.rb
88
102
  extensions: []
89
103
  extra_rdoc_files: []
90
104
  files:
@@ -96,21 +110,24 @@ files:
96
110
  - LICENSE.txt
97
111
  - README.md
98
112
  - Rakefile
99
- - bin/convert_dpc_folder.rb
113
+ - bin/mint_manifest_arks.rb
100
114
  - config/locales/en.yml
101
115
  - ddr-ingesttools.gemspec
102
116
  - lib/ddr/ingesttools.rb
103
- - lib/ddr/ingesttools/checksum_file.rb
104
- - lib/ddr/ingesttools/dpc_folder_converter.rb
105
- - lib/ddr/ingesttools/dpc_folder_converter/converter.rb
117
+ - lib/ddr/ingesttools/manifest_ark_minter.rb
118
+ - lib/ddr/ingesttools/manifest_ark_minter/configuration.rb
119
+ - lib/ddr/ingesttools/manifest_ark_minter/manifest_parser.rb
120
+ - lib/ddr/ingesttools/manifest_ark_minter/manifest_updater.rb
121
+ - lib/ddr/ingesttools/manifest_ark_minter/minter.rb
106
122
  - lib/ddr/ingesttools/version.rb
107
- - spec/fixtures/files/bad-checksums-sha1.txt
108
- - spec/fixtures/files/good-checksums-sha1.txt
109
- - spec/fixtures/files/manifest-sha1-collection-title_admin_set.txt
110
- - spec/fixtures/files/manifest-sha1.txt
111
- - spec/integration/dpc_folder_converter_spec.rb
123
+ - spec/fixtures/rdr_importer/configs/default.yml
124
+ - spec/fixtures/rdr_importer/manifests/manifest_with_all_arks.csv
125
+ - spec/fixtures/rdr_importer/manifests/manifest_with_no_arks.csv
126
+ - spec/fixtures/rdr_importer/manifests/manifest_with_some_arks.csv
127
+ - spec/manifest_ark_minter/manifest_parser_spec.rb
128
+ - spec/manifest_ark_minter/manifest_updater_spec.rb
129
+ - spec/manifest_ark_minter/minter_spec.rb
112
130
  - spec/spec_helper.rb
113
- - spec/unit/checksum_file_spec.rb
114
131
  homepage: https://github.com/duke-libraries/ddr-ingesttools
115
132
  licenses:
116
133
  - BSD-3-Clause
@@ -131,15 +148,16 @@ required_rubygems_version: !ruby/object:Gem::Requirement
131
148
  version: '0'
132
149
  requirements: []
133
150
  rubyforge_project:
134
- rubygems_version: 2.6.11
151
+ rubygems_version: 2.6.14
135
152
  signing_key:
136
153
  specification_version: 4
137
154
  summary: Ruby tools supporting ingest into the Duke Digital Repository.
138
155
  test_files:
139
- - spec/fixtures/files/bad-checksums-sha1.txt
140
- - spec/fixtures/files/good-checksums-sha1.txt
141
- - spec/fixtures/files/manifest-sha1-collection-title_admin_set.txt
142
- - spec/fixtures/files/manifest-sha1.txt
143
- - spec/integration/dpc_folder_converter_spec.rb
156
+ - spec/fixtures/rdr_importer/configs/default.yml
157
+ - spec/fixtures/rdr_importer/manifests/manifest_with_all_arks.csv
158
+ - spec/fixtures/rdr_importer/manifests/manifest_with_no_arks.csv
159
+ - spec/fixtures/rdr_importer/manifests/manifest_with_some_arks.csv
160
+ - spec/manifest_ark_minter/manifest_parser_spec.rb
161
+ - spec/manifest_ark_minter/manifest_updater_spec.rb
162
+ - spec/manifest_ark_minter/minter_spec.rb
144
163
  - spec/spec_helper.rb
145
- - spec/unit/checksum_file_spec.rb
@@ -1,61 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'ddr/ingesttools'
4
- require 'optparse'
5
-
6
- options = {}
7
-
8
- # Parse command line arguments
9
- parser = OptionParser.new do |opts|
10
- opts.banner = 'Usage: convert_dpc_folder.rb [options]'
11
-
12
- opts.on('-s', '--source SOURCE', 'Path to DPC Folder to be converted') do |v|
13
- options[:source] = v
14
- end
15
-
16
- opts.on('-t', '--target TARGET', 'Path to folder where Standard Ingest Format is to be built') do |v|
17
- options[:target] = v
18
- end
19
-
20
- opts.on('-i', '--item_id_length LENGTH', Integer, 'Number of characters to copy from the beginning of each file name',
21
- 'to use as the local ID of the item of which that file is a component') do |v|
22
- options[:item_id_length] = v
23
- end
24
-
25
- opts.on('-c', '--checksums [CHECKSUM_FILE]', 'External checksum file') do |v|
26
- options[:checksums] = v
27
- end
28
-
29
- opts.on('--[no-]copy_files', 'Copy files to target location instead of using a symlink') do |v|
30
- options[:copy_files] = v
31
- end
32
-
33
- opts.on('--collection_title [TITLE]', 'Title for collection',
34
- 'required if intending to create a collection-creating Standard Ingest') do |v|
35
- options[:collection_title] = v
36
- end
37
-
38
- opts.on('--admin_set [ADMIN_SET]', 'Admin set for collection',
39
- 'required if intending to create a collection-creating Standard Ingest') do |v|
40
- options[:admin_set] = v
41
- end
42
-
43
- end
44
-
45
- begin
46
- parser.parse!
47
- mandatory = [ :source, :target, :item_id_length ]
48
- missing = mandatory.select{ |param| options[param].nil? }
49
- unless missing.empty?
50
- raise OptionParser::MissingArgument.new(missing.join(', '))
51
- end
52
- rescue OptionParser::InvalidOption, OptionParser::MissingArgument
53
- puts $!.to_s
54
- puts parser
55
- exit(false)
56
- end
57
-
58
- converter = Ddr::IngestTools::DpcFolderConverter::Converter.new(options)
59
- results = converter.call
60
- puts I18n.translate('errors.count', { count: results.errors.size })
61
- results.errors.each { |e| puts e }
@@ -1,28 +0,0 @@
1
- module Ddr::IngestTools
2
- class ChecksumFile
3
-
4
- attr_reader :digests
5
-
6
- def initialize(checksum_filepath)
7
- @digests = digest_hash(checksum_filepath)
8
- end
9
-
10
- def digest(filepath)
11
- digests[filepath]
12
- end
13
-
14
- private
15
-
16
- def digest_hash(checksum_filepath)
17
- h = {}
18
- File.open(checksum_filepath, 'r') do |file|
19
- file.each_line do |line|
20
- digest, path = line.chomp.split
21
- h[path] = digest
22
- end
23
- end
24
- h
25
- end
26
-
27
- end
28
- end
@@ -1,35 +0,0 @@
1
- require_relative 'dpc_folder_converter/converter'
2
-
3
- module Ddr::IngestTools
4
- module DpcFolderConverter
5
- #Configuration defaults
6
- @config = {
7
- included_extensions: [ '.jpg', '.mov', '.mp3', '.mp4', '.pdf', '.tif', '.tiff', '.wav' ],
8
- csv_options: { :encoding=>"UTF-8", :col_sep=>"\t", :headers=>true, :write_headers=>true,
9
- :header_converters=>:symbol }
10
- }
11
-
12
- @valid_config_keys = @config.keys
13
-
14
- # Configure through hash
15
- def self.configure(opts = {})
16
- opts.each {|k,v| @config[k.to_sym] = v if @valid_config_keys.include?(k.to_sym)}
17
- end
18
-
19
- # Configure through yaml file
20
- def self.configure_with(path_to_yaml_file)
21
- begin
22
- config = YAML::load(IO.read(path_to_yaml_file))
23
- rescue Errno::ENOENT
24
- log(:warning, "YAML configuration file couldn't be found. Using defaults."); return
25
- rescue Psych::SyntaxError
26
- log(:warning, "YAML configuration file contains invalid syntax. Using defaults."); return
27
- end
28
- configure(config)
29
- end
30
-
31
- def self.config
32
- @config
33
- end
34
- end
35
- end
@@ -1,151 +0,0 @@
1
- require 'bagit'
2
- require 'csv'
3
- require 'fileutils'
4
- require 'find'
5
-
6
- module Ddr::IngestTools::DpcFolderConverter
7
- class Converter
8
-
9
- INTERMEDIATE_FILES_DIRNAME = 'intermediate_files'
10
- DPC_TARGETS_DIRNAME = 'targets'
11
- SIF_TARGETS_DIRNAME = 'dpc_targets'
12
- SIF_METADATA_FILENAME = 'metadata.txt'
13
- SIF_MANIFEST_SHA1_FILENAME = 'manifest-sha1.txt'
14
-
15
- Results = Struct.new(:file_map, :errors)
16
-
17
- attr_reader :source, :target, :data_dir, :item_id_length, :checksums, :copy_files, :collection_title,
18
- :admin_set, :metadata_headers
19
- attr_accessor :errors, :file_map, :local_id_metadata, :results
20
-
21
- def initialize(source:, target:, item_id_length:, checksums: nil, copy_files: false, collection_title: nil,
22
- admin_set: nil)
23
- @source = source
24
- @target = target
25
- @item_id_length = item_id_length
26
- @checksums = checksums
27
- @copy_files = copy_files
28
- @collection_title = collection_title
29
- @admin_set = admin_set
30
- @metadata_headers = [ 'path', 'local_id' ]
31
- @metadata_headers << 'title' unless collection_title.nil?
32
- @metadata_headers << 'admin_set' unless admin_set.nil?
33
- end
34
-
35
- def call
36
- setup
37
- scan_files(source)
38
- output_metadata
39
- bagitup
40
- validate_checksums if checksums
41
- Results.new(file_map, errors)
42
- end
43
-
44
- private
45
-
46
- def setup
47
- @data_dir = File.join(target, 'data')
48
- @errors = []
49
- @file_map = {}
50
- @local_id_metadata = {}
51
- FileUtils.mkdir_p data_dir
52
- end
53
-
54
- def included_extensions
55
- Ddr::IngestTools::DpcFolderConverter.config[:included_extensions]
56
- end
57
-
58
- def scan_files(dirpath, file_handler='handle_component'.to_sym)
59
- Dir.foreach(dirpath).each do |entry|
60
- next if [ '.', '..' ].include?(entry)
61
- path = File.join(dirpath, entry)
62
- if File.directory?(path)
63
- if entry == DPC_TARGETS_DIRNAME
64
- scan_files(path, :handle_target)
65
- elsif entry == INTERMEDIATE_FILES_DIRNAME
66
- scan_files(path, :handle_intermediate_file)
67
- else
68
- scan_files(path, file_handler)
69
- end
70
- else
71
- if included_extensions.include?(File.extname(entry))
72
- self.send(file_handler, path)
73
- end
74
- end
75
- end
76
- end
77
-
78
- def handle_component(file)
79
- base = File.basename(file, File.extname(file))
80
- item_id = item_id_length == 0 ? base : base[0, item_id_length]
81
- FileUtils.mkdir_p(File.join(data_dir, item_id))
82
- local_id_metadata[item_id] = item_id
83
- handle_file(file, item_id)
84
- local_id_metadata[File.join(item_id, File.basename(file))] = base
85
- end
86
-
87
- def handle_intermediate_file(file)
88
- FileUtils.mkdir_p(File.join(data_dir, INTERMEDIATE_FILES_DIRNAME))
89
- handle_file(file, INTERMEDIATE_FILES_DIRNAME)
90
- end
91
-
92
- def handle_target(file)
93
- base = File.basename(file, File.extname(file))
94
- FileUtils.mkdir_p(File.join(data_dir, SIF_TARGETS_DIRNAME))
95
- handle_file(file, SIF_TARGETS_DIRNAME)
96
- local_id_metadata[File.join(SIF_TARGETS_DIRNAME, File.basename(file))] = base
97
- end
98
-
99
- def handle_file(file, folder_name)
100
- if copy_files
101
- FileUtils.cp file, File.join(data_dir, folder_name)
102
- else
103
- FileUtils.ln_s file, File.join(data_dir, folder_name)
104
- end
105
- file_map[file] = File.join(data_dir, folder_name, File.basename(file))
106
- end
107
-
108
- def output_metadata
109
- metadata_rows = []
110
- case
111
- when collection_title && admin_set
112
- metadata_rows << CSV::Row.new(metadata_headers, [ nil, nil, collection_title, admin_set ])
113
- when collection_title
114
- metadata_rows << CSV::Row.new(metadata_headers, [ nil, nil, collection_title ])
115
- when admin_set
116
- metadata_rows << CSV::Row.new(metadata_headers, [ nil, nil, admin_set ])
117
- end
118
- local_id_metadata.each_pair do |k,v|
119
- row_elements = [ k, v ]
120
- row_elements << nil if collection_title
121
- row_elements << nil if admin_set
122
- metadata_rows << CSV::Row.new(metadata_headers, row_elements)
123
- end
124
- File.open(File.join(data_dir, SIF_METADATA_FILENAME), 'w') do |file|
125
- file.puts(metadata_headers.join(Ddr::IngestTools::DpcFolderConverter.config[:csv_options][:col_sep]))
126
- metadata_rows.each do |row|
127
- file.puts(row.to_csv(Ddr::IngestTools::DpcFolderConverter.config[:csv_options]))
128
- end
129
- end
130
- end
131
-
132
- def bagitup
133
- bag = BagIt::Bag.new(target)
134
- bag.manifest!
135
- end
136
-
137
- def validate_checksums
138
- external_checksums = Ddr::IngestTools::ChecksumFile.new(checksums)
139
- sif_manifest = Ddr::IngestTools::ChecksumFile.new(File.join(target, SIF_MANIFEST_SHA1_FILENAME))
140
- file_map.each do |source_path, target_path|
141
- external_checksum = external_checksums.digest(source_path)
142
- manifest_path = target_path.sub("#{target}/", '')
143
- sif_checksum = sif_manifest.digest(manifest_path)
144
- unless external_checksum == sif_checksum
145
- errors << I18n.translate('errors.checksum_mismatch', { c1: external_checksum, f1: source_path,
146
- c2: sif_checksum, f2: target_path })
147
- end
148
- end
149
- end
150
- end
151
- end
@@ -1,11 +0,0 @@
1
- 3201454891f1d63a1493f393e061491e2d65ffcd SOURCE_DIRECTORY/Thumbs.db
2
- 59ec01f979a76b968bc579e5cd0ceb3bcf3e629f SOURCE_DIRECTORY/abc001001.tif
3
- d0a2f2482783ae3c83d06f3cdeaa1a306cc043ad SOURCE_DIRECTORY/abc001002.tif
4
- 38ee72ab417192589f3a54ef1016131c7d7e9e4e SOURCE_DIRECTORY/abc002001.tif
5
- 1da7765e3ca71ed76395bc56dae69d64732beff8 SOURCE_DIRECTORY/checksums.txt
6
- c227abc095d3b758ab1c1c1c9e922494b6b6e0b0 SOURCE_DIRECTORY/g/abc003001.wav
7
- 541af8df7d2c631382bd0ec189476894d0323df5 SOURCE_DIRECTORY/g/abc003002.wav
8
- 49d77d27bf82ec3dafa1967490594883f5e8b432 SOURCE_DIRECTORY/intermediate_files/abc001001.jpg
9
- 260b3c2d20a1726de96671d29f73ba09d13b61ba SOURCE_DIRECTORY/intermediate_files/abc002001.jpg
10
- a08c4d5a76d1b8735587be6ffcba66a9baf475c4 SOURCE_DIRECTORY/targets/T001.tif
11
- 40f9993d06945544fa57af88f7c3e9102ecc69e3 SOURCE_DIRECTORY/targets/T002.tif
@@ -1,11 +0,0 @@
1
- 3201454891f1d63a1493f393e061491e2d65ffcd SOURCE_DIRECTORY/Thumbs.db
2
- 59ec01f979a76b968bc579e5cd0ceb3bcf3e629f SOURCE_DIRECTORY/abc001001.tif
3
- d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad SOURCE_DIRECTORY/abc001002.tif
4
- 38ee72ab417192589f3a54ef1016131c7d7e9e4e SOURCE_DIRECTORY/abc002001.tif
5
- 1da7765e3ca71ed76395bc56dae69d64732beff8 SOURCE_DIRECTORY/checksums.txt
6
- c227abc095d3b758051c1c1c9e922494b6b6e0b0 SOURCE_DIRECTORY/g/abc003001.wav
7
- 541af8df7d2c631382bd0ec189476894d0323df5 SOURCE_DIRECTORY/g/abc003002.wav
8
- 49d77d27bf82ec3dafa1967490594883f5e8b432 SOURCE_DIRECTORY/intermediate_files/abc001001.jpg
9
- 260b3c2d20a7126de96671d29f73ba09d13b61ba SOURCE_DIRECTORY/intermediate_files/abc002001.jpg
10
- a08c4d5a76d1b8734487be6ffcba66a9baf475c4 SOURCE_DIRECTORY/targets/T001.tif
11
- 40f9993d06945544fa57af88f7c3e9102ecc69e3 SOURCE_DIRECTORY/targets/T002.tif
@@ -1,10 +0,0 @@
1
- 59ec01f979a76b968bc579e5cd0ceb3bcf3e629f data/abc001/abc001001.tif
2
- d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad data/abc001/abc001002.tif
3
- 38ee72ab417192589f3a54ef1016131c7d7e9e4e data/abc002/abc002001.tif
4
- c227abc095d3b758051c1c1c9e922494b6b6e0b0 data/abc003/abc003001.wav
5
- 541af8df7d2c631382bd0ec189476894d0323df5 data/abc003/abc003002.wav
6
- a08c4d5a76d1b8734487be6ffcba66a9baf475c4 data/dpc_targets/T001.tif
7
- 40f9993d06945544fa57af88f7c3e9102ecc69e3 data/dpc_targets/T002.tif
8
- 49d77d27bf82ec3dafa1967490594883f5e8b432 data/intermediate_files/abc001001.jpg
9
- 260b3c2d20a7126de96671d29f73ba09d13b61ba data/intermediate_files/abc002001.jpg
10
- 21f041cdd694f7755ed84b8cd2668214a43bad6c data/metadata.txt
@@ -1,10 +0,0 @@
1
- 59ec01f979a76b968bc579e5cd0ceb3bcf3e629f data/abc001/abc001001.tif
2
- d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad data/abc001/abc001002.tif
3
- 38ee72ab417192589f3a54ef1016131c7d7e9e4e data/abc002/abc002001.tif
4
- c227abc095d3b758051c1c1c9e922494b6b6e0b0 data/abc003/abc003001.wav
5
- 541af8df7d2c631382bd0ec189476894d0323df5 data/abc003/abc003002.wav
6
- a08c4d5a76d1b8734487be6ffcba66a9baf475c4 data/dpc_targets/T001.tif
7
- 40f9993d06945544fa57af88f7c3e9102ecc69e3 data/dpc_targets/T002.tif
8
- 49d77d27bf82ec3dafa1967490594883f5e8b432 data/intermediate_files/abc001001.jpg
9
- 260b3c2d20a7126de96671d29f73ba09d13b61ba data/intermediate_files/abc002001.jpg
10
- 913699468893882d1dec463f3df1a405c7f32784 data/metadata.txt
@@ -1,201 +0,0 @@
1
- module Ddr::IngestTools::DpcFolderConverter
2
-
3
- RSpec.describe Converter do
4
-
5
- shared_examples 'a conversion to standard ingest format' do
6
- subject { described_class.new(converter_args) }
7
- it 'produces the correct standard ingest format directory' do
8
- results = subject.call
9
- # Target directory contains all the expected files and only the expected files
10
- expect(Array(Find.find(target_directory))).to match_array(expected_files)
11
- # Target content files are same as source content files
12
- expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001001.tif'),
13
- File.join(source_directory, 'abc001001.tif'))).to be true
14
- expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001002.tif'),
15
- File.join(source_directory, 'abc001002.tif'))).to be true
16
- expect(FileUtils.compare_file(File.join(data_directory, 'abc002', 'abc002001.tif'),
17
- File.join(source_directory, 'abc002001.tif'))).to be true
18
- expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003001.wav'),
19
- File.join(source_directory, 'g', 'abc003001.wav'))).to be true
20
- expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003002.wav'),
21
- File.join(source_directory, 'g', 'abc003002.wav'))).to be true
22
- expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T001.tif'),
23
- File.join(source_directory, 'targets', 'T001.tif'))).to be true
24
- expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T002.tif'),
25
- File.join(source_directory, 'targets', 'T002.tif'))).to be true
26
- expect(FileUtils.compare_file(File.join(data_directory, 'intermediate_files', 'abc001001.jpg'),
27
- File.join(source_directory, 'intermediate_files', 'abc001001.jpg'))).to be true
28
- expect(FileUtils.compare_file(File.join(data_directory, 'intermediate_files', 'abc002001.jpg'),
29
- File.join(source_directory, 'intermediate_files', 'abc002001.jpg'))).to be true
30
- # Generated metadata file contains the expected contents
31
- metadata_lines = File.readlines(File.join(data_directory, 'metadata.txt')).map(&:chomp)
32
- expect(metadata_lines).to match_array(expected_metadata)
33
- # Generated manifest contains the expected contents (ignoring line order)
34
- generated_manifest = File.readlines(File.join(File.join(target_directory, 'manifest-sha1.txt'))).sort
35
- expect(generated_manifest).to match_array(expected_manifest)
36
- # Conversion process produces the expected errors
37
- expect(results.errors).to match_array(checksum_errors)
38
- end
39
- end
40
-
41
- let(:source_directory) { Dir.mktmpdir('dpc') }
42
- let(:target_directory) { Dir.mktmpdir('sif') }
43
- let(:data_directory) { File.join(target_directory, 'data') }
44
- let(:item_id_length) { 6 }
45
- let(:checksums_directory) { Dir.mktmpdir('checksums') }
46
- let(:checksums) { File.join(checksums_directory, 'checksums-sha1.txt') }
47
- let(:converter_args) { { source: source_directory, target: target_directory, item_id_length: item_id_length } }
48
- let(:expected_files) { [
49
- target_directory,
50
- File.join(target_directory, 'bag-info.txt'),
51
- File.join(target_directory, 'bagit.txt'),
52
- data_directory,
53
- File.join(data_directory, 'abc001'),
54
- File.join(data_directory, 'abc001', 'abc001001.tif'),
55
- File.join(data_directory, 'abc001', 'abc001002.tif'),
56
- File.join(data_directory, 'abc002'),
57
- File.join(data_directory, 'abc002', 'abc002001.tif'),
58
- File.join(data_directory, 'abc003', 'abc003001.wav'),
59
- File.join(data_directory, 'abc003'),
60
- File.join(data_directory, 'abc003', 'abc003002.wav'),
61
- File.join(data_directory, 'dpc_targets'),
62
- File.join(data_directory, 'dpc_targets', 'T001.tif'),
63
- File.join(data_directory, 'dpc_targets', 'T002.tif'),
64
- File.join(data_directory, 'intermediate_files'),
65
- File.join(data_directory, 'intermediate_files', 'abc001001.jpg'),
66
- File.join(data_directory, 'intermediate_files', 'abc002001.jpg'),
67
- File.join(data_directory, 'metadata.txt'),
68
- File.join(target_directory, 'manifest-md5.txt'),
69
- File.join(target_directory, 'manifest-sha1.txt'),
70
- File.join(target_directory, 'tagmanifest-md5.txt'),
71
- File.join(target_directory, 'tagmanifest-sha1.txt')
72
- ] }
73
- let(:expected_metadata) { [
74
- "path\tlocal_id",
75
- "abc001\tabc001",
76
- "abc002\tabc002",
77
- "abc003\tabc003",
78
- "abc001/abc001001.tif\tabc001001",
79
- "abc001/abc001002.tif\tabc001002",
80
- "abc002/abc002001.tif\tabc002001",
81
- "abc003/abc003001.wav\tabc003001",
82
- "abc003/abc003002.wav\tabc003002",
83
- "dpc_targets/T001.tif\tT001",
84
- "dpc_targets/T002.tif\tT002"
85
- ] }
86
- let(:expected_manifest) do
87
- File.readlines(File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt')).sort
88
- end
89
-
90
- before do
91
- File.open(File.join(source_directory, 'Thumbs.db'), 'w') { |f| f.write('Thumbs') }
92
- File.open(File.join(source_directory, 'abc001001.tif'), 'w') { |f| f.write('abc001001') }
93
- File.open(File.join(source_directory, 'abc001002.tif'), 'w') { |f| f.write('abc001002') }
94
- File.open(File.join(source_directory, 'abc002001.tif'), 'w') { |f| f.write('abc002001') }
95
- File.open(File.join(source_directory, 'checksums.txt'), 'w') { |f| f.write('checksums') }
96
- Dir.mkdir(File.join(source_directory,'g'))
97
- File.open(File.join(source_directory, 'g', 'abc003001.wav'), 'w') { |f| f.write('abc003001') }
98
- File.open(File.join(source_directory, 'g', 'abc003002.wav'), 'w') { |f| f.write('abc003002') }
99
- Dir.mkdir(File.join(source_directory, 'intermediate_files'))
100
- File.open(File.join(source_directory, 'intermediate_files', 'abc001001.jpg'), 'w') { |f| f.write('abc001001 jpg')}
101
- File.open(File.join(source_directory, 'intermediate_files', 'abc002001.jpg'), 'w') { |f| f.write('abc002001 jpg')}
102
- Dir.mkdir(File.join(source_directory, 'targets'))
103
- File.open(File.join(source_directory, 'targets', 'T001.tif'), 'w') { |f| f.write('T001') }
104
- File.open(File.join(source_directory, 'targets', 'T002.tif'), 'w') { |f| f.write('T002') }
105
- end
106
-
107
- describe 'external checksum files' do
108
- describe 'external checksum file' do
109
- before do
110
- File.open(checksums, 'w') do |f|
111
- f << File.open(checksum_file_template).read.gsub('SOURCE_DIRECTORY', source_directory)
112
- end
113
- converter_args[:checksums] = checksums
114
- end
115
- describe 'mismatch' do
116
- let(:checksum_file_template) { File.join('spec', 'fixtures', 'files', 'bad-checksums-sha1.txt') }
117
- let(:checksum_errors) {
118
- [ I18n.translate('errors.checksum_mismatch', { c1: 'd0a2f2482783ae3c83d06f3cdeaa1a306cc043ad',
119
- f1: File.join(source_directory, 'abc001002.tif'),
120
- c2: 'd0a2f2482783ae3c38d06f3cdeaa1a306cc043ad',
121
- f2: File.join(target_directory, 'data/abc001/abc001002.tif') }),
122
- I18n.translate('errors.checksum_mismatch', { c1: 'c227abc095d3b758ab1c1c1c9e922494b6b6e0b0',
123
- f1: File.join(source_directory, 'g/abc003001.wav'),
124
- c2: 'c227abc095d3b758051c1c1c9e922494b6b6e0b0',
125
- f2: File.join(target_directory, 'data/abc003/abc003001.wav') }),
126
- I18n.translate('errors.checksum_mismatch', { c1: '260b3c2d20a1726de96671d29f73ba09d13b61ba',
127
- f1: File.join(source_directory, 'intermediate_files/abc002001.jpg'),
128
- c2: '260b3c2d20a7126de96671d29f73ba09d13b61ba',
129
- f2: File.join(target_directory, 'data/intermediate_files/abc002001.jpg') }),
130
- I18n.translate('errors.checksum_mismatch', { c1: 'a08c4d5a76d1b8735587be6ffcba66a9baf475c4',
131
- f1: File.join(source_directory, 'targets/T001.tif'),
132
- c2: 'a08c4d5a76d1b8734487be6ffcba66a9baf475c4',
133
- f2: File.join(target_directory, 'data/dpc_targets/T001.tif') })
134
- ]
135
- }
136
- describe 'files are copied' do
137
- before { converter_args[:copy_files] = true }
138
- it_behaves_like 'a conversion to standard ingest format'
139
- end
140
- describe 'files are not copied' do
141
- before { converter_args[:copy_files] = false }
142
- it_behaves_like 'a conversion to standard ingest format'
143
- end
144
- end
145
- describe 'no mismatch' do
146
- let(:checksum_file_template) { File.join('spec', 'fixtures', 'files', 'good-checksums-sha1.txt') }
147
- let(:checksum_errors) { [] }
148
- describe 'files are copied' do
149
- before { converter_args[:copy_files] = true }
150
- it_behaves_like 'a conversion to standard ingest format'
151
- end
152
- describe 'files are not copied' do
153
- before { converter_args[:copy_files] = false }
154
- it_behaves_like 'a conversion to standard ingest format'
155
- end
156
- end
157
- end
158
-
159
- describe 'no external checksum file' do
160
- let(:checksum_errors) { [] }
161
- describe 'files are copied' do
162
- before { converter_args[:copy_files] = true }
163
- it_behaves_like 'a conversion to standard ingest format'
164
- end
165
- describe 'files are not copied' do
166
- before { converter_args[:copy_files] = false }
167
- it_behaves_like 'a conversion to standard ingest format'
168
- end
169
- end
170
- end
171
-
172
- describe 'collection titles and admin sets' do
173
- let(:checksum_errors) { [] }
174
- describe 'collection title and admin set provided' do
175
- let(:expected_metadata) { [
176
- "path\tlocal_id\ttitle\tadmin_set",
177
- "\t\tTest Collection\tfoo",
178
- "abc001\tabc001\t\t",
179
- "abc002\tabc002\t\t",
180
- "abc003\tabc003\t\t",
181
- "abc001/abc001001.tif\tabc001001\t\t",
182
- "abc001/abc001002.tif\tabc001002\t\t",
183
- "abc002/abc002001.tif\tabc002001\t\t",
184
- "abc003/abc003001.wav\tabc003001\t\t",
185
- "abc003/abc003002.wav\tabc003002\t\t",
186
- "dpc_targets/T001.tif\tT001\t\t",
187
- "dpc_targets/T002.tif\tT002\t\t"
188
- ] }
189
- let(:expected_manifest) do
190
- File.readlines(File.join('spec', 'fixtures', 'files', 'manifest-sha1-collection-title_admin_set.txt')).sort
191
- end
192
- before do
193
- converter_args[:collection_title] = 'Test Collection'
194
- converter_args[:admin_set] = 'foo'
195
- end
196
- it_behaves_like 'a conversion to standard ingest format'
197
- end
198
- end
199
-
200
- end
201
- end
@@ -1,17 +0,0 @@
1
- module Ddr::IngestTools
2
-
3
- RSpec.describe ChecksumFile do
4
-
5
- subject { described_class.new(checksum_filepath) }
6
-
7
- let(:checksum_filepath) { File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt') }
8
-
9
- describe 'digest' do
10
- it 'provides the requested digest' do
11
- expect(subject.digest('data/abc001/abc001002.tif')).to eq('d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad')
12
- expect(subject.digest('not/in/checksum/file.txt')).to be nil
13
- end
14
- end
15
- end
16
-
17
- end