ddr-ingesttools 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (31) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/bin/mint_manifest_arks.rb +49 -0
  4. data/config/locales/en.yml +6 -6
  5. data/ddr-ingesttools.gemspec +3 -2
  6. data/lib/ddr/ingesttools.rb +1 -2
  7. data/lib/ddr/ingesttools/manifest_ark_minter.rb +22 -0
  8. data/lib/ddr/ingesttools/manifest_ark_minter/configuration.rb +13 -0
  9. data/lib/ddr/ingesttools/manifest_ark_minter/manifest_parser.rb +33 -0
  10. data/lib/ddr/ingesttools/manifest_ark_minter/manifest_updater.rb +80 -0
  11. data/lib/ddr/ingesttools/manifest_ark_minter/minter.rb +43 -0
  12. data/lib/ddr/ingesttools/version.rb +1 -1
  13. data/spec/fixtures/rdr_importer/configs/default.yml +3 -0
  14. data/spec/fixtures/rdr_importer/manifests/manifest_with_all_arks.csv +4 -0
  15. data/spec/fixtures/rdr_importer/manifests/manifest_with_no_arks.csv +4 -0
  16. data/spec/fixtures/rdr_importer/manifests/manifest_with_some_arks.csv +4 -0
  17. data/spec/manifest_ark_minter/manifest_parser_spec.rb +38 -0
  18. data/spec/manifest_ark_minter/manifest_updater_spec.rb +46 -0
  19. data/spec/manifest_ark_minter/minter_spec.rb +50 -0
  20. data/spec/spec_helper.rb +1 -0
  21. metadata +43 -25
  22. data/bin/convert_dpc_folder.rb +0 -61
  23. data/lib/ddr/ingesttools/checksum_file.rb +0 -28
  24. data/lib/ddr/ingesttools/dpc_folder_converter.rb +0 -35
  25. data/lib/ddr/ingesttools/dpc_folder_converter/converter.rb +0 -151
  26. data/spec/fixtures/files/bad-checksums-sha1.txt +0 -11
  27. data/spec/fixtures/files/good-checksums-sha1.txt +0 -11
  28. data/spec/fixtures/files/manifest-sha1-collection-title_admin_set.txt +0 -10
  29. data/spec/fixtures/files/manifest-sha1.txt +0 -10
  30. data/spec/integration/dpc_folder_converter_spec.rb +0 -201
  31. data/spec/unit/checksum_file_spec.rb +0 -17
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fb22fa65d3323b243dcafc3b999f55d42894b130
4
- data.tar.gz: f85324eb9f8efd80febb1de7c9d1a5f2f87898aa
3
+ metadata.gz: ef121cd2d5211caf9ba9b1beebbbc316d2b7214e
4
+ data.tar.gz: 922b0ca3eba98b5dc5d1bc07c126c842e1a093cc
5
5
  SHA512:
6
- metadata.gz: 88bbc9f8d76a0b11bc0dec289c9bba3fc54efc340b6bfe9a0d7855108b0d040e88c385facda2d5d892a8aad02a18e9f64dc4d243bc547cce4987e7e9dd71df4e
7
- data.tar.gz: 5e475ae95b4d2ca9c5c8dd4b1d9fa85efc34956f4aac5b9778f62f837b495dd55a5d6d683785526b002a23d131f8caefc6170e46dd485867d5b22e099bbedbae
6
+ metadata.gz: 89d1fc5adacf2986a47019e0cca3da6ba2a5cf8054d72a4c1a21b82805ef0a0711abc2569f5367878aa6e8b452d8e79482d63d4ac5fdb28deef459bb3175d820
7
+ data.tar.gz: 24ebbe3f71fce16d96e032ba6c283e0431ba7f87a3495144ab218908052837699ff0687430eceeffcac7c31a5ab75fda7a677759b685f0583c9257c290482493
data/.gitignore CHANGED
@@ -1,3 +1,5 @@
1
+ .byebug_history
1
2
  Gemfile.lock
2
3
  pkg
3
4
  tmp
5
+
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'i18n'
4
+ require 'ddr/ingesttools'
5
+ require 'optparse'
6
+
7
+ options = {}
8
+
9
+ puts I18n.t('marquee')
10
+ puts I18n.t('suite.name')
11
+ puts I18n.t('manifest_ark_minter.name')
12
+ puts I18n.t('marquee')
13
+
14
+ # Parse command line arguments
15
+ parser = OptionParser.new do |opts|
16
+ opts.banner = 'Usage: mint_manifest_arks.rb [options]'
17
+
18
+ opts.on('-c', '--config CONFIG_FILE', 'Path to configuration file') do |v|
19
+ options[:config] = v
20
+ end
21
+
22
+ opts.on('-m', '--manifest MANIFEST_FILE', 'Path to manifest file for which ARKs are to be minted') do |v|
23
+ options[:manifest] = v
24
+ end
25
+
26
+ opts.on('-o', '--output OUTPUT_FILE', 'Path to which updated manifest file should be written') do |v|
27
+ options[:output] = v
28
+ end
29
+ end
30
+
31
+ begin
32
+ parser.parse!
33
+ if options['config'].nil?
34
+ puts I18n.t('manifest_ark_minter.use_default_config_file',
35
+ default_config_file: Ddr::IngestTools::ManifestArkMinter::ManifestUpdater::DEFAULT_CONFIG_FILE)
36
+ end
37
+ mandatory = [ :manifest, :output ]
38
+ missing = mandatory.select{ |param| options[param].nil? }
39
+ unless missing.empty?
40
+ raise OptionParser::MissingArgument.new(missing.join(', '))
41
+ end
42
+ rescue OptionParser::InvalidOption, OptionParser::MissingArgument
43
+ puts $!.to_s
44
+ puts parser
45
+ exit(false)
46
+ end
47
+
48
+ updater = Ddr::IngestTools::ManifestArkMinter::ManifestUpdater.new(options)
49
+ updater.call
@@ -1,7 +1,7 @@
1
1
  en:
2
- errors:
3
- count: "%{count} error(s)"
4
- checksum_mismatch: |
5
- ***** Checksum mismatch:
6
- %{c1} %{f1}
7
- %{c2} %{f2}
2
+ marquee: ==================================================
3
+ suite:
4
+ name: DDR Ingest Tools
5
+ manifest_ark_minter:
6
+ name: Manifest ARK Minter
7
+ use_default_config_file: 'Will use default configuration file: %{default_config_file}'
@@ -18,10 +18,11 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_dependency "bagit", "~> 0.4"
22
- spec.add_dependency "i18n", "~> 0.8"
21
+ spec.add_dependency "ezid-client", "~> 1.7"
22
+ spec.add_dependency "i18n", "~> 1.0"
23
23
 
24
24
  spec.add_development_dependency "bundler", "~> 1.14"
25
+ spec.add_development_dependency "byebug"
25
26
  spec.add_development_dependency "rake", "~> 12.0"
26
27
  spec.add_development_dependency "rspec", "~> 3.0"
27
28
  end
@@ -1,6 +1,5 @@
1
1
  require_relative 'ingesttools/version'
2
- require_relative 'ingesttools/dpc_folder_converter'
3
- require_relative 'ingesttools/checksum_file'
2
+ require_relative 'ingesttools/manifest_ark_minter'
4
3
 
5
4
  require 'i18n'
6
5
 
@@ -0,0 +1,22 @@
1
+ require_relative 'manifest_ark_minter/configuration'
2
+ require_relative 'manifest_ark_minter/manifest_updater'
3
+ require_relative 'manifest_ark_minter/manifest_parser'
4
+ require_relative 'manifest_ark_minter/minter'
5
+
6
+ module Ddr::IngestTools
7
+ module ManifestArkMinter
8
+
9
+ class << self
10
+ attr_writer :configuration
11
+
12
+ def configuration
13
+ @configuration ||= Configuration.new
14
+ end
15
+
16
+ def configure
17
+ yield(configuration)
18
+ end
19
+ end
20
+
21
+ end
22
+ end
@@ -0,0 +1,13 @@
1
+ module Ddr::IngestTools::ManifestArkMinter
2
+ class Configuration
3
+
4
+ attr_accessor :ezid_default_shoulder, :ezid_password, :ezid_user
5
+
6
+ def initialize
7
+ @ezid_default_shoulder = nil
8
+ @ezid_password = nil
9
+ @ezid_user = nil
10
+ end
11
+
12
+ end
13
+ end
@@ -0,0 +1,33 @@
1
+ require 'csv'
2
+
3
+ module Ddr::IngestTools::ManifestArkMinter
4
+ class ManifestParser
5
+
6
+ attr_reader :manifest_file_path
7
+
8
+ ARK_HEADER = 'ark'
9
+
10
+ def initialize(manifest_file_path)
11
+ @manifest_file_path = manifest_file_path
12
+ end
13
+
14
+ def as_csv_table
15
+ @csv_table ||= CSV.read(manifest_file_path, headers: true)
16
+ end
17
+
18
+ def arks_missing?
19
+ arks.any? { |value| value.compact.empty? }
20
+ end
21
+
22
+ def headers
23
+ as_csv_table.headers
24
+ end
25
+
26
+ private
27
+
28
+ def arks
29
+ as_csv_table.values_at(ARK_HEADER)
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,80 @@
1
+ require 'fileutils'
2
+ require 'logger'
3
+ require 'tempfile'
4
+
5
+ module Ddr::IngestTools::ManifestArkMinter
6
+ class ManifestUpdater
7
+
8
+ attr_reader :config, :logger, :manifest, :output
9
+
10
+ DEFAULT_CONFIG_FILE = 'manifest_ark_minter_config.yml'
11
+
12
+ def initialize(config: DEFAULT_CONFIG_FILE, manifest:, output:, logger: nil)
13
+ @config = config
14
+ @manifest = manifest
15
+ @output = output
16
+ @logger = logger || Logger.new(STDOUT)
17
+ end
18
+
19
+ def call
20
+ configure
21
+ if needs_updating?
22
+ update
23
+ else
24
+ logger.info("Manifest file already has ARKs ... nothing to mint")
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def configure
31
+ conf = YAML::load(IO.read(config))
32
+ Ddr::IngestTools::ManifestArkMinter.configure do |config|
33
+ config.ezid_default_shoulder = conf.fetch('ezid_default_shoulder')
34
+ config.ezid_password = conf.fetch('ezid_password')
35
+ config.ezid_user = conf.fetch('ezid_user')
36
+ end
37
+ end
38
+
39
+ def update
40
+ update_manifest_table
41
+ write_updated_manifest
42
+ end
43
+
44
+ def update_manifest_table
45
+ logger.info("Minting ARKs for manifest file")
46
+ mint_counter = 0
47
+ manifest_as_csv_table.each do |row|
48
+ unless row['ark']
49
+ row['ark'] = minter.mint
50
+ mint_counter += 1
51
+ end
52
+ end
53
+ logger.info("Minted #{mint_counter} ARK(s)")
54
+ end
55
+
56
+ def manifest_as_csv_table
57
+ @manifest_as_csv_table ||= parser.as_csv_table
58
+ end
59
+
60
+ def write_updated_manifest
61
+ File.open(output, 'w') do |f|
62
+ f.write(manifest_as_csv_table.to_csv)
63
+ end
64
+ logger.info("Updated manifest file is at #{output}")
65
+ end
66
+
67
+ def needs_updating?
68
+ parser.arks_missing?
69
+ end
70
+
71
+ def minter
72
+ @minter ||= Minter.new
73
+ end
74
+
75
+ def parser
76
+ @parser ||= ManifestParser.new(manifest)
77
+ end
78
+
79
+ end
80
+ end
@@ -0,0 +1,43 @@
1
+ require 'ezid-client'
2
+
3
+ module Ddr::IngestTools::ManifestArkMinter
4
+ class Minter
5
+
6
+ DEFAULT_EXPORT = 'no'.freeze
7
+ DEFAULT_PROFILE = 'dc'.freeze
8
+ DEFAULT_STATUS = Ezid::Status::RESERVED
9
+
10
+ def initialize
11
+ configure_ark
12
+ configure_client
13
+ end
14
+
15
+ def mint
16
+ Ezid::Identifier.mint
17
+ end
18
+
19
+ private
20
+
21
+ def configure_ark
22
+ Ezid::Identifier.defaults = {
23
+ export: DEFAULT_EXPORT,
24
+ profile: DEFAULT_PROFILE,
25
+ status: DEFAULT_STATUS
26
+ }
27
+ end
28
+
29
+ def configure_client
30
+ Ezid::Client.configure do |config|
31
+ config.default_shoulder = module_configuration.ezid_default_shoulder
32
+ config.password = module_configuration.ezid_password
33
+ config.user = module_configuration.ezid_user
34
+ config.logger = Logger.new(File::NULL)
35
+ end
36
+ end
37
+
38
+ def module_configuration
39
+ Ddr::IngestTools::ManifestArkMinter.configuration
40
+ end
41
+
42
+ end
43
+ end
@@ -1,5 +1,5 @@
1
1
  module Ddr
2
2
  module IngestTools
3
- VERSION = '0.3.0'
3
+ VERSION = '0.4.0'
4
4
  end
5
5
  end
@@ -0,0 +1,3 @@
1
+ ezid_default_shoulder: ark:/99999/fk4
2
+ ezid_password: apitest
3
+ ezid_user: apitest
@@ -0,0 +1,4 @@
1
+ ark,visibility,title,contributor,resource_type,license,file,file,file
2
+ ark:/99999/fk4s76kg89,open,Test 1,"Smith, Sue",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv,data/data2.csv,docs/doc1.txt
3
+ ark:/99999/fk4ng5vp6m,,Test 2,"Jones, Bill",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data3.csv,docs/doc2.txt
4
+ ark:/99999/fk4hq54w3t,authenticated,Test 3,"Allen, Jane",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv
@@ -0,0 +1,4 @@
1
+ visibility,title,contributor,resource_type,license,file,file,file
2
+ open,Test 1,"Smith, Sue",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv,data/data2.csv,docs/doc1.txt
3
+ ,Test 2,"Jones, Bill",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data3.csv,docs/doc2.txt
4
+ authenticated,Test 3,"Allen, Jane",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv
@@ -0,0 +1,4 @@
1
+ ark,visibility,title,contributor,resource_type,license,file,file,file
2
+ ,open,Test 1,"Smith, Sue",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv,data/data2.csv,docs/doc1.txt
3
+ ark:/99999/fk4ng5vp6m,,Test 2,"Jones, Bill",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data3.csv,docs/doc2.txt
4
+ ,authenticated,Test 3,"Allen, Jane",dataset,"http://creativecommons.org/publicdomain/zero/1.0/",data/data1.csv
@@ -0,0 +1,38 @@
1
+ require 'spec_helper'
2
+
3
+ module Ddr::IngestTools::ManifestArkMinter
4
+
5
+ RSpec.describe ManifestParser do
6
+
7
+ subject { described_class.new(manifest_file) }
8
+
9
+ describe '#as_csv_table' do
10
+ let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_some_arks.csv') }
11
+ specify { expect(subject.as_csv_table).to be_a CSV::Table }
12
+ end
13
+
14
+ describe '#headers' do
15
+ let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_some_arks.csv') }
16
+ let(:expected_headers) { %w(ark visibility title contributor resource_type license file) }
17
+ it 'parses out the list of headers' do
18
+ expect(subject.headers).to include(*expected_headers)
19
+ end
20
+ end
21
+
22
+ describe '#arks_missing?' do
23
+ describe 'no arks assigned' do
24
+ let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_no_arks.csv') }
25
+ specify { expect(subject.arks_missing?).to be true }
26
+ end
27
+ describe 'some arks assigned' do
28
+ let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_some_arks.csv') }
29
+ specify { expect(subject.arks_missing?).to be true }
30
+ end
31
+ describe 'all arks assigned' do
32
+ let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_all_arks.csv') }
33
+ specify { expect(subject.arks_missing?).to be false }
34
+ end
35
+ end
36
+ end
37
+
38
+ end
@@ -0,0 +1,46 @@
1
+ require 'spec_helper'
2
+ require 'tempfile'
3
+
4
+ module Ddr::IngestTools::ManifestArkMinter
5
+
6
+ RSpec.describe ManifestUpdater do
7
+
8
+ subject { described_class.new(config: config_file, manifest: manifest_file, output: output_file) }
9
+
10
+ let(:config_file) { File.join('spec', 'fixtures', 'rdr_importer', 'configs', 'default.yml') }
11
+ let(:output_dir) { Dir.mktmpdir }
12
+ let(:output_file) { File.join(output_dir, 'output.csv') }
13
+
14
+ after { FileUtils.remove_dir output_dir }
15
+
16
+ describe 'manifest has ARKs for all rows' do
17
+ let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_all_arks.csv') }
18
+ it 'does not produce an output file' do
19
+ expect{ subject.call }.not_to change{ File.exist?(output_file) }
20
+ end
21
+ end
22
+
23
+ describe 'manifest has ARKs for some rows' do
24
+ let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_some_arks.csv') }
25
+ it 'mints ARKs for rows without them' do
26
+ expect_any_instance_of(Minter).to receive(:mint).exactly(2).times.and_call_original
27
+ subject.call
28
+ table = CSV.read(output_file, headers: true)
29
+ # expect(table['ark']).to all(match(/ark:\/99999\/fk4/))
30
+ expect(table['ark']).to match([ /ark:\/99999\/fk4/, 'ark:/99999/fk4ng5vp6m', /ark:\/99999\/fk4/ ])
31
+ end
32
+ end
33
+
34
+ describe 'manifest has ARKs for no rows' do
35
+ let(:manifest_file) { File.join('spec', 'fixtures', 'rdr_importer', 'manifests', 'manifest_with_no_arks.csv') }
36
+ it 'mints ARKs for every row' do
37
+ expect_any_instance_of(Minter).to receive(:mint).exactly(3).times.and_call_original
38
+ subject.call
39
+ table = CSV.read(output_file, headers: true)
40
+ expect(table['ark']).to all(match(/ark:\/99999\/fk4/))
41
+ end
42
+ end
43
+
44
+ end
45
+
46
+ end
@@ -0,0 +1,50 @@
1
+ require 'spec_helper'
2
+
3
+ module Ddr::IngestTools::ManifestArkMinter
4
+
5
+ RSpec.describe Minter do
6
+
7
+ describe '#initialize' do
8
+ describe 'Ezid::Identifier defaults' do
9
+ before { described_class.new }
10
+ let(:ark_defaults) { { export: described_class::DEFAULT_EXPORT,
11
+ profile: described_class::DEFAULT_PROFILE,
12
+ status: described_class::DEFAULT_STATUS } }
13
+ it 'configures Ezid::Identifer defaults' do
14
+ expect(Ezid::Identifier.defaults).to match(ark_defaults)
15
+ end
16
+ end
17
+ describe 'Ezid::Client configuration' do
18
+ let(:configuration) { Configuration.new }
19
+ before do
20
+ configuration.ezid_default_shoulder = 'ark:/99999/fk4'
21
+ configuration.ezid_password = 'apitest'
22
+ configuration.ezid_user = 'apitest'
23
+ allow(Ddr::IngestTools::ManifestArkMinter).to receive(:configuration) { configuration }
24
+ described_class.new
25
+ end
26
+ it 'configures the Ezid::Client' do
27
+ expect(Ezid::Client.config.default_shoulder).to eq('ark:/99999/fk4')
28
+ expect(Ezid::Client.config.password).to eq('apitest')
29
+ expect(Ezid::Client.config.user).to eq('apitest')
30
+ end
31
+ end
32
+ end
33
+
34
+ describe '#mint' do
35
+ let(:configuration) { Configuration.new }
36
+ before do
37
+ configuration.ezid_default_shoulder = 'ark:/99999/fk4'
38
+ configuration.ezid_password = 'apitest'
39
+ configuration.ezid_user = 'apitest'
40
+ allow(Ddr::IngestTools::ManifestArkMinter).to receive(:configuration) { configuration }
41
+ end
42
+ it 'calls Ezid::Identifier to mint an ark' do
43
+ expect(Ezid::Identifier).to receive(:mint)
44
+ subject.mint
45
+ end
46
+ end
47
+
48
+ end
49
+
50
+ end
@@ -1,3 +1,4 @@
1
+ require 'byebug'
1
2
  require 'ddr/ingesttools'
2
3
  require 'i18n'
3
4
 
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ddr-ingesttools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jim Coble
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-05 00:00:00.000000000 Z
11
+ date: 2018-03-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bagit
14
+ name: ezid-client
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0.4'
19
+ version: '1.7'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0.4'
26
+ version: '1.7'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: i18n
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0.8'
33
+ version: '1.0'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '0.8'
40
+ version: '1.0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: bundler
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '1.14'
55
+ - !ruby/object:Gem::Dependency
56
+ name: byebug
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: rake
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -84,7 +98,7 @@ description: A collection of Ruby tools supporting ingest into the Duke Digital
84
98
  email:
85
99
  - jim.coble@duke.edu
86
100
  executables:
87
- - convert_dpc_folder.rb
101
+ - mint_manifest_arks.rb
88
102
  extensions: []
89
103
  extra_rdoc_files: []
90
104
  files:
@@ -96,21 +110,24 @@ files:
96
110
  - LICENSE.txt
97
111
  - README.md
98
112
  - Rakefile
99
- - bin/convert_dpc_folder.rb
113
+ - bin/mint_manifest_arks.rb
100
114
  - config/locales/en.yml
101
115
  - ddr-ingesttools.gemspec
102
116
  - lib/ddr/ingesttools.rb
103
- - lib/ddr/ingesttools/checksum_file.rb
104
- - lib/ddr/ingesttools/dpc_folder_converter.rb
105
- - lib/ddr/ingesttools/dpc_folder_converter/converter.rb
117
+ - lib/ddr/ingesttools/manifest_ark_minter.rb
118
+ - lib/ddr/ingesttools/manifest_ark_minter/configuration.rb
119
+ - lib/ddr/ingesttools/manifest_ark_minter/manifest_parser.rb
120
+ - lib/ddr/ingesttools/manifest_ark_minter/manifest_updater.rb
121
+ - lib/ddr/ingesttools/manifest_ark_minter/minter.rb
106
122
  - lib/ddr/ingesttools/version.rb
107
- - spec/fixtures/files/bad-checksums-sha1.txt
108
- - spec/fixtures/files/good-checksums-sha1.txt
109
- - spec/fixtures/files/manifest-sha1-collection-title_admin_set.txt
110
- - spec/fixtures/files/manifest-sha1.txt
111
- - spec/integration/dpc_folder_converter_spec.rb
123
+ - spec/fixtures/rdr_importer/configs/default.yml
124
+ - spec/fixtures/rdr_importer/manifests/manifest_with_all_arks.csv
125
+ - spec/fixtures/rdr_importer/manifests/manifest_with_no_arks.csv
126
+ - spec/fixtures/rdr_importer/manifests/manifest_with_some_arks.csv
127
+ - spec/manifest_ark_minter/manifest_parser_spec.rb
128
+ - spec/manifest_ark_minter/manifest_updater_spec.rb
129
+ - spec/manifest_ark_minter/minter_spec.rb
112
130
  - spec/spec_helper.rb
113
- - spec/unit/checksum_file_spec.rb
114
131
  homepage: https://github.com/duke-libraries/ddr-ingesttools
115
132
  licenses:
116
133
  - BSD-3-Clause
@@ -131,15 +148,16 @@ required_rubygems_version: !ruby/object:Gem::Requirement
131
148
  version: '0'
132
149
  requirements: []
133
150
  rubyforge_project:
134
- rubygems_version: 2.6.11
151
+ rubygems_version: 2.6.14
135
152
  signing_key:
136
153
  specification_version: 4
137
154
  summary: Ruby tools supporting ingest into the Duke Digital Repository.
138
155
  test_files:
139
- - spec/fixtures/files/bad-checksums-sha1.txt
140
- - spec/fixtures/files/good-checksums-sha1.txt
141
- - spec/fixtures/files/manifest-sha1-collection-title_admin_set.txt
142
- - spec/fixtures/files/manifest-sha1.txt
143
- - spec/integration/dpc_folder_converter_spec.rb
156
+ - spec/fixtures/rdr_importer/configs/default.yml
157
+ - spec/fixtures/rdr_importer/manifests/manifest_with_all_arks.csv
158
+ - spec/fixtures/rdr_importer/manifests/manifest_with_no_arks.csv
159
+ - spec/fixtures/rdr_importer/manifests/manifest_with_some_arks.csv
160
+ - spec/manifest_ark_minter/manifest_parser_spec.rb
161
+ - spec/manifest_ark_minter/manifest_updater_spec.rb
162
+ - spec/manifest_ark_minter/minter_spec.rb
144
163
  - spec/spec_helper.rb
145
- - spec/unit/checksum_file_spec.rb
@@ -1,61 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'ddr/ingesttools'
4
- require 'optparse'
5
-
6
- options = {}
7
-
8
- # Parse command line arguments
9
- parser = OptionParser.new do |opts|
10
- opts.banner = 'Usage: convert_dpc_folder.rb [options]'
11
-
12
- opts.on('-s', '--source SOURCE', 'Path to DPC Folder to be converted') do |v|
13
- options[:source] = v
14
- end
15
-
16
- opts.on('-t', '--target TARGET', 'Path to folder where Standard Ingest Format is to be built') do |v|
17
- options[:target] = v
18
- end
19
-
20
- opts.on('-i', '--item_id_length LENGTH', Integer, 'Number of characters to copy from the beginning of each file name',
21
- 'to use as the local ID of the item of which that file is a component') do |v|
22
- options[:item_id_length] = v
23
- end
24
-
25
- opts.on('-c', '--checksums [CHECKSUM_FILE]', 'External checksum file') do |v|
26
- options[:checksums] = v
27
- end
28
-
29
- opts.on('--[no-]copy_files', 'Copy files to target location instead of using a symlink') do |v|
30
- options[:copy_files] = v
31
- end
32
-
33
- opts.on('--collection_title [TITLE]', 'Title for collection',
34
- 'required if intending to create a collection-creating Standard Ingest') do |v|
35
- options[:collection_title] = v
36
- end
37
-
38
- opts.on('--admin_set [ADMIN_SET]', 'Admin set for collection',
39
- 'required if intending to create a collection-creating Standard Ingest') do |v|
40
- options[:admin_set] = v
41
- end
42
-
43
- end
44
-
45
- begin
46
- parser.parse!
47
- mandatory = [ :source, :target, :item_id_length ]
48
- missing = mandatory.select{ |param| options[param].nil? }
49
- unless missing.empty?
50
- raise OptionParser::MissingArgument.new(missing.join(', '))
51
- end
52
- rescue OptionParser::InvalidOption, OptionParser::MissingArgument
53
- puts $!.to_s
54
- puts parser
55
- exit(false)
56
- end
57
-
58
- converter = Ddr::IngestTools::DpcFolderConverter::Converter.new(options)
59
- results = converter.call
60
- puts I18n.translate('errors.count', { count: results.errors.size })
61
- results.errors.each { |e| puts e }
@@ -1,28 +0,0 @@
1
- module Ddr::IngestTools
2
- class ChecksumFile
3
-
4
- attr_reader :digests
5
-
6
- def initialize(checksum_filepath)
7
- @digests = digest_hash(checksum_filepath)
8
- end
9
-
10
- def digest(filepath)
11
- digests[filepath]
12
- end
13
-
14
- private
15
-
16
- def digest_hash(checksum_filepath)
17
- h = {}
18
- File.open(checksum_filepath, 'r') do |file|
19
- file.each_line do |line|
20
- digest, path = line.chomp.split
21
- h[path] = digest
22
- end
23
- end
24
- h
25
- end
26
-
27
- end
28
- end
@@ -1,35 +0,0 @@
1
- require_relative 'dpc_folder_converter/converter'
2
-
3
- module Ddr::IngestTools
4
- module DpcFolderConverter
5
- #Configuration defaults
6
- @config = {
7
- included_extensions: [ '.jpg', '.mov', '.mp3', '.mp4', '.pdf', '.tif', '.tiff', '.wav' ],
8
- csv_options: { :encoding=>"UTF-8", :col_sep=>"\t", :headers=>true, :write_headers=>true,
9
- :header_converters=>:symbol }
10
- }
11
-
12
- @valid_config_keys = @config.keys
13
-
14
- # Configure through hash
15
- def self.configure(opts = {})
16
- opts.each {|k,v| @config[k.to_sym] = v if @valid_config_keys.include?(k.to_sym)}
17
- end
18
-
19
- # Configure through yaml file
20
- def self.configure_with(path_to_yaml_file)
21
- begin
22
- config = YAML::load(IO.read(path_to_yaml_file))
23
- rescue Errno::ENOENT
24
- log(:warning, "YAML configuration file couldn't be found. Using defaults."); return
25
- rescue Psych::SyntaxError
26
- log(:warning, "YAML configuration file contains invalid syntax. Using defaults."); return
27
- end
28
- configure(config)
29
- end
30
-
31
- def self.config
32
- @config
33
- end
34
- end
35
- end
@@ -1,151 +0,0 @@
1
- require 'bagit'
2
- require 'csv'
3
- require 'fileutils'
4
- require 'find'
5
-
6
- module Ddr::IngestTools::DpcFolderConverter
7
- class Converter
8
-
9
- INTERMEDIATE_FILES_DIRNAME = 'intermediate_files'
10
- DPC_TARGETS_DIRNAME = 'targets'
11
- SIF_TARGETS_DIRNAME = 'dpc_targets'
12
- SIF_METADATA_FILENAME = 'metadata.txt'
13
- SIF_MANIFEST_SHA1_FILENAME = 'manifest-sha1.txt'
14
-
15
- Results = Struct.new(:file_map, :errors)
16
-
17
- attr_reader :source, :target, :data_dir, :item_id_length, :checksums, :copy_files, :collection_title,
18
- :admin_set, :metadata_headers
19
- attr_accessor :errors, :file_map, :local_id_metadata, :results
20
-
21
- def initialize(source:, target:, item_id_length:, checksums: nil, copy_files: false, collection_title: nil,
22
- admin_set: nil)
23
- @source = source
24
- @target = target
25
- @item_id_length = item_id_length
26
- @checksums = checksums
27
- @copy_files = copy_files
28
- @collection_title = collection_title
29
- @admin_set = admin_set
30
- @metadata_headers = [ 'path', 'local_id' ]
31
- @metadata_headers << 'title' unless collection_title.nil?
32
- @metadata_headers << 'admin_set' unless admin_set.nil?
33
- end
34
-
35
- def call
36
- setup
37
- scan_files(source)
38
- output_metadata
39
- bagitup
40
- validate_checksums if checksums
41
- Results.new(file_map, errors)
42
- end
43
-
44
- private
45
-
46
- def setup
47
- @data_dir = File.join(target, 'data')
48
- @errors = []
49
- @file_map = {}
50
- @local_id_metadata = {}
51
- FileUtils.mkdir_p data_dir
52
- end
53
-
54
- def included_extensions
55
- Ddr::IngestTools::DpcFolderConverter.config[:included_extensions]
56
- end
57
-
58
- def scan_files(dirpath, file_handler='handle_component'.to_sym)
59
- Dir.foreach(dirpath).each do |entry|
60
- next if [ '.', '..' ].include?(entry)
61
- path = File.join(dirpath, entry)
62
- if File.directory?(path)
63
- if entry == DPC_TARGETS_DIRNAME
64
- scan_files(path, :handle_target)
65
- elsif entry == INTERMEDIATE_FILES_DIRNAME
66
- scan_files(path, :handle_intermediate_file)
67
- else
68
- scan_files(path, file_handler)
69
- end
70
- else
71
- if included_extensions.include?(File.extname(entry))
72
- self.send(file_handler, path)
73
- end
74
- end
75
- end
76
- end
77
-
78
- def handle_component(file)
79
- base = File.basename(file, File.extname(file))
80
- item_id = item_id_length == 0 ? base : base[0, item_id_length]
81
- FileUtils.mkdir_p(File.join(data_dir, item_id))
82
- local_id_metadata[item_id] = item_id
83
- handle_file(file, item_id)
84
- local_id_metadata[File.join(item_id, File.basename(file))] = base
85
- end
86
-
87
- def handle_intermediate_file(file)
88
- FileUtils.mkdir_p(File.join(data_dir, INTERMEDIATE_FILES_DIRNAME))
89
- handle_file(file, INTERMEDIATE_FILES_DIRNAME)
90
- end
91
-
92
- def handle_target(file)
93
- base = File.basename(file, File.extname(file))
94
- FileUtils.mkdir_p(File.join(data_dir, SIF_TARGETS_DIRNAME))
95
- handle_file(file, SIF_TARGETS_DIRNAME)
96
- local_id_metadata[File.join(SIF_TARGETS_DIRNAME, File.basename(file))] = base
97
- end
98
-
99
- def handle_file(file, folder_name)
100
- if copy_files
101
- FileUtils.cp file, File.join(data_dir, folder_name)
102
- else
103
- FileUtils.ln_s file, File.join(data_dir, folder_name)
104
- end
105
- file_map[file] = File.join(data_dir, folder_name, File.basename(file))
106
- end
107
-
108
- def output_metadata
109
- metadata_rows = []
110
- case
111
- when collection_title && admin_set
112
- metadata_rows << CSV::Row.new(metadata_headers, [ nil, nil, collection_title, admin_set ])
113
- when collection_title
114
- metadata_rows << CSV::Row.new(metadata_headers, [ nil, nil, collection_title ])
115
- when admin_set
116
- metadata_rows << CSV::Row.new(metadata_headers, [ nil, nil, admin_set ])
117
- end
118
- local_id_metadata.each_pair do |k,v|
119
- row_elements = [ k, v ]
120
- row_elements << nil if collection_title
121
- row_elements << nil if admin_set
122
- metadata_rows << CSV::Row.new(metadata_headers, row_elements)
123
- end
124
- File.open(File.join(data_dir, SIF_METADATA_FILENAME), 'w') do |file|
125
- file.puts(metadata_headers.join(Ddr::IngestTools::DpcFolderConverter.config[:csv_options][:col_sep]))
126
- metadata_rows.each do |row|
127
- file.puts(row.to_csv(Ddr::IngestTools::DpcFolderConverter.config[:csv_options]))
128
- end
129
- end
130
- end
131
-
132
- def bagitup
133
- bag = BagIt::Bag.new(target)
134
- bag.manifest!
135
- end
136
-
137
- def validate_checksums
138
- external_checksums = Ddr::IngestTools::ChecksumFile.new(checksums)
139
- sif_manifest = Ddr::IngestTools::ChecksumFile.new(File.join(target, SIF_MANIFEST_SHA1_FILENAME))
140
- file_map.each do |source_path, target_path|
141
- external_checksum = external_checksums.digest(source_path)
142
- manifest_path = target_path.sub("#{target}/", '')
143
- sif_checksum = sif_manifest.digest(manifest_path)
144
- unless external_checksum == sif_checksum
145
- errors << I18n.translate('errors.checksum_mismatch', { c1: external_checksum, f1: source_path,
146
- c2: sif_checksum, f2: target_path })
147
- end
148
- end
149
- end
150
- end
151
- end
@@ -1,11 +0,0 @@
1
- 3201454891f1d63a1493f393e061491e2d65ffcd SOURCE_DIRECTORY/Thumbs.db
2
- 59ec01f979a76b968bc579e5cd0ceb3bcf3e629f SOURCE_DIRECTORY/abc001001.tif
3
- d0a2f2482783ae3c83d06f3cdeaa1a306cc043ad SOURCE_DIRECTORY/abc001002.tif
4
- 38ee72ab417192589f3a54ef1016131c7d7e9e4e SOURCE_DIRECTORY/abc002001.tif
5
- 1da7765e3ca71ed76395bc56dae69d64732beff8 SOURCE_DIRECTORY/checksums.txt
6
- c227abc095d3b758ab1c1c1c9e922494b6b6e0b0 SOURCE_DIRECTORY/g/abc003001.wav
7
- 541af8df7d2c631382bd0ec189476894d0323df5 SOURCE_DIRECTORY/g/abc003002.wav
8
- 49d77d27bf82ec3dafa1967490594883f5e8b432 SOURCE_DIRECTORY/intermediate_files/abc001001.jpg
9
- 260b3c2d20a1726de96671d29f73ba09d13b61ba SOURCE_DIRECTORY/intermediate_files/abc002001.jpg
10
- a08c4d5a76d1b8735587be6ffcba66a9baf475c4 SOURCE_DIRECTORY/targets/T001.tif
11
- 40f9993d06945544fa57af88f7c3e9102ecc69e3 SOURCE_DIRECTORY/targets/T002.tif
@@ -1,11 +0,0 @@
1
- 3201454891f1d63a1493f393e061491e2d65ffcd SOURCE_DIRECTORY/Thumbs.db
2
- 59ec01f979a76b968bc579e5cd0ceb3bcf3e629f SOURCE_DIRECTORY/abc001001.tif
3
- d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad SOURCE_DIRECTORY/abc001002.tif
4
- 38ee72ab417192589f3a54ef1016131c7d7e9e4e SOURCE_DIRECTORY/abc002001.tif
5
- 1da7765e3ca71ed76395bc56dae69d64732beff8 SOURCE_DIRECTORY/checksums.txt
6
- c227abc095d3b758051c1c1c9e922494b6b6e0b0 SOURCE_DIRECTORY/g/abc003001.wav
7
- 541af8df7d2c631382bd0ec189476894d0323df5 SOURCE_DIRECTORY/g/abc003002.wav
8
- 49d77d27bf82ec3dafa1967490594883f5e8b432 SOURCE_DIRECTORY/intermediate_files/abc001001.jpg
9
- 260b3c2d20a7126de96671d29f73ba09d13b61ba SOURCE_DIRECTORY/intermediate_files/abc002001.jpg
10
- a08c4d5a76d1b8734487be6ffcba66a9baf475c4 SOURCE_DIRECTORY/targets/T001.tif
11
- 40f9993d06945544fa57af88f7c3e9102ecc69e3 SOURCE_DIRECTORY/targets/T002.tif
@@ -1,10 +0,0 @@
1
- 59ec01f979a76b968bc579e5cd0ceb3bcf3e629f data/abc001/abc001001.tif
2
- d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad data/abc001/abc001002.tif
3
- 38ee72ab417192589f3a54ef1016131c7d7e9e4e data/abc002/abc002001.tif
4
- c227abc095d3b758051c1c1c9e922494b6b6e0b0 data/abc003/abc003001.wav
5
- 541af8df7d2c631382bd0ec189476894d0323df5 data/abc003/abc003002.wav
6
- a08c4d5a76d1b8734487be6ffcba66a9baf475c4 data/dpc_targets/T001.tif
7
- 40f9993d06945544fa57af88f7c3e9102ecc69e3 data/dpc_targets/T002.tif
8
- 49d77d27bf82ec3dafa1967490594883f5e8b432 data/intermediate_files/abc001001.jpg
9
- 260b3c2d20a7126de96671d29f73ba09d13b61ba data/intermediate_files/abc002001.jpg
10
- 21f041cdd694f7755ed84b8cd2668214a43bad6c data/metadata.txt
@@ -1,10 +0,0 @@
1
- 59ec01f979a76b968bc579e5cd0ceb3bcf3e629f data/abc001/abc001001.tif
2
- d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad data/abc001/abc001002.tif
3
- 38ee72ab417192589f3a54ef1016131c7d7e9e4e data/abc002/abc002001.tif
4
- c227abc095d3b758051c1c1c9e922494b6b6e0b0 data/abc003/abc003001.wav
5
- 541af8df7d2c631382bd0ec189476894d0323df5 data/abc003/abc003002.wav
6
- a08c4d5a76d1b8734487be6ffcba66a9baf475c4 data/dpc_targets/T001.tif
7
- 40f9993d06945544fa57af88f7c3e9102ecc69e3 data/dpc_targets/T002.tif
8
- 49d77d27bf82ec3dafa1967490594883f5e8b432 data/intermediate_files/abc001001.jpg
9
- 260b3c2d20a7126de96671d29f73ba09d13b61ba data/intermediate_files/abc002001.jpg
10
- 913699468893882d1dec463f3df1a405c7f32784 data/metadata.txt
@@ -1,201 +0,0 @@
1
- module Ddr::IngestTools::DpcFolderConverter
2
-
3
- RSpec.describe Converter do
4
-
5
- shared_examples 'a conversion to standard ingest format' do
6
- subject { described_class.new(converter_args) }
7
- it 'produces the correct standard ingest format directory' do
8
- results = subject.call
9
- # Target directory contains all the expected files and only the expected files
10
- expect(Array(Find.find(target_directory))).to match_array(expected_files)
11
- # Target content files are same as source content files
12
- expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001001.tif'),
13
- File.join(source_directory, 'abc001001.tif'))).to be true
14
- expect(FileUtils.compare_file(File.join(data_directory, 'abc001', 'abc001002.tif'),
15
- File.join(source_directory, 'abc001002.tif'))).to be true
16
- expect(FileUtils.compare_file(File.join(data_directory, 'abc002', 'abc002001.tif'),
17
- File.join(source_directory, 'abc002001.tif'))).to be true
18
- expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003001.wav'),
19
- File.join(source_directory, 'g', 'abc003001.wav'))).to be true
20
- expect(FileUtils.compare_file(File.join(data_directory, 'abc003', 'abc003002.wav'),
21
- File.join(source_directory, 'g', 'abc003002.wav'))).to be true
22
- expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T001.tif'),
23
- File.join(source_directory, 'targets', 'T001.tif'))).to be true
24
- expect(FileUtils.compare_file(File.join(data_directory, 'dpc_targets', 'T002.tif'),
25
- File.join(source_directory, 'targets', 'T002.tif'))).to be true
26
- expect(FileUtils.compare_file(File.join(data_directory, 'intermediate_files', 'abc001001.jpg'),
27
- File.join(source_directory, 'intermediate_files', 'abc001001.jpg'))).to be true
28
- expect(FileUtils.compare_file(File.join(data_directory, 'intermediate_files', 'abc002001.jpg'),
29
- File.join(source_directory, 'intermediate_files', 'abc002001.jpg'))).to be true
30
- # Generated metadata file contains the expected contents
31
- metadata_lines = File.readlines(File.join(data_directory, 'metadata.txt')).map(&:chomp)
32
- expect(metadata_lines).to match_array(expected_metadata)
33
- # Generated manifest contains the expected contents (ignoring line order)
34
- generated_manifest = File.readlines(File.join(File.join(target_directory, 'manifest-sha1.txt'))).sort
35
- expect(generated_manifest).to match_array(expected_manifest)
36
- # Conversion process produces the expected errors
37
- expect(results.errors).to match_array(checksum_errors)
38
- end
39
- end
40
-
41
- let(:source_directory) { Dir.mktmpdir('dpc') }
42
- let(:target_directory) { Dir.mktmpdir('sif') }
43
- let(:data_directory) { File.join(target_directory, 'data') }
44
- let(:item_id_length) { 6 }
45
- let(:checksums_directory) { Dir.mktmpdir('checksums') }
46
- let(:checksums) { File.join(checksums_directory, 'checksums-sha1.txt') }
47
- let(:converter_args) { { source: source_directory, target: target_directory, item_id_length: item_id_length } }
48
- let(:expected_files) { [
49
- target_directory,
50
- File.join(target_directory, 'bag-info.txt'),
51
- File.join(target_directory, 'bagit.txt'),
52
- data_directory,
53
- File.join(data_directory, 'abc001'),
54
- File.join(data_directory, 'abc001', 'abc001001.tif'),
55
- File.join(data_directory, 'abc001', 'abc001002.tif'),
56
- File.join(data_directory, 'abc002'),
57
- File.join(data_directory, 'abc002', 'abc002001.tif'),
58
- File.join(data_directory, 'abc003', 'abc003001.wav'),
59
- File.join(data_directory, 'abc003'),
60
- File.join(data_directory, 'abc003', 'abc003002.wav'),
61
- File.join(data_directory, 'dpc_targets'),
62
- File.join(data_directory, 'dpc_targets', 'T001.tif'),
63
- File.join(data_directory, 'dpc_targets', 'T002.tif'),
64
- File.join(data_directory, 'intermediate_files'),
65
- File.join(data_directory, 'intermediate_files', 'abc001001.jpg'),
66
- File.join(data_directory, 'intermediate_files', 'abc002001.jpg'),
67
- File.join(data_directory, 'metadata.txt'),
68
- File.join(target_directory, 'manifest-md5.txt'),
69
- File.join(target_directory, 'manifest-sha1.txt'),
70
- File.join(target_directory, 'tagmanifest-md5.txt'),
71
- File.join(target_directory, 'tagmanifest-sha1.txt')
72
- ] }
73
- let(:expected_metadata) { [
74
- "path\tlocal_id",
75
- "abc001\tabc001",
76
- "abc002\tabc002",
77
- "abc003\tabc003",
78
- "abc001/abc001001.tif\tabc001001",
79
- "abc001/abc001002.tif\tabc001002",
80
- "abc002/abc002001.tif\tabc002001",
81
- "abc003/abc003001.wav\tabc003001",
82
- "abc003/abc003002.wav\tabc003002",
83
- "dpc_targets/T001.tif\tT001",
84
- "dpc_targets/T002.tif\tT002"
85
- ] }
86
- let(:expected_manifest) do
87
- File.readlines(File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt')).sort
88
- end
89
-
90
- before do
91
- File.open(File.join(source_directory, 'Thumbs.db'), 'w') { |f| f.write('Thumbs') }
92
- File.open(File.join(source_directory, 'abc001001.tif'), 'w') { |f| f.write('abc001001') }
93
- File.open(File.join(source_directory, 'abc001002.tif'), 'w') { |f| f.write('abc001002') }
94
- File.open(File.join(source_directory, 'abc002001.tif'), 'w') { |f| f.write('abc002001') }
95
- File.open(File.join(source_directory, 'checksums.txt'), 'w') { |f| f.write('checksums') }
96
- Dir.mkdir(File.join(source_directory,'g'))
97
- File.open(File.join(source_directory, 'g', 'abc003001.wav'), 'w') { |f| f.write('abc003001') }
98
- File.open(File.join(source_directory, 'g', 'abc003002.wav'), 'w') { |f| f.write('abc003002') }
99
- Dir.mkdir(File.join(source_directory, 'intermediate_files'))
100
- File.open(File.join(source_directory, 'intermediate_files', 'abc001001.jpg'), 'w') { |f| f.write('abc001001 jpg')}
101
- File.open(File.join(source_directory, 'intermediate_files', 'abc002001.jpg'), 'w') { |f| f.write('abc002001 jpg')}
102
- Dir.mkdir(File.join(source_directory, 'targets'))
103
- File.open(File.join(source_directory, 'targets', 'T001.tif'), 'w') { |f| f.write('T001') }
104
- File.open(File.join(source_directory, 'targets', 'T002.tif'), 'w') { |f| f.write('T002') }
105
- end
106
-
107
- describe 'external checksum files' do
108
- describe 'external checksum file' do
109
- before do
110
- File.open(checksums, 'w') do |f|
111
- f << File.open(checksum_file_template).read.gsub('SOURCE_DIRECTORY', source_directory)
112
- end
113
- converter_args[:checksums] = checksums
114
- end
115
- describe 'mismatch' do
116
- let(:checksum_file_template) { File.join('spec', 'fixtures', 'files', 'bad-checksums-sha1.txt') }
117
- let(:checksum_errors) {
118
- [ I18n.translate('errors.checksum_mismatch', { c1: 'd0a2f2482783ae3c83d06f3cdeaa1a306cc043ad',
119
- f1: File.join(source_directory, 'abc001002.tif'),
120
- c2: 'd0a2f2482783ae3c38d06f3cdeaa1a306cc043ad',
121
- f2: File.join(target_directory, 'data/abc001/abc001002.tif') }),
122
- I18n.translate('errors.checksum_mismatch', { c1: 'c227abc095d3b758ab1c1c1c9e922494b6b6e0b0',
123
- f1: File.join(source_directory, 'g/abc003001.wav'),
124
- c2: 'c227abc095d3b758051c1c1c9e922494b6b6e0b0',
125
- f2: File.join(target_directory, 'data/abc003/abc003001.wav') }),
126
- I18n.translate('errors.checksum_mismatch', { c1: '260b3c2d20a1726de96671d29f73ba09d13b61ba',
127
- f1: File.join(source_directory, 'intermediate_files/abc002001.jpg'),
128
- c2: '260b3c2d20a7126de96671d29f73ba09d13b61ba',
129
- f2: File.join(target_directory, 'data/intermediate_files/abc002001.jpg') }),
130
- I18n.translate('errors.checksum_mismatch', { c1: 'a08c4d5a76d1b8735587be6ffcba66a9baf475c4',
131
- f1: File.join(source_directory, 'targets/T001.tif'),
132
- c2: 'a08c4d5a76d1b8734487be6ffcba66a9baf475c4',
133
- f2: File.join(target_directory, 'data/dpc_targets/T001.tif') })
134
- ]
135
- }
136
- describe 'files are copied' do
137
- before { converter_args[:copy_files] = true }
138
- it_behaves_like 'a conversion to standard ingest format'
139
- end
140
- describe 'files are not copied' do
141
- before { converter_args[:copy_files] = false }
142
- it_behaves_like 'a conversion to standard ingest format'
143
- end
144
- end
145
- describe 'no mismatch' do
146
- let(:checksum_file_template) { File.join('spec', 'fixtures', 'files', 'good-checksums-sha1.txt') }
147
- let(:checksum_errors) { [] }
148
- describe 'files are copied' do
149
- before { converter_args[:copy_files] = true }
150
- it_behaves_like 'a conversion to standard ingest format'
151
- end
152
- describe 'files are not copied' do
153
- before { converter_args[:copy_files] = false }
154
- it_behaves_like 'a conversion to standard ingest format'
155
- end
156
- end
157
- end
158
-
159
- describe 'no external checksum file' do
160
- let(:checksum_errors) { [] }
161
- describe 'files are copied' do
162
- before { converter_args[:copy_files] = true }
163
- it_behaves_like 'a conversion to standard ingest format'
164
- end
165
- describe 'files are not copied' do
166
- before { converter_args[:copy_files] = false }
167
- it_behaves_like 'a conversion to standard ingest format'
168
- end
169
- end
170
- end
171
-
172
- describe 'collection titles and admin sets' do
173
- let(:checksum_errors) { [] }
174
- describe 'collection title and admin set provided' do
175
- let(:expected_metadata) { [
176
- "path\tlocal_id\ttitle\tadmin_set",
177
- "\t\tTest Collection\tfoo",
178
- "abc001\tabc001\t\t",
179
- "abc002\tabc002\t\t",
180
- "abc003\tabc003\t\t",
181
- "abc001/abc001001.tif\tabc001001\t\t",
182
- "abc001/abc001002.tif\tabc001002\t\t",
183
- "abc002/abc002001.tif\tabc002001\t\t",
184
- "abc003/abc003001.wav\tabc003001\t\t",
185
- "abc003/abc003002.wav\tabc003002\t\t",
186
- "dpc_targets/T001.tif\tT001\t\t",
187
- "dpc_targets/T002.tif\tT002\t\t"
188
- ] }
189
- let(:expected_manifest) do
190
- File.readlines(File.join('spec', 'fixtures', 'files', 'manifest-sha1-collection-title_admin_set.txt')).sort
191
- end
192
- before do
193
- converter_args[:collection_title] = 'Test Collection'
194
- converter_args[:admin_set] = 'foo'
195
- end
196
- it_behaves_like 'a conversion to standard ingest format'
197
- end
198
- end
199
-
200
- end
201
- end
@@ -1,17 +0,0 @@
1
- module Ddr::IngestTools
2
-
3
- RSpec.describe ChecksumFile do
4
-
5
- subject { described_class.new(checksum_filepath) }
6
-
7
- let(:checksum_filepath) { File.join('spec', 'fixtures', 'files', 'manifest-sha1.txt') }
8
-
9
- describe 'digest' do
10
- it 'provides the requested digest' do
11
- expect(subject.digest('data/abc001/abc001002.tif')).to eq('d0a2f2482783ae3c38d06f3cdeaa1a306cc043ad')
12
- expect(subject.digest('not/in/checksum/file.txt')).to be nil
13
- end
14
- end
15
- end
16
-
17
- end