hyrax-ingest 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +52 -0
  3. data/config/routes.rb +2 -0
  4. data/lib/hyrax/ingest.rb +12 -0
  5. data/lib/hyrax/ingest/batch_runner.rb +130 -0
  6. data/lib/hyrax/ingest/configuration.rb +54 -0
  7. data/lib/hyrax/ingest/engine.rb +6 -0
  8. data/lib/hyrax/ingest/errors.rb +186 -0
  9. data/lib/hyrax/ingest/fetcher.rb +55 -0
  10. data/lib/hyrax/ingest/fetcher/base.rb +78 -0
  11. data/lib/hyrax/ingest/fetcher/csv_file.rb +89 -0
  12. data/lib/hyrax/ingest/fetcher/date_time.rb +15 -0
  13. data/lib/hyrax/ingest/fetcher/literal.rb +24 -0
  14. data/lib/hyrax/ingest/fetcher/premis_event_type.rb +28 -0
  15. data/lib/hyrax/ingest/fetcher/rdf_uri.rb +21 -0
  16. data/lib/hyrax/ingest/fetcher/xml_file.rb +54 -0
  17. data/lib/hyrax/ingest/fetcher/yaml_file.rb +36 -0
  18. data/lib/hyrax/ingest/has_depositor.rb +13 -0
  19. data/lib/hyrax/ingest/has_iteration.rb +18 -0
  20. data/lib/hyrax/ingest/has_logger.rb +29 -0
  21. data/lib/hyrax/ingest/has_report.rb +17 -0
  22. data/lib/hyrax/ingest/has_shared_sip.rb +20 -0
  23. data/lib/hyrax/ingest/has_sip.rb +20 -0
  24. data/lib/hyrax/ingest/ingester.rb +75 -0
  25. data/lib/hyrax/ingest/ingester/active_fedora_base_ingester.rb +136 -0
  26. data/lib/hyrax/ingest/ingester/active_fedora_file_ingester.rb +17 -0
  27. data/lib/hyrax/ingest/ingester/active_fedora_property_assigner.rb +67 -0
  28. data/lib/hyrax/ingest/ingester/base.rb +28 -0
  29. data/lib/hyrax/ingest/ingester/file_set_ingester.rb +68 -0
  30. data/lib/hyrax/ingest/ingester/preservation_event_ingester.rb +27 -0
  31. data/lib/hyrax/ingest/ingester/work_ingester.rb +55 -0
  32. data/lib/hyrax/ingest/reporting.rb +13 -0
  33. data/lib/hyrax/ingest/reporting/configuration.rb +22 -0
  34. data/lib/hyrax/ingest/reporting/report.rb +79 -0
  35. data/lib/hyrax/ingest/reporting/views/hyrax_ingest_report.html.erb +77 -0
  36. data/lib/hyrax/ingest/reporting/views/hyrax_ingest_report.txt.erb +21 -0
  37. data/lib/hyrax/ingest/runner.rb +103 -0
  38. data/lib/hyrax/ingest/sip.rb +92 -0
  39. data/lib/hyrax/ingest/transformer.rb +42 -0
  40. data/lib/hyrax/ingest/transformer/base.rb +12 -0
  41. data/lib/hyrax/ingest/transformer/to_date.rb +33 -0
  42. data/lib/hyrax/ingest/version.rb +5 -0
  43. data/lib/tasks/ingest_tasks.rake +22 -0
  44. metadata +330 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c0d5eeb006d3931a1a7668b71e9a0ce85d603bbff3025aba8552eb42958806d4
4
+ data.tar.gz: 97de1d90af9b90972ceb24df28ffebd9687d9ac63fc18fc4d59ec96e54359949
5
+ SHA512:
6
+ metadata.gz: 1175542851df962db08f1e1568ff5d07a53a9630995d9af0ff1b0c6a7d7cff08a871e4c03d28f479998198bd6e2f8b990874510f8424259440ae21e468429c0c
7
+ data.tar.gz: 12ffb343ec907eefe6c8ac367e8bb963239a32f2aea789189e9e11ed21c8eacb27983d8365adf42505531ff523995b79701d998425f907290d6b74467acb99c8
@@ -0,0 +1,52 @@
1
+ # Hyrax::Ingest
2
+
3
+ [![Build Status](https://travis-ci.org/IUBLibTech/hyrax-ingest.svg?branch=master)](https://travis-ci.org/IUBLibTech/hyrax-ingest)
4
+
5
+ Hyrax::Ingest is an extensible plugin for ingesting content and metadata into
6
+ a Hyrax[https://github.com/samvera/hyrax] repository based on a declarative
7
+ configuration.
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ ```ruby
14
+ gem 'hyrax-ingest'
15
+ ```
16
+
17
+ And then execute:
18
+
19
+ $ bundle
20
+
21
+ Or install it yourself as:
22
+
23
+ $ gem install hyrax-ingest
24
+
25
+ ## Overview
26
+
27
+ The Hyrax::Ingest plugin defines "ingest" as the process of mapping content
28
+ and data from one or more sources to one or more persistence layers to be used
29
+ by a Hyrax application.
30
+
31
+
32
+ ## Development
33
+
34
+ After checking out the repo, run `bin/setup` to install dependencies. Then,
35
+ run `rake spec` to run the tests. You can also run `bin/console` for an
36
+ interactive prompt that will allow you to experiment.
37
+
38
+ To install this gem onto your local machine, run `bundle exec rake install`.
39
+ To release a new version, update the version number in `version.rb`, and then
40
+ run `bundle exec rake release`, which will create a git tag for the version,
41
+ push git commits and tags, and push the `.gem` file to
42
+ [rubygems.org](https://rubygems.org).
43
+
44
+ ## Contributing
45
+
46
+ TODO: Add CONTRIBUTING.md per the Samvera standard, and refer to it here.
47
+
48
+ ## License
49
+
50
+ The gem is available as open source under the terms of the [MIT
51
+ License](http://opensource.org/licenses/MIT).
52
+
@@ -0,0 +1,2 @@
1
+ Rails.application.routes.draw do
2
+ end
@@ -0,0 +1,12 @@
1
+ require 'hyrax/ingest/engine'
2
+ require 'hyrax'
3
+
4
+ module Hyrax
5
+ module Ingest
6
+ class << self
7
+ def root
8
+ File.expand_path('../../../', __FILE__)
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,130 @@
1
+ require 'hyrax/ingest/runner'
2
+ require 'hyrax/ingest/has_report'
3
+ require 'hyrax/ingest/has_logger'
4
+ require 'hyrax/ingest/has_depositor'
5
+ require 'interloper'
6
+
7
+ module Hyrax
8
+ module Ingest
9
+ class BatchRunner
10
+ include HasReport
11
+ include HasLogger
12
+ include HasDepositor
13
+ include Interloper
14
+
15
+ # Report before and after a batch is ingested.
16
+ before(:run!) do
17
+ logger.info "Starting batch ingest, batch size = #{iterations}."
18
+ report.stat[:batch_size] = iterations
19
+ report.stat[:datetime_started] = DateTime.now
20
+ end
21
+
22
+ after(:run!) do
23
+ logger.info "Batch ingest complete."
24
+ report.stat[:datetime_completed] = DateTime.now
25
+ report.write_to_file
26
+ end
27
+
28
+ attr_reader :sip_paths
29
+
30
+ def initialize(config_file_path:, sip_paths: [], shared_sip_path: nil, iterations: nil, depositor: nil)
31
+ @sip_paths = sip_paths
32
+ @shared_sip_path = shared_sip_path
33
+ @config_file_path = config_file_path
34
+ @iterations = iterations
35
+ self.depositor = depositor
36
+ end
37
+
38
+ def run!
39
+ runners.each { |runner| runner.run! }
40
+ rescue StandardError => error
41
+ # TODO: move to callback, but :rescue hook doesn't exist yet in
42
+ # Interloper gem.
43
+ report.stat[:datetime_completed] = DateTime.now
44
+ report.failed_with error
45
+ raise error
46
+ ensure
47
+ # TODO: move to callback, but :ensure hook doesn't exist yet in
48
+ # Interloper gem.
49
+ report.write_to_file
50
+ end
51
+
52
+ # Returns an array containing the IDs of new or updated records.
53
+ # Currently only returns the IDs for ActiveFedora records (or
54
+ # subclasses) that are specified at the top level (i.e. not nested) of
55
+ # the ingest configuration.
56
+ # @return [Array] list of IDs
57
+ def ingested_ids
58
+ ingested_ids_by_type.flatten
59
+ end
60
+
61
+ # Returns an hash containing the IDs of new or updated records, keyed by
62
+ # the model class by which they were saved.
63
+ # Example:
64
+ # { FileSet => ['123', '456'], Work => ['789'] }
65
+ # Currently only returns the IDs for ActiveFedora records (or
66
+ # subclasses) that are specified at the top level (i.e. not nested) of
67
+ # the ingest configuration.
68
+ # @return [Hash] IDs keyed by the model class by which they were saved.
69
+ def ingested_ids_by_type
70
+ {}.tap do |h|
71
+ runners.each do |runner|
72
+ runner.ingested_ids_by_type.each do |type, ids|
73
+ h[type] ||= []
74
+ h[type] += ids
75
+ h[type].uniq!
76
+ end
77
+ end
78
+ end
79
+ end
80
+
81
+ # Returns an array containing the IDs of new or updated records.
82
+ # Currently only returns the IDs for ActiveFedora records (or
83
+ # subclasses) that are specified at the top level (i.e. not nested) of
84
+ # the ingest configuration.
85
+ # @return [Array] list of IDs
86
+ def ingested_ids
87
+ ingested_ids_by_type.flatten
88
+ end
89
+
90
+ # Returns an hash containing the IDs of new or updated records, keyed by
91
+ # the model class by which they were saved.
92
+ # Example:
93
+ # { FileSet => ['123', '456'], Work => ['789'] }
94
+ # Currently only returns the IDs for ActiveFedora records (or
95
+ # subclasses) that are specified at the top level (i.e. not nested) of
96
+ # the ingest configuration.
97
+ # @return [Hash] IDs keyed by the model class by which they were saved.
98
+ def ingested_ids_by_type
99
+ {}.tap do |h|
100
+ runners.each do |runner|
101
+ runner.ingested_ids_by_type.each do |type, ids|
102
+ h[type] ||= []
103
+ h[type] += ids
104
+ h[type].uniq!
105
+ end
106
+ end
107
+ end
108
+ end
109
+
110
+ private
111
+
112
+ def iterations
113
+ # Return @iterations.to_i if it's not nil and not empty
114
+ unless @iterations.nil? || @iterations.to_s.empty?
115
+ @iterations
116
+ else
117
+ [1, @sip_paths.count].max
118
+ end.to_i
119
+ end
120
+
121
+ def runners
122
+ @runners ||= (0...iterations).map do |iteration|
123
+ Hyrax::Ingest::Runner.new(config_file_path: @config_file_path, sip_path: @sip_paths[iteration], shared_sip_path: @shared_sip_path, iteration: iteration, depositor: depositor).tap do |runner|
124
+ runner.report = report if runner.respond_to? :report=
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,54 @@
1
+ require 'hyrax/ingest/errors'
2
+ require 'active_support/core_ext/hash/keys'
3
+
4
+ module Hyrax
5
+ module Ingest
6
+ class Configuration
7
+ attr_reader :config_file_path
8
+
9
+ def initialize(config_file_path:)
10
+ @config_file_path = config_file_path.to_s
11
+ raise Hyrax::Ingest::Errors::NoConfigFileFound.new(@config_file_path) unless File.exist? @config_file_path
12
+ validate!
13
+ end
14
+
15
+ # @return [Array] Array of hashes, where each hash is the configuration options
16
+ # for an Ingester instance
17
+ def ingester_configs
18
+ @ingester_configs ||= config[:ingest]
19
+ end
20
+
21
+ private
22
+
23
+ # @return [Hash] The config hash parsed from yaml file, and with keys
24
+ # converted from strings to symbols.
25
+ def config
26
+ @config ||= Psych.load_file(config_file_path).deep_symbolize_keys
27
+ end
28
+
29
+ # Validates the configuration.
30
+ # @raise [Hyrax::Ingest::Errors::InvalidConfig] When the configuration is invalid.
31
+ def validate!
32
+ validate_top_level_key!
33
+ validate_ingester_configs_array!
34
+ validate_ingester_config_hashes!
35
+ end
36
+
37
+ # @raise [Hyrax::Ingest::Errors::InvalidConfig] When the top level
38
+ # 'ingest' key is missing.
39
+ def validate_top_level_key!
40
+ raise Hyrax::Ingest::Errors::InvalidConfig.new(config_file_path, "Top-level key 'ingest' is missing.") unless config[:ingest]
41
+ end
42
+
43
+ def validate_ingester_configs_array!
44
+ raise Hyrax::Ingest::Errors::InvalidConfig.new(config_file_path, "Value under top-level 'ingest' key must be an array containing the configuration for each ingester you want to use.") unless config[:ingest].respond_to?(:each)
45
+ end
46
+
47
+ def validate_ingester_config_hashes!
48
+ config[:ingest].each do |ingest_config|
49
+ raise Hyrax::Ingest::Errors::InvalidConfig.new(config_file_path, "Each ingester configuration must be a single key-value pair, where the key is the type of ingester, and the value is a hash containing the configuration for the ingester. But a #{ingester_config.class} was found instead.") unless ingest_config.respond_to? :keys
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,6 @@
1
+ module Hyrax
2
+ module Ingest
3
+ class Engine < ::Rails::Engine
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,186 @@
1
+ module Hyrax
2
+ module Ingest
3
+ # Base class that allows for rescuing from all Hyrax::Ingest errors.
4
+ # Intentionally no-op.
5
+ class Error < StandardError; end
6
+
7
+ module Errors
8
+ class InvalidSIPPath < Hyrax::Ingest::Error
9
+ def initialize(invalid_path)
10
+ super("#{invalid_path} is not a valid SIP path")
11
+ end
12
+ end
13
+
14
+ class NoSIPSpecified < Hyrax::Ingest::Error
15
+ def initialize(obj)
16
+ super("No SIP was specified.")
17
+ end
18
+ end
19
+
20
+ class NoSharedSIPSpecified < Hyrax::Ingest::Error
21
+ def initialize
22
+ super("No shared SIP was specified.")
23
+ end
24
+ end
25
+
26
+ class InvalidSIP < Hyrax::Ingest::Error
27
+ def initialize(invalid_sip_obj)
28
+ super("Invalid SIP object. SIP must be an instance of Hyrax::Ingest::SIP (or an instance of a subclass) but an instance of #{invalid_sip_obj.class} was found.")
29
+ end
30
+ end
31
+
32
+ class UnknownActiveFedoraModel < Hyrax::Ingest::Error
33
+ def initialize(af_model_class_name)
34
+ super("Unknown ActiveFedora model type '#{af_model_class_name.to_s}'")
35
+ end
36
+ end
37
+
38
+ class FileNotFoundInSIP < Hyrax::Ingest::Error
39
+ def initialize(sip_path, string_or_regexp)
40
+ super("No file matching #{string_or_regexp.inspect.to_s} was found in the SIP at path '#{sip_path}'")
41
+ end
42
+ end
43
+
44
+ class UnknownAssignerClass < Hyrax::Ingest::Error
45
+ def initialize(class_name, available_classes)
46
+ super("Assigner class '#{class_name}' not found. Available assigner classes are: #{Array(available_classes).join(', ')}")
47
+ end
48
+ end
49
+
50
+ class AmbiguousAssignerClass < Hyrax::Ingest::Error
51
+ def initialize(class_name, matching_classes)
52
+ super("Assigner class name '#{class_name}' is ambiguous; could mean any one of the following classes: #{Array(matching_classes).join(',')}. Please use namespaces in the class name to be more specific.")
53
+ end
54
+ end
55
+
56
+ class UnknownFetcherClass < Hyrax::Ingest::Error
57
+ def initialize(class_name, available_classes)
58
+ super("Fetcher class '#{class_name}' not found. Available transformer classes are: #{Array(available_classes).join(', ')}")
59
+ end
60
+ end
61
+
62
+ class AmbiguousFetcherClass < Hyrax::Ingest::Error
63
+ def initialize(class_name, matching_classes)
64
+ super("Fetcher class name '#{class_name}' is ambiguous; could mean any one of the following classes: #{Array(matching_classes).join(',')}. Please use namespaces in the class name to be more specific.")
65
+ end
66
+ end
67
+
68
+ class InvalidFetcher < Hyrax::Ingest::Error
69
+ def initialize(class_name)
70
+ super("Invalid fetcher '#{class_name}'; Fetcher objects must extend Hyrax::Ingest::Fetcher::Base.")
71
+ end
72
+ end
73
+
74
+ class MissingConfigOptions < Hyrax::Ingest::Error
75
+ def initialize(config_options)
76
+ super("Missing config options: '#{config_options.join(', ')}'")
77
+ end
78
+ end
79
+
80
+ class UnknownIngesterClass < Hyrax::Ingest::Error
81
+ def initialize(class_name, available_classes)
82
+ super("Ingester class '#{class_name}' not found. Available ingester classes are: #{Array(available_classes).join(', ')}")
83
+ end
84
+ end
85
+
86
+ class AmbiguousIngesterClass < Hyrax::Ingest::Error
87
+ def initialize(class_name, matching_classes)
88
+ super("Ingester class name '#{class_name}' is ambiguous; could mean any one of the following classes: #{Array(matching_classes).join(',')}. Please use namespaces in the class name to be more specific.")
89
+ end
90
+ end
91
+
92
+ class RecordNotFound < Hyrax::Ingest::Error
93
+ def initialize(model_class_name, where_clause)
94
+ super("Record of type '#{model_class_name}' could not be found where #{where_clause}.")
95
+ end
96
+ end
97
+
98
+ class AmbiguousFetchOptions < Hyrax::Ingest::Error
99
+ def initialize(ambiguous_options)
100
+ super("Could not determine which transformer class to use given the following options: #{Array(ambiguous_options).join(', ')}")
101
+ end
102
+ end
103
+
104
+ class MissingRequiredFetchOptions < Hyrax::Ingest::Error
105
+ def initialize(missing_options)
106
+ super("Missing required assignment options: #{Array(missing_options).join(', ')}")
107
+ end
108
+ end
109
+
110
+ class InvalidFetchOptions < Hyrax::Ingest::Error
111
+ def initialize(invalid_options)
112
+ super("Invalid fetch options: #{Array(invalid_options).join(', ')}")
113
+ end
114
+ end
115
+
116
+ class InvalidConfig < Hyrax::Ingest::Error
117
+ def initialize(config_file_path, msg=nil)
118
+ message = ["Invalid configuration in '#{config_file_path}'.", msg.to_s].join("\n")
119
+ super(message)
120
+ end
121
+ end
122
+
123
+ class InvalidIngesterClass < Hyrax::Ingest::Error
124
+ def initialize(invalid_class)
125
+ super("Invalid ingester class #{invalid_class} does not extend Hyrax::Ingest::Ingester::Base")
126
+ end
127
+ end
128
+
129
+ class UnknownTransformerClass < Hyrax::Ingest::Error
130
+ def initialize(class_name, available_classes)
131
+ super("Transformer class '#{class_name}' not found. Available transformer classes are: #{Array(available_classes).join(', ')}")
132
+ end
133
+ end
134
+
135
+ class AmbiguousTransformerClass < Hyrax::Ingest::Error
136
+ def initialize(class_name, matching_classes)
137
+ super("Transformer class name '#{class_name}' is ambiguous; could mean any one of the following classes: #{Array(matching_classes).join(',')}. Please use namespaces in the class name to be more specific.")
138
+ end
139
+ end
140
+
141
+ class NoConfigFileFound < Hyrax::Ingest::Error
142
+ def initialize(path)
143
+ super("No ingest config file exists at '#{path}'.")
144
+ end
145
+ end
146
+
147
+ class UnknownRdfPredicate < Hyrax::Ingest::Error
148
+ def initialize(unknown_rdf_predicate, af_model_class_name)
149
+ super("Unknown RDF Predicate '#{unknown_rdf_predicate}' for ActiveFedora model '#{af_model_class_name}'")
150
+ end
151
+ end
152
+
153
+ class UnrecognizedTransformOption < Hyrax::Ingest::Error
154
+ def initialize(unrecognized_transform_option)
155
+ super("Unknown transform option: '#{unrecognized_transform_option}'")
156
+ end
157
+ end
158
+
159
+ class UnableToPrintIngestReport < Hyrax::Ingest::Error
160
+ def initialize(unable_to_read_from_this_thing)
161
+ super("Cannot print ingest report from #{unable_to_read_from_this_thing}. To print a report, it must be written to a file.")
162
+ end
163
+ end
164
+
165
+ class InvalidReportConfigOptions < Hyrax::Ingest::Error
166
+ def initialize(invalid_options)
167
+ super("Invalid reporting config options: #{invalid_options.join(', ')}")
168
+ end
169
+ end
170
+
171
+ class MissingCsvColumn < Hyrax::Ingest::Error
172
+ def initialize(invalid_column)
173
+ super("Unknown column header for: #{invalid_column}.")
174
+ end
175
+ end
176
+
177
+ class ConfigurationError < Hyrax::Ingest::Error; end
178
+
179
+ class InvalidActiveFedoraPropertyValue < Hyrax::Ingest::Error
180
+ def initialize(value, property_name, rdf_predicate)
181
+ super("Could not assign '#{value}' to property #{property_name} (with RDF predicate '#{rdf_predicate}')")
182
+ end
183
+ end
184
+ end
185
+ end
186
+ end