hyrax-ingest 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +52 -0
  3. data/config/routes.rb +2 -0
  4. data/lib/hyrax/ingest.rb +12 -0
  5. data/lib/hyrax/ingest/batch_runner.rb +130 -0
  6. data/lib/hyrax/ingest/configuration.rb +54 -0
  7. data/lib/hyrax/ingest/engine.rb +6 -0
  8. data/lib/hyrax/ingest/errors.rb +186 -0
  9. data/lib/hyrax/ingest/fetcher.rb +55 -0
  10. data/lib/hyrax/ingest/fetcher/base.rb +78 -0
  11. data/lib/hyrax/ingest/fetcher/csv_file.rb +89 -0
  12. data/lib/hyrax/ingest/fetcher/date_time.rb +15 -0
  13. data/lib/hyrax/ingest/fetcher/literal.rb +24 -0
  14. data/lib/hyrax/ingest/fetcher/premis_event_type.rb +28 -0
  15. data/lib/hyrax/ingest/fetcher/rdf_uri.rb +21 -0
  16. data/lib/hyrax/ingest/fetcher/xml_file.rb +54 -0
  17. data/lib/hyrax/ingest/fetcher/yaml_file.rb +36 -0
  18. data/lib/hyrax/ingest/has_depositor.rb +13 -0
  19. data/lib/hyrax/ingest/has_iteration.rb +18 -0
  20. data/lib/hyrax/ingest/has_logger.rb +29 -0
  21. data/lib/hyrax/ingest/has_report.rb +17 -0
  22. data/lib/hyrax/ingest/has_shared_sip.rb +20 -0
  23. data/lib/hyrax/ingest/has_sip.rb +20 -0
  24. data/lib/hyrax/ingest/ingester.rb +75 -0
  25. data/lib/hyrax/ingest/ingester/active_fedora_base_ingester.rb +136 -0
  26. data/lib/hyrax/ingest/ingester/active_fedora_file_ingester.rb +17 -0
  27. data/lib/hyrax/ingest/ingester/active_fedora_property_assigner.rb +67 -0
  28. data/lib/hyrax/ingest/ingester/base.rb +28 -0
  29. data/lib/hyrax/ingest/ingester/file_set_ingester.rb +68 -0
  30. data/lib/hyrax/ingest/ingester/preservation_event_ingester.rb +27 -0
  31. data/lib/hyrax/ingest/ingester/work_ingester.rb +55 -0
  32. data/lib/hyrax/ingest/reporting.rb +13 -0
  33. data/lib/hyrax/ingest/reporting/configuration.rb +22 -0
  34. data/lib/hyrax/ingest/reporting/report.rb +79 -0
  35. data/lib/hyrax/ingest/reporting/views/hyrax_ingest_report.html.erb +77 -0
  36. data/lib/hyrax/ingest/reporting/views/hyrax_ingest_report.txt.erb +21 -0
  37. data/lib/hyrax/ingest/runner.rb +103 -0
  38. data/lib/hyrax/ingest/sip.rb +92 -0
  39. data/lib/hyrax/ingest/transformer.rb +42 -0
  40. data/lib/hyrax/ingest/transformer/base.rb +12 -0
  41. data/lib/hyrax/ingest/transformer/to_date.rb +33 -0
  42. data/lib/hyrax/ingest/version.rb +5 -0
  43. data/lib/tasks/ingest_tasks.rake +22 -0
  44. metadata +330 -0
@@ -0,0 +1,17 @@
1
+ require 'hyrax/ingest/reporting/report'
2
+ require 'hyrax/ingest/errors'
3
+
4
+ module Hyrax
5
+ module Ingest
6
+ module HasReport
7
+ def report
8
+ @report ||= Hyrax::Ingest::Reporting::Report.new
9
+ end
10
+
11
+ def report=(report)
12
+ raise Hyrax::Ingest::Errors::InvalidIngestReport unless report.is_a? Hyrax::Ingest::Reporting::Report
13
+ @report = report
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,20 @@
1
+ # Simple interface for injecting a Hyrax::Ingest::SIP dependency.
2
+ require 'hyrax/ingest/sip'
3
+ require 'hyrax/ingest/errors'
4
+
5
+ module Hyrax
6
+ module Ingest
7
+ module HasSharedSIP
8
+ def shared_sip=(shared_sip)
9
+ unless shared_sip.nil?
10
+ raise Hyrax::Ingest::Errors::InvalidSIP.new(shared_sip) unless shared_sip.is_a? Hyrax::Ingest::SIP
11
+ end
12
+ @shared_sip = shared_sip
13
+ end
14
+
15
+ def shared_sip
16
+ @shared_sip
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,20 @@
1
+ # Simple interface for injecting a Hyrax::Ingest::SIP dependency.
2
+
3
+ require 'hyrax/ingest/sip'
4
+ require 'hyrax/ingest/errors'
5
+
6
+ module Hyrax
7
+ module Ingest
8
+ module HasSIP
9
+ def sip=(sip)
10
+ unless sip.nil?
11
+ raise Hyrax::Ingest::Errors::InvalidSIP.new(sip) unless sip.is_a? Hyrax::Ingest::SIP
12
+ end
13
+ @sip = sip
14
+ end
15
+
16
+ # @return [Hyrax::Ingest::SIP] the value of @sip attribute.
17
+ def sip; @sip; end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,75 @@
1
+ require 'hyrax/ingest/ingester/active_fedora_base_ingester'
2
+ require 'hyrax/ingest/ingester/file_set_ingester'
3
+ require 'hyrax/ingest/ingester/work_ingester'
4
+ require 'hyrax/ingest/ingester/preservation_event_ingester'
5
+ require 'hyrax/ingest/errors'
6
+
7
+ module Hyrax
8
+ module Ingest
9
+ module Ingester
10
+ class << self
11
+ # @return [Set] the set of all Ingester classes available by default.
12
+ def default_registered_classes
13
+ Set.new.tap do |registered_classes|
14
+ registered_classes << Hyrax::Ingest::Ingester::ActiveFedoraBaseIngester
15
+ registered_classes << Hyrax::Ingest::Ingester::FileSetIngester
16
+ registered_classes << Hyrax::Ingest::Ingester::WorkIngester
17
+ registered_classes << Hyrax::Ingest::Ingester::PreservationEventIngester
18
+ end
19
+ end
20
+
21
+ # @return [Set] The set of all registered Ingester classes
22
+ def registered_classes
23
+ @registered_classes ||= default_registered_classes
24
+ end
25
+
26
+ # @param [Class] klass The Ingester class to add to the set of available
27
+ # Ingester classes.
28
+ # @raise [Hyrax::Ingest::Errors::InvalidIngesterClass] When the
29
+ # parameter given does not inherit from the base Ingester class.
30
+ # @return [Set] The set of available ingester classes, with the new
31
+ # one added.
32
+ def register_class(klass)
33
+ raise Hyrax::Ingest::Errors::InvalidIngesterClass.new(klass) unless klass.ancestors.include?(Hyrax::Ingest::Ingester::Base)
34
+ registered_classes.add(klass)
35
+ end
36
+
37
+ # @param [Class] klass The Ingester class to add to the set of available
38
+ # Ingester classes.
39
+ # @return [Set] The set of available ingester classes, with the specified
40
+ # class removed.
41
+ def unregister_class(klass)
42
+ registered_classes.delete(klass)
43
+ end
44
+
45
+ # @param [String] ingester_name The stringified name of the class
46
+ # constant. The string "Ingester" will be appended if it's not already on there.
47
+ # @param [Hash] options The hash that will get passed to the
48
+ # constructor of the Ingester class.
49
+ # @return An instance of the Ingester class.
50
+ def factory(ingester_name, options={})
51
+ find_class_by_name(ingester_name).new(options)
52
+ end
53
+
54
+ private
55
+ # @param [String] class_name The stringified class name, with or
56
+ # without namespaces.
57
+ # @raise [Hyrax::Ingest::Errors::UnknownIngesterClass] When there is no
58
+ # corresponding Ingester class for the given value of the `class_name`
59
+ # param.
60
+ # @raise [Hyrax::Ingest::Errors::AmbiguousIngesterClass] When the value of
61
+ # `class_name` param is insufficient in determining a Ingester class.
62
+ # @return [Class] The appropriate Ingester class.
63
+ def find_class_by_name(class_name)
64
+ class_name.to_s.sub!(/(Ingester)?$/, 'Ingester')
65
+ found_classes = registered_classes.select do |class_const|
66
+ (class_const.to_s == class_name) || (class_const.to_s =~ /::#{class_name}/)
67
+ end
68
+ raise Hyrax::Ingest::Errors::UnknownIngesterClass.new(class_name, registered_classes) if found_classes.count == 0
69
+ raise Hyrax::Ingest::Errors::AmbiguousIngesterClass.new(class_name, found_classes) if found_classes.count > 1
70
+ found_classes.first
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,136 @@
1
+ require 'hyrax/ingest/ingester/base'
2
+ require 'active_support/inflector'
3
+ require 'active_fedora'
4
+ require 'hyrax/ingest/ingester/active_fedora_property_assigner'
5
+ require 'hyrax/ingest/fetcher'
6
+ require 'hyrax/ingest/transformer'
7
+
8
+ module Hyrax
9
+ module Ingest
10
+ module Ingester
11
+ class ActiveFedoraBaseIngester < Base
12
+ include Interloper
13
+
14
+ attr_reader :af_model_class_name, :properties_config, :update_params, :shared_sip
15
+
16
+ before(:save_model!) { logger.info "Saving #{af_model_class_name}" }
17
+
18
+ after(:save_model!) do
19
+ if af_model.persisted?
20
+ logger.info "#{af_model_class_name} saved!"
21
+ report.stat[:models_saved] << af_model
22
+ else
23
+ logger.error "Validation Error(s): " + af_model.errors.map {|field, msg| "'#{field}' #{msg}" }.join('; ')
24
+ report.stat[:models_failed] << af_model
25
+ end
26
+ end
27
+
28
+ def initialize(config={})
29
+ raise ArgumentError, "Option :af_model_class_name is required" unless config.key?(:af_model_class_name)
30
+ @af_model_class_name = config.delete(:af_model_class_name).to_s
31
+ @properties_config = config.delete(:properties) || []
32
+ @update_params = config.delete(:update)
33
+ super(config)
34
+ end
35
+
36
+ def run!
37
+ assign_properties!
38
+ save_model!
39
+ end
40
+
41
+ def af_model
42
+ @af_model ||= new_or_existing_af_model
43
+ end
44
+
45
+ protected
46
+
47
+ def save_model!(continue_if_invalid: true)
48
+ af_model.save!
49
+ af_model
50
+ rescue ActiveFedora::RecordInvalid => e
51
+ raise e unless continue_if_invalid
52
+ false
53
+ end
54
+
55
+ def assign_properties!
56
+ property_assigners.each do |property_assigner|
57
+ property_assigner.assign!
58
+ end
59
+ end
60
+
61
+ private
62
+
63
+ def af_model_class
64
+ Object.const_get(af_model_class_name.to_s)
65
+ rescue NameError => e
66
+ raise Hyrax::Ingest::Errors::UnknownActiveFedoraModel.new(af_model_class_name)
67
+ end
68
+
69
+ def new_or_existing_af_model
70
+ if where_clause
71
+ af_model_class.where(where_clause).first.tap do |found_record|
72
+ raise Hyrax::Ingest::Errors::RecordNotFound.new(af_model_class, where_clause) unless found_record
73
+ end
74
+ else
75
+ af_model_class.new
76
+ end
77
+ end
78
+
79
+ def where_clause
80
+ return unless update_params
81
+ {}.tap do |where_clause|
82
+ update_params.each do |field, from_params|
83
+ where_clause[field] = begin
84
+ value = create_fetcher_from_config(from_params[:from]).fetch
85
+ # Cast to string unless value is an array
86
+ value = value.to_s unless value.respond_to? :each
87
+ value
88
+ end
89
+ end
90
+ end
91
+ end
92
+
93
+ def property_assigners
94
+ @property_assigners ||= properties_config.map do |property_config|
95
+ property_assigner_options = {
96
+ rdf_predicate: property_config[:rdf_predicate],
97
+ fetcher: create_fetcher_from_config(property_config[:from]),
98
+ af_model: af_model
99
+ }
100
+
101
+ if property_config.key?(:transform)
102
+ transformer_class_name = property_config[:transform].keys.first
103
+ transformer_class_options = property_config[:transform].values.first
104
+ property_assigner_options[:transformer] = Hyrax::Ingest::Transformer.factory(transformer_class_name, transformer_class_options)
105
+ end
106
+
107
+ ActiveFedoraPropertyAssigner.new(property_assigner_options)
108
+ end
109
+ end
110
+
111
+ def create_fetcher_from_config(fetcher_config)
112
+ fetcher_class_name = fetcher_config.keys.first
113
+ fetcher_class_options = fetcher_config.values.first
114
+ Hyrax::Ingest::Fetcher.factory(fetcher_class_name, fetcher_class_options).tap do |fetcher|
115
+ if fetcher.respond_to?(:sip=)
116
+ fetcher.sip = if use_shared_sip?(fetcher_config[fetcher_class_name])
117
+ raise Hyrax::Ingest::Errors::NoSharedSIPSpecified unless shared_sip
118
+ shared_sip
119
+ else
120
+ sip
121
+ end
122
+ end
123
+ fetcher.iteration = iteration if fetcher.respond_to? :iteration=
124
+ fetcher.logger = logger if fetcher.respond_to? :logger=
125
+ fetcher.report = report if fetcher.respond_to? :report=
126
+ end
127
+ end
128
+
129
+ def use_shared_sip?(config)
130
+ truthy_vals = ['1', 'true', 'TRUE', 'True', 'yes', true]
131
+ return truthy_vals.include? config[:shared]
132
+ end
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,17 @@
1
+ module Hyrax
2
+ module Ingest
3
+ module Ingester
4
+ class ActiveFedoraFileIngester
5
+
6
+ attr_reader :type, :original_filename, :external_url, :content
7
+
8
+ def initialize(type:, original_filename: nil, external_url: nil, content: nil)
9
+ @type = type
10
+ @original_filename = original_filename
11
+ @external_url = external_url
12
+ @content = content
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,67 @@
1
+ require 'hyrax/ingest/fetcher/base'
2
+ require 'hyrax/ingest/errors'
3
+ require 'active_fedora'
4
+ require 'hyrax/ingest/has_report'
5
+ require 'hyrax/ingest/has_logger'
6
+
7
+ module Hyrax
8
+ module Ingest
9
+ module Ingester
10
+ class ActiveFedoraPropertyAssigner
11
+ include Interloper
12
+ include HasReport
13
+ include HasLogger
14
+
15
+ after(:assign!) do
16
+ logger.info "#{@fetched_and_transformed_value} assigned to property '#{property_name}' with rdf predicate '#{rdf_predicate}'"
17
+ end
18
+
19
+ attr_reader :rdf_predicate, :af_model, :fetcher, :transformer
20
+
21
+ def initialize(options={})
22
+ @rdf_predicate = options[:rdf_predicate]
23
+ @fetcher = options[:fetcher]
24
+ @af_model = options[:af_model]
25
+ @transformer = options[:transformer]
26
+ raise Hyrax::Ingest::Errors::UnknownActiveFedoraModel.new(@af_model.class) unless @af_model.is_a? ActiveFedora::Base
27
+ raise Hyrax::Ingest::Errors::InvalidFetcher.new(@fetcher.class) unless @fetcher.is_a? Hyrax::Ingest::Fetcher::Base
28
+ end
29
+
30
+ def assign!
31
+ af_model.set_value(property_name, fetched_and_transformed_value)
32
+ rescue ::ActiveTriples::Relation::ValueError => e
33
+ # Rethrow ActiveTriples::Relation::ValueError as something more specific to ingest.
34
+ raise Hyrax::Ingest::Errors::InvalidActiveFedoraPropertyValue.new(fetched_value, property_name, rdf_predicate)
35
+ end
36
+
37
+ private
38
+
39
+ def fetched_and_transformed_value
40
+ @fetched_and_transformed_value ||= if transformer
41
+ transformer.transform(fetched_value)
42
+ else
43
+ fetched_value
44
+ end
45
+ end
46
+
47
+ def fetched_value
48
+ @fetched_value ||= fetcher.fetch
49
+ end
50
+
51
+ # Performs a lookup of property name by RDF predicate.
52
+ # @return [Symbol] The symbol representing the accessor for the
53
+ # property that matches the RDF predicate stored in the
54
+ # @rdf_predicate attribtue.
55
+ def property_name
56
+ @property ||= begin
57
+ property = af_model.send(:properties).select do |_att, config|
58
+ config.predicate == rdf_predicate
59
+ end
60
+ raise Hyrax::Ingest::Errors::UnknownRdfPredicate.new(rdf_predicate, af_model.class) if property.keys.count == 0
61
+ property.keys.first.to_sym
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,28 @@
1
+ require 'hyrax/ingest/has_sip'
2
+ require 'hyrax/ingest/has_shared_sip'
3
+ require 'hyrax/ingest/has_iteration'
4
+ require 'hyrax/ingest/has_logger'
5
+ require 'hyrax/ingest/has_report'
6
+ require 'hyrax/ingest/has_depositor'
7
+
8
+ module Hyrax
9
+ module Ingest
10
+ module Ingester
11
+ class Base
12
+ include HasSIP
13
+ include HasSharedSIP
14
+ include HasIteration
15
+ include HasReport
16
+ include HasLogger
17
+ include HasDepositor
18
+
19
+ def initialize(config={})
20
+ self.depositor = config.delete(:depositor)
21
+ end
22
+
23
+ # no-op, meant to be overrriden
24
+ def run!; end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,68 @@
1
+ require 'hyrax/ingest/ingester/active_fedora_base_ingester'
2
+ require 'hyrax/ingest/ingester/active_fedora_file_ingester'
3
+
4
+ module Hyrax
5
+ module Ingest
6
+ module Ingester
7
+ class FileSetIngester < ActiveFedoraBaseIngester
8
+ attr_reader :files_config, :preservation_events_config
9
+
10
+ def initialize(config={})
11
+ @files_config = config.delete(:Files) || []
12
+ @preservation_events_config = config.delete(:PreservationEvents) || []
13
+ config[:af_model_class_name] ||= 'FileSet'
14
+ super(config)
15
+ end
16
+
17
+ def run!
18
+ assign_properties!
19
+ apply_depositor_metadata!
20
+ save_model!
21
+ ingest_preservation_events!
22
+ add_files_to_file_set!
23
+ # return the new instance of the ActiveFedora model
24
+ af_model
25
+ end
26
+
27
+ private
28
+
29
+ def apply_depositor_metadata!
30
+ af_model.apply_depositor_metadata(depositor) if depositor
31
+ end
32
+
33
+ def add_files_to_file_set!
34
+ file_ingesters.each do |file_ingester|
35
+ if (file_ingester.external_url)
36
+ Hydra::Works::AddExternalFileToFileSet.call(af_model, file_ingester.external_url, file_ingester.type)
37
+ else
38
+ Hydra::Works::AddFileToFileSet.call(af_model, file_ingester.content, file_ingester.type)
39
+ end
40
+ end
41
+ end
42
+
43
+ def file_ingesters
44
+ @file_ingesters ||= files_config.map do |file_config|
45
+ Hyrax::Ingest::Ingester::ActiveFedoraFileIngester.new(file_config)
46
+ end
47
+ end
48
+
49
+ def ingest_preservation_events!
50
+ preservation_event_ingesters.each do |preservation_event_ingester|
51
+ preservation_event_ingester.run!
52
+ end
53
+ end
54
+
55
+ def preservation_event_ingesters
56
+ preservation_event_ingesters ||= preservation_events_config.map do |preservation_event_config|
57
+ preservation_event_config[:premis_event_related_object] = af_model
58
+ Hyrax::Ingest::Ingester::PreservationEventIngester.new(preservation_event_config).tap do |preservation_event_ingester|
59
+ preservation_event_ingester.sip = sip if preservation_event_ingester.respond_to?(:sip=)
60
+ preservation_event_ingester.shared_sip = shared_sip if preservation_event_ingester.respond_to?(:shared_sip=)
61
+ preservation_event_ingester.iteration = iteration if preservation_event_ingester.respond_to?(:iteration=)
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end