hyrax-ingest 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +52 -0
  3. data/config/routes.rb +2 -0
  4. data/lib/hyrax/ingest.rb +12 -0
  5. data/lib/hyrax/ingest/batch_runner.rb +130 -0
  6. data/lib/hyrax/ingest/configuration.rb +54 -0
  7. data/lib/hyrax/ingest/engine.rb +6 -0
  8. data/lib/hyrax/ingest/errors.rb +186 -0
  9. data/lib/hyrax/ingest/fetcher.rb +55 -0
  10. data/lib/hyrax/ingest/fetcher/base.rb +78 -0
  11. data/lib/hyrax/ingest/fetcher/csv_file.rb +89 -0
  12. data/lib/hyrax/ingest/fetcher/date_time.rb +15 -0
  13. data/lib/hyrax/ingest/fetcher/literal.rb +24 -0
  14. data/lib/hyrax/ingest/fetcher/premis_event_type.rb +28 -0
  15. data/lib/hyrax/ingest/fetcher/rdf_uri.rb +21 -0
  16. data/lib/hyrax/ingest/fetcher/xml_file.rb +54 -0
  17. data/lib/hyrax/ingest/fetcher/yaml_file.rb +36 -0
  18. data/lib/hyrax/ingest/has_depositor.rb +13 -0
  19. data/lib/hyrax/ingest/has_iteration.rb +18 -0
  20. data/lib/hyrax/ingest/has_logger.rb +29 -0
  21. data/lib/hyrax/ingest/has_report.rb +17 -0
  22. data/lib/hyrax/ingest/has_shared_sip.rb +20 -0
  23. data/lib/hyrax/ingest/has_sip.rb +20 -0
  24. data/lib/hyrax/ingest/ingester.rb +75 -0
  25. data/lib/hyrax/ingest/ingester/active_fedora_base_ingester.rb +136 -0
  26. data/lib/hyrax/ingest/ingester/active_fedora_file_ingester.rb +17 -0
  27. data/lib/hyrax/ingest/ingester/active_fedora_property_assigner.rb +67 -0
  28. data/lib/hyrax/ingest/ingester/base.rb +28 -0
  29. data/lib/hyrax/ingest/ingester/file_set_ingester.rb +68 -0
  30. data/lib/hyrax/ingest/ingester/preservation_event_ingester.rb +27 -0
  31. data/lib/hyrax/ingest/ingester/work_ingester.rb +55 -0
  32. data/lib/hyrax/ingest/reporting.rb +13 -0
  33. data/lib/hyrax/ingest/reporting/configuration.rb +22 -0
  34. data/lib/hyrax/ingest/reporting/report.rb +79 -0
  35. data/lib/hyrax/ingest/reporting/views/hyrax_ingest_report.html.erb +77 -0
  36. data/lib/hyrax/ingest/reporting/views/hyrax_ingest_report.txt.erb +21 -0
  37. data/lib/hyrax/ingest/runner.rb +103 -0
  38. data/lib/hyrax/ingest/sip.rb +92 -0
  39. data/lib/hyrax/ingest/transformer.rb +42 -0
  40. data/lib/hyrax/ingest/transformer/base.rb +12 -0
  41. data/lib/hyrax/ingest/transformer/to_date.rb +33 -0
  42. data/lib/hyrax/ingest/version.rb +5 -0
  43. data/lib/tasks/ingest_tasks.rake +22 -0
  44. metadata +330 -0
@@ -0,0 +1,17 @@
1
+ require 'hyrax/ingest/reporting/report'
2
+ require 'hyrax/ingest/errors'
3
+
4
+ module Hyrax
5
+ module Ingest
6
+ module HasReport
7
+ def report
8
+ @report ||= Hyrax::Ingest::Reporting::Report.new
9
+ end
10
+
11
+ def report=(report)
12
+ raise Hyrax::Ingest::Errors::InvalidIngestReport unless report.is_a? Hyrax::Ingest::Reporting::Report
13
+ @report = report
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,20 @@
1
+ # Simple interface for injecting a Hyrax::Ingest::SIP dependency.
2
+ require 'hyrax/ingest/sip'
3
+ require 'hyrax/ingest/errors'
4
+
5
+ module Hyrax
6
+ module Ingest
7
+ module HasSharedSIP
8
+ def shared_sip=(shared_sip)
9
+ unless shared_sip.nil?
10
+ raise Hyrax::Ingest::Errors::InvalidSIP.new(shared_sip) unless shared_sip.is_a? Hyrax::Ingest::SIP
11
+ end
12
+ @shared_sip = shared_sip
13
+ end
14
+
15
+ def shared_sip
16
+ @shared_sip
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,20 @@
1
+ # Simple interface for injecting a Hyrax::Ingest::SIP dependency.
2
+
3
+ require 'hyrax/ingest/sip'
4
+ require 'hyrax/ingest/errors'
5
+
6
+ module Hyrax
7
+ module Ingest
8
+ module HasSIP
9
+ def sip=(sip)
10
+ unless sip.nil?
11
+ raise Hyrax::Ingest::Errors::InvalidSIP.new(sip) unless sip.is_a? Hyrax::Ingest::SIP
12
+ end
13
+ @sip = sip
14
+ end
15
+
16
+ # @return [Hyrax::Ingest::SIP] the value of @sip attribute.
17
+ def sip; @sip; end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,75 @@
1
+ require 'hyrax/ingest/ingester/active_fedora_base_ingester'
2
+ require 'hyrax/ingest/ingester/file_set_ingester'
3
+ require 'hyrax/ingest/ingester/work_ingester'
4
+ require 'hyrax/ingest/ingester/preservation_event_ingester'
5
+ require 'hyrax/ingest/errors'
6
+
7
+ module Hyrax
8
+ module Ingest
9
+ module Ingester
10
+ class << self
11
+ # @return [Set] the set of all Ingester classes available by default.
12
+ def default_registered_classes
13
+ Set.new.tap do |registered_classes|
14
+ registered_classes << Hyrax::Ingest::Ingester::ActiveFedoraBaseIngester
15
+ registered_classes << Hyrax::Ingest::Ingester::FileSetIngester
16
+ registered_classes << Hyrax::Ingest::Ingester::WorkIngester
17
+ registered_classes << Hyrax::Ingest::Ingester::PreservationEventIngester
18
+ end
19
+ end
20
+
21
+ # @return [Set] The set of all registered Ingester classes
22
+ def registered_classes
23
+ @registered_classes ||= default_registered_classes
24
+ end
25
+
26
+ # @param [Class] klass The Ingester class to add to the set of available
27
+ # Ingester classes.
28
+ # @raise [Hyrax::Ingest::Errors::InvalidIngesterClass] When the
29
+ # parameter given does not inherit from the base Ingester class.
30
+ # @return [Set] The set of available ingester classes, with the new
31
+ # one added.
32
+ def register_class(klass)
33
+ raise Hyrax::Ingest::Errors::InvalidIngesterClass.new(klass) unless klass.ancestors.include?(Hyrax::Ingest::Ingester::Base)
34
+ registered_classes.add(klass)
35
+ end
36
+
37
+ # @param [Class] klass The Ingester class to add to the set of available
38
+ # Ingester classes.
39
+ # @return [Set] The set of available ingester classes, with the specified
40
+ # class removed.
41
+ def unregister_class(klass)
42
+ registered_classes.delete(klass)
43
+ end
44
+
45
+ # @param [String] ingester_name The stringified name of the class
46
+ # constant. The string "Ingester" will be appended if it's not already on there.
47
+ # @param [Hash] options The hash that will get passed to the
48
+ # constructor of the Ingester class.
49
+ # @return An instance of the Ingester class.
50
+ def factory(ingester_name, options={})
51
+ find_class_by_name(ingester_name).new(options)
52
+ end
53
+
54
+ private
55
+ # @param [String] class_name The stringified class name, with or
56
+ # without namespaces.
57
+ # @raise [Hyrax::Ingest::Errors::UnknownIngesterClass] When there is no
58
+ # corresponding Ingester class for the given value of the `class_name`
59
+ # param.
60
+ # @raise [Hyrax::Ingest::Errors::AmbiguousIngesterClass] When the value of
61
+ # `class_name` param is insufficient in determining a Ingester class.
62
+ # @return [Class] The appropriate Ingester class.
63
+ def find_class_by_name(class_name)
64
+ class_name.to_s.sub!(/(Ingester)?$/, 'Ingester')
65
+ found_classes = registered_classes.select do |class_const|
66
+ (class_const.to_s == class_name) || (class_const.to_s =~ /::#{class_name}/)
67
+ end
68
+ raise Hyrax::Ingest::Errors::UnknownIngesterClass.new(class_name, registered_classes) if found_classes.count == 0
69
+ raise Hyrax::Ingest::Errors::AmbiguousIngesterClass.new(class_name, found_classes) if found_classes.count > 1
70
+ found_classes.first
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,136 @@
1
+ require 'hyrax/ingest/ingester/base'
2
+ require 'active_support/inflector'
3
+ require 'active_fedora'
4
+ require 'hyrax/ingest/ingester/active_fedora_property_assigner'
5
+ require 'hyrax/ingest/fetcher'
6
+ require 'hyrax/ingest/transformer'
7
+
8
+ module Hyrax
9
+ module Ingest
10
+ module Ingester
11
+ class ActiveFedoraBaseIngester < Base
12
+ include Interloper
13
+
14
+ attr_reader :af_model_class_name, :properties_config, :update_params, :shared_sip
15
+
16
+ before(:save_model!) { logger.info "Saving #{af_model_class_name}" }
17
+
18
+ after(:save_model!) do
19
+ if af_model.persisted?
20
+ logger.info "#{af_model_class_name} saved!"
21
+ report.stat[:models_saved] << af_model
22
+ else
23
+ logger.error "Validation Error(s): " + af_model.errors.map {|field, msg| "'#{field}' #{msg}" }.join('; ')
24
+ report.stat[:models_failed] << af_model
25
+ end
26
+ end
27
+
28
+ def initialize(config={})
29
+ raise ArgumentError, "Option :af_model_class_name is required" unless config.key?(:af_model_class_name)
30
+ @af_model_class_name = config.delete(:af_model_class_name).to_s
31
+ @properties_config = config.delete(:properties) || []
32
+ @update_params = config.delete(:update)
33
+ super(config)
34
+ end
35
+
36
+ def run!
37
+ assign_properties!
38
+ save_model!
39
+ end
40
+
41
+ def af_model
42
+ @af_model ||= new_or_existing_af_model
43
+ end
44
+
45
+ protected
46
+
47
+ def save_model!(continue_if_invalid: true)
48
+ af_model.save!
49
+ af_model
50
+ rescue ActiveFedora::RecordInvalid => e
51
+ raise e unless continue_if_invalid
52
+ false
53
+ end
54
+
55
+ def assign_properties!
56
+ property_assigners.each do |property_assigner|
57
+ property_assigner.assign!
58
+ end
59
+ end
60
+
61
+ private
62
+
63
+ def af_model_class
64
+ Object.const_get(af_model_class_name.to_s)
65
+ rescue NameError => e
66
+ raise Hyrax::Ingest::Errors::UnknownActiveFedoraModel.new(af_model_class_name)
67
+ end
68
+
69
+ def new_or_existing_af_model
70
+ if where_clause
71
+ af_model_class.where(where_clause).first.tap do |found_record|
72
+ raise Hyrax::Ingest::Errors::RecordNotFound.new(af_model_class, where_clause) unless found_record
73
+ end
74
+ else
75
+ af_model_class.new
76
+ end
77
+ end
78
+
79
+ def where_clause
80
+ return unless update_params
81
+ {}.tap do |where_clause|
82
+ update_params.each do |field, from_params|
83
+ where_clause[field] = begin
84
+ value = create_fetcher_from_config(from_params[:from]).fetch
85
+ # Cast to string unless value is an array
86
+ value = value.to_s unless value.respond_to? :each
87
+ value
88
+ end
89
+ end
90
+ end
91
+ end
92
+
93
+ def property_assigners
94
+ @property_assigners ||= properties_config.map do |property_config|
95
+ property_assigner_options = {
96
+ rdf_predicate: property_config[:rdf_predicate],
97
+ fetcher: create_fetcher_from_config(property_config[:from]),
98
+ af_model: af_model
99
+ }
100
+
101
+ if property_config.key?(:transform)
102
+ transformer_class_name = property_config[:transform].keys.first
103
+ transformer_class_options = property_config[:transform].values.first
104
+ property_assigner_options[:transformer] = Hyrax::Ingest::Transformer.factory(transformer_class_name, transformer_class_options)
105
+ end
106
+
107
+ ActiveFedoraPropertyAssigner.new(property_assigner_options)
108
+ end
109
+ end
110
+
111
+ def create_fetcher_from_config(fetcher_config)
112
+ fetcher_class_name = fetcher_config.keys.first
113
+ fetcher_class_options = fetcher_config.values.first
114
+ Hyrax::Ingest::Fetcher.factory(fetcher_class_name, fetcher_class_options).tap do |fetcher|
115
+ if fetcher.respond_to?(:sip=)
116
+ fetcher.sip = if use_shared_sip?(fetcher_config[fetcher_class_name])
117
+ raise Hyrax::Ingest::Errors::NoSharedSIPSpecified unless shared_sip
118
+ shared_sip
119
+ else
120
+ sip
121
+ end
122
+ end
123
+ fetcher.iteration = iteration if fetcher.respond_to? :iteration=
124
+ fetcher.logger = logger if fetcher.respond_to? :logger=
125
+ fetcher.report = report if fetcher.respond_to? :report=
126
+ end
127
+ end
128
+
129
+ def use_shared_sip?(config)
130
+ truthy_vals = ['1', 'true', 'TRUE', 'True', 'yes', true]
131
+ return truthy_vals.include? config[:shared]
132
+ end
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,17 @@
1
+ module Hyrax
2
+ module Ingest
3
+ module Ingester
4
+ class ActiveFedoraFileIngester
5
+
6
+ attr_reader :type, :original_filename, :external_url, :content
7
+
8
+ def initialize(type:, original_filename: nil, external_url: nil, content: nil)
9
+ @type = type
10
+ @original_filename = original_filename
11
+ @external_url = external_url
12
+ @content = content
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,67 @@
1
+ require 'hyrax/ingest/fetcher/base'
2
+ require 'hyrax/ingest/errors'
3
+ require 'active_fedora'
4
+ require 'hyrax/ingest/has_report'
5
+ require 'hyrax/ingest/has_logger'
6
+
7
+ module Hyrax
8
+ module Ingest
9
+ module Ingester
10
+ class ActiveFedoraPropertyAssigner
11
+ include Interloper
12
+ include HasReport
13
+ include HasLogger
14
+
15
+ after(:assign!) do
16
+ logger.info "#{@fetched_and_transformed_value} assigned to property '#{property_name}' with rdf predicate '#{rdf_predicate}'"
17
+ end
18
+
19
+ attr_reader :rdf_predicate, :af_model, :fetcher, :transformer
20
+
21
+ def initialize(options={})
22
+ @rdf_predicate = options[:rdf_predicate]
23
+ @fetcher = options[:fetcher]
24
+ @af_model = options[:af_model]
25
+ @transformer = options[:transformer]
26
+ raise Hyrax::Ingest::Errors::UnknownActiveFedoraModel.new(@af_model.class) unless @af_model.is_a? ActiveFedora::Base
27
+ raise Hyrax::Ingest::Errors::InvalidFetcher.new(@fetcher.class) unless @fetcher.is_a? Hyrax::Ingest::Fetcher::Base
28
+ end
29
+
30
+ def assign!
31
+ af_model.set_value(property_name, fetched_and_transformed_value)
32
+ rescue ::ActiveTriples::Relation::ValueError => e
33
+ # Rethrow ActiveTriples::Relation::ValueError as something more specific to ingest.
34
+ raise Hyrax::Ingest::Errors::InvalidActiveFedoraPropertyValue.new(fetched_value, property_name, rdf_predicate)
35
+ end
36
+
37
+ private
38
+
39
+ def fetched_and_transformed_value
40
+ @fetched_and_transformed_value ||= if transformer
41
+ transformer.transform(fetched_value)
42
+ else
43
+ fetched_value
44
+ end
45
+ end
46
+
47
+ def fetched_value
48
+ @fetched_value ||= fetcher.fetch
49
+ end
50
+
51
+ # Performs a lookup of property name by RDF predicate.
52
+ # @return [Symbol] The symbol representing the accessor for the
53
+ # property that matches the RDF predicate stored in the
54
+ # @rdf_predicate attribtue.
55
+ def property_name
56
+ @property ||= begin
57
+ property = af_model.send(:properties).select do |_att, config|
58
+ config.predicate == rdf_predicate
59
+ end
60
+ raise Hyrax::Ingest::Errors::UnknownRdfPredicate.new(rdf_predicate, af_model.class) if property.keys.count == 0
61
+ property.keys.first.to_sym
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,28 @@
1
+ require 'hyrax/ingest/has_sip'
2
+ require 'hyrax/ingest/has_shared_sip'
3
+ require 'hyrax/ingest/has_iteration'
4
+ require 'hyrax/ingest/has_logger'
5
+ require 'hyrax/ingest/has_report'
6
+ require 'hyrax/ingest/has_depositor'
7
+
8
+ module Hyrax
9
+ module Ingest
10
+ module Ingester
11
+ class Base
12
+ include HasSIP
13
+ include HasSharedSIP
14
+ include HasIteration
15
+ include HasReport
16
+ include HasLogger
17
+ include HasDepositor
18
+
19
+ def initialize(config={})
20
+ self.depositor = config.delete(:depositor)
21
+ end
22
+
23
+ # no-op, meant to be overrriden
24
+ def run!; end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,68 @@
1
+ require 'hyrax/ingest/ingester/active_fedora_base_ingester'
2
+ require 'hyrax/ingest/ingester/active_fedora_file_ingester'
3
+
4
+ module Hyrax
5
+ module Ingest
6
+ module Ingester
7
+ class FileSetIngester < ActiveFedoraBaseIngester
8
+ attr_reader :files_config, :preservation_events_config
9
+
10
+ def initialize(config={})
11
+ @files_config = config.delete(:Files) || []
12
+ @preservation_events_config = config.delete(:PreservationEvents) || []
13
+ config[:af_model_class_name] ||= 'FileSet'
14
+ super(config)
15
+ end
16
+
17
+ def run!
18
+ assign_properties!
19
+ apply_depositor_metadata!
20
+ save_model!
21
+ ingest_preservation_events!
22
+ add_files_to_file_set!
23
+ # return the new instance of the ActiveFedora model
24
+ af_model
25
+ end
26
+
27
+ private
28
+
29
+ def apply_depositor_metadata!
30
+ af_model.apply_depositor_metadata(depositor) if depositor
31
+ end
32
+
33
+ def add_files_to_file_set!
34
+ file_ingesters.each do |file_ingester|
35
+ if (file_ingester.external_url)
36
+ Hydra::Works::AddExternalFileToFileSet.call(af_model, file_ingester.external_url, file_ingester.type)
37
+ else
38
+ Hydra::Works::AddFileToFileSet.call(af_model, file_ingester.content, file_ingester.type)
39
+ end
40
+ end
41
+ end
42
+
43
+ def file_ingesters
44
+ @file_ingesters ||= files_config.map do |file_config|
45
+ Hyrax::Ingest::Ingester::ActiveFedoraFileIngester.new(file_config)
46
+ end
47
+ end
48
+
49
+ def ingest_preservation_events!
50
+ preservation_event_ingesters.each do |preservation_event_ingester|
51
+ preservation_event_ingester.run!
52
+ end
53
+ end
54
+
55
+ def preservation_event_ingesters
56
+ preservation_event_ingesters ||= preservation_events_config.map do |preservation_event_config|
57
+ preservation_event_config[:premis_event_related_object] = af_model
58
+ Hyrax::Ingest::Ingester::PreservationEventIngester.new(preservation_event_config).tap do |preservation_event_ingester|
59
+ preservation_event_ingester.sip = sip if preservation_event_ingester.respond_to?(:sip=)
60
+ preservation_event_ingester.shared_sip = shared_sip if preservation_event_ingester.respond_to?(:shared_sip=)
61
+ preservation_event_ingester.iteration = iteration if preservation_event_ingester.respond_to?(:iteration=)
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end