hyrax-ingest 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +52 -0
  3. data/config/routes.rb +2 -0
  4. data/lib/hyrax/ingest.rb +12 -0
  5. data/lib/hyrax/ingest/batch_runner.rb +130 -0
  6. data/lib/hyrax/ingest/configuration.rb +54 -0
  7. data/lib/hyrax/ingest/engine.rb +6 -0
  8. data/lib/hyrax/ingest/errors.rb +186 -0
  9. data/lib/hyrax/ingest/fetcher.rb +55 -0
  10. data/lib/hyrax/ingest/fetcher/base.rb +78 -0
  11. data/lib/hyrax/ingest/fetcher/csv_file.rb +89 -0
  12. data/lib/hyrax/ingest/fetcher/date_time.rb +15 -0
  13. data/lib/hyrax/ingest/fetcher/literal.rb +24 -0
  14. data/lib/hyrax/ingest/fetcher/premis_event_type.rb +28 -0
  15. data/lib/hyrax/ingest/fetcher/rdf_uri.rb +21 -0
  16. data/lib/hyrax/ingest/fetcher/xml_file.rb +54 -0
  17. data/lib/hyrax/ingest/fetcher/yaml_file.rb +36 -0
  18. data/lib/hyrax/ingest/has_depositor.rb +13 -0
  19. data/lib/hyrax/ingest/has_iteration.rb +18 -0
  20. data/lib/hyrax/ingest/has_logger.rb +29 -0
  21. data/lib/hyrax/ingest/has_report.rb +17 -0
  22. data/lib/hyrax/ingest/has_shared_sip.rb +20 -0
  23. data/lib/hyrax/ingest/has_sip.rb +20 -0
  24. data/lib/hyrax/ingest/ingester.rb +75 -0
  25. data/lib/hyrax/ingest/ingester/active_fedora_base_ingester.rb +136 -0
  26. data/lib/hyrax/ingest/ingester/active_fedora_file_ingester.rb +17 -0
  27. data/lib/hyrax/ingest/ingester/active_fedora_property_assigner.rb +67 -0
  28. data/lib/hyrax/ingest/ingester/base.rb +28 -0
  29. data/lib/hyrax/ingest/ingester/file_set_ingester.rb +68 -0
  30. data/lib/hyrax/ingest/ingester/preservation_event_ingester.rb +27 -0
  31. data/lib/hyrax/ingest/ingester/work_ingester.rb +55 -0
  32. data/lib/hyrax/ingest/reporting.rb +13 -0
  33. data/lib/hyrax/ingest/reporting/configuration.rb +22 -0
  34. data/lib/hyrax/ingest/reporting/report.rb +79 -0
  35. data/lib/hyrax/ingest/reporting/views/hyrax_ingest_report.html.erb +77 -0
  36. data/lib/hyrax/ingest/reporting/views/hyrax_ingest_report.txt.erb +21 -0
  37. data/lib/hyrax/ingest/runner.rb +103 -0
  38. data/lib/hyrax/ingest/sip.rb +92 -0
  39. data/lib/hyrax/ingest/transformer.rb +42 -0
  40. data/lib/hyrax/ingest/transformer/base.rb +12 -0
  41. data/lib/hyrax/ingest/transformer/to_date.rb +33 -0
  42. data/lib/hyrax/ingest/version.rb +5 -0
  43. data/lib/tasks/ingest_tasks.rake +22 -0
  44. metadata +330 -0
@@ -0,0 +1,55 @@
1
+ require 'hyrax/ingest/fetcher/xml_file'
2
+ require 'hyrax/ingest/fetcher/yaml_file'
3
+ require 'hyrax/ingest/fetcher/csv_file'
4
+ require 'hyrax/ingest/fetcher/literal'
5
+ require 'hyrax/ingest/fetcher/premis_event_type'
6
+ require 'hyrax/ingest/fetcher/rdf_uri'
7
+ require 'hyrax/ingest/fetcher/date_time'
8
+ require 'hyrax/ingest/errors'
9
+
10
+ module Hyrax
11
+ module Ingest
12
+ module Fetcher
13
+ class << self
14
+ # @return Set The set of all fetcher classes.
15
+ def all_classes
16
+ @all_classes ||= Set.new.tap do |all_classes|
17
+ all_classes << Hyrax::Ingest::Fetcher::XMLFile
18
+ all_classes << Hyrax::Ingest::Fetcher::YAMLFile
19
+ all_classes << Hyrax::Ingest::Fetcher::CSVFile
20
+ all_classes << Hyrax::Ingest::Fetcher::Literal
21
+ all_classes << Hyrax::Ingest::Fetcher::PremisEventType
22
+ all_classes << Hyrax::Ingest::Fetcher::RdfUri
23
+ all_classes << Hyrax::Ingest::Fetcher::DateTime
24
+ end
25
+ end
26
+
27
+ # @param [string] options The key is the fetcher class name
28
+ # and the value is the hash of options to pass to the constructor of the fetcher
29
+ # class.
30
+ # @return An instance of the fetcher class.
31
+ def factory(class_name, options={})
32
+ options ||= {}
33
+ find_class_by_name(class_name).new(options)
34
+ end
35
+
36
+ # @param [String] class_name The stringified class name, with our
37
+ # without namespaces.
38
+ # @raise [Hyrax::Ingest::Errors::UnknownFetcherClass] When there is no
39
+ # corresponding fetcher class for the given value of the `class_name`
40
+ # param.
41
+ # @raise [Hyrax::Ingest::Errors::UnknownfetcherClass] When the value of
42
+ # `class_name` param is insufficient in determining a fetcher class.
43
+ # @return [Class] The appropriate fetcher class.
44
+ def find_class_by_name(class_name)
45
+ found_classes = all_classes.select do |class_const|
46
+ (class_const.to_s == class_name) || (class_const.to_s =~ /::#{class_name}/)
47
+ end
48
+ raise Hyrax::Ingest::Errors::UnknownFetcherClass.new(class_name, all_classes) if found_classes.count == 0
49
+ raise Hyrax::Ingest::Errors::AmbiguousFetcherClass.new(class_name, found_classes) if found_classes.count > 1
50
+ found_classes.first
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,78 @@
1
+ require 'hyrax/ingest/errors'
2
+ require 'hyrax/ingest/sip'
3
+ require 'interloper'
4
+ require 'hyrax/ingest/has_logger'
5
+ require 'hyrax/ingest/has_report'
6
+
7
+
8
+
9
+ module Hyrax
10
+ module Ingest
11
+ module Fetcher
12
+ class Base
13
+ include Interloper
14
+ include HasReport
15
+ include HasLogger
16
+
17
+ # Callback to log missing values that have been marked as required.
18
+ after(:fetch) do
19
+ # Use a flag to only report the missing value once.
20
+ unless @after_fetch_run_once
21
+ if required? && fetched_value_is_empty?
22
+ report_missing_required_value
23
+ end
24
+ @after_fetch_run_once = true
25
+ end
26
+ end
27
+
28
+ def initialize(options={})
29
+ @required = options.delete(:required)
30
+ end
31
+
32
+ # Boolean reader for @required.
33
+ def required?; !!@required; end
34
+
35
+ # Subclasses should override this method with the logic required to
36
+ # fetch values from a SIP. The overidden method should set
37
+ # @fetched_value, and return it.
38
+ # @see Hyrax::Ingest::Fetcher::XMLFile#fetch
39
+ # @abstract
40
+ def fetch
41
+ @fetched_value
42
+ end
43
+
44
+ protected
45
+
46
+ # Determines whether the value that was fetched by #fetch is empty
47
+ # or not. Be default, the fetched value is empty if it is nil, an
48
+ # empty string, an empty array, an empty hash, or an array
49
+ # containing any combination of those. Overwrite this method in
50
+ # subclasses to change the definition of empty in those contexts.
51
+ # The return value is used for reporting which values are missing,
52
+ # but required.
53
+ # @return [Boolean] True if @fetched_value is considered to be empty; false otherwise.
54
+ def fetched_value_is_empty?
55
+ Array(@fetched_value).reduce(true) do |all_empty, val|
56
+ all_empty &&= ( val.nil? || ( val.respond_to?(:empty?) && val.empty? ) )
57
+ end
58
+ end
59
+
60
+ # Reports occurrences of missing required values.
61
+ # Subclasses should override this method to provide further detail
62
+ # by passing an options hash that will be available for reporting.
63
+ # @see Hyrax::Ingest::Fetcher::XMLFile#report_missing_required_value
64
+ # @example
65
+ # # Provide additional info to the report.
66
+ # def report_missing_require_value
67
+ # super(foo: "bar")
68
+ # end
69
+ def report_missing_required_value(params={})
70
+ short_class_name = self.class.to_s.gsub(/.*\:\:/, '')
71
+ logger.warn "Missing required value from #{short_class_name} with params = #{params}"
72
+ report.stat[:missing_required_values][self.class] ||= []
73
+ report.stat[:missing_required_values][self.class] << params
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,89 @@
1
+ require 'hyrax/ingest/fetcher/base'
2
+ require 'hyrax/ingest/has_sip'
3
+ require 'hyrax/ingest/has_iteration'
4
+ require 'roo'
5
+
6
+ module Hyrax
7
+ module Ingest
8
+ module Fetcher
9
+ class CSVFile < Base
10
+ attr_reader :filename, :column, :row, :header_row_number, :row_offset
11
+
12
+ include HasSIP
13
+ include HasIteration
14
+
15
+ def initialize(options={})
16
+ raise ArgumentError, "Required option :filename is missing" unless options.key?(:filename)
17
+ raise ArgumentError, "Required option :column is missing" unless options.key?(:column)
18
+ raise ArgumentError, "Required option :row is missing" unless options.key?(:row)
19
+
20
+ @filename = options.fetch(:filename)
21
+ @column = options.fetch(:column, '').to_s.strip.downcase
22
+ @row = options.fetch(:row, '').to_s.strip.downcase
23
+ @header_row_number = options.fetch(:header_row_number, 1)
24
+ @row_offset = options[:row_offset]
25
+ super
26
+ end
27
+
28
+ def fetch
29
+ @fetched_value ||= cell_value
30
+ end
31
+
32
+ private
33
+
34
+ def roo
35
+ @roo ||= Roo::CSV.new(sip.find_file_path(filename))
36
+ end
37
+
38
+ def cell_value
39
+ roo.cell(row_number, column_number)
40
+ end
41
+
42
+ # @return The column number from which to retrieve the cell.
43
+ def column_number
44
+ @column_number ||= column_number_from_header || specific_column_number
45
+ # TODO: custom error
46
+ raise ArgumentError, "Value for column: option must be a number or a column header; '#{column}' was given." if @column_number.nil?
47
+ @column_number
48
+ end
49
+
50
+ def column_number_from_header
51
+ headers.index(column) + 1 if headers.index(column)
52
+ end
53
+
54
+ def specific_column_number
55
+ column.to_i if string_is_integer?(column)
56
+ end
57
+
58
+ def row_number
59
+ @row_number ||= next_row || specific_row_number
60
+ # TODO: custom error
61
+ raise ArgumentError, "Value for row: option must be a number or the keyword 'next'; #{@row} was given." if @row_number.nil?
62
+ @row_number + row_offset
63
+ end
64
+
65
+ def next_row
66
+ iteration + 1 if @row == 'next'
67
+ end
68
+
69
+ def specific_row_number
70
+ row.to_i if string_is_integer?(row)
71
+ end
72
+
73
+ def row_offset
74
+ @row_offset || header_row_number
75
+ end
76
+
77
+ def headers
78
+ @headers ||= roo.row(header_row_number).map(&:to_s).map(&:downcase)
79
+ end
80
+
81
+ def string_is_integer?(str)
82
+ # remove leading zeros first
83
+ str.sub!(/^0+/, '')
84
+ str.to_i.to_s == str.to_s
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,15 @@
1
+ require 'hyrax/ingest/errors'
2
+ require 'hyrax/ingest/sip'
3
+ require 'date'
4
+
5
+ module Hyrax
6
+ module Ingest
7
+ module Fetcher
8
+ class DateTime < Base
9
+ def fetch
10
+ @fetched_value ||= ::DateTime.now
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,24 @@
1
+ require 'hyrax/ingest/errors'
2
+ require 'hyrax/ingest/sip'
3
+
4
+ module Hyrax
5
+ module Ingest
6
+ module Fetcher
7
+ class Literal < Base
8
+ attr_reader :literal_value
9
+
10
+ inherit_callbacks_for :fetch
11
+
12
+ def initialize(options={})
13
+ options = { value: options } unless options.is_a? Hash
14
+ @literal_value = options.delete(:value)
15
+ super
16
+ end
17
+
18
+ def fetch
19
+ @fetched_value = literal_value
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,28 @@
1
+ # This class adhere's to the Hyrax::Ingest::Fetcher::Base interface to work other classes within the Hyrax::Ingest
2
+ # gem. If effectively maps PREMIS Event Type abbreviations to the URI that is then associated with a
3
+ # Hyrax::Preservation::Event model.
4
+
5
+ require 'hyrax/ingest/errors'
6
+ require 'hyrax/ingest/sip'
7
+
8
+ module Hyrax
9
+ module Ingest
10
+ module Fetcher
11
+ class PremisEventType < Base
12
+ inherit_callbacks_for :fetch
13
+
14
+ attr_reader :abbr
15
+
16
+ def initialize(options={})
17
+ options = { abbr: options } unless options.is_a? Hash
18
+ @abbr = options.delete(:abbr)
19
+ super
20
+ end
21
+
22
+ def fetch
23
+ @fetched_value ||= Hyrax::Preservation::PremisEventType.find_by_abbr(@abbr).uri
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,21 @@
1
+ require 'rdf'
2
+
3
+ module Hyrax
4
+ module Ingest
5
+ module Fetcher
6
+ class RdfUri < Base
7
+ attr_reader :uri_str
8
+
9
+ def initialize(options={})
10
+ options = { uri_str: options } unless options.is_a? Hash
11
+ @uri_str = options.delete(:uri_str)
12
+ super
13
+ end
14
+
15
+ def fetch
16
+ @fetched_value ||= ::RDF::URI.new(uri_str)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,54 @@
1
+ require 'hyrax/ingest/fetcher/base'
2
+ require 'hyrax/ingest/has_sip'
3
+ require 'nokogiri'
4
+ require 'hyrax/ingest/reporting'
5
+ require 'interloper'
6
+
7
+ module Hyrax
8
+ module Ingest
9
+ module Fetcher
10
+ class XMLFile < Base
11
+ include HasSIP
12
+
13
+ attr_reader :filename, :xpath, :default, :fetched_value
14
+
15
+ inherit_callbacks_after :fetch
16
+
17
+ def initialize(options={})
18
+ raise ArgumentError, "Required option :filename is missing" unless options.key?(:filename)
19
+ raise ArgumentError, "Required option :xpath is missing" unless options.key?(:xpath)
20
+ @filename = options[:filename]
21
+ @xpath = options[:xpath]
22
+ @default = options[:default] || []
23
+ super
24
+ end
25
+
26
+ # Overrides Hyrax::Ingest::Fetcher::Base#fetch
27
+ # @return [String] The value fetched from the XML file
28
+ def fetch
29
+ @fetched_value ||= begin
30
+ fetched_from_xml = noko.xpath(xpath).map(&:text)
31
+ fetched_from_xml.empty? ? default : fetched_from_xml
32
+ end
33
+ end
34
+
35
+ protected
36
+
37
+ # Overrides
38
+ # Hyrax::Ingest::Fetcher::Base#report_missing_required_value,
39
+ # passing filename and xpath inforamtion. The report summary has
40
+ # specific logic to look for these.
41
+ def report_missing_required_value
42
+ super(filename: filename, xpath: xpath)
43
+ end
44
+
45
+ def noko
46
+ @noko ||= Nokogiri::XML(sip.read_file(filename)).tap do |n|
47
+ # TODO: allow using namespaces instead of blindly removing them.
48
+ n.remove_namespaces!
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,36 @@
1
+ require 'hyrax/ingest/fetcher/base'
2
+ require 'hyrax/ingest/has_sip'
3
+ require 'yaml'
4
+
5
+ module Hyrax
6
+ module Ingest
7
+ module Fetcher
8
+ class YAMLFile < Base
9
+ include HasSIP
10
+
11
+ inherit_callbacks_for :fetch
12
+
13
+ attr_reader :filename, :yaml_path
14
+
15
+ def initialize(options={})
16
+ raise ArgumentError, "Required option :filename is missing" unless options.key?(:filename)
17
+ raise ArgumentError, "Required option :yaml_path is missing" unless options.key?(:yaml_path)
18
+ @filename = options[:filename]
19
+ @yaml_path = options[:yaml_path]
20
+ super
21
+ end
22
+
23
+ def fetch
24
+ @fetched_value ||= yaml.dig(*yaml_path)
25
+ end
26
+
27
+ private
28
+
29
+ # @return [Hash, Array, String] The YAML parsed into Ruby objects.
30
+ def yaml
31
+ @yaml ||= YAML.load sip.read_file(filename)
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,13 @@
1
+ # Simple interface for injecting a Hyrax::Ingest::SIP dependency.
2
+ require 'hyrax/ingest/sip'
3
+ require 'hyrax/ingest/errors'
4
+
5
+ module Hyrax
6
+ module Ingest
7
+ module HasDepositor
8
+ def HasDepositor.included(mod)
9
+ attr_accessor :depositor
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,18 @@
1
+ # Simple interface for injecting a Hyrax::Ingest::SIP dependency.
2
+
3
+ require 'hyrax/ingest/sip'
4
+ require 'hyrax/ingest/errors'
5
+
6
+ module Hyrax
7
+ module Ingest
8
+ module HasIteration
9
+ def iteration=(iteration)
10
+ @iteration = iteration.to_i
11
+ end
12
+
13
+ def iteration
14
+ @iteration ||= 0
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,29 @@
1
+ require 'logger'
2
+
3
+ module Hyrax
4
+ module Ingest
5
+ module HasLogger
6
+ def logger
7
+ @logger ||= Logger.new(HasLogger.config.default_log_to)
8
+ end
9
+
10
+ def logger=(logger)
11
+ raise Hyrax::Ingest::Errors::InvalidLogger unless logger.is_a? Logger
12
+ @logger = logger
13
+ end
14
+
15
+ class << self
16
+ def config
17
+ @config ||= Configuration.new
18
+ end
19
+ end
20
+
21
+ class Configuration
22
+ attr_accessor :default_log_to
23
+ def initialize
24
+ @default_log_to = STDOUT
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end