bulkrax 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +205 -0
  3. data/README.md +202 -0
  4. data/Rakefile +42 -0
  5. data/app/assets/config/bulkrax_manifest.js +2 -0
  6. data/app/assets/javascripts/bulkrax/application.js +14 -0
  7. data/app/assets/javascripts/bulkrax/bulkrax.js +11 -0
  8. data/app/assets/javascripts/bulkrax/entries.js +15 -0
  9. data/app/assets/javascripts/bulkrax/exporters.js +60 -0
  10. data/app/assets/javascripts/bulkrax/importers.js.erb +166 -0
  11. data/app/assets/stylesheets/bulkrax/accordion.scss +40 -0
  12. data/app/assets/stylesheets/bulkrax/application.css +15 -0
  13. data/app/assets/stylesheets/bulkrax/coderay.scss +264 -0
  14. data/app/assets/stylesheets/bulkrax/import_export.scss +37 -0
  15. data/app/controllers/bulkrax/application_controller.rb +8 -0
  16. data/app/controllers/bulkrax/entries_controller.rb +44 -0
  17. data/app/controllers/bulkrax/exporters_controller.rb +125 -0
  18. data/app/controllers/bulkrax/importers_controller.rb +315 -0
  19. data/app/controllers/concerns/bulkrax/api.rb +29 -0
  20. data/app/factories/bulkrax/object_factory.rb +230 -0
  21. data/app/helpers/bulkrax/application_helper.rb +15 -0
  22. data/app/helpers/bulkrax/exporters_helper.rb +6 -0
  23. data/app/helpers/bulkrax/importers_helper.rb +13 -0
  24. data/app/helpers/bulkrax/validation_helper.rb +153 -0
  25. data/app/jobs/bulkrax/application_job.rb +6 -0
  26. data/app/jobs/bulkrax/child_relationships_job.rb +128 -0
  27. data/app/jobs/bulkrax/delete_work_job.rb +16 -0
  28. data/app/jobs/bulkrax/download_cloud_file_job.rb +18 -0
  29. data/app/jobs/bulkrax/export_work_job.rb +37 -0
  30. data/app/jobs/bulkrax/exporter_job.rb +14 -0
  31. data/app/jobs/bulkrax/import_work_collection_job.rb +41 -0
  32. data/app/jobs/bulkrax/import_work_job.rb +32 -0
  33. data/app/jobs/bulkrax/importer_job.rb +26 -0
  34. data/app/mailers/bulkrax/application_mailer.rb +8 -0
  35. data/app/matchers/bulkrax/application_matcher.rb +113 -0
  36. data/app/matchers/bulkrax/bagit_matcher.rb +6 -0
  37. data/app/matchers/bulkrax/csv_matcher.rb +6 -0
  38. data/app/matchers/bulkrax/oai_matcher.rb +6 -0
  39. data/app/models/bulkrax/application_record.rb +7 -0
  40. data/app/models/bulkrax/csv_collection_entry.rb +19 -0
  41. data/app/models/bulkrax/csv_entry.rb +163 -0
  42. data/app/models/bulkrax/entry.rb +104 -0
  43. data/app/models/bulkrax/exporter.rb +122 -0
  44. data/app/models/bulkrax/exporter_run.rb +7 -0
  45. data/app/models/bulkrax/import_failed.rb +13 -0
  46. data/app/models/bulkrax/importer.rb +155 -0
  47. data/app/models/bulkrax/importer_run.rb +8 -0
  48. data/app/models/bulkrax/oai_dc_entry.rb +6 -0
  49. data/app/models/bulkrax/oai_entry.rb +74 -0
  50. data/app/models/bulkrax/oai_qualified_dc_entry.rb +6 -0
  51. data/app/models/bulkrax/oai_set_entry.rb +19 -0
  52. data/app/models/bulkrax/rdf_collection_entry.rb +19 -0
  53. data/app/models/bulkrax/rdf_entry.rb +90 -0
  54. data/app/models/bulkrax/status.rb +25 -0
  55. data/app/models/bulkrax/xml_entry.rb +73 -0
  56. data/app/models/concerns/bulkrax/download_behavior.rb +61 -0
  57. data/app/models/concerns/bulkrax/errored_entries.rb +45 -0
  58. data/app/models/concerns/bulkrax/export_behavior.rb +58 -0
  59. data/app/models/concerns/bulkrax/file_factory.rb +140 -0
  60. data/app/models/concerns/bulkrax/has_local_processing.rb +7 -0
  61. data/app/models/concerns/bulkrax/has_matchers.rb +155 -0
  62. data/app/models/concerns/bulkrax/import_behavior.rb +90 -0
  63. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +34 -0
  64. data/app/models/concerns/bulkrax/status_info.rb +56 -0
  65. data/app/parsers/bulkrax/application_parser.rb +299 -0
  66. data/app/parsers/bulkrax/bagit_parser.rb +157 -0
  67. data/app/parsers/bulkrax/csv_parser.rb +266 -0
  68. data/app/parsers/bulkrax/oai_dc_parser.rb +130 -0
  69. data/app/parsers/bulkrax/oai_qualified_dc_parser.rb +9 -0
  70. data/app/parsers/bulkrax/xml_parser.rb +103 -0
  71. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +19 -0
  72. data/app/views/bulkrax/entries/_raw_metadata.html.erb +19 -0
  73. data/app/views/bulkrax/entries/show.html.erb +63 -0
  74. data/app/views/bulkrax/exporters/_form.html.erb +120 -0
  75. data/app/views/bulkrax/exporters/edit.html.erb +23 -0
  76. data/app/views/bulkrax/exporters/index.html.erb +67 -0
  77. data/app/views/bulkrax/exporters/new.html.erb +23 -0
  78. data/app/views/bulkrax/exporters/show.html.erb +124 -0
  79. data/app/views/bulkrax/importers/_bagit_fields.html.erb +54 -0
  80. data/app/views/bulkrax/importers/_browse_everything.html.erb +12 -0
  81. data/app/views/bulkrax/importers/_csv_fields.html.erb +39 -0
  82. data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +16 -0
  83. data/app/views/bulkrax/importers/_form.html.erb +35 -0
  84. data/app/views/bulkrax/importers/_oai_fields.html.erb +42 -0
  85. data/app/views/bulkrax/importers/_xml_fields.html.erb +60 -0
  86. data/app/views/bulkrax/importers/edit.html.erb +20 -0
  87. data/app/views/bulkrax/importers/index.html.erb +77 -0
  88. data/app/views/bulkrax/importers/new.html.erb +25 -0
  89. data/app/views/bulkrax/importers/show.html.erb +175 -0
  90. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +37 -0
  91. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +52 -0
  92. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +39 -0
  93. data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +6 -0
  94. data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +19 -0
  95. data/app/views/layouts/bulkrax/application.html.erb +14 -0
  96. data/config/locales/bulkrax.en.yml +36 -0
  97. data/config/routes.rb +18 -0
  98. data/db/migrate/20181011230201_create_bulkrax_importers.rb +18 -0
  99. data/db/migrate/20181011230228_create_bulkrax_importer_runs.rb +16 -0
  100. data/db/migrate/20190325183136_create_bulkrax_entries.rb +16 -0
  101. data/db/migrate/20190601221109_add_status_to_entry.rb +9 -0
  102. data/db/migrate/20190715161939_add_collections_to_importer_runs.rb +6 -0
  103. data/db/migrate/20190715162044_change_collection_ids_on_entries.rb +5 -0
  104. data/db/migrate/20190729124607_create_bulkrax_exporters.rb +19 -0
  105. data/db/migrate/20190729134158_create_bulkrax_exporter_runs.rb +14 -0
  106. data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +12 -0
  107. data/db/migrate/20191203225129_add_total_collection_records_to_importer_runs.rb +5 -0
  108. data/db/migrate/20191204191623_add_children_to_importer_runs.rb +6 -0
  109. data/db/migrate/20191204223857_change_total_records_to_total_work_entries.rb +6 -0
  110. data/db/migrate/20191212155530_change_entry_last_error.rb +19 -0
  111. data/db/migrate/20200108194557_add_validate_only_to_bulkrax_importers.rb +5 -0
  112. data/db/migrate/20200301232856_add_status_to_importers.rb +9 -0
  113. data/db/migrate/20200312190638_remove_foreign_key_from_bulkrax_entries.rb +5 -0
  114. data/db/migrate/20200326235838_add_status_to_exporters.rb +7 -0
  115. data/db/migrate/20200601204556_add_invalid_record_to_importer_run.rb +5 -0
  116. data/db/migrate/20200818055819_create_bulkrax_statuses.rb +18 -0
  117. data/db/migrate/20200819054016_move_to_statuses.rb +30 -0
  118. data/db/migrate/20201106014204_add_date_filter_and_status_to_bulkrax_exporters.rb +7 -0
  119. data/db/migrate/20201117220007_add_workflow_status_to_bulkrax_exporter.rb +5 -0
  120. data/db/migrate/20210806044408_remove_unused_last_error.rb +7 -0
  121. data/db/migrate/20210806065737_increase_text_sizes.rb +12 -0
  122. data/lib/bulkrax.rb +161 -0
  123. data/lib/bulkrax/engine.rb +37 -0
  124. data/lib/bulkrax/version.rb +5 -0
  125. data/lib/generators/bulkrax/install_generator.rb +80 -0
  126. data/lib/generators/bulkrax/templates/README +3 -0
  127. data/lib/generators/bulkrax/templates/app/assets/images/bulkrax/removed.png +0 -0
  128. data/lib/generators/bulkrax/templates/app/models/concerns/bulkrax/has_local_processing.rb +8 -0
  129. data/lib/generators/bulkrax/templates/bin/importer +140 -0
  130. data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +84 -0
  131. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +72 -0
  132. data/lib/tasks/bulkrax_tasks.rake +6 -0
  133. metadata +388 -0
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ # this is a PORO to help pass errors around
4
+ module Bulkrax
5
+ class ImportFailed
6
+ attr_accessor :message, :backtrace
7
+
8
+ def initialize(message, backtrace)
9
+ @message = message
10
+ @backtrace = backtrace
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,155 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'iso8601'
4
+
5
+ module Bulkrax
6
+ class Importer < ApplicationRecord
7
+ include Bulkrax::ImporterExporterBehavior
8
+ include Bulkrax::StatusInfo
9
+
10
+ serialize :parser_fields, JSON
11
+ serialize :field_mapping, JSON
12
+
13
+ belongs_to :user
14
+ has_many :importer_runs, dependent: :destroy
15
+ has_many :entries, as: :importerexporter, dependent: :destroy
16
+
17
+ validates :name, presence: true
18
+ validates :admin_set_id, presence: true
19
+ validates :parser_klass, presence: true
20
+
21
+ delegate :valid_import?, :create_parent_child_relationships,
22
+ :write_errored_entries_file, :visibility, to: :parser
23
+
24
+ attr_accessor :only_updates, :file_style, :file
25
+ attr_writer :current_run
26
+
27
+ def status
28
+ if self.validate_only
29
+ 'Validated'
30
+ else
31
+ super
32
+ end
33
+ end
34
+
35
+ def record_status
36
+ importer_run = ImporterRun.find(current_run.id) # make sure fresh
37
+ return if importer_run.enqueued_records.positive? # still processing
38
+ if importer_run.failed_records.positive?
39
+ if importer_run.invalid_records.present?
40
+ e = Bulkrax::ImportFailed.new('Failed with Invalid Records', importer_run.invalid_records.split("\n"))
41
+ importer_run.importer.status_info(e)
42
+ else
43
+ importer_run.importer.status_info('Complete (with failures)')
44
+ end
45
+ else
46
+ importer_run.importer.status_info('Complete')
47
+ end
48
+ end
49
+
50
+ # If field_mapping is empty, setup a default based on the export_properties
51
+ def mapping
52
+ @mapping ||= if self.field_mapping.blank? || self.field_mapping == [{}]
53
+ if parser.import_fields.present? || self.field_mapping == [{}]
54
+ ActiveSupport::HashWithIndifferentAccess.new(
55
+ parser.import_fields.reject(&:nil?).map do |m|
56
+ Bulkrax.default_field_mapping.call(m)
57
+ end.inject(:merge)
58
+ )
59
+ end
60
+ else
61
+ self.field_mapping
62
+ end
63
+ end
64
+
65
+ def parser_fields
66
+ self[:parser_fields] || {}
67
+ end
68
+
69
+ def self.frequency_enums
70
+ # these duration values use ISO 8601 Durations (https://en.wikipedia.org/wiki/ISO_8601#Durations)
71
+ # TLDR; all durations are prefixed with 'P' and the parts are a number with the type of duration.
72
+ # i.e. P1Y2M3W4DT5H6M7S == 1 Year, 2 Months, 3 Weeks, 4 Days, 5 Hours, 6 Minutes, 7 Seconds
73
+ [['Daily', 'P1D'], ['Monthly', 'P1M'], ['Yearly', 'P1Y'], ['Once (on save)', 'PT0S']]
74
+ end
75
+
76
+ def frequency=(frequency)
77
+ self[:frequency] = ISO8601::Duration.new(frequency).to_s
78
+ end
79
+
80
+ def frequency
81
+ f = self[:frequency] || "PT0S"
82
+ ISO8601::Duration.new(f)
83
+ end
84
+
85
+ def schedulable?
86
+ frequency.to_seconds != 0
87
+ end
88
+
89
+ def current_run
90
+ @current_run ||= self.importer_runs.create!(total_work_entries: self.limit || parser.total, total_collection_entries: parser.collections_total)
91
+ end
92
+
93
+ def last_run
94
+ @last_run ||= self.importer_runs.last
95
+ end
96
+
97
+ def seen
98
+ @seen ||= {}
99
+ end
100
+
101
+ def replace_files
102
+ self.parser_fields['replace_files']
103
+ end
104
+
105
+ def update_files
106
+ self.parser_fields['update_files']
107
+ end
108
+
109
+ def import_works
110
+ self.save if self.new_record? # Object needs to be saved for statuses
111
+ self.only_updates ||= false
112
+ parser.create_works
113
+ rescue StandardError => e
114
+ status_info(e)
115
+ end
116
+
117
+ def import_collections
118
+ self.save if self.new_record? # Object needs to be saved for statuses
119
+ parser.create_collections
120
+ rescue StandardError => e
121
+ status_info(e)
122
+ end
123
+
124
+ # Prepend the base_url to ensure unique set identifiers
125
+ # @todo - move to parser, as this is OAI specific
126
+ def unique_collection_identifier(id)
127
+ "#{self.parser_fields['base_url'].split('/')[2]}_#{id}"
128
+ end
129
+
130
+ # The format for metadata for the incoming import; corresponds to an Entry class
131
+ def import_metadata_format
132
+ [['CSV', 'Bulkrax::CsvEntry'], ['RDF (N-Triples)', 'Bulkrax::RdfEntry']]
133
+ end
134
+
135
+ # The type of metadata for the incoming import, either one file for all works, or one file per work
136
+ # def import_metadata_type
137
+ # [['Single Metadata File for all works', 'single'], ['Multiple Files, one per Work', 'multi']]
138
+ # end
139
+
140
+ # If the import data is zipped, unzip it to this path
141
+ def importer_unzip_path
142
+ @importer_unzip_path ||= File.join(Bulkrax.import_path, "import_#{path_string}")
143
+ end
144
+
145
+ def errored_entries_csv_path
146
+ @errored_entries_csv_path ||= File.join(Bulkrax.import_path, "import_#{path_string}_errored_entries.csv")
147
+ end
148
+
149
+ def path_string
150
+ "#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}_#{self.importer_runs.last.id}"
151
+ rescue
152
+ "#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ImporterRun < ApplicationRecord
5
+ belongs_to :importer
6
+ has_many :statuses, as: :runnable, dependent: :destroy
7
+ end
8
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class OaiDcEntry < OaiEntry
5
+ end
6
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'erb'
4
+ require 'ostruct'
5
+
6
+ module Bulkrax
7
+ class OaiEntry < Entry
8
+ serialize :raw_metadata, JSON
9
+
10
+ delegate :record, to: :raw_record
11
+
12
+ def raw_record
13
+ @raw_record ||= client.get_record(identifier: identifier, metadata_prefix: parser.parser_fields['metadata_prefix'])
14
+ end
15
+
16
+ def sets
17
+ record.header.set_spec
18
+ end
19
+
20
+ def context
21
+ @context ||= OpenStruct.new(record: record, identifier: record.header.identifier)
22
+ end
23
+
24
+ def thumbnail_url
25
+ ERB.new(parser.parser_fields['thumbnail_url']).result(context.instance_eval { binding })
26
+ end
27
+
28
+ def build_metadata
29
+ self.parsed_metadata = {}
30
+ self.parsed_metadata[work_identifier] = [record.header.identifier]
31
+
32
+ record.metadata.children.each do |child|
33
+ child.children.each do |node|
34
+ add_metadata(node.name, node.content)
35
+ end
36
+ end
37
+ add_metadata('thumbnail_url', thumbnail_url)
38
+
39
+ add_visibility
40
+ add_rights_statement
41
+ add_admin_set_id
42
+ add_collections
43
+ add_local
44
+
45
+ return self.parsed_metadata
46
+ end
47
+
48
+ def collections_created?
49
+ if parser.collection_name == 'all'
50
+ sets.blank? || (sets.present? && sets.size == self.collection_ids.size)
51
+ else
52
+ self.collection_ids.size == 1
53
+ end
54
+ end
55
+
56
+ # Retrieve list of collections for the entry; add to collection_ids
57
+ # If OAI-PMH doesn't return setSpec in the headers for GetRecord, use parser.collection_name
58
+ # in this case, if 'All' is selected, records will not be added to a collection.
59
+ def find_or_create_collection_ids
60
+ return self.collection_ids if collections_created?
61
+ if sets.blank? || parser.collection_name != 'all'
62
+ # c = Collection.where(Bulkrax.system_identifier_field => importerexporter.unique_collection_identifier(parser.collection_name)).first
63
+ collection = find_collection(importerexporter.unique_collection_identifier(parser.collection_name))
64
+ self.collection_ids << collection.id if collection.present? && !self.collection_ids.include?(collection.id)
65
+ else # All - collections should exist for all sets
66
+ sets.each do |set|
67
+ c = Collection.find_by(work_identifier => importerexporter.unique_collection_identifier(set.content))
68
+ self.collection_ids << c.id if c.present? && !self.collection_ids.include?(c.id)
69
+ end
70
+ end
71
+ return self.collection_ids
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class OaiQualifiedDcEntry < OaiEntry
5
+ end
6
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class OaiSetEntry < OaiEntry
5
+ def factory_class
6
+ Collection
7
+ end
8
+
9
+ def build_metadata
10
+ self.parsed_metadata = self.raw_metadata
11
+ add_local
12
+ return self.parsed_metadata
13
+ end
14
+
15
+ def collections_created?
16
+ true
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class RdfCollectionEntry < RdfEntry
5
+ def record
6
+ @record ||= self.raw_metadata
7
+ end
8
+
9
+ def build_metadata
10
+ self.parsed_metadata = self.raw_metadata
11
+ add_local
12
+ return self.parsed_metadata
13
+ end
14
+
15
+ def factory_class
16
+ Collection
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rdf'
4
+ module Bulkrax
5
+ class RdfEntry < Entry
6
+ serialize :raw_metadata, JSON
7
+
8
+ def self.read_data(path)
9
+ RDF::Reader.open(path)
10
+ end
11
+
12
+ def self.fields_from_data(data)
13
+ data.predicates.map(&:to_s)
14
+ end
15
+
16
+ def self.data_for_entry(data, source_id)
17
+ reader = data
18
+ format = reader.class.format.to_sym
19
+ collections = []
20
+ children = []
21
+ delete = nil
22
+ data = RDF::Writer.for(format).buffer do |writer|
23
+ reader.each_statement do |statement|
24
+ collections << statement.object.to_s if collection_field.present? && collection_field == statement.predicate.to_s
25
+ children << statement.object.to_s if children_field.present? && children_field == statement.predicate.to_s
26
+ delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
27
+ writer << statement
28
+ end
29
+ end
30
+ return {
31
+ source_id => reader.subjects.first.to_s,
32
+ delete: delete,
33
+ format: format,
34
+ data: data,
35
+ collection: collections,
36
+ children: children
37
+ }
38
+ end
39
+
40
+ def self.collection_field
41
+ Bulkrax.collection_field_mapping[self.to_s]
42
+ end
43
+
44
+ def self.children_field
45
+ Bulkrax.parent_child_field_mapping[self.to_s]
46
+ end
47
+
48
+ def record
49
+ @record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data'])
50
+ end
51
+
52
+ def build_metadata
53
+ raise StandardError, 'Record not found' if record.nil?
54
+ raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
55
+
56
+ self.parsed_metadata = {}
57
+ self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
58
+
59
+ record.each_statement do |statement|
60
+ # Only process the subject for our record (in case other data is in the file)
61
+ next unless statement.subject.to_s == self.raw_metadata[source_identifier]
62
+ add_metadata(statement.predicate.to_s, statement.object.to_s)
63
+ end
64
+ add_visibility
65
+ add_rights_statement
66
+ add_admin_set_id
67
+ add_collections
68
+ add_local
69
+ self.parsed_metadata['file'] = self.raw_metadata['file']
70
+
71
+ self.parsed_metadata
72
+ end
73
+
74
+ def collections_created?
75
+ return true if self.raw_metadata['collection'].blank?
76
+ self.raw_metadata['collection'].length == self.collection_ids.length
77
+ end
78
+
79
+ def find_or_create_collection_ids
80
+ return self.collection_ids if collections_created?
81
+ if self.raw_metadata['collection'].present?
82
+ self.raw_metadata['collection'].each do |collection|
83
+ c = find_collection(collection)
84
+ self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
85
+ end
86
+ end
87
+ return self.collection_ids
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class Status < ApplicationRecord
5
+ belongs_to :statusable, polymorphic: true
6
+ belongs_to :runnable, polymorphic: true
7
+ serialize :error_backtrace, Array
8
+
9
+ scope :for_importers, -> { where(statusable_type: 'Bulkrax::Importer') }
10
+ scope :for_exporters, -> { where(statusable_type: 'Bulkrax::Exporter') }
11
+
12
+ scope :latest_by_statusable, -> { joins(latest_by_statusable_subtable.join_sources) }
13
+
14
+ def self.latest_by_statusable_subtable
15
+ status_table = self.arel_table
16
+ latest_status_query = status_table.project(status_table[:statusable_id],
17
+ status_table[:statusable_type],
18
+ status_table[:id].maximum.as("latest_status_id")).group(status_table[:statusable_id], status_table[:statusable_type])
19
+
20
+ latest_status_table = Arel::Table.new(latest_status_query).alias(:latest_status)
21
+ status_table.join(latest_status_query.as(latest_status_table.name.to_s), Arel::Nodes::InnerJoin)
22
+ .on(status_table[:id].eq(latest_status_table[:latest_status_id]))
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ module Bulkrax
5
+ # Generic XML Entry
6
+ class XmlEntry < Entry
7
+ serialize :raw_metadata, JSON
8
+
9
+ def self.fields_from_data(data); end
10
+
11
+ def self.read_data(path)
12
+ # This doesn't cope with BOM sequences:
13
+ # Nokogiri::XML(open(path), nil, 'UTF-8').remove_namespaces!
14
+ Nokogiri::XML(open(path)).remove_namespaces!
15
+ end
16
+
17
+ def self.data_for_entry(data, source_id)
18
+ collections = []
19
+ children = []
20
+ xpath_for_source_id = ".//*[name()='#{source_id}']"
21
+ return {
22
+ source_id => data.xpath(xpath_for_source_id).first.text,
23
+ delete: data.xpath(".//*[name()='delete']").first&.text,
24
+ data:
25
+ data.to_xml(
26
+ encoding: 'UTF-8',
27
+ save_with:
28
+ Nokogiri::XML::Node::SaveOptions::NO_DECLARATION | Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
29
+ ).delete("\n").delete("\t").squeeze(' '), # Remove newlines, tabs, and extra whitespace
30
+ collection: collections,
31
+ children: children
32
+ }
33
+ end
34
+
35
+ # def self.matcher_class; end
36
+
37
+ def record
38
+ @record ||= Nokogiri::XML(self.raw_metadata['data'], nil, 'UTF-8')
39
+ end
40
+
41
+ def build_metadata
42
+ raise StandardError, 'Record not found' if record.nil?
43
+ raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
44
+ self.parsed_metadata = {}
45
+ self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
46
+ xml_elements.each do |element_name|
47
+ elements = record.xpath("//*[name()='#{element_name}']")
48
+ next if elements.blank?
49
+ elements.each do |el|
50
+ el.children.map(&:content).each do |content|
51
+ add_metadata(element_name, content) if content.present?
52
+ end
53
+ end
54
+ end
55
+ add_visibility
56
+ add_rights_statement
57
+ add_admin_set_id
58
+ add_collections
59
+ self.parsed_metadata['file'] = self.raw_metadata['file']
60
+
61
+ add_local
62
+ raise StandardError, "title is required" if self.parsed_metadata['title'].blank?
63
+ self.parsed_metadata
64
+ end
65
+
66
+ # Grab the class from the real parser
67
+ def xml_elements
68
+ Bulkrax.field_mappings[self.importerexporter.parser_klass].map do |_k, v|
69
+ v[:from]
70
+ end.flatten.compact.uniq
71
+ end
72
+ end
73
+ end