bulkrax 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +205 -0
- data/README.md +202 -0
- data/Rakefile +42 -0
- data/app/assets/config/bulkrax_manifest.js +2 -0
- data/app/assets/javascripts/bulkrax/application.js +14 -0
- data/app/assets/javascripts/bulkrax/bulkrax.js +11 -0
- data/app/assets/javascripts/bulkrax/entries.js +15 -0
- data/app/assets/javascripts/bulkrax/exporters.js +60 -0
- data/app/assets/javascripts/bulkrax/importers.js.erb +166 -0
- data/app/assets/stylesheets/bulkrax/accordion.scss +40 -0
- data/app/assets/stylesheets/bulkrax/application.css +15 -0
- data/app/assets/stylesheets/bulkrax/coderay.scss +264 -0
- data/app/assets/stylesheets/bulkrax/import_export.scss +37 -0
- data/app/controllers/bulkrax/application_controller.rb +8 -0
- data/app/controllers/bulkrax/entries_controller.rb +44 -0
- data/app/controllers/bulkrax/exporters_controller.rb +125 -0
- data/app/controllers/bulkrax/importers_controller.rb +315 -0
- data/app/controllers/concerns/bulkrax/api.rb +29 -0
- data/app/factories/bulkrax/object_factory.rb +230 -0
- data/app/helpers/bulkrax/application_helper.rb +15 -0
- data/app/helpers/bulkrax/exporters_helper.rb +6 -0
- data/app/helpers/bulkrax/importers_helper.rb +13 -0
- data/app/helpers/bulkrax/validation_helper.rb +153 -0
- data/app/jobs/bulkrax/application_job.rb +6 -0
- data/app/jobs/bulkrax/child_relationships_job.rb +128 -0
- data/app/jobs/bulkrax/delete_work_job.rb +16 -0
- data/app/jobs/bulkrax/download_cloud_file_job.rb +18 -0
- data/app/jobs/bulkrax/export_work_job.rb +37 -0
- data/app/jobs/bulkrax/exporter_job.rb +14 -0
- data/app/jobs/bulkrax/import_work_collection_job.rb +41 -0
- data/app/jobs/bulkrax/import_work_job.rb +32 -0
- data/app/jobs/bulkrax/importer_job.rb +26 -0
- data/app/mailers/bulkrax/application_mailer.rb +8 -0
- data/app/matchers/bulkrax/application_matcher.rb +113 -0
- data/app/matchers/bulkrax/bagit_matcher.rb +6 -0
- data/app/matchers/bulkrax/csv_matcher.rb +6 -0
- data/app/matchers/bulkrax/oai_matcher.rb +6 -0
- data/app/models/bulkrax/application_record.rb +7 -0
- data/app/models/bulkrax/csv_collection_entry.rb +19 -0
- data/app/models/bulkrax/csv_entry.rb +163 -0
- data/app/models/bulkrax/entry.rb +104 -0
- data/app/models/bulkrax/exporter.rb +122 -0
- data/app/models/bulkrax/exporter_run.rb +7 -0
- data/app/models/bulkrax/import_failed.rb +13 -0
- data/app/models/bulkrax/importer.rb +155 -0
- data/app/models/bulkrax/importer_run.rb +8 -0
- data/app/models/bulkrax/oai_dc_entry.rb +6 -0
- data/app/models/bulkrax/oai_entry.rb +74 -0
- data/app/models/bulkrax/oai_qualified_dc_entry.rb +6 -0
- data/app/models/bulkrax/oai_set_entry.rb +19 -0
- data/app/models/bulkrax/rdf_collection_entry.rb +19 -0
- data/app/models/bulkrax/rdf_entry.rb +90 -0
- data/app/models/bulkrax/status.rb +25 -0
- data/app/models/bulkrax/xml_entry.rb +73 -0
- data/app/models/concerns/bulkrax/download_behavior.rb +61 -0
- data/app/models/concerns/bulkrax/errored_entries.rb +45 -0
- data/app/models/concerns/bulkrax/export_behavior.rb +58 -0
- data/app/models/concerns/bulkrax/file_factory.rb +140 -0
- data/app/models/concerns/bulkrax/has_local_processing.rb +7 -0
- data/app/models/concerns/bulkrax/has_matchers.rb +155 -0
- data/app/models/concerns/bulkrax/import_behavior.rb +90 -0
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +34 -0
- data/app/models/concerns/bulkrax/status_info.rb +56 -0
- data/app/parsers/bulkrax/application_parser.rb +299 -0
- data/app/parsers/bulkrax/bagit_parser.rb +157 -0
- data/app/parsers/bulkrax/csv_parser.rb +266 -0
- data/app/parsers/bulkrax/oai_dc_parser.rb +130 -0
- data/app/parsers/bulkrax/oai_qualified_dc_parser.rb +9 -0
- data/app/parsers/bulkrax/xml_parser.rb +103 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +19 -0
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +19 -0
- data/app/views/bulkrax/entries/show.html.erb +63 -0
- data/app/views/bulkrax/exporters/_form.html.erb +120 -0
- data/app/views/bulkrax/exporters/edit.html.erb +23 -0
- data/app/views/bulkrax/exporters/index.html.erb +67 -0
- data/app/views/bulkrax/exporters/new.html.erb +23 -0
- data/app/views/bulkrax/exporters/show.html.erb +124 -0
- data/app/views/bulkrax/importers/_bagit_fields.html.erb +54 -0
- data/app/views/bulkrax/importers/_browse_everything.html.erb +12 -0
- data/app/views/bulkrax/importers/_csv_fields.html.erb +39 -0
- data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +16 -0
- data/app/views/bulkrax/importers/_form.html.erb +35 -0
- data/app/views/bulkrax/importers/_oai_fields.html.erb +42 -0
- data/app/views/bulkrax/importers/_xml_fields.html.erb +60 -0
- data/app/views/bulkrax/importers/edit.html.erb +20 -0
- data/app/views/bulkrax/importers/index.html.erb +77 -0
- data/app/views/bulkrax/importers/new.html.erb +25 -0
- data/app/views/bulkrax/importers/show.html.erb +175 -0
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +37 -0
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +52 -0
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +39 -0
- data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +6 -0
- data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +19 -0
- data/app/views/layouts/bulkrax/application.html.erb +14 -0
- data/config/locales/bulkrax.en.yml +36 -0
- data/config/routes.rb +18 -0
- data/db/migrate/20181011230201_create_bulkrax_importers.rb +18 -0
- data/db/migrate/20181011230228_create_bulkrax_importer_runs.rb +16 -0
- data/db/migrate/20190325183136_create_bulkrax_entries.rb +16 -0
- data/db/migrate/20190601221109_add_status_to_entry.rb +9 -0
- data/db/migrate/20190715161939_add_collections_to_importer_runs.rb +6 -0
- data/db/migrate/20190715162044_change_collection_ids_on_entries.rb +5 -0
- data/db/migrate/20190729124607_create_bulkrax_exporters.rb +19 -0
- data/db/migrate/20190729134158_create_bulkrax_exporter_runs.rb +14 -0
- data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +12 -0
- data/db/migrate/20191203225129_add_total_collection_records_to_importer_runs.rb +5 -0
- data/db/migrate/20191204191623_add_children_to_importer_runs.rb +6 -0
- data/db/migrate/20191204223857_change_total_records_to_total_work_entries.rb +6 -0
- data/db/migrate/20191212155530_change_entry_last_error.rb +19 -0
- data/db/migrate/20200108194557_add_validate_only_to_bulkrax_importers.rb +5 -0
- data/db/migrate/20200301232856_add_status_to_importers.rb +9 -0
- data/db/migrate/20200312190638_remove_foreign_key_from_bulkrax_entries.rb +5 -0
- data/db/migrate/20200326235838_add_status_to_exporters.rb +7 -0
- data/db/migrate/20200601204556_add_invalid_record_to_importer_run.rb +5 -0
- data/db/migrate/20200818055819_create_bulkrax_statuses.rb +18 -0
- data/db/migrate/20200819054016_move_to_statuses.rb +30 -0
- data/db/migrate/20201106014204_add_date_filter_and_status_to_bulkrax_exporters.rb +7 -0
- data/db/migrate/20201117220007_add_workflow_status_to_bulkrax_exporter.rb +5 -0
- data/db/migrate/20210806044408_remove_unused_last_error.rb +7 -0
- data/db/migrate/20210806065737_increase_text_sizes.rb +12 -0
- data/lib/bulkrax.rb +161 -0
- data/lib/bulkrax/engine.rb +37 -0
- data/lib/bulkrax/version.rb +5 -0
- data/lib/generators/bulkrax/install_generator.rb +80 -0
- data/lib/generators/bulkrax/templates/README +3 -0
- data/lib/generators/bulkrax/templates/app/assets/images/bulkrax/removed.png +0 -0
- data/lib/generators/bulkrax/templates/app/models/concerns/bulkrax/has_local_processing.rb +8 -0
- data/lib/generators/bulkrax/templates/bin/importer +140 -0
- data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +84 -0
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +72 -0
- data/lib/tasks/bulkrax_tasks.rake +6 -0
- metadata +388 -0
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# this is a PORO to help pass errors around
|
4
|
+
module Bulkrax
|
5
|
+
class ImportFailed
|
6
|
+
attr_accessor :message, :backtrace
|
7
|
+
|
8
|
+
def initialize(message, backtrace)
|
9
|
+
@message = message
|
10
|
+
@backtrace = backtrace
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,155 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'iso8601'
|
4
|
+
|
5
|
+
module Bulkrax
|
6
|
+
class Importer < ApplicationRecord
|
7
|
+
include Bulkrax::ImporterExporterBehavior
|
8
|
+
include Bulkrax::StatusInfo
|
9
|
+
|
10
|
+
serialize :parser_fields, JSON
|
11
|
+
serialize :field_mapping, JSON
|
12
|
+
|
13
|
+
belongs_to :user
|
14
|
+
has_many :importer_runs, dependent: :destroy
|
15
|
+
has_many :entries, as: :importerexporter, dependent: :destroy
|
16
|
+
|
17
|
+
validates :name, presence: true
|
18
|
+
validates :admin_set_id, presence: true
|
19
|
+
validates :parser_klass, presence: true
|
20
|
+
|
21
|
+
delegate :valid_import?, :create_parent_child_relationships,
|
22
|
+
:write_errored_entries_file, :visibility, to: :parser
|
23
|
+
|
24
|
+
attr_accessor :only_updates, :file_style, :file
|
25
|
+
attr_writer :current_run
|
26
|
+
|
27
|
+
def status
|
28
|
+
if self.validate_only
|
29
|
+
'Validated'
|
30
|
+
else
|
31
|
+
super
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def record_status
|
36
|
+
importer_run = ImporterRun.find(current_run.id) # make sure fresh
|
37
|
+
return if importer_run.enqueued_records.positive? # still processing
|
38
|
+
if importer_run.failed_records.positive?
|
39
|
+
if importer_run.invalid_records.present?
|
40
|
+
e = Bulkrax::ImportFailed.new('Failed with Invalid Records', importer_run.invalid_records.split("\n"))
|
41
|
+
importer_run.importer.status_info(e)
|
42
|
+
else
|
43
|
+
importer_run.importer.status_info('Complete (with failures)')
|
44
|
+
end
|
45
|
+
else
|
46
|
+
importer_run.importer.status_info('Complete')
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# If field_mapping is empty, setup a default based on the export_properties
|
51
|
+
def mapping
|
52
|
+
@mapping ||= if self.field_mapping.blank? || self.field_mapping == [{}]
|
53
|
+
if parser.import_fields.present? || self.field_mapping == [{}]
|
54
|
+
ActiveSupport::HashWithIndifferentAccess.new(
|
55
|
+
parser.import_fields.reject(&:nil?).map do |m|
|
56
|
+
Bulkrax.default_field_mapping.call(m)
|
57
|
+
end.inject(:merge)
|
58
|
+
)
|
59
|
+
end
|
60
|
+
else
|
61
|
+
self.field_mapping
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def parser_fields
|
66
|
+
self[:parser_fields] || {}
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.frequency_enums
|
70
|
+
# these duration values use ISO 8601 Durations (https://en.wikipedia.org/wiki/ISO_8601#Durations)
|
71
|
+
# TLDR; all durations are prefixed with 'P' and the parts are a number with the type of duration.
|
72
|
+
# i.e. P1Y2M3W4DT5H6M7S == 1 Year, 2 Months, 3 Weeks, 4 Days, 5 Hours, 6 Minutes, 7 Seconds
|
73
|
+
[['Daily', 'P1D'], ['Monthly', 'P1M'], ['Yearly', 'P1Y'], ['Once (on save)', 'PT0S']]
|
74
|
+
end
|
75
|
+
|
76
|
+
def frequency=(frequency)
|
77
|
+
self[:frequency] = ISO8601::Duration.new(frequency).to_s
|
78
|
+
end
|
79
|
+
|
80
|
+
def frequency
|
81
|
+
f = self[:frequency] || "PT0S"
|
82
|
+
ISO8601::Duration.new(f)
|
83
|
+
end
|
84
|
+
|
85
|
+
def schedulable?
|
86
|
+
frequency.to_seconds != 0
|
87
|
+
end
|
88
|
+
|
89
|
+
def current_run
|
90
|
+
@current_run ||= self.importer_runs.create!(total_work_entries: self.limit || parser.total, total_collection_entries: parser.collections_total)
|
91
|
+
end
|
92
|
+
|
93
|
+
def last_run
|
94
|
+
@last_run ||= self.importer_runs.last
|
95
|
+
end
|
96
|
+
|
97
|
+
def seen
|
98
|
+
@seen ||= {}
|
99
|
+
end
|
100
|
+
|
101
|
+
def replace_files
|
102
|
+
self.parser_fields['replace_files']
|
103
|
+
end
|
104
|
+
|
105
|
+
def update_files
|
106
|
+
self.parser_fields['update_files']
|
107
|
+
end
|
108
|
+
|
109
|
+
def import_works
|
110
|
+
self.save if self.new_record? # Object needs to be saved for statuses
|
111
|
+
self.only_updates ||= false
|
112
|
+
parser.create_works
|
113
|
+
rescue StandardError => e
|
114
|
+
status_info(e)
|
115
|
+
end
|
116
|
+
|
117
|
+
def import_collections
|
118
|
+
self.save if self.new_record? # Object needs to be saved for statuses
|
119
|
+
parser.create_collections
|
120
|
+
rescue StandardError => e
|
121
|
+
status_info(e)
|
122
|
+
end
|
123
|
+
|
124
|
+
# Prepend the base_url to ensure unique set identifiers
|
125
|
+
# @todo - move to parser, as this is OAI specific
|
126
|
+
def unique_collection_identifier(id)
|
127
|
+
"#{self.parser_fields['base_url'].split('/')[2]}_#{id}"
|
128
|
+
end
|
129
|
+
|
130
|
+
# The format for metadata for the incoming import; corresponds to an Entry class
|
131
|
+
def import_metadata_format
|
132
|
+
[['CSV', 'Bulkrax::CsvEntry'], ['RDF (N-Triples)', 'Bulkrax::RdfEntry']]
|
133
|
+
end
|
134
|
+
|
135
|
+
# The type of metadata for the incoming import, either one file for all works, or one file per work
|
136
|
+
# def import_metadata_type
|
137
|
+
# [['Single Metadata File for all works', 'single'], ['Multiple Files, one per Work', 'multi']]
|
138
|
+
# end
|
139
|
+
|
140
|
+
# If the import data is zipped, unzip it to this path
|
141
|
+
def importer_unzip_path
|
142
|
+
@importer_unzip_path ||= File.join(Bulkrax.import_path, "import_#{path_string}")
|
143
|
+
end
|
144
|
+
|
145
|
+
def errored_entries_csv_path
|
146
|
+
@errored_entries_csv_path ||= File.join(Bulkrax.import_path, "import_#{path_string}_errored_entries.csv")
|
147
|
+
end
|
148
|
+
|
149
|
+
def path_string
|
150
|
+
"#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}_#{self.importer_runs.last.id}"
|
151
|
+
rescue
|
152
|
+
"#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'erb'
|
4
|
+
require 'ostruct'
|
5
|
+
|
6
|
+
module Bulkrax
|
7
|
+
class OaiEntry < Entry
|
8
|
+
serialize :raw_metadata, JSON
|
9
|
+
|
10
|
+
delegate :record, to: :raw_record
|
11
|
+
|
12
|
+
def raw_record
|
13
|
+
@raw_record ||= client.get_record(identifier: identifier, metadata_prefix: parser.parser_fields['metadata_prefix'])
|
14
|
+
end
|
15
|
+
|
16
|
+
def sets
|
17
|
+
record.header.set_spec
|
18
|
+
end
|
19
|
+
|
20
|
+
def context
|
21
|
+
@context ||= OpenStruct.new(record: record, identifier: record.header.identifier)
|
22
|
+
end
|
23
|
+
|
24
|
+
def thumbnail_url
|
25
|
+
ERB.new(parser.parser_fields['thumbnail_url']).result(context.instance_eval { binding })
|
26
|
+
end
|
27
|
+
|
28
|
+
def build_metadata
|
29
|
+
self.parsed_metadata = {}
|
30
|
+
self.parsed_metadata[work_identifier] = [record.header.identifier]
|
31
|
+
|
32
|
+
record.metadata.children.each do |child|
|
33
|
+
child.children.each do |node|
|
34
|
+
add_metadata(node.name, node.content)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
add_metadata('thumbnail_url', thumbnail_url)
|
38
|
+
|
39
|
+
add_visibility
|
40
|
+
add_rights_statement
|
41
|
+
add_admin_set_id
|
42
|
+
add_collections
|
43
|
+
add_local
|
44
|
+
|
45
|
+
return self.parsed_metadata
|
46
|
+
end
|
47
|
+
|
48
|
+
def collections_created?
|
49
|
+
if parser.collection_name == 'all'
|
50
|
+
sets.blank? || (sets.present? && sets.size == self.collection_ids.size)
|
51
|
+
else
|
52
|
+
self.collection_ids.size == 1
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Retrieve list of collections for the entry; add to collection_ids
|
57
|
+
# If OAI-PMH doesn't return setSpec in the headers for GetRecord, use parser.collection_name
|
58
|
+
# in this case, if 'All' is selected, records will not be added to a collection.
|
59
|
+
def find_or_create_collection_ids
|
60
|
+
return self.collection_ids if collections_created?
|
61
|
+
if sets.blank? || parser.collection_name != 'all'
|
62
|
+
# c = Collection.where(Bulkrax.system_identifier_field => importerexporter.unique_collection_identifier(parser.collection_name)).first
|
63
|
+
collection = find_collection(importerexporter.unique_collection_identifier(parser.collection_name))
|
64
|
+
self.collection_ids << collection.id if collection.present? && !self.collection_ids.include?(collection.id)
|
65
|
+
else # All - collections should exist for all sets
|
66
|
+
sets.each do |set|
|
67
|
+
c = Collection.find_by(work_identifier => importerexporter.unique_collection_identifier(set.content))
|
68
|
+
self.collection_ids << c.id if c.present? && !self.collection_ids.include?(c.id)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
return self.collection_ids
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class OaiSetEntry < OaiEntry
|
5
|
+
def factory_class
|
6
|
+
Collection
|
7
|
+
end
|
8
|
+
|
9
|
+
def build_metadata
|
10
|
+
self.parsed_metadata = self.raw_metadata
|
11
|
+
add_local
|
12
|
+
return self.parsed_metadata
|
13
|
+
end
|
14
|
+
|
15
|
+
def collections_created?
|
16
|
+
true
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class RdfCollectionEntry < RdfEntry
|
5
|
+
def record
|
6
|
+
@record ||= self.raw_metadata
|
7
|
+
end
|
8
|
+
|
9
|
+
def build_metadata
|
10
|
+
self.parsed_metadata = self.raw_metadata
|
11
|
+
add_local
|
12
|
+
return self.parsed_metadata
|
13
|
+
end
|
14
|
+
|
15
|
+
def factory_class
|
16
|
+
Collection
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rdf'
|
4
|
+
module Bulkrax
|
5
|
+
class RdfEntry < Entry
|
6
|
+
serialize :raw_metadata, JSON
|
7
|
+
|
8
|
+
def self.read_data(path)
|
9
|
+
RDF::Reader.open(path)
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.fields_from_data(data)
|
13
|
+
data.predicates.map(&:to_s)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.data_for_entry(data, source_id)
|
17
|
+
reader = data
|
18
|
+
format = reader.class.format.to_sym
|
19
|
+
collections = []
|
20
|
+
children = []
|
21
|
+
delete = nil
|
22
|
+
data = RDF::Writer.for(format).buffer do |writer|
|
23
|
+
reader.each_statement do |statement|
|
24
|
+
collections << statement.object.to_s if collection_field.present? && collection_field == statement.predicate.to_s
|
25
|
+
children << statement.object.to_s if children_field.present? && children_field == statement.predicate.to_s
|
26
|
+
delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
|
27
|
+
writer << statement
|
28
|
+
end
|
29
|
+
end
|
30
|
+
return {
|
31
|
+
source_id => reader.subjects.first.to_s,
|
32
|
+
delete: delete,
|
33
|
+
format: format,
|
34
|
+
data: data,
|
35
|
+
collection: collections,
|
36
|
+
children: children
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.collection_field
|
41
|
+
Bulkrax.collection_field_mapping[self.to_s]
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.children_field
|
45
|
+
Bulkrax.parent_child_field_mapping[self.to_s]
|
46
|
+
end
|
47
|
+
|
48
|
+
def record
|
49
|
+
@record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data'])
|
50
|
+
end
|
51
|
+
|
52
|
+
def build_metadata
|
53
|
+
raise StandardError, 'Record not found' if record.nil?
|
54
|
+
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
55
|
+
|
56
|
+
self.parsed_metadata = {}
|
57
|
+
self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
|
58
|
+
|
59
|
+
record.each_statement do |statement|
|
60
|
+
# Only process the subject for our record (in case other data is in the file)
|
61
|
+
next unless statement.subject.to_s == self.raw_metadata[source_identifier]
|
62
|
+
add_metadata(statement.predicate.to_s, statement.object.to_s)
|
63
|
+
end
|
64
|
+
add_visibility
|
65
|
+
add_rights_statement
|
66
|
+
add_admin_set_id
|
67
|
+
add_collections
|
68
|
+
add_local
|
69
|
+
self.parsed_metadata['file'] = self.raw_metadata['file']
|
70
|
+
|
71
|
+
self.parsed_metadata
|
72
|
+
end
|
73
|
+
|
74
|
+
def collections_created?
|
75
|
+
return true if self.raw_metadata['collection'].blank?
|
76
|
+
self.raw_metadata['collection'].length == self.collection_ids.length
|
77
|
+
end
|
78
|
+
|
79
|
+
def find_or_create_collection_ids
|
80
|
+
return self.collection_ids if collections_created?
|
81
|
+
if self.raw_metadata['collection'].present?
|
82
|
+
self.raw_metadata['collection'].each do |collection|
|
83
|
+
c = find_collection(collection)
|
84
|
+
self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
return self.collection_ids
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class Status < ApplicationRecord
|
5
|
+
belongs_to :statusable, polymorphic: true
|
6
|
+
belongs_to :runnable, polymorphic: true
|
7
|
+
serialize :error_backtrace, Array
|
8
|
+
|
9
|
+
scope :for_importers, -> { where(statusable_type: 'Bulkrax::Importer') }
|
10
|
+
scope :for_exporters, -> { where(statusable_type: 'Bulkrax::Exporter') }
|
11
|
+
|
12
|
+
scope :latest_by_statusable, -> { joins(latest_by_statusable_subtable.join_sources) }
|
13
|
+
|
14
|
+
def self.latest_by_statusable_subtable
|
15
|
+
status_table = self.arel_table
|
16
|
+
latest_status_query = status_table.project(status_table[:statusable_id],
|
17
|
+
status_table[:statusable_type],
|
18
|
+
status_table[:id].maximum.as("latest_status_id")).group(status_table[:statusable_id], status_table[:statusable_type])
|
19
|
+
|
20
|
+
latest_status_table = Arel::Table.new(latest_status_query).alias(:latest_status)
|
21
|
+
status_table.join(latest_status_query.as(latest_status_table.name.to_s), Arel::Nodes::InnerJoin)
|
22
|
+
.on(status_table[:id].eq(latest_status_table[:latest_status_id]))
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
module Bulkrax
|
5
|
+
# Generic XML Entry
|
6
|
+
class XmlEntry < Entry
|
7
|
+
serialize :raw_metadata, JSON
|
8
|
+
|
9
|
+
def self.fields_from_data(data); end
|
10
|
+
|
11
|
+
def self.read_data(path)
|
12
|
+
# This doesn't cope with BOM sequences:
|
13
|
+
# Nokogiri::XML(open(path), nil, 'UTF-8').remove_namespaces!
|
14
|
+
Nokogiri::XML(open(path)).remove_namespaces!
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.data_for_entry(data, source_id)
|
18
|
+
collections = []
|
19
|
+
children = []
|
20
|
+
xpath_for_source_id = ".//*[name()='#{source_id}']"
|
21
|
+
return {
|
22
|
+
source_id => data.xpath(xpath_for_source_id).first.text,
|
23
|
+
delete: data.xpath(".//*[name()='delete']").first&.text,
|
24
|
+
data:
|
25
|
+
data.to_xml(
|
26
|
+
encoding: 'UTF-8',
|
27
|
+
save_with:
|
28
|
+
Nokogiri::XML::Node::SaveOptions::NO_DECLARATION | Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
|
29
|
+
).delete("\n").delete("\t").squeeze(' '), # Remove newlines, tabs, and extra whitespace
|
30
|
+
collection: collections,
|
31
|
+
children: children
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
# def self.matcher_class; end
|
36
|
+
|
37
|
+
def record
|
38
|
+
@record ||= Nokogiri::XML(self.raw_metadata['data'], nil, 'UTF-8')
|
39
|
+
end
|
40
|
+
|
41
|
+
def build_metadata
|
42
|
+
raise StandardError, 'Record not found' if record.nil?
|
43
|
+
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
44
|
+
self.parsed_metadata = {}
|
45
|
+
self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
|
46
|
+
xml_elements.each do |element_name|
|
47
|
+
elements = record.xpath("//*[name()='#{element_name}']")
|
48
|
+
next if elements.blank?
|
49
|
+
elements.each do |el|
|
50
|
+
el.children.map(&:content).each do |content|
|
51
|
+
add_metadata(element_name, content) if content.present?
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
add_visibility
|
56
|
+
add_rights_statement
|
57
|
+
add_admin_set_id
|
58
|
+
add_collections
|
59
|
+
self.parsed_metadata['file'] = self.raw_metadata['file']
|
60
|
+
|
61
|
+
add_local
|
62
|
+
raise StandardError, "title is required" if self.parsed_metadata['title'].blank?
|
63
|
+
self.parsed_metadata
|
64
|
+
end
|
65
|
+
|
66
|
+
# Grab the class from the real parser
|
67
|
+
def xml_elements
|
68
|
+
Bulkrax.field_mappings[self.importerexporter.parser_klass].map do |_k, v|
|
69
|
+
v[:from]
|
70
|
+
end.flatten.compact.uniq
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|