bulkrax 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +205 -0
- data/README.md +202 -0
- data/Rakefile +42 -0
- data/app/assets/config/bulkrax_manifest.js +2 -0
- data/app/assets/javascripts/bulkrax/application.js +14 -0
- data/app/assets/javascripts/bulkrax/bulkrax.js +11 -0
- data/app/assets/javascripts/bulkrax/entries.js +15 -0
- data/app/assets/javascripts/bulkrax/exporters.js +60 -0
- data/app/assets/javascripts/bulkrax/importers.js.erb +166 -0
- data/app/assets/stylesheets/bulkrax/accordion.scss +40 -0
- data/app/assets/stylesheets/bulkrax/application.css +15 -0
- data/app/assets/stylesheets/bulkrax/coderay.scss +264 -0
- data/app/assets/stylesheets/bulkrax/import_export.scss +37 -0
- data/app/controllers/bulkrax/application_controller.rb +8 -0
- data/app/controllers/bulkrax/entries_controller.rb +44 -0
- data/app/controllers/bulkrax/exporters_controller.rb +125 -0
- data/app/controllers/bulkrax/importers_controller.rb +315 -0
- data/app/controllers/concerns/bulkrax/api.rb +29 -0
- data/app/factories/bulkrax/object_factory.rb +230 -0
- data/app/helpers/bulkrax/application_helper.rb +15 -0
- data/app/helpers/bulkrax/exporters_helper.rb +6 -0
- data/app/helpers/bulkrax/importers_helper.rb +13 -0
- data/app/helpers/bulkrax/validation_helper.rb +153 -0
- data/app/jobs/bulkrax/application_job.rb +6 -0
- data/app/jobs/bulkrax/child_relationships_job.rb +128 -0
- data/app/jobs/bulkrax/delete_work_job.rb +16 -0
- data/app/jobs/bulkrax/download_cloud_file_job.rb +18 -0
- data/app/jobs/bulkrax/export_work_job.rb +37 -0
- data/app/jobs/bulkrax/exporter_job.rb +14 -0
- data/app/jobs/bulkrax/import_work_collection_job.rb +41 -0
- data/app/jobs/bulkrax/import_work_job.rb +32 -0
- data/app/jobs/bulkrax/importer_job.rb +26 -0
- data/app/mailers/bulkrax/application_mailer.rb +8 -0
- data/app/matchers/bulkrax/application_matcher.rb +113 -0
- data/app/matchers/bulkrax/bagit_matcher.rb +6 -0
- data/app/matchers/bulkrax/csv_matcher.rb +6 -0
- data/app/matchers/bulkrax/oai_matcher.rb +6 -0
- data/app/models/bulkrax/application_record.rb +7 -0
- data/app/models/bulkrax/csv_collection_entry.rb +19 -0
- data/app/models/bulkrax/csv_entry.rb +163 -0
- data/app/models/bulkrax/entry.rb +104 -0
- data/app/models/bulkrax/exporter.rb +122 -0
- data/app/models/bulkrax/exporter_run.rb +7 -0
- data/app/models/bulkrax/import_failed.rb +13 -0
- data/app/models/bulkrax/importer.rb +155 -0
- data/app/models/bulkrax/importer_run.rb +8 -0
- data/app/models/bulkrax/oai_dc_entry.rb +6 -0
- data/app/models/bulkrax/oai_entry.rb +74 -0
- data/app/models/bulkrax/oai_qualified_dc_entry.rb +6 -0
- data/app/models/bulkrax/oai_set_entry.rb +19 -0
- data/app/models/bulkrax/rdf_collection_entry.rb +19 -0
- data/app/models/bulkrax/rdf_entry.rb +90 -0
- data/app/models/bulkrax/status.rb +25 -0
- data/app/models/bulkrax/xml_entry.rb +73 -0
- data/app/models/concerns/bulkrax/download_behavior.rb +61 -0
- data/app/models/concerns/bulkrax/errored_entries.rb +45 -0
- data/app/models/concerns/bulkrax/export_behavior.rb +58 -0
- data/app/models/concerns/bulkrax/file_factory.rb +140 -0
- data/app/models/concerns/bulkrax/has_local_processing.rb +7 -0
- data/app/models/concerns/bulkrax/has_matchers.rb +155 -0
- data/app/models/concerns/bulkrax/import_behavior.rb +90 -0
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +34 -0
- data/app/models/concerns/bulkrax/status_info.rb +56 -0
- data/app/parsers/bulkrax/application_parser.rb +299 -0
- data/app/parsers/bulkrax/bagit_parser.rb +157 -0
- data/app/parsers/bulkrax/csv_parser.rb +266 -0
- data/app/parsers/bulkrax/oai_dc_parser.rb +130 -0
- data/app/parsers/bulkrax/oai_qualified_dc_parser.rb +9 -0
- data/app/parsers/bulkrax/xml_parser.rb +103 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +19 -0
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +19 -0
- data/app/views/bulkrax/entries/show.html.erb +63 -0
- data/app/views/bulkrax/exporters/_form.html.erb +120 -0
- data/app/views/bulkrax/exporters/edit.html.erb +23 -0
- data/app/views/bulkrax/exporters/index.html.erb +67 -0
- data/app/views/bulkrax/exporters/new.html.erb +23 -0
- data/app/views/bulkrax/exporters/show.html.erb +124 -0
- data/app/views/bulkrax/importers/_bagit_fields.html.erb +54 -0
- data/app/views/bulkrax/importers/_browse_everything.html.erb +12 -0
- data/app/views/bulkrax/importers/_csv_fields.html.erb +39 -0
- data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +16 -0
- data/app/views/bulkrax/importers/_form.html.erb +35 -0
- data/app/views/bulkrax/importers/_oai_fields.html.erb +42 -0
- data/app/views/bulkrax/importers/_xml_fields.html.erb +60 -0
- data/app/views/bulkrax/importers/edit.html.erb +20 -0
- data/app/views/bulkrax/importers/index.html.erb +77 -0
- data/app/views/bulkrax/importers/new.html.erb +25 -0
- data/app/views/bulkrax/importers/show.html.erb +175 -0
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +37 -0
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +52 -0
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +39 -0
- data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +6 -0
- data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +19 -0
- data/app/views/layouts/bulkrax/application.html.erb +14 -0
- data/config/locales/bulkrax.en.yml +36 -0
- data/config/routes.rb +18 -0
- data/db/migrate/20181011230201_create_bulkrax_importers.rb +18 -0
- data/db/migrate/20181011230228_create_bulkrax_importer_runs.rb +16 -0
- data/db/migrate/20190325183136_create_bulkrax_entries.rb +16 -0
- data/db/migrate/20190601221109_add_status_to_entry.rb +9 -0
- data/db/migrate/20190715161939_add_collections_to_importer_runs.rb +6 -0
- data/db/migrate/20190715162044_change_collection_ids_on_entries.rb +5 -0
- data/db/migrate/20190729124607_create_bulkrax_exporters.rb +19 -0
- data/db/migrate/20190729134158_create_bulkrax_exporter_runs.rb +14 -0
- data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +12 -0
- data/db/migrate/20191203225129_add_total_collection_records_to_importer_runs.rb +5 -0
- data/db/migrate/20191204191623_add_children_to_importer_runs.rb +6 -0
- data/db/migrate/20191204223857_change_total_records_to_total_work_entries.rb +6 -0
- data/db/migrate/20191212155530_change_entry_last_error.rb +19 -0
- data/db/migrate/20200108194557_add_validate_only_to_bulkrax_importers.rb +5 -0
- data/db/migrate/20200301232856_add_status_to_importers.rb +9 -0
- data/db/migrate/20200312190638_remove_foreign_key_from_bulkrax_entries.rb +5 -0
- data/db/migrate/20200326235838_add_status_to_exporters.rb +7 -0
- data/db/migrate/20200601204556_add_invalid_record_to_importer_run.rb +5 -0
- data/db/migrate/20200818055819_create_bulkrax_statuses.rb +18 -0
- data/db/migrate/20200819054016_move_to_statuses.rb +30 -0
- data/db/migrate/20201106014204_add_date_filter_and_status_to_bulkrax_exporters.rb +7 -0
- data/db/migrate/20201117220007_add_workflow_status_to_bulkrax_exporter.rb +5 -0
- data/db/migrate/20210806044408_remove_unused_last_error.rb +7 -0
- data/db/migrate/20210806065737_increase_text_sizes.rb +12 -0
- data/lib/bulkrax.rb +161 -0
- data/lib/bulkrax/engine.rb +37 -0
- data/lib/bulkrax/version.rb +5 -0
- data/lib/generators/bulkrax/install_generator.rb +80 -0
- data/lib/generators/bulkrax/templates/README +3 -0
- data/lib/generators/bulkrax/templates/app/assets/images/bulkrax/removed.png +0 -0
- data/lib/generators/bulkrax/templates/app/models/concerns/bulkrax/has_local_processing.rb +8 -0
- data/lib/generators/bulkrax/templates/bin/importer +140 -0
- data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +84 -0
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +72 -0
- data/lib/tasks/bulkrax_tasks.rake +6 -0
- metadata +388 -0
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# this is a PORO to help pass errors around
|
4
|
+
module Bulkrax
|
5
|
+
class ImportFailed
|
6
|
+
attr_accessor :message, :backtrace
|
7
|
+
|
8
|
+
def initialize(message, backtrace)
|
9
|
+
@message = message
|
10
|
+
@backtrace = backtrace
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,155 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'iso8601'
|
4
|
+
|
5
|
+
module Bulkrax
|
6
|
+
class Importer < ApplicationRecord
|
7
|
+
include Bulkrax::ImporterExporterBehavior
|
8
|
+
include Bulkrax::StatusInfo
|
9
|
+
|
10
|
+
serialize :parser_fields, JSON
|
11
|
+
serialize :field_mapping, JSON
|
12
|
+
|
13
|
+
belongs_to :user
|
14
|
+
has_many :importer_runs, dependent: :destroy
|
15
|
+
has_many :entries, as: :importerexporter, dependent: :destroy
|
16
|
+
|
17
|
+
validates :name, presence: true
|
18
|
+
validates :admin_set_id, presence: true
|
19
|
+
validates :parser_klass, presence: true
|
20
|
+
|
21
|
+
delegate :valid_import?, :create_parent_child_relationships,
|
22
|
+
:write_errored_entries_file, :visibility, to: :parser
|
23
|
+
|
24
|
+
attr_accessor :only_updates, :file_style, :file
|
25
|
+
attr_writer :current_run
|
26
|
+
|
27
|
+
def status
|
28
|
+
if self.validate_only
|
29
|
+
'Validated'
|
30
|
+
else
|
31
|
+
super
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def record_status
|
36
|
+
importer_run = ImporterRun.find(current_run.id) # make sure fresh
|
37
|
+
return if importer_run.enqueued_records.positive? # still processing
|
38
|
+
if importer_run.failed_records.positive?
|
39
|
+
if importer_run.invalid_records.present?
|
40
|
+
e = Bulkrax::ImportFailed.new('Failed with Invalid Records', importer_run.invalid_records.split("\n"))
|
41
|
+
importer_run.importer.status_info(e)
|
42
|
+
else
|
43
|
+
importer_run.importer.status_info('Complete (with failures)')
|
44
|
+
end
|
45
|
+
else
|
46
|
+
importer_run.importer.status_info('Complete')
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# If field_mapping is empty, setup a default based on the export_properties
|
51
|
+
def mapping
|
52
|
+
@mapping ||= if self.field_mapping.blank? || self.field_mapping == [{}]
|
53
|
+
if parser.import_fields.present? || self.field_mapping == [{}]
|
54
|
+
ActiveSupport::HashWithIndifferentAccess.new(
|
55
|
+
parser.import_fields.reject(&:nil?).map do |m|
|
56
|
+
Bulkrax.default_field_mapping.call(m)
|
57
|
+
end.inject(:merge)
|
58
|
+
)
|
59
|
+
end
|
60
|
+
else
|
61
|
+
self.field_mapping
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def parser_fields
|
66
|
+
self[:parser_fields] || {}
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.frequency_enums
|
70
|
+
# these duration values use ISO 8601 Durations (https://en.wikipedia.org/wiki/ISO_8601#Durations)
|
71
|
+
# TLDR; all durations are prefixed with 'P' and the parts are a number with the type of duration.
|
72
|
+
# i.e. P1Y2M3W4DT5H6M7S == 1 Year, 2 Months, 3 Weeks, 4 Days, 5 Hours, 6 Minutes, 7 Seconds
|
73
|
+
[['Daily', 'P1D'], ['Monthly', 'P1M'], ['Yearly', 'P1Y'], ['Once (on save)', 'PT0S']]
|
74
|
+
end
|
75
|
+
|
76
|
+
def frequency=(frequency)
|
77
|
+
self[:frequency] = ISO8601::Duration.new(frequency).to_s
|
78
|
+
end
|
79
|
+
|
80
|
+
def frequency
|
81
|
+
f = self[:frequency] || "PT0S"
|
82
|
+
ISO8601::Duration.new(f)
|
83
|
+
end
|
84
|
+
|
85
|
+
def schedulable?
|
86
|
+
frequency.to_seconds != 0
|
87
|
+
end
|
88
|
+
|
89
|
+
def current_run
|
90
|
+
@current_run ||= self.importer_runs.create!(total_work_entries: self.limit || parser.total, total_collection_entries: parser.collections_total)
|
91
|
+
end
|
92
|
+
|
93
|
+
def last_run
|
94
|
+
@last_run ||= self.importer_runs.last
|
95
|
+
end
|
96
|
+
|
97
|
+
def seen
|
98
|
+
@seen ||= {}
|
99
|
+
end
|
100
|
+
|
101
|
+
def replace_files
|
102
|
+
self.parser_fields['replace_files']
|
103
|
+
end
|
104
|
+
|
105
|
+
def update_files
|
106
|
+
self.parser_fields['update_files']
|
107
|
+
end
|
108
|
+
|
109
|
+
def import_works
|
110
|
+
self.save if self.new_record? # Object needs to be saved for statuses
|
111
|
+
self.only_updates ||= false
|
112
|
+
parser.create_works
|
113
|
+
rescue StandardError => e
|
114
|
+
status_info(e)
|
115
|
+
end
|
116
|
+
|
117
|
+
def import_collections
|
118
|
+
self.save if self.new_record? # Object needs to be saved for statuses
|
119
|
+
parser.create_collections
|
120
|
+
rescue StandardError => e
|
121
|
+
status_info(e)
|
122
|
+
end
|
123
|
+
|
124
|
+
# Prepend the base_url to ensure unique set identifiers
|
125
|
+
# @todo - move to parser, as this is OAI specific
|
126
|
+
def unique_collection_identifier(id)
|
127
|
+
"#{self.parser_fields['base_url'].split('/')[2]}_#{id}"
|
128
|
+
end
|
129
|
+
|
130
|
+
# The format for metadata for the incoming import; corresponds to an Entry class
|
131
|
+
def import_metadata_format
|
132
|
+
[['CSV', 'Bulkrax::CsvEntry'], ['RDF (N-Triples)', 'Bulkrax::RdfEntry']]
|
133
|
+
end
|
134
|
+
|
135
|
+
# The type of metadata for the incoming import, either one file for all works, or one file per work
|
136
|
+
# def import_metadata_type
|
137
|
+
# [['Single Metadata File for all works', 'single'], ['Multiple Files, one per Work', 'multi']]
|
138
|
+
# end
|
139
|
+
|
140
|
+
# If the import data is zipped, unzip it to this path
|
141
|
+
def importer_unzip_path
|
142
|
+
@importer_unzip_path ||= File.join(Bulkrax.import_path, "import_#{path_string}")
|
143
|
+
end
|
144
|
+
|
145
|
+
def errored_entries_csv_path
|
146
|
+
@errored_entries_csv_path ||= File.join(Bulkrax.import_path, "import_#{path_string}_errored_entries.csv")
|
147
|
+
end
|
148
|
+
|
149
|
+
def path_string
|
150
|
+
"#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}_#{self.importer_runs.last.id}"
|
151
|
+
rescue
|
152
|
+
"#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'erb'
|
4
|
+
require 'ostruct'
|
5
|
+
|
6
|
+
module Bulkrax
|
7
|
+
class OaiEntry < Entry
|
8
|
+
serialize :raw_metadata, JSON
|
9
|
+
|
10
|
+
delegate :record, to: :raw_record
|
11
|
+
|
12
|
+
def raw_record
|
13
|
+
@raw_record ||= client.get_record(identifier: identifier, metadata_prefix: parser.parser_fields['metadata_prefix'])
|
14
|
+
end
|
15
|
+
|
16
|
+
def sets
|
17
|
+
record.header.set_spec
|
18
|
+
end
|
19
|
+
|
20
|
+
def context
|
21
|
+
@context ||= OpenStruct.new(record: record, identifier: record.header.identifier)
|
22
|
+
end
|
23
|
+
|
24
|
+
def thumbnail_url
|
25
|
+
ERB.new(parser.parser_fields['thumbnail_url']).result(context.instance_eval { binding })
|
26
|
+
end
|
27
|
+
|
28
|
+
def build_metadata
|
29
|
+
self.parsed_metadata = {}
|
30
|
+
self.parsed_metadata[work_identifier] = [record.header.identifier]
|
31
|
+
|
32
|
+
record.metadata.children.each do |child|
|
33
|
+
child.children.each do |node|
|
34
|
+
add_metadata(node.name, node.content)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
add_metadata('thumbnail_url', thumbnail_url)
|
38
|
+
|
39
|
+
add_visibility
|
40
|
+
add_rights_statement
|
41
|
+
add_admin_set_id
|
42
|
+
add_collections
|
43
|
+
add_local
|
44
|
+
|
45
|
+
return self.parsed_metadata
|
46
|
+
end
|
47
|
+
|
48
|
+
def collections_created?
|
49
|
+
if parser.collection_name == 'all'
|
50
|
+
sets.blank? || (sets.present? && sets.size == self.collection_ids.size)
|
51
|
+
else
|
52
|
+
self.collection_ids.size == 1
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Retrieve list of collections for the entry; add to collection_ids
|
57
|
+
# If OAI-PMH doesn't return setSpec in the headers for GetRecord, use parser.collection_name
|
58
|
+
# in this case, if 'All' is selected, records will not be added to a collection.
|
59
|
+
def find_or_create_collection_ids
|
60
|
+
return self.collection_ids if collections_created?
|
61
|
+
if sets.blank? || parser.collection_name != 'all'
|
62
|
+
# c = Collection.where(Bulkrax.system_identifier_field => importerexporter.unique_collection_identifier(parser.collection_name)).first
|
63
|
+
collection = find_collection(importerexporter.unique_collection_identifier(parser.collection_name))
|
64
|
+
self.collection_ids << collection.id if collection.present? && !self.collection_ids.include?(collection.id)
|
65
|
+
else # All - collections should exist for all sets
|
66
|
+
sets.each do |set|
|
67
|
+
c = Collection.find_by(work_identifier => importerexporter.unique_collection_identifier(set.content))
|
68
|
+
self.collection_ids << c.id if c.present? && !self.collection_ids.include?(c.id)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
return self.collection_ids
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class OaiSetEntry < OaiEntry
|
5
|
+
def factory_class
|
6
|
+
Collection
|
7
|
+
end
|
8
|
+
|
9
|
+
def build_metadata
|
10
|
+
self.parsed_metadata = self.raw_metadata
|
11
|
+
add_local
|
12
|
+
return self.parsed_metadata
|
13
|
+
end
|
14
|
+
|
15
|
+
def collections_created?
|
16
|
+
true
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class RdfCollectionEntry < RdfEntry
|
5
|
+
def record
|
6
|
+
@record ||= self.raw_metadata
|
7
|
+
end
|
8
|
+
|
9
|
+
def build_metadata
|
10
|
+
self.parsed_metadata = self.raw_metadata
|
11
|
+
add_local
|
12
|
+
return self.parsed_metadata
|
13
|
+
end
|
14
|
+
|
15
|
+
def factory_class
|
16
|
+
Collection
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rdf'
|
4
|
+
module Bulkrax
|
5
|
+
class RdfEntry < Entry
|
6
|
+
serialize :raw_metadata, JSON
|
7
|
+
|
8
|
+
def self.read_data(path)
|
9
|
+
RDF::Reader.open(path)
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.fields_from_data(data)
|
13
|
+
data.predicates.map(&:to_s)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.data_for_entry(data, source_id)
|
17
|
+
reader = data
|
18
|
+
format = reader.class.format.to_sym
|
19
|
+
collections = []
|
20
|
+
children = []
|
21
|
+
delete = nil
|
22
|
+
data = RDF::Writer.for(format).buffer do |writer|
|
23
|
+
reader.each_statement do |statement|
|
24
|
+
collections << statement.object.to_s if collection_field.present? && collection_field == statement.predicate.to_s
|
25
|
+
children << statement.object.to_s if children_field.present? && children_field == statement.predicate.to_s
|
26
|
+
delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
|
27
|
+
writer << statement
|
28
|
+
end
|
29
|
+
end
|
30
|
+
return {
|
31
|
+
source_id => reader.subjects.first.to_s,
|
32
|
+
delete: delete,
|
33
|
+
format: format,
|
34
|
+
data: data,
|
35
|
+
collection: collections,
|
36
|
+
children: children
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.collection_field
|
41
|
+
Bulkrax.collection_field_mapping[self.to_s]
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.children_field
|
45
|
+
Bulkrax.parent_child_field_mapping[self.to_s]
|
46
|
+
end
|
47
|
+
|
48
|
+
def record
|
49
|
+
@record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data'])
|
50
|
+
end
|
51
|
+
|
52
|
+
def build_metadata
|
53
|
+
raise StandardError, 'Record not found' if record.nil?
|
54
|
+
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
55
|
+
|
56
|
+
self.parsed_metadata = {}
|
57
|
+
self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
|
58
|
+
|
59
|
+
record.each_statement do |statement|
|
60
|
+
# Only process the subject for our record (in case other data is in the file)
|
61
|
+
next unless statement.subject.to_s == self.raw_metadata[source_identifier]
|
62
|
+
add_metadata(statement.predicate.to_s, statement.object.to_s)
|
63
|
+
end
|
64
|
+
add_visibility
|
65
|
+
add_rights_statement
|
66
|
+
add_admin_set_id
|
67
|
+
add_collections
|
68
|
+
add_local
|
69
|
+
self.parsed_metadata['file'] = self.raw_metadata['file']
|
70
|
+
|
71
|
+
self.parsed_metadata
|
72
|
+
end
|
73
|
+
|
74
|
+
def collections_created?
|
75
|
+
return true if self.raw_metadata['collection'].blank?
|
76
|
+
self.raw_metadata['collection'].length == self.collection_ids.length
|
77
|
+
end
|
78
|
+
|
79
|
+
def find_or_create_collection_ids
|
80
|
+
return self.collection_ids if collections_created?
|
81
|
+
if self.raw_metadata['collection'].present?
|
82
|
+
self.raw_metadata['collection'].each do |collection|
|
83
|
+
c = find_collection(collection)
|
84
|
+
self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
return self.collection_ids
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Bulkrax
|
4
|
+
class Status < ApplicationRecord
|
5
|
+
belongs_to :statusable, polymorphic: true
|
6
|
+
belongs_to :runnable, polymorphic: true
|
7
|
+
serialize :error_backtrace, Array
|
8
|
+
|
9
|
+
scope :for_importers, -> { where(statusable_type: 'Bulkrax::Importer') }
|
10
|
+
scope :for_exporters, -> { where(statusable_type: 'Bulkrax::Exporter') }
|
11
|
+
|
12
|
+
scope :latest_by_statusable, -> { joins(latest_by_statusable_subtable.join_sources) }
|
13
|
+
|
14
|
+
def self.latest_by_statusable_subtable
|
15
|
+
status_table = self.arel_table
|
16
|
+
latest_status_query = status_table.project(status_table[:statusable_id],
|
17
|
+
status_table[:statusable_type],
|
18
|
+
status_table[:id].maximum.as("latest_status_id")).group(status_table[:statusable_id], status_table[:statusable_type])
|
19
|
+
|
20
|
+
latest_status_table = Arel::Table.new(latest_status_query).alias(:latest_status)
|
21
|
+
status_table.join(latest_status_query.as(latest_status_table.name.to_s), Arel::Nodes::InnerJoin)
|
22
|
+
.on(status_table[:id].eq(latest_status_table[:latest_status_id]))
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
module Bulkrax
|
5
|
+
# Generic XML Entry
|
6
|
+
class XmlEntry < Entry
|
7
|
+
serialize :raw_metadata, JSON
|
8
|
+
|
9
|
+
def self.fields_from_data(data); end
|
10
|
+
|
11
|
+
def self.read_data(path)
|
12
|
+
# This doesn't cope with BOM sequences:
|
13
|
+
# Nokogiri::XML(open(path), nil, 'UTF-8').remove_namespaces!
|
14
|
+
Nokogiri::XML(open(path)).remove_namespaces!
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.data_for_entry(data, source_id)
|
18
|
+
collections = []
|
19
|
+
children = []
|
20
|
+
xpath_for_source_id = ".//*[name()='#{source_id}']"
|
21
|
+
return {
|
22
|
+
source_id => data.xpath(xpath_for_source_id).first.text,
|
23
|
+
delete: data.xpath(".//*[name()='delete']").first&.text,
|
24
|
+
data:
|
25
|
+
data.to_xml(
|
26
|
+
encoding: 'UTF-8',
|
27
|
+
save_with:
|
28
|
+
Nokogiri::XML::Node::SaveOptions::NO_DECLARATION | Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
|
29
|
+
).delete("\n").delete("\t").squeeze(' '), # Remove newlines, tabs, and extra whitespace
|
30
|
+
collection: collections,
|
31
|
+
children: children
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
# def self.matcher_class; end
|
36
|
+
|
37
|
+
def record
|
38
|
+
@record ||= Nokogiri::XML(self.raw_metadata['data'], nil, 'UTF-8')
|
39
|
+
end
|
40
|
+
|
41
|
+
def build_metadata
|
42
|
+
raise StandardError, 'Record not found' if record.nil?
|
43
|
+
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
44
|
+
self.parsed_metadata = {}
|
45
|
+
self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
|
46
|
+
xml_elements.each do |element_name|
|
47
|
+
elements = record.xpath("//*[name()='#{element_name}']")
|
48
|
+
next if elements.blank?
|
49
|
+
elements.each do |el|
|
50
|
+
el.children.map(&:content).each do |content|
|
51
|
+
add_metadata(element_name, content) if content.present?
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
add_visibility
|
56
|
+
add_rights_statement
|
57
|
+
add_admin_set_id
|
58
|
+
add_collections
|
59
|
+
self.parsed_metadata['file'] = self.raw_metadata['file']
|
60
|
+
|
61
|
+
add_local
|
62
|
+
raise StandardError, "title is required" if self.parsed_metadata['title'].blank?
|
63
|
+
self.parsed_metadata
|
64
|
+
end
|
65
|
+
|
66
|
+
# Grab the class from the real parser
|
67
|
+
def xml_elements
|
68
|
+
Bulkrax.field_mappings[self.importerexporter.parser_klass].map do |_k, v|
|
69
|
+
v[:from]
|
70
|
+
end.flatten.compact.uniq
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|