bulkrax 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (133) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +205 -0
  3. data/README.md +202 -0
  4. data/Rakefile +42 -0
  5. data/app/assets/config/bulkrax_manifest.js +2 -0
  6. data/app/assets/javascripts/bulkrax/application.js +14 -0
  7. data/app/assets/javascripts/bulkrax/bulkrax.js +11 -0
  8. data/app/assets/javascripts/bulkrax/entries.js +15 -0
  9. data/app/assets/javascripts/bulkrax/exporters.js +60 -0
  10. data/app/assets/javascripts/bulkrax/importers.js.erb +166 -0
  11. data/app/assets/stylesheets/bulkrax/accordion.scss +40 -0
  12. data/app/assets/stylesheets/bulkrax/application.css +15 -0
  13. data/app/assets/stylesheets/bulkrax/coderay.scss +264 -0
  14. data/app/assets/stylesheets/bulkrax/import_export.scss +37 -0
  15. data/app/controllers/bulkrax/application_controller.rb +8 -0
  16. data/app/controllers/bulkrax/entries_controller.rb +44 -0
  17. data/app/controllers/bulkrax/exporters_controller.rb +125 -0
  18. data/app/controllers/bulkrax/importers_controller.rb +315 -0
  19. data/app/controllers/concerns/bulkrax/api.rb +29 -0
  20. data/app/factories/bulkrax/object_factory.rb +230 -0
  21. data/app/helpers/bulkrax/application_helper.rb +15 -0
  22. data/app/helpers/bulkrax/exporters_helper.rb +6 -0
  23. data/app/helpers/bulkrax/importers_helper.rb +13 -0
  24. data/app/helpers/bulkrax/validation_helper.rb +153 -0
  25. data/app/jobs/bulkrax/application_job.rb +6 -0
  26. data/app/jobs/bulkrax/child_relationships_job.rb +128 -0
  27. data/app/jobs/bulkrax/delete_work_job.rb +16 -0
  28. data/app/jobs/bulkrax/download_cloud_file_job.rb +18 -0
  29. data/app/jobs/bulkrax/export_work_job.rb +37 -0
  30. data/app/jobs/bulkrax/exporter_job.rb +14 -0
  31. data/app/jobs/bulkrax/import_work_collection_job.rb +41 -0
  32. data/app/jobs/bulkrax/import_work_job.rb +32 -0
  33. data/app/jobs/bulkrax/importer_job.rb +26 -0
  34. data/app/mailers/bulkrax/application_mailer.rb +8 -0
  35. data/app/matchers/bulkrax/application_matcher.rb +113 -0
  36. data/app/matchers/bulkrax/bagit_matcher.rb +6 -0
  37. data/app/matchers/bulkrax/csv_matcher.rb +6 -0
  38. data/app/matchers/bulkrax/oai_matcher.rb +6 -0
  39. data/app/models/bulkrax/application_record.rb +7 -0
  40. data/app/models/bulkrax/csv_collection_entry.rb +19 -0
  41. data/app/models/bulkrax/csv_entry.rb +163 -0
  42. data/app/models/bulkrax/entry.rb +104 -0
  43. data/app/models/bulkrax/exporter.rb +122 -0
  44. data/app/models/bulkrax/exporter_run.rb +7 -0
  45. data/app/models/bulkrax/import_failed.rb +13 -0
  46. data/app/models/bulkrax/importer.rb +155 -0
  47. data/app/models/bulkrax/importer_run.rb +8 -0
  48. data/app/models/bulkrax/oai_dc_entry.rb +6 -0
  49. data/app/models/bulkrax/oai_entry.rb +74 -0
  50. data/app/models/bulkrax/oai_qualified_dc_entry.rb +6 -0
  51. data/app/models/bulkrax/oai_set_entry.rb +19 -0
  52. data/app/models/bulkrax/rdf_collection_entry.rb +19 -0
  53. data/app/models/bulkrax/rdf_entry.rb +90 -0
  54. data/app/models/bulkrax/status.rb +25 -0
  55. data/app/models/bulkrax/xml_entry.rb +73 -0
  56. data/app/models/concerns/bulkrax/download_behavior.rb +61 -0
  57. data/app/models/concerns/bulkrax/errored_entries.rb +45 -0
  58. data/app/models/concerns/bulkrax/export_behavior.rb +58 -0
  59. data/app/models/concerns/bulkrax/file_factory.rb +140 -0
  60. data/app/models/concerns/bulkrax/has_local_processing.rb +7 -0
  61. data/app/models/concerns/bulkrax/has_matchers.rb +155 -0
  62. data/app/models/concerns/bulkrax/import_behavior.rb +90 -0
  63. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +34 -0
  64. data/app/models/concerns/bulkrax/status_info.rb +56 -0
  65. data/app/parsers/bulkrax/application_parser.rb +299 -0
  66. data/app/parsers/bulkrax/bagit_parser.rb +157 -0
  67. data/app/parsers/bulkrax/csv_parser.rb +266 -0
  68. data/app/parsers/bulkrax/oai_dc_parser.rb +130 -0
  69. data/app/parsers/bulkrax/oai_qualified_dc_parser.rb +9 -0
  70. data/app/parsers/bulkrax/xml_parser.rb +103 -0
  71. data/app/views/bulkrax/entries/_parsed_metadata.html.erb +19 -0
  72. data/app/views/bulkrax/entries/_raw_metadata.html.erb +19 -0
  73. data/app/views/bulkrax/entries/show.html.erb +63 -0
  74. data/app/views/bulkrax/exporters/_form.html.erb +120 -0
  75. data/app/views/bulkrax/exporters/edit.html.erb +23 -0
  76. data/app/views/bulkrax/exporters/index.html.erb +67 -0
  77. data/app/views/bulkrax/exporters/new.html.erb +23 -0
  78. data/app/views/bulkrax/exporters/show.html.erb +124 -0
  79. data/app/views/bulkrax/importers/_bagit_fields.html.erb +54 -0
  80. data/app/views/bulkrax/importers/_browse_everything.html.erb +12 -0
  81. data/app/views/bulkrax/importers/_csv_fields.html.erb +39 -0
  82. data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +16 -0
  83. data/app/views/bulkrax/importers/_form.html.erb +35 -0
  84. data/app/views/bulkrax/importers/_oai_fields.html.erb +42 -0
  85. data/app/views/bulkrax/importers/_xml_fields.html.erb +60 -0
  86. data/app/views/bulkrax/importers/edit.html.erb +20 -0
  87. data/app/views/bulkrax/importers/index.html.erb +77 -0
  88. data/app/views/bulkrax/importers/new.html.erb +25 -0
  89. data/app/views/bulkrax/importers/show.html.erb +175 -0
  90. data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +37 -0
  91. data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +52 -0
  92. data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +39 -0
  93. data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +6 -0
  94. data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +19 -0
  95. data/app/views/layouts/bulkrax/application.html.erb +14 -0
  96. data/config/locales/bulkrax.en.yml +36 -0
  97. data/config/routes.rb +18 -0
  98. data/db/migrate/20181011230201_create_bulkrax_importers.rb +18 -0
  99. data/db/migrate/20181011230228_create_bulkrax_importer_runs.rb +16 -0
  100. data/db/migrate/20190325183136_create_bulkrax_entries.rb +16 -0
  101. data/db/migrate/20190601221109_add_status_to_entry.rb +9 -0
  102. data/db/migrate/20190715161939_add_collections_to_importer_runs.rb +6 -0
  103. data/db/migrate/20190715162044_change_collection_ids_on_entries.rb +5 -0
  104. data/db/migrate/20190729124607_create_bulkrax_exporters.rb +19 -0
  105. data/db/migrate/20190729134158_create_bulkrax_exporter_runs.rb +14 -0
  106. data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +12 -0
  107. data/db/migrate/20191203225129_add_total_collection_records_to_importer_runs.rb +5 -0
  108. data/db/migrate/20191204191623_add_children_to_importer_runs.rb +6 -0
  109. data/db/migrate/20191204223857_change_total_records_to_total_work_entries.rb +6 -0
  110. data/db/migrate/20191212155530_change_entry_last_error.rb +19 -0
  111. data/db/migrate/20200108194557_add_validate_only_to_bulkrax_importers.rb +5 -0
  112. data/db/migrate/20200301232856_add_status_to_importers.rb +9 -0
  113. data/db/migrate/20200312190638_remove_foreign_key_from_bulkrax_entries.rb +5 -0
  114. data/db/migrate/20200326235838_add_status_to_exporters.rb +7 -0
  115. data/db/migrate/20200601204556_add_invalid_record_to_importer_run.rb +5 -0
  116. data/db/migrate/20200818055819_create_bulkrax_statuses.rb +18 -0
  117. data/db/migrate/20200819054016_move_to_statuses.rb +30 -0
  118. data/db/migrate/20201106014204_add_date_filter_and_status_to_bulkrax_exporters.rb +7 -0
  119. data/db/migrate/20201117220007_add_workflow_status_to_bulkrax_exporter.rb +5 -0
  120. data/db/migrate/20210806044408_remove_unused_last_error.rb +7 -0
  121. data/db/migrate/20210806065737_increase_text_sizes.rb +12 -0
  122. data/lib/bulkrax.rb +161 -0
  123. data/lib/bulkrax/engine.rb +37 -0
  124. data/lib/bulkrax/version.rb +5 -0
  125. data/lib/generators/bulkrax/install_generator.rb +80 -0
  126. data/lib/generators/bulkrax/templates/README +3 -0
  127. data/lib/generators/bulkrax/templates/app/assets/images/bulkrax/removed.png +0 -0
  128. data/lib/generators/bulkrax/templates/app/models/concerns/bulkrax/has_local_processing.rb +8 -0
  129. data/lib/generators/bulkrax/templates/bin/importer +140 -0
  130. data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +84 -0
  131. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +72 -0
  132. data/lib/tasks/bulkrax_tasks.rake +6 -0
  133. metadata +388 -0
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ # this is a PORO to help pass errors around
4
+ module Bulkrax
5
+ class ImportFailed
6
+ attr_accessor :message, :backtrace
7
+
8
+ def initialize(message, backtrace)
9
+ @message = message
10
+ @backtrace = backtrace
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,155 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'iso8601'
4
+
5
+ module Bulkrax
6
+ class Importer < ApplicationRecord
7
+ include Bulkrax::ImporterExporterBehavior
8
+ include Bulkrax::StatusInfo
9
+
10
+ serialize :parser_fields, JSON
11
+ serialize :field_mapping, JSON
12
+
13
+ belongs_to :user
14
+ has_many :importer_runs, dependent: :destroy
15
+ has_many :entries, as: :importerexporter, dependent: :destroy
16
+
17
+ validates :name, presence: true
18
+ validates :admin_set_id, presence: true
19
+ validates :parser_klass, presence: true
20
+
21
+ delegate :valid_import?, :create_parent_child_relationships,
22
+ :write_errored_entries_file, :visibility, to: :parser
23
+
24
+ attr_accessor :only_updates, :file_style, :file
25
+ attr_writer :current_run
26
+
27
+ def status
28
+ if self.validate_only
29
+ 'Validated'
30
+ else
31
+ super
32
+ end
33
+ end
34
+
35
+ def record_status
36
+ importer_run = ImporterRun.find(current_run.id) # make sure fresh
37
+ return if importer_run.enqueued_records.positive? # still processing
38
+ if importer_run.failed_records.positive?
39
+ if importer_run.invalid_records.present?
40
+ e = Bulkrax::ImportFailed.new('Failed with Invalid Records', importer_run.invalid_records.split("\n"))
41
+ importer_run.importer.status_info(e)
42
+ else
43
+ importer_run.importer.status_info('Complete (with failures)')
44
+ end
45
+ else
46
+ importer_run.importer.status_info('Complete')
47
+ end
48
+ end
49
+
50
+ # If field_mapping is empty, setup a default based on the export_properties
51
+ def mapping
52
+ @mapping ||= if self.field_mapping.blank? || self.field_mapping == [{}]
53
+ if parser.import_fields.present? || self.field_mapping == [{}]
54
+ ActiveSupport::HashWithIndifferentAccess.new(
55
+ parser.import_fields.reject(&:nil?).map do |m|
56
+ Bulkrax.default_field_mapping.call(m)
57
+ end.inject(:merge)
58
+ )
59
+ end
60
+ else
61
+ self.field_mapping
62
+ end
63
+ end
64
+
65
+ def parser_fields
66
+ self[:parser_fields] || {}
67
+ end
68
+
69
+ def self.frequency_enums
70
+ # these duration values use ISO 8601 Durations (https://en.wikipedia.org/wiki/ISO_8601#Durations)
71
+ # TLDR; all durations are prefixed with 'P' and the parts are a number with the type of duration.
72
+ # i.e. P1Y2M3W4DT5H6M7S == 1 Year, 2 Months, 3 Weeks, 4 Days, 5 Hours, 6 Minutes, 7 Seconds
73
+ [['Daily', 'P1D'], ['Monthly', 'P1M'], ['Yearly', 'P1Y'], ['Once (on save)', 'PT0S']]
74
+ end
75
+
76
+ def frequency=(frequency)
77
+ self[:frequency] = ISO8601::Duration.new(frequency).to_s
78
+ end
79
+
80
+ def frequency
81
+ f = self[:frequency] || "PT0S"
82
+ ISO8601::Duration.new(f)
83
+ end
84
+
85
+ def schedulable?
86
+ frequency.to_seconds != 0
87
+ end
88
+
89
+ def current_run
90
+ @current_run ||= self.importer_runs.create!(total_work_entries: self.limit || parser.total, total_collection_entries: parser.collections_total)
91
+ end
92
+
93
+ def last_run
94
+ @last_run ||= self.importer_runs.last
95
+ end
96
+
97
+ def seen
98
+ @seen ||= {}
99
+ end
100
+
101
+ def replace_files
102
+ self.parser_fields['replace_files']
103
+ end
104
+
105
+ def update_files
106
+ self.parser_fields['update_files']
107
+ end
108
+
109
+ def import_works
110
+ self.save if self.new_record? # Object needs to be saved for statuses
111
+ self.only_updates ||= false
112
+ parser.create_works
113
+ rescue StandardError => e
114
+ status_info(e)
115
+ end
116
+
117
+ def import_collections
118
+ self.save if self.new_record? # Object needs to be saved for statuses
119
+ parser.create_collections
120
+ rescue StandardError => e
121
+ status_info(e)
122
+ end
123
+
124
+ # Prepend the base_url to ensure unique set identifiers
125
+ # @todo - move to parser, as this is OAI specific
126
+ def unique_collection_identifier(id)
127
+ "#{self.parser_fields['base_url'].split('/')[2]}_#{id}"
128
+ end
129
+
130
+ # The format for metadata for the incoming import; corresponds to an Entry class
131
+ def import_metadata_format
132
+ [['CSV', 'Bulkrax::CsvEntry'], ['RDF (N-Triples)', 'Bulkrax::RdfEntry']]
133
+ end
134
+
135
+ # The type of metadata for the incoming import, either one file for all works, or one file per work
136
+ # def import_metadata_type
137
+ # [['Single Metadata File for all works', 'single'], ['Multiple Files, one per Work', 'multi']]
138
+ # end
139
+
140
+ # If the import data is zipped, unzip it to this path
141
+ def importer_unzip_path
142
+ @importer_unzip_path ||= File.join(Bulkrax.import_path, "import_#{path_string}")
143
+ end
144
+
145
+ def errored_entries_csv_path
146
+ @errored_entries_csv_path ||= File.join(Bulkrax.import_path, "import_#{path_string}_errored_entries.csv")
147
+ end
148
+
149
+ def path_string
150
+ "#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}_#{self.importer_runs.last.id}"
151
+ rescue
152
+ "#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ImporterRun < ApplicationRecord
5
+ belongs_to :importer
6
+ has_many :statuses, as: :runnable, dependent: :destroy
7
+ end
8
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class OaiDcEntry < OaiEntry
5
+ end
6
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'erb'
4
+ require 'ostruct'
5
+
6
+ module Bulkrax
7
+ class OaiEntry < Entry
8
+ serialize :raw_metadata, JSON
9
+
10
+ delegate :record, to: :raw_record
11
+
12
+ def raw_record
13
+ @raw_record ||= client.get_record(identifier: identifier, metadata_prefix: parser.parser_fields['metadata_prefix'])
14
+ end
15
+
16
+ def sets
17
+ record.header.set_spec
18
+ end
19
+
20
+ def context
21
+ @context ||= OpenStruct.new(record: record, identifier: record.header.identifier)
22
+ end
23
+
24
+ def thumbnail_url
25
+ ERB.new(parser.parser_fields['thumbnail_url']).result(context.instance_eval { binding })
26
+ end
27
+
28
+ def build_metadata
29
+ self.parsed_metadata = {}
30
+ self.parsed_metadata[work_identifier] = [record.header.identifier]
31
+
32
+ record.metadata.children.each do |child|
33
+ child.children.each do |node|
34
+ add_metadata(node.name, node.content)
35
+ end
36
+ end
37
+ add_metadata('thumbnail_url', thumbnail_url)
38
+
39
+ add_visibility
40
+ add_rights_statement
41
+ add_admin_set_id
42
+ add_collections
43
+ add_local
44
+
45
+ return self.parsed_metadata
46
+ end
47
+
48
+ def collections_created?
49
+ if parser.collection_name == 'all'
50
+ sets.blank? || (sets.present? && sets.size == self.collection_ids.size)
51
+ else
52
+ self.collection_ids.size == 1
53
+ end
54
+ end
55
+
56
+ # Retrieve list of collections for the entry; add to collection_ids
57
+ # If OAI-PMH doesn't return setSpec in the headers for GetRecord, use parser.collection_name
58
+ # in this case, if 'All' is selected, records will not be added to a collection.
59
+ def find_or_create_collection_ids
60
+ return self.collection_ids if collections_created?
61
+ if sets.blank? || parser.collection_name != 'all'
62
+ # c = Collection.where(Bulkrax.system_identifier_field => importerexporter.unique_collection_identifier(parser.collection_name)).first
63
+ collection = find_collection(importerexporter.unique_collection_identifier(parser.collection_name))
64
+ self.collection_ids << collection.id if collection.present? && !self.collection_ids.include?(collection.id)
65
+ else # All - collections should exist for all sets
66
+ sets.each do |set|
67
+ c = Collection.find_by(work_identifier => importerexporter.unique_collection_identifier(set.content))
68
+ self.collection_ids << c.id if c.present? && !self.collection_ids.include?(c.id)
69
+ end
70
+ end
71
+ return self.collection_ids
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class OaiQualifiedDcEntry < OaiEntry
5
+ end
6
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class OaiSetEntry < OaiEntry
5
+ def factory_class
6
+ Collection
7
+ end
8
+
9
+ def build_metadata
10
+ self.parsed_metadata = self.raw_metadata
11
+ add_local
12
+ return self.parsed_metadata
13
+ end
14
+
15
+ def collections_created?
16
+ true
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class RdfCollectionEntry < RdfEntry
5
+ def record
6
+ @record ||= self.raw_metadata
7
+ end
8
+
9
+ def build_metadata
10
+ self.parsed_metadata = self.raw_metadata
11
+ add_local
12
+ return self.parsed_metadata
13
+ end
14
+
15
+ def factory_class
16
+ Collection
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rdf'
4
+ module Bulkrax
5
+ class RdfEntry < Entry
6
+ serialize :raw_metadata, JSON
7
+
8
+ def self.read_data(path)
9
+ RDF::Reader.open(path)
10
+ end
11
+
12
+ def self.fields_from_data(data)
13
+ data.predicates.map(&:to_s)
14
+ end
15
+
16
+ def self.data_for_entry(data, source_id)
17
+ reader = data
18
+ format = reader.class.format.to_sym
19
+ collections = []
20
+ children = []
21
+ delete = nil
22
+ data = RDF::Writer.for(format).buffer do |writer|
23
+ reader.each_statement do |statement|
24
+ collections << statement.object.to_s if collection_field.present? && collection_field == statement.predicate.to_s
25
+ children << statement.object.to_s if children_field.present? && children_field == statement.predicate.to_s
26
+ delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
27
+ writer << statement
28
+ end
29
+ end
30
+ return {
31
+ source_id => reader.subjects.first.to_s,
32
+ delete: delete,
33
+ format: format,
34
+ data: data,
35
+ collection: collections,
36
+ children: children
37
+ }
38
+ end
39
+
40
+ def self.collection_field
41
+ Bulkrax.collection_field_mapping[self.to_s]
42
+ end
43
+
44
+ def self.children_field
45
+ Bulkrax.parent_child_field_mapping[self.to_s]
46
+ end
47
+
48
+ def record
49
+ @record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data'])
50
+ end
51
+
52
+ def build_metadata
53
+ raise StandardError, 'Record not found' if record.nil?
54
+ raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
55
+
56
+ self.parsed_metadata = {}
57
+ self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
58
+
59
+ record.each_statement do |statement|
60
+ # Only process the subject for our record (in case other data is in the file)
61
+ next unless statement.subject.to_s == self.raw_metadata[source_identifier]
62
+ add_metadata(statement.predicate.to_s, statement.object.to_s)
63
+ end
64
+ add_visibility
65
+ add_rights_statement
66
+ add_admin_set_id
67
+ add_collections
68
+ add_local
69
+ self.parsed_metadata['file'] = self.raw_metadata['file']
70
+
71
+ self.parsed_metadata
72
+ end
73
+
74
+ def collections_created?
75
+ return true if self.raw_metadata['collection'].blank?
76
+ self.raw_metadata['collection'].length == self.collection_ids.length
77
+ end
78
+
79
+ def find_or_create_collection_ids
80
+ return self.collection_ids if collections_created?
81
+ if self.raw_metadata['collection'].present?
82
+ self.raw_metadata['collection'].each do |collection|
83
+ c = find_collection(collection)
84
+ self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
85
+ end
86
+ end
87
+ return self.collection_ids
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class Status < ApplicationRecord
5
+ belongs_to :statusable, polymorphic: true
6
+ belongs_to :runnable, polymorphic: true
7
+ serialize :error_backtrace, Array
8
+
9
+ scope :for_importers, -> { where(statusable_type: 'Bulkrax::Importer') }
10
+ scope :for_exporters, -> { where(statusable_type: 'Bulkrax::Exporter') }
11
+
12
+ scope :latest_by_statusable, -> { joins(latest_by_statusable_subtable.join_sources) }
13
+
14
+ def self.latest_by_statusable_subtable
15
+ status_table = self.arel_table
16
+ latest_status_query = status_table.project(status_table[:statusable_id],
17
+ status_table[:statusable_type],
18
+ status_table[:id].maximum.as("latest_status_id")).group(status_table[:statusable_id], status_table[:statusable_type])
19
+
20
+ latest_status_table = Arel::Table.new(latest_status_query).alias(:latest_status)
21
+ status_table.join(latest_status_query.as(latest_status_table.name.to_s), Arel::Nodes::InnerJoin)
22
+ .on(status_table[:id].eq(latest_status_table[:latest_status_id]))
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ module Bulkrax
5
+ # Generic XML Entry
6
+ class XmlEntry < Entry
7
+ serialize :raw_metadata, JSON
8
+
9
+ def self.fields_from_data(data); end
10
+
11
+ def self.read_data(path)
12
+ # This doesn't cope with BOM sequences:
13
+ # Nokogiri::XML(open(path), nil, 'UTF-8').remove_namespaces!
14
+ Nokogiri::XML(open(path)).remove_namespaces!
15
+ end
16
+
17
+ def self.data_for_entry(data, source_id)
18
+ collections = []
19
+ children = []
20
+ xpath_for_source_id = ".//*[name()='#{source_id}']"
21
+ return {
22
+ source_id => data.xpath(xpath_for_source_id).first.text,
23
+ delete: data.xpath(".//*[name()='delete']").first&.text,
24
+ data:
25
+ data.to_xml(
26
+ encoding: 'UTF-8',
27
+ save_with:
28
+ Nokogiri::XML::Node::SaveOptions::NO_DECLARATION | Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
29
+ ).delete("\n").delete("\t").squeeze(' '), # Remove newlines, tabs, and extra whitespace
30
+ collection: collections,
31
+ children: children
32
+ }
33
+ end
34
+
35
+ # def self.matcher_class; end
36
+
37
+ def record
38
+ @record ||= Nokogiri::XML(self.raw_metadata['data'], nil, 'UTF-8')
39
+ end
40
+
41
+ def build_metadata
42
+ raise StandardError, 'Record not found' if record.nil?
43
+ raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
44
+ self.parsed_metadata = {}
45
+ self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
46
+ xml_elements.each do |element_name|
47
+ elements = record.xpath("//*[name()='#{element_name}']")
48
+ next if elements.blank?
49
+ elements.each do |el|
50
+ el.children.map(&:content).each do |content|
51
+ add_metadata(element_name, content) if content.present?
52
+ end
53
+ end
54
+ end
55
+ add_visibility
56
+ add_rights_statement
57
+ add_admin_set_id
58
+ add_collections
59
+ self.parsed_metadata['file'] = self.raw_metadata['file']
60
+
61
+ add_local
62
+ raise StandardError, "title is required" if self.parsed_metadata['title'].blank?
63
+ self.parsed_metadata
64
+ end
65
+
66
+ # Grab the class from the real parser
67
+ def xml_elements
68
+ Bulkrax.field_mappings[self.importerexporter.parser_klass].map do |_k, v|
69
+ v[:from]
70
+ end.flatten.compact.uniq
71
+ end
72
+ end
73
+ end