bulkrax 1.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/app/controllers/bulkrax/exporters_controller.rb +12 -4
  4. data/app/controllers/bulkrax/importers_controller.rb +23 -17
  5. data/app/factories/bulkrax/object_factory.rb +84 -63
  6. data/app/jobs/bulkrax/create_relationships_job.rb +156 -0
  7. data/app/jobs/bulkrax/delete_work_job.rb +6 -2
  8. data/app/jobs/bulkrax/export_work_job.rb +3 -1
  9. data/app/jobs/bulkrax/exporter_job.rb +1 -0
  10. data/app/jobs/bulkrax/{import_work_collection_job.rb → import_collection_job.rb} +4 -2
  11. data/app/jobs/bulkrax/import_file_set_job.rb +69 -0
  12. data/app/jobs/bulkrax/import_work_job.rb +2 -0
  13. data/app/jobs/bulkrax/importer_job.rb +18 -1
  14. data/app/matchers/bulkrax/application_matcher.rb +5 -5
  15. data/app/models/bulkrax/csv_collection_entry.rb +8 -6
  16. data/app/models/bulkrax/csv_entry.rb +132 -65
  17. data/app/models/bulkrax/csv_file_set_entry.rb +26 -0
  18. data/app/models/bulkrax/entry.rb +19 -8
  19. data/app/models/bulkrax/exporter.rb +12 -5
  20. data/app/models/bulkrax/importer.rb +24 -5
  21. data/app/models/bulkrax/oai_entry.rb +5 -1
  22. data/app/models/bulkrax/rdf_entry.rb +16 -7
  23. data/app/models/bulkrax/xml_entry.rb +4 -0
  24. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +39 -0
  25. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  26. data/app/models/concerns/bulkrax/has_matchers.rb +44 -13
  27. data/app/models/concerns/bulkrax/import_behavior.rb +40 -5
  28. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +23 -2
  29. data/app/models/concerns/bulkrax/status_info.rb +4 -4
  30. data/app/parsers/bulkrax/application_parser.rb +67 -84
  31. data/app/parsers/bulkrax/bagit_parser.rb +13 -4
  32. data/app/parsers/bulkrax/csv_parser.rb +170 -64
  33. data/app/parsers/bulkrax/oai_dc_parser.rb +6 -3
  34. data/app/parsers/bulkrax/xml_parser.rb +5 -0
  35. data/app/views/bulkrax/exporters/_form.html.erb +1 -1
  36. data/app/views/bulkrax/exporters/show.html.erb +2 -1
  37. data/app/views/bulkrax/importers/index.html.erb +17 -17
  38. data/app/views/bulkrax/importers/show.html.erb +52 -6
  39. data/config/locales/bulkrax.en.yml +1 -0
  40. data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +5 -1
  41. data/db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb +5 -0
  42. data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +6 -0
  43. data/db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb +7 -0
  44. data/db/migrate/20220118001339_add_import_attempts_to_entries.rb +5 -0
  45. data/db/migrate/20220119213325_add_work_counters_to_importer_runs.rb +6 -0
  46. data/lib/bulkrax/engine.rb +1 -1
  47. data/lib/bulkrax/version.rb +1 -1
  48. data/lib/bulkrax.rb +9 -17
  49. data/lib/generators/bulkrax/templates/bin/importer +17 -11
  50. data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +3 -1
  51. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +7 -12
  52. metadata +22 -10
  53. data/app/jobs/bulkrax/child_relationships_job.rb +0 -128
data/lib/bulkrax.rb CHANGED
@@ -5,12 +5,14 @@ require 'active_support/all'
5
5
 
6
6
  module Bulkrax
7
7
  class << self
8
+ # TODO: remove collection_field_mapping when releasing v2
8
9
  mattr_accessor :parsers,
9
10
  :default_work_type,
10
11
  :default_field_mapping,
11
12
  :collection_field_mapping,
12
13
  :fill_in_blank_source_identifiers,
13
- :parent_child_field_mapping,
14
+ :related_children_field_mapping,
15
+ :related_parents_field_mapping,
14
16
  :reserved_properties,
15
17
  :field_mappings,
16
18
  :import_path,
@@ -33,26 +35,16 @@ module Bulkrax
33
35
  self.removed_image_path = Bulkrax::Engine.root.join('spec', 'fixtures', 'removed.png').to_s
34
36
  self.server_name = 'bulkrax@example.com'
35
37
 
36
- # @todo, merge parent_child_field_mapping and collection_field_mapping into field_mappings,
37
- # or make them settable per import some other way.
38
-
39
- # Field_mapping for establishing a parent-child relationship (FROM parent TO child)
40
- # This can be a Collection to Work, or Work to Work relationship
41
- # This value IS NOT used for OAI, so setting the OAI Entries here will have no effect
42
- # The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry'
43
- # Example:
44
- # {
45
- # 'Bulkrax::RdfEntry' => 'http://opaquenamespace.org/ns/contents',
46
- # 'Bulkrax::CsvEntry' => 'children'
47
- # }
48
- # By default no parent-child relationships are added
49
- self.parent_child_field_mapping = {}
50
-
38
+ # NOTE: Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.
39
+ # Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.
40
+ # TODO: remove collection_field_mapping when releasing v2
51
41
  # Field_mapping for establishing a collection relationship (FROM work TO collection)
52
42
  # This value IS NOT used for OAI, so setting the OAI Entries here will have no effect
53
43
  # The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry'
54
44
  # The default value for CSV is collection
55
- self.collection_field_mapping = {}
45
+ self.collection_field_mapping = {
46
+ 'Bulkrax::CsvEntry' => 'collection'
47
+ }
56
48
 
57
49
  # Hash of Generic field_mappings for use in the view
58
50
  # There must be one field_mappings hash per view parial
@@ -6,18 +6,11 @@ require_relative '../config/environment'
6
6
  require 'slop'
7
7
 
8
8
  def main(opts = {})
9
- if opts[:importer_id].blank? && invalid?(opts)
10
- puts 'Missing required parameters'
11
- help
12
- end
13
-
14
- if opts[:auth_token].blank?
15
- puts 'Missing Authentication Token --auth_token'
16
- exit
17
- end
9
+ check_required_params
18
10
 
19
11
  update = opts[:importer_id].present?
20
- url = build_url(opts.delete(:importer_id), opts.delete(:url))
12
+ port = opts[:port].presence
13
+ url = build_url(opts.delete(:importer_id), opts.delete(:url), port)
21
14
 
22
15
  headers = { 'Content-Type' => 'application/json' }
23
16
  headers['Authorization'] = "Token: #{opts.delete(:auth_token)}"
@@ -43,6 +36,18 @@ def main(opts = {})
43
36
  puts "#{response.status} - #{response.body.truncate(200)}"
44
37
  end
45
38
 
39
+ def check_required_params
40
+ if opts[:importer_id].blank? && invalid?(opts)
41
+ puts 'Missing required parameters'
42
+ help
43
+ end
44
+
45
+ if opts[:auth_token].blank? # rubocop:disable Style/GuardClause
46
+ puts 'Missing Authentication Token --auth_token'
47
+ exit
48
+ end
49
+ end
50
+
46
51
  def invalid?(opts)
47
52
  required_params.each do |p|
48
53
  return true if opts[p.to_sym].blank?
@@ -74,11 +79,12 @@ def build_params(opts = {})
74
79
  return params.compact
75
80
  end
76
81
 
77
- def build_url(importer_id, url)
82
+ def build_url(importer_id, url, port = nil)
78
83
  if url.nil?
79
84
  protocol = Rails.application.config.force_ssl ? 'https://' : 'http://'
80
85
  host = Rails.application.config.action_mailer.default_url_options[:host]
81
86
  url = "#{protocol}#{host}"
87
+ url = "#{url}:#{port}" if port
82
88
  end
83
89
  path = Bulkrax::Engine.routes.url_helpers.polymorphic_path(Bulkrax::Importer)
84
90
  url = File.join(url, path)
@@ -81,4 +81,6 @@ bulkrax:
81
81
  - "Update and Import (importer has not yet been run)"
82
82
  auth_token:
83
83
  definition: 'Authentication token. Required for JSON requests only.'
84
- required: true
84
+ required: true
85
+ port:
86
+ definition: 'Port to use in http request. Defaults to 80. May need to set to 3000 when running in the development environment'
@@ -19,18 +19,8 @@ Bulkrax.setup do |config|
19
19
  # Server name for oai request header
20
20
  # config.server_name = 'my_server@name.com'
21
21
 
22
- # Field_mapping for establishing a parent-child relationship (FROM parent TO child)
23
- # This can be a Collection to Work, or Work to Work relationship
24
- # This value IS NOT used for OAI, so setting the OAI Entries here will have no effect
25
- # The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry'
26
- # Example:
27
- # {
28
- # 'Bulkrax::RdfEntry' => 'http://opaquenamespace.org/ns/contents',
29
- # 'Bulkrax::CsvEntry' => 'children'
30
- # }
31
- # By default no parent-child relationships are added
32
- # config.parent_child_field_mapping = { }
33
-
22
+ # NOTE: Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.
23
+ # Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.
34
24
  # Field_mapping for establishing a collection relationship (FROM work TO collection)
35
25
  # This value IS NOT used for OAI, so setting the OAI parser here will have no effect
36
26
  # The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry'
@@ -48,6 +38,11 @@ Bulkrax.setup do |config|
48
38
  # e.g. to exclude date
49
39
  # config.field_mappings["Bulkrax::OaiDcParser"]["date"] = { from: ["date"], excluded: true }
50
40
  #
41
+ # e.g. to import parent-child relationships
42
+ # config.field_mappings['Bulkrax::CsvParser']['parents'] = { from: ['parents'], related_parents_field_mapping: true }
43
+ # config.field_mappings['Bulkrax::CsvParser']['children'] = { from: ['children'], related_children_field_mapping: true }
44
+ # (For more info on importing relationships, see Bulkrax Wiki: https://github.com/samvera-labs/bulkrax/wiki/Configuring-Bulkrax#parent-child-relationship-field-mappings)
45
+ #
51
46
  # # e.g. to add the required source_identifier field
52
47
  # # config.field_mappings["Bulkrax::CsvParser"]["source_id"] = { from: ["old_source_id"], source_identifier: true }
53
48
  # If you want Bulkrax to fill in source_identifiers for you, see below
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulkrax
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Kaufman
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-28 00:00:00.000000000 Z
11
+ date: 2022-02-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -224,7 +224,11 @@ dependencies:
224
224
  - - ">="
225
225
  - !ruby/object:Gem::Version
226
226
  version: '0'
227
- description: Description of Bulkrax.
227
+ description: Bulkrax is a batteries included importer for Samvera applications. It
228
+ currently includes support for OAI-PMH (DC and Qualified DC) and CSV out of the
229
+ box. It is also designed to be extensible, allowing you to easily add new importers
230
+ in to your application or to include them with other gems. Bulkrax provides a full
231
+ admin interface including creating, editing, scheduling and reviewing imports.
228
232
  email:
229
233
  - rob@notch8.com
230
234
  executables: []
@@ -255,12 +259,13 @@ files:
255
259
  - app/helpers/bulkrax/importers_helper.rb
256
260
  - app/helpers/bulkrax/validation_helper.rb
257
261
  - app/jobs/bulkrax/application_job.rb
258
- - app/jobs/bulkrax/child_relationships_job.rb
262
+ - app/jobs/bulkrax/create_relationships_job.rb
259
263
  - app/jobs/bulkrax/delete_work_job.rb
260
264
  - app/jobs/bulkrax/download_cloud_file_job.rb
261
265
  - app/jobs/bulkrax/export_work_job.rb
262
266
  - app/jobs/bulkrax/exporter_job.rb
263
- - app/jobs/bulkrax/import_work_collection_job.rb
267
+ - app/jobs/bulkrax/import_collection_job.rb
268
+ - app/jobs/bulkrax/import_file_set_job.rb
264
269
  - app/jobs/bulkrax/import_work_job.rb
265
270
  - app/jobs/bulkrax/importer_job.rb
266
271
  - app/mailers/bulkrax/application_mailer.rb
@@ -271,6 +276,7 @@ files:
271
276
  - app/models/bulkrax/application_record.rb
272
277
  - app/models/bulkrax/csv_collection_entry.rb
273
278
  - app/models/bulkrax/csv_entry.rb
279
+ - app/models/bulkrax/csv_file_set_entry.rb
274
280
  - app/models/bulkrax/entry.rb
275
281
  - app/models/bulkrax/exporter.rb
276
282
  - app/models/bulkrax/exporter_run.rb
@@ -286,6 +292,7 @@ files:
286
292
  - app/models/bulkrax/status.rb
287
293
  - app/models/bulkrax/xml_entry.rb
288
294
  - app/models/concerns/bulkrax/download_behavior.rb
295
+ - app/models/concerns/bulkrax/dynamic_record_lookup.rb
289
296
  - app/models/concerns/bulkrax/errored_entries.rb
290
297
  - app/models/concerns/bulkrax/export_behavior.rb
291
298
  - app/models/concerns/bulkrax/file_factory.rb
@@ -351,6 +358,11 @@ files:
351
358
  - db/migrate/20201117220007_add_workflow_status_to_bulkrax_exporter.rb
352
359
  - db/migrate/20210806044408_remove_unused_last_error.rb
353
360
  - db/migrate/20210806065737_increase_text_sizes.rb
361
+ - db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb
362
+ - db/migrate/20211203195233_rename_children_counters_to_relationships.rb
363
+ - db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb
364
+ - db/migrate/20220118001339_add_import_attempts_to_entries.rb
365
+ - db/migrate/20220119213325_add_work_counters_to_importer_runs.rb
354
366
  - lib/bulkrax.rb
355
367
  - lib/bulkrax/engine.rb
356
368
  - lib/bulkrax/version.rb
@@ -366,7 +378,7 @@ homepage: https://github.com/samvera-labs/bulkrax
366
378
  licenses:
367
379
  - Apache-2.0
368
380
  metadata: {}
369
- post_install_message:
381
+ post_install_message:
370
382
  rdoc_options: []
371
383
  require_paths:
372
384
  - lib
@@ -381,8 +393,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
381
393
  - !ruby/object:Gem::Version
382
394
  version: '0'
383
395
  requirements: []
384
- rubygems_version: 3.1.2
385
- signing_key:
396
+ rubygems_version: 3.1.4
397
+ signing_key:
386
398
  specification_version: 4
387
- summary: Summary of Bulkrax.
399
+ summary: Import and export tool for Hyrax and Hyku
388
400
  test_files: []
@@ -1,128 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Bulkrax
4
- class ChildWorksError < RuntimeError; end
5
- class ChildRelationshipsJob < ApplicationJob
6
- queue_as :import
7
-
8
- def perform(*args)
9
- @args = args
10
-
11
- if entry.factory_class == Collection
12
- collection_membership
13
- else
14
- work_membership
15
- end
16
- # Not all of the Works/Collections exist yet; reschedule
17
- rescue Bulkrax::ChildWorksError
18
- reschedule(args[0], args[1], args[2])
19
- end
20
-
21
- def collection_membership
22
- # add collection to works
23
- member_of_collection = []
24
- child_works_hash.each { |k, v| member_of_collection << k if v[:class_name] != 'Collection' }
25
- member_of_collection.each { |work| work_child_collection_parent(work) }
26
-
27
- # add collections to collection
28
- members_collections = []
29
- child_works_hash.each { |k, v| members_collections << k if v[:class_name] == 'Collection' }
30
- collection_parent_collection_child(members_collections) if members_collections.present?
31
- end
32
-
33
- def work_membership
34
- # add works to work
35
- # reject any Collections, they can't be children of Works
36
- members_works = []
37
- # reject any Collections, they can't be children of Works
38
- child_works_hash.each { |k, v| members_works << k if v[:class_name] != 'Collection' }
39
- if members_works.length < child_entries.length # rubocop:disable Style/IfUnlessModifier
40
- Rails.logger.warn("Cannot add collections as children of works: #{(@child_entries.length - members_works.length)} collections were discarded for parent entry #{@entry.id} (of #{@child_entries.length})")
41
- end
42
- work_parent_work_child(members_works) if members_works.present?
43
- end
44
-
45
- def entry
46
- @entry ||= Bulkrax::Entry.find(@args[0])
47
- end
48
-
49
- def child_entries
50
- @child_entries ||= @args[1].map { |e| Bulkrax::Entry.find(e) }
51
- end
52
-
53
- def child_works_hash
54
- @child_works_hash ||= child_entries.each_with_object({}) do |child_entry, hash|
55
- work = child_entry.factory.find
56
- # If we can't find the Work/Collection, raise a custom error
57
- raise ChildWorksError if work.blank?
58
- hash[work.id] = { class_name: work.class.to_s, entry.parser.source_identifier => child_entry.identifier }
59
- end
60
- end
61
-
62
- def importer_run_id
63
- @args[2]
64
- end
65
-
66
- def user
67
- @user ||= entry.importerexporter.user
68
- end
69
-
70
- private
71
-
72
- # rubocop:disable Rails/SkipsModelValidations
73
- # Work-Collection membership is added to the child as member_of_collection_ids
74
- # This is adding the reverse relatinship, from the child to the parent
75
- def work_child_collection_parent(work_id)
76
- attrs = { id: work_id, collections: [{ id: entry&.factory&.find&.id }] }
77
- Bulkrax::ObjectFactory.new(attributes: attrs,
78
- source_identifier_value: child_works_hash[work_id][entry.parser.source_identifier],
79
- work_identifier: entry.parser.work_identifier,
80
- replace_files: false,
81
- user: user,
82
- klass: child_works_hash[work_id][:class_name].constantize).run
83
- ImporterRun.find(importer_run_id).increment!(:processed_children)
84
- rescue StandardError => e
85
- entry.status_info(e)
86
- ImporterRun.find(importer_run_id).increment!(:failed_children)
87
- end
88
-
89
- # Collection-Collection membership is added to the as member_ids
90
- def collection_parent_collection_child(member_ids)
91
- attrs = { id: entry&.factory&.find&.id, children: member_ids }
92
- Bulkrax::ObjectFactory.new(attributes: attrs,
93
- source_identifier_value: entry.identifier,
94
- work_identifier: entry.parser.work_identifier,
95
- replace_files: false,
96
- user: user,
97
- klass: entry.factory_class).run
98
- ImporterRun.find(importer_run_id).increment!(:processed_children)
99
- rescue StandardError => e
100
- entry.status_info(e)
101
- ImporterRun.find(importer_run_id).increment!(:failed_children)
102
- end
103
-
104
- # Work-Work membership is added to the parent as member_ids
105
- def work_parent_work_child(member_ids)
106
- # build work_members_attributes
107
- attrs = { id: entry&.factory&.find&.id,
108
- work_members_attributes: member_ids.each.with_index.each_with_object({}) do |(member, index), ids|
109
- ids[index] = { id: member }
110
- end }
111
- Bulkrax::ObjectFactory.new(attributes: attrs,
112
- source_identifier_value: entry.identifier,
113
- work_identifier: entry.parser.work_identifier,
114
- replace_files: false,
115
- user: user,
116
- klass: entry.factory_class).run
117
- ImporterRun.find(importer_run_id).increment!(:processed_children)
118
- rescue StandardError => e
119
- entry.status_info(e)
120
- ImporterRun.find(importer_run_id).increment!(:failed_children)
121
- end
122
- # rubocop:enable Rails/SkipsModelValidations
123
-
124
- def reschedule(entry_id, child_entry_ids, importer_run_id)
125
- ChildRelationshipsJob.set(wait: 10.minutes).perform_later(entry_id, child_entry_ids, importer_run_id)
126
- end
127
- end
128
- end