bulkrax 1.0.2 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/app/controllers/bulkrax/exporters_controller.rb +12 -4
  4. data/app/controllers/bulkrax/importers_controller.rb +23 -17
  5. data/app/factories/bulkrax/object_factory.rb +84 -63
  6. data/app/jobs/bulkrax/create_relationships_job.rb +156 -0
  7. data/app/jobs/bulkrax/delete_work_job.rb +6 -2
  8. data/app/jobs/bulkrax/export_work_job.rb +3 -1
  9. data/app/jobs/bulkrax/exporter_job.rb +1 -0
  10. data/app/jobs/bulkrax/{import_work_collection_job.rb → import_collection_job.rb} +4 -2
  11. data/app/jobs/bulkrax/import_file_set_job.rb +69 -0
  12. data/app/jobs/bulkrax/import_work_job.rb +2 -0
  13. data/app/jobs/bulkrax/importer_job.rb +18 -1
  14. data/app/matchers/bulkrax/application_matcher.rb +5 -5
  15. data/app/models/bulkrax/csv_collection_entry.rb +8 -6
  16. data/app/models/bulkrax/csv_entry.rb +132 -65
  17. data/app/models/bulkrax/csv_file_set_entry.rb +26 -0
  18. data/app/models/bulkrax/entry.rb +19 -8
  19. data/app/models/bulkrax/exporter.rb +12 -5
  20. data/app/models/bulkrax/importer.rb +24 -5
  21. data/app/models/bulkrax/oai_entry.rb +5 -1
  22. data/app/models/bulkrax/rdf_entry.rb +16 -7
  23. data/app/models/bulkrax/xml_entry.rb +4 -0
  24. data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +39 -0
  25. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  26. data/app/models/concerns/bulkrax/has_matchers.rb +44 -13
  27. data/app/models/concerns/bulkrax/import_behavior.rb +40 -5
  28. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +23 -2
  29. data/app/models/concerns/bulkrax/status_info.rb +4 -4
  30. data/app/parsers/bulkrax/application_parser.rb +67 -84
  31. data/app/parsers/bulkrax/bagit_parser.rb +13 -4
  32. data/app/parsers/bulkrax/csv_parser.rb +170 -64
  33. data/app/parsers/bulkrax/oai_dc_parser.rb +6 -3
  34. data/app/parsers/bulkrax/xml_parser.rb +5 -0
  35. data/app/views/bulkrax/exporters/_form.html.erb +1 -1
  36. data/app/views/bulkrax/exporters/show.html.erb +2 -1
  37. data/app/views/bulkrax/importers/index.html.erb +17 -17
  38. data/app/views/bulkrax/importers/show.html.erb +52 -6
  39. data/config/locales/bulkrax.en.yml +1 -0
  40. data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +5 -1
  41. data/db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb +5 -0
  42. data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +6 -0
  43. data/db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb +7 -0
  44. data/db/migrate/20220118001339_add_import_attempts_to_entries.rb +5 -0
  45. data/db/migrate/20220119213325_add_work_counters_to_importer_runs.rb +6 -0
  46. data/lib/bulkrax/engine.rb +1 -1
  47. data/lib/bulkrax/version.rb +1 -1
  48. data/lib/bulkrax.rb +9 -17
  49. data/lib/generators/bulkrax/templates/bin/importer +17 -11
  50. data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +3 -1
  51. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +7 -12
  52. metadata +22 -10
  53. data/app/jobs/bulkrax/child_relationships_job.rb +0 -128
data/lib/bulkrax.rb CHANGED
@@ -5,12 +5,14 @@ require 'active_support/all'
5
5
 
6
6
  module Bulkrax
7
7
  class << self
8
+ # TODO: remove collection_field_mapping when releasing v2
8
9
  mattr_accessor :parsers,
9
10
  :default_work_type,
10
11
  :default_field_mapping,
11
12
  :collection_field_mapping,
12
13
  :fill_in_blank_source_identifiers,
13
- :parent_child_field_mapping,
14
+ :related_children_field_mapping,
15
+ :related_parents_field_mapping,
14
16
  :reserved_properties,
15
17
  :field_mappings,
16
18
  :import_path,
@@ -33,26 +35,16 @@ module Bulkrax
33
35
  self.removed_image_path = Bulkrax::Engine.root.join('spec', 'fixtures', 'removed.png').to_s
34
36
  self.server_name = 'bulkrax@example.com'
35
37
 
36
- # @todo, merge parent_child_field_mapping and collection_field_mapping into field_mappings,
37
- # or make them settable per import some other way.
38
-
39
- # Field_mapping for establishing a parent-child relationship (FROM parent TO child)
40
- # This can be a Collection to Work, or Work to Work relationship
41
- # This value IS NOT used for OAI, so setting the OAI Entries here will have no effect
42
- # The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry'
43
- # Example:
44
- # {
45
- # 'Bulkrax::RdfEntry' => 'http://opaquenamespace.org/ns/contents',
46
- # 'Bulkrax::CsvEntry' => 'children'
47
- # }
48
- # By default no parent-child relationships are added
49
- self.parent_child_field_mapping = {}
50
-
38
+ # NOTE: Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.
39
+ # Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.
40
+ # TODO: remove collection_field_mapping when releasing v2
51
41
  # Field_mapping for establishing a collection relationship (FROM work TO collection)
52
42
  # This value IS NOT used for OAI, so setting the OAI Entries here will have no effect
53
43
  # The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry'
54
44
  # The default value for CSV is collection
55
- self.collection_field_mapping = {}
45
+ self.collection_field_mapping = {
46
+ 'Bulkrax::CsvEntry' => 'collection'
47
+ }
56
48
 
57
49
  # Hash of Generic field_mappings for use in the view
58
50
  # There must be one field_mappings hash per view parial
@@ -6,18 +6,11 @@ require_relative '../config/environment'
6
6
  require 'slop'
7
7
 
8
8
  def main(opts = {})
9
- if opts[:importer_id].blank? && invalid?(opts)
10
- puts 'Missing required parameters'
11
- help
12
- end
13
-
14
- if opts[:auth_token].blank?
15
- puts 'Missing Authentication Token --auth_token'
16
- exit
17
- end
9
+ check_required_params
18
10
 
19
11
  update = opts[:importer_id].present?
20
- url = build_url(opts.delete(:importer_id), opts.delete(:url))
12
+ port = opts[:port].presence
13
+ url = build_url(opts.delete(:importer_id), opts.delete(:url), port)
21
14
 
22
15
  headers = { 'Content-Type' => 'application/json' }
23
16
  headers['Authorization'] = "Token: #{opts.delete(:auth_token)}"
@@ -43,6 +36,18 @@ def main(opts = {})
43
36
  puts "#{response.status} - #{response.body.truncate(200)}"
44
37
  end
45
38
 
39
+ def check_required_params
40
+ if opts[:importer_id].blank? && invalid?(opts)
41
+ puts 'Missing required parameters'
42
+ help
43
+ end
44
+
45
+ if opts[:auth_token].blank? # rubocop:disable Style/GuardClause
46
+ puts 'Missing Authentication Token --auth_token'
47
+ exit
48
+ end
49
+ end
50
+
46
51
  def invalid?(opts)
47
52
  required_params.each do |p|
48
53
  return true if opts[p.to_sym].blank?
@@ -74,11 +79,12 @@ def build_params(opts = {})
74
79
  return params.compact
75
80
  end
76
81
 
77
- def build_url(importer_id, url)
82
+ def build_url(importer_id, url, port = nil)
78
83
  if url.nil?
79
84
  protocol = Rails.application.config.force_ssl ? 'https://' : 'http://'
80
85
  host = Rails.application.config.action_mailer.default_url_options[:host]
81
86
  url = "#{protocol}#{host}"
87
+ url = "#{url}:#{port}" if port
82
88
  end
83
89
  path = Bulkrax::Engine.routes.url_helpers.polymorphic_path(Bulkrax::Importer)
84
90
  url = File.join(url, path)
@@ -81,4 +81,6 @@ bulkrax:
81
81
  - "Update and Import (importer has not yet been run)"
82
82
  auth_token:
83
83
  definition: 'Authentication token. Required for JSON requests only.'
84
- required: true
84
+ required: true
85
+ port:
86
+ definition: 'Port to use in http request. Defaults to 80. May need to set to 3000 when running in the development environment'
@@ -19,18 +19,8 @@ Bulkrax.setup do |config|
19
19
  # Server name for oai request header
20
20
  # config.server_name = 'my_server@name.com'
21
21
 
22
- # Field_mapping for establishing a parent-child relationship (FROM parent TO child)
23
- # This can be a Collection to Work, or Work to Work relationship
24
- # This value IS NOT used for OAI, so setting the OAI Entries here will have no effect
25
- # The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry'
26
- # Example:
27
- # {
28
- # 'Bulkrax::RdfEntry' => 'http://opaquenamespace.org/ns/contents',
29
- # 'Bulkrax::CsvEntry' => 'children'
30
- # }
31
- # By default no parent-child relationships are added
32
- # config.parent_child_field_mapping = { }
33
-
22
+ # NOTE: Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.
23
+ # Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.
34
24
  # Field_mapping for establishing a collection relationship (FROM work TO collection)
35
25
  # This value IS NOT used for OAI, so setting the OAI parser here will have no effect
36
26
  # The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry'
@@ -48,6 +38,11 @@ Bulkrax.setup do |config|
48
38
  # e.g. to exclude date
49
39
  # config.field_mappings["Bulkrax::OaiDcParser"]["date"] = { from: ["date"], excluded: true }
50
40
  #
41
+ # e.g. to import parent-child relationships
42
+ # config.field_mappings['Bulkrax::CsvParser']['parents'] = { from: ['parents'], related_parents_field_mapping: true }
43
+ # config.field_mappings['Bulkrax::CsvParser']['children'] = { from: ['children'], related_children_field_mapping: true }
44
+ # (For more info on importing relationships, see Bulkrax Wiki: https://github.com/samvera-labs/bulkrax/wiki/Configuring-Bulkrax#parent-child-relationship-field-mappings)
45
+ #
51
46
  # # e.g. to add the required source_identifier field
52
47
  # # config.field_mappings["Bulkrax::CsvParser"]["source_id"] = { from: ["old_source_id"], source_identifier: true }
53
48
  # If you want Bulkrax to fill in source_identifiers for you, see below
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulkrax
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Kaufman
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-28 00:00:00.000000000 Z
11
+ date: 2022-02-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -224,7 +224,11 @@ dependencies:
224
224
  - - ">="
225
225
  - !ruby/object:Gem::Version
226
226
  version: '0'
227
- description: Description of Bulkrax.
227
+ description: Bulkrax is a batteries included importer for Samvera applications. It
228
+ currently includes support for OAI-PMH (DC and Qualified DC) and CSV out of the
229
+ box. It is also designed to be extensible, allowing you to easily add new importers
230
+ in to your application or to include them with other gems. Bulkrax provides a full
231
+ admin interface including creating, editing, scheduling and reviewing imports.
228
232
  email:
229
233
  - rob@notch8.com
230
234
  executables: []
@@ -255,12 +259,13 @@ files:
255
259
  - app/helpers/bulkrax/importers_helper.rb
256
260
  - app/helpers/bulkrax/validation_helper.rb
257
261
  - app/jobs/bulkrax/application_job.rb
258
- - app/jobs/bulkrax/child_relationships_job.rb
262
+ - app/jobs/bulkrax/create_relationships_job.rb
259
263
  - app/jobs/bulkrax/delete_work_job.rb
260
264
  - app/jobs/bulkrax/download_cloud_file_job.rb
261
265
  - app/jobs/bulkrax/export_work_job.rb
262
266
  - app/jobs/bulkrax/exporter_job.rb
263
- - app/jobs/bulkrax/import_work_collection_job.rb
267
+ - app/jobs/bulkrax/import_collection_job.rb
268
+ - app/jobs/bulkrax/import_file_set_job.rb
264
269
  - app/jobs/bulkrax/import_work_job.rb
265
270
  - app/jobs/bulkrax/importer_job.rb
266
271
  - app/mailers/bulkrax/application_mailer.rb
@@ -271,6 +276,7 @@ files:
271
276
  - app/models/bulkrax/application_record.rb
272
277
  - app/models/bulkrax/csv_collection_entry.rb
273
278
  - app/models/bulkrax/csv_entry.rb
279
+ - app/models/bulkrax/csv_file_set_entry.rb
274
280
  - app/models/bulkrax/entry.rb
275
281
  - app/models/bulkrax/exporter.rb
276
282
  - app/models/bulkrax/exporter_run.rb
@@ -286,6 +292,7 @@ files:
286
292
  - app/models/bulkrax/status.rb
287
293
  - app/models/bulkrax/xml_entry.rb
288
294
  - app/models/concerns/bulkrax/download_behavior.rb
295
+ - app/models/concerns/bulkrax/dynamic_record_lookup.rb
289
296
  - app/models/concerns/bulkrax/errored_entries.rb
290
297
  - app/models/concerns/bulkrax/export_behavior.rb
291
298
  - app/models/concerns/bulkrax/file_factory.rb
@@ -351,6 +358,11 @@ files:
351
358
  - db/migrate/20201117220007_add_workflow_status_to_bulkrax_exporter.rb
352
359
  - db/migrate/20210806044408_remove_unused_last_error.rb
353
360
  - db/migrate/20210806065737_increase_text_sizes.rb
361
+ - db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb
362
+ - db/migrate/20211203195233_rename_children_counters_to_relationships.rb
363
+ - db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb
364
+ - db/migrate/20220118001339_add_import_attempts_to_entries.rb
365
+ - db/migrate/20220119213325_add_work_counters_to_importer_runs.rb
354
366
  - lib/bulkrax.rb
355
367
  - lib/bulkrax/engine.rb
356
368
  - lib/bulkrax/version.rb
@@ -366,7 +378,7 @@ homepage: https://github.com/samvera-labs/bulkrax
366
378
  licenses:
367
379
  - Apache-2.0
368
380
  metadata: {}
369
- post_install_message:
381
+ post_install_message:
370
382
  rdoc_options: []
371
383
  require_paths:
372
384
  - lib
@@ -381,8 +393,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
381
393
  - !ruby/object:Gem::Version
382
394
  version: '0'
383
395
  requirements: []
384
- rubygems_version: 3.1.2
385
- signing_key:
396
+ rubygems_version: 3.1.4
397
+ signing_key:
386
398
  specification_version: 4
387
- summary: Summary of Bulkrax.
399
+ summary: Import and export tool for Hyrax and Hyku
388
400
  test_files: []
@@ -1,128 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Bulkrax
4
- class ChildWorksError < RuntimeError; end
5
- class ChildRelationshipsJob < ApplicationJob
6
- queue_as :import
7
-
8
- def perform(*args)
9
- @args = args
10
-
11
- if entry.factory_class == Collection
12
- collection_membership
13
- else
14
- work_membership
15
- end
16
- # Not all of the Works/Collections exist yet; reschedule
17
- rescue Bulkrax::ChildWorksError
18
- reschedule(args[0], args[1], args[2])
19
- end
20
-
21
- def collection_membership
22
- # add collection to works
23
- member_of_collection = []
24
- child_works_hash.each { |k, v| member_of_collection << k if v[:class_name] != 'Collection' }
25
- member_of_collection.each { |work| work_child_collection_parent(work) }
26
-
27
- # add collections to collection
28
- members_collections = []
29
- child_works_hash.each { |k, v| members_collections << k if v[:class_name] == 'Collection' }
30
- collection_parent_collection_child(members_collections) if members_collections.present?
31
- end
32
-
33
- def work_membership
34
- # add works to work
35
- # reject any Collections, they can't be children of Works
36
- members_works = []
37
- # reject any Collections, they can't be children of Works
38
- child_works_hash.each { |k, v| members_works << k if v[:class_name] != 'Collection' }
39
- if members_works.length < child_entries.length # rubocop:disable Style/IfUnlessModifier
40
- Rails.logger.warn("Cannot add collections as children of works: #{(@child_entries.length - members_works.length)} collections were discarded for parent entry #{@entry.id} (of #{@child_entries.length})")
41
- end
42
- work_parent_work_child(members_works) if members_works.present?
43
- end
44
-
45
- def entry
46
- @entry ||= Bulkrax::Entry.find(@args[0])
47
- end
48
-
49
- def child_entries
50
- @child_entries ||= @args[1].map { |e| Bulkrax::Entry.find(e) }
51
- end
52
-
53
- def child_works_hash
54
- @child_works_hash ||= child_entries.each_with_object({}) do |child_entry, hash|
55
- work = child_entry.factory.find
56
- # If we can't find the Work/Collection, raise a custom error
57
- raise ChildWorksError if work.blank?
58
- hash[work.id] = { class_name: work.class.to_s, entry.parser.source_identifier => child_entry.identifier }
59
- end
60
- end
61
-
62
- def importer_run_id
63
- @args[2]
64
- end
65
-
66
- def user
67
- @user ||= entry.importerexporter.user
68
- end
69
-
70
- private
71
-
72
- # rubocop:disable Rails/SkipsModelValidations
73
- # Work-Collection membership is added to the child as member_of_collection_ids
74
- # This is adding the reverse relatinship, from the child to the parent
75
- def work_child_collection_parent(work_id)
76
- attrs = { id: work_id, collections: [{ id: entry&.factory&.find&.id }] }
77
- Bulkrax::ObjectFactory.new(attributes: attrs,
78
- source_identifier_value: child_works_hash[work_id][entry.parser.source_identifier],
79
- work_identifier: entry.parser.work_identifier,
80
- replace_files: false,
81
- user: user,
82
- klass: child_works_hash[work_id][:class_name].constantize).run
83
- ImporterRun.find(importer_run_id).increment!(:processed_children)
84
- rescue StandardError => e
85
- entry.status_info(e)
86
- ImporterRun.find(importer_run_id).increment!(:failed_children)
87
- end
88
-
89
- # Collection-Collection membership is added to the as member_ids
90
- def collection_parent_collection_child(member_ids)
91
- attrs = { id: entry&.factory&.find&.id, children: member_ids }
92
- Bulkrax::ObjectFactory.new(attributes: attrs,
93
- source_identifier_value: entry.identifier,
94
- work_identifier: entry.parser.work_identifier,
95
- replace_files: false,
96
- user: user,
97
- klass: entry.factory_class).run
98
- ImporterRun.find(importer_run_id).increment!(:processed_children)
99
- rescue StandardError => e
100
- entry.status_info(e)
101
- ImporterRun.find(importer_run_id).increment!(:failed_children)
102
- end
103
-
104
- # Work-Work membership is added to the parent as member_ids
105
- def work_parent_work_child(member_ids)
106
- # build work_members_attributes
107
- attrs = { id: entry&.factory&.find&.id,
108
- work_members_attributes: member_ids.each.with_index.each_with_object({}) do |(member, index), ids|
109
- ids[index] = { id: member }
110
- end }
111
- Bulkrax::ObjectFactory.new(attributes: attrs,
112
- source_identifier_value: entry.identifier,
113
- work_identifier: entry.parser.work_identifier,
114
- replace_files: false,
115
- user: user,
116
- klass: entry.factory_class).run
117
- ImporterRun.find(importer_run_id).increment!(:processed_children)
118
- rescue StandardError => e
119
- entry.status_info(e)
120
- ImporterRun.find(importer_run_id).increment!(:failed_children)
121
- end
122
- # rubocop:enable Rails/SkipsModelValidations
123
-
124
- def reschedule(entry_id, child_entry_ids, importer_run_id)
125
- ChildRelationshipsJob.set(wait: 10.minutes).perform_later(entry_id, child_entry_ids, importer_run_id)
126
- end
127
- end
128
- end