fedora-migrate 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +128 -0
  3. data/.rubocop_todo.yml +9 -0
  4. data/.travis.yml +3 -2
  5. data/Gemfile +2 -0
  6. data/fedora-migrate.gemspec +1 -0
  7. data/lib/fedora-migrate.rb +3 -5
  8. data/lib/fedora_migrate/content_mover.rb +7 -11
  9. data/lib/fedora_migrate/datastream_mover.rb +21 -24
  10. data/lib/fedora_migrate/datastream_verification.rb +15 -17
  11. data/lib/fedora_migrate/dates_mover.rb +0 -2
  12. data/lib/fedora_migrate/file_configurator.rb +3 -5
  13. data/lib/fedora_migrate/hooks.rb +0 -2
  14. data/lib/fedora_migrate/logger.rb +8 -11
  15. data/lib/fedora_migrate/migration_options.rb +6 -7
  16. data/lib/fedora_migrate/migration_report.rb +18 -22
  17. data/lib/fedora_migrate/mover.rb +4 -6
  18. data/lib/fedora_migrate/object_mover.rb +28 -34
  19. data/lib/fedora_migrate/permissions.rb +8 -10
  20. data/lib/fedora_migrate/permissions_mover.rb +7 -11
  21. data/lib/fedora_migrate/rdf_datastream_mover.rb +1 -2
  22. data/lib/fedora_migrate/rels_ext_datastream_mover.rb +29 -31
  23. data/lib/fedora_migrate/repository_migrator.rb +40 -43
  24. data/lib/fedora_migrate/rights_metadata.rb +109 -114
  25. data/lib/fedora_migrate/rubydora_connection.rb +4 -5
  26. data/lib/fedora_migrate/target_constructor.rb +19 -22
  27. data/lib/fedora_migrate/version.rb +1 -1
  28. data/spec/integration/content_versions_spec.rb +12 -14
  29. data/spec/integration/custom_target_spec.rb +44 -0
  30. data/spec/integration/fedora3_interface_spec.rb +7 -11
  31. data/spec/integration/missing_relationships_spec.rb +8 -10
  32. data/spec/integration/object_migration_spec.rb +20 -31
  33. data/spec/integration/permission_migration_spec.rb +4 -6
  34. data/spec/integration/rdf_migration_spec.rb +3 -6
  35. data/spec/integration/relationship_migration_spec.rb +6 -7
  36. data/spec/integration/repository_migration_spec.rb +14 -19
  37. data/spec/integration/versions_spec.rb +6 -8
  38. data/spec/spec_helper.rb +3 -3
  39. data/spec/support/example_model.rb +23 -25
  40. data/spec/unit/content_mover_spec.rb +21 -23
  41. data/spec/unit/datastream_mover_spec.rb +10 -14
  42. data/spec/unit/datastream_verification_spec.rb +7 -9
  43. data/spec/unit/dates_mover_spec.rb +3 -4
  44. data/spec/unit/fedora_migrate_spec.rb +2 -6
  45. data/spec/unit/file_configurator_spec.rb +4 -8
  46. data/spec/unit/migration_options_spec.rb +1 -3
  47. data/spec/unit/migration_report_spec.rb +5 -6
  48. data/spec/unit/mover_spec.rb +10 -12
  49. data/spec/unit/object_mover_spec.rb +9 -16
  50. data/spec/unit/permissions_mover_spec.rb +8 -11
  51. data/spec/unit/rels_ext_datastream_mover_spec.rb +4 -6
  52. data/spec/unit/repository_migrator_spec.rb +12 -14
  53. data/spec/unit/rubydora_connection_spec.rb +3 -5
  54. data/spec/unit/target_constructor_spec.rb +10 -16
  55. data/tasks/dev.rake +9 -1
  56. metadata +21 -3
@@ -1,10 +1,9 @@
1
1
  module FedoraMigrate
2
2
  module MigrationOptions
3
-
4
3
  attr_accessor :options, :conversions
5
4
 
6
5
  def conversion_options
7
- self.conversions = options.nil? ? [] : [options[:convert]].flatten
6
+ self.conversions = options.nil? ? [] : [options[:convert]].flatten
8
7
  end
9
8
 
10
9
  def forced?
@@ -23,12 +22,12 @@ module FedoraMigrate
23
22
  return [] if options.nil?
24
23
  options.fetch(:blacklist, [])
25
24
  end
26
-
25
+
27
26
  private
28
-
29
- def option_true?(name)
30
- !!(options && options[name])
31
- end
32
27
 
28
+ def option_true?(name)
29
+ return false unless options
30
+ options.fetch(name, false)
31
+ end
33
32
  end
34
33
  end
@@ -1,13 +1,12 @@
1
1
  module FedoraMigrate
2
2
  class MigrationReport
3
-
4
3
  attr_accessor :path, :results
5
4
 
6
5
  DEFAULT_PATH = "migration_report".freeze
7
6
 
8
- def initialize path=nil
7
+ def initialize(path = nil)
9
8
  @path = path.nil? ? DEFAULT_PATH : path
10
- FileUtils::mkdir_p(@path)
9
+ FileUtils.mkdir_p(@path)
11
10
  reload
12
11
  end
13
12
 
@@ -15,9 +14,7 @@ module FedoraMigrate
15
14
  @results = load_results_from_directory
16
15
  end
17
16
 
18
- def empty?
19
- results.empty?
20
- end
17
+ delegate :empty?, to: :results
21
18
 
22
19
  def failed_objects
23
20
  results.keys.map { |k| k unless results[k]["status"] }.compact
@@ -31,36 +28,35 @@ module FedoraMigrate
31
28
  results.keys.count
32
29
  end
33
30
 
34
- def report_failures output = String.new
31
+ def report_failures(output = '')
35
32
  failed_objects.each do |k|
36
- output << "#{k}:\n\tobject: #{results[k]["object"]}\n\trelationships: #{results[k]["relationships"]}\n\n"
33
+ output << "#{k}:\n\tobject: #{results[k]['object']}\n\trelationships: #{results[k]['relationships']}\n\n"
37
34
  end
38
35
  output
39
36
  end
40
37
 
41
38
  # Receives and individual report and writes it to the MigrationReport directory
42
- def save pid, report
43
- file = File.join(path,file_from_pid(pid))
39
+ def save(pid, report)
40
+ file = File.join(path, file_from_pid(pid))
44
41
  json = JSON.load(report.to_json)
45
42
  File.write(file, JSON.pretty_generate(json))
46
43
  end
47
44
 
48
45
  private
49
46
 
50
- def load_results_from_directory assembled = Hash.new
51
- Dir.glob(File.join(path,"*.json")).each do |file|
52
- assembled[pid_from_file(file)] = JSON.parse(File.read(file))
47
+ def load_results_from_directory(assembled = {})
48
+ Dir.glob(File.join(path, "*.json")).each do |file|
49
+ assembled[pid_from_file(file)] = JSON.parse(File.read(file))
50
+ end
51
+ assembled
53
52
  end
54
- assembled
55
- end
56
-
57
- def pid_from_file file
58
- File.basename(file, ".*").gsub(/_/,":")
59
- end
60
53
 
61
- def file_from_pid pid
62
- pid.gsub(/:/,"_")+".json"
63
- end
54
+ def pid_from_file(file)
55
+ File.basename(file, ".*").tr('_', ":")
56
+ end
64
57
 
58
+ def file_from_pid(pid)
59
+ pid.tr(':', "_") + ".json"
60
+ end
65
61
  end
66
62
  end
@@ -1,12 +1,11 @@
1
1
  module FedoraMigrate
2
2
  class Mover
3
-
4
3
  include MigrationOptions
5
4
  include Hooks
6
5
 
7
6
  attr_accessor :target, :source, :report
8
7
 
9
- def initialize *args
8
+ def initialize(*args)
10
9
  @source = args[0]
11
10
  @target = args[1]
12
11
  @options = args[2]
@@ -37,17 +36,16 @@ module FedoraMigrate
37
36
  end
38
37
  end
39
38
 
40
- def id_component object=nil
39
+ def id_component(object = nil)
41
40
  object ||= source
42
41
  raise FedoraMigrate::Errors::MigrationError, "can't get the id component without an object" if object.nil?
43
42
  self.class.id_component(object)
44
43
  end
45
44
 
46
- def self.id_component object
47
- return object.pid.split(/:/).last if object.kind_of?(Rubydora::DigitalObject)
45
+ def self.id_component(object)
46
+ return object.pid.split(/:/).last if object.is_a?(Rubydora::DigitalObject)
48
47
  return object.to_s.split(/:/).last if object.respond_to?(:to_s)
49
48
  nil
50
49
  end
51
-
52
50
  end
53
51
  end
@@ -1,6 +1,5 @@
1
1
  module FedoraMigrate
2
2
  class ObjectMover < Mover
3
-
4
3
  RIGHTS_DATASTREAM = "rightsMetadata".freeze
5
4
 
6
5
  ContentDatastreamReport = Struct.new(:ds, :versions)
@@ -38,52 +37,47 @@ module FedoraMigrate
38
37
  save
39
38
  end
40
39
 
40
+ def target
41
+ @target ||= FedoraMigrate::TargetConstructor.new(source).build
42
+ end
43
+
41
44
  private
42
45
 
43
- def migrate_datastreams
44
- migrate_content_datastreams
45
- migrate_permissions
46
- migrate_dates
47
- end
46
+ def migrate_datastreams
47
+ migrate_content_datastreams
48
+ migrate_permissions
49
+ migrate_dates
50
+ end
48
51
 
49
- # We have to call save before migrating content datastreams, otherwise versions aren't recorded
50
- # TODO: this will fail if required fields are defined in a descMetadata datastream that is not
51
- # converted to RDF (issue #8)
52
- def migrate_content_datastreams
53
- save
54
- target.attached_files.keys.each do |ds|
55
- mover = FedoraMigrate::DatastreamMover.new(source.datastreams[ds.to_s], target.attached_files[ds.to_s], options)
56
- report.content_datastreams << ContentDatastreamReport.new(ds, mover.migrate)
52
+ # We have to call save before migrating content datastreams, otherwise versions aren't recorded
53
+ # TODO: this will fail if required fields are defined in a descMetadata datastream that is not
54
+ # converted to RDF (issue #8)
55
+ def migrate_content_datastreams
56
+ save
57
+ target.attached_files.keys.each do |ds|
58
+ mover = FedoraMigrate::DatastreamMover.new(source.datastreams[ds.to_s], target.attached_files[ds.to_s], options)
59
+ report.content_datastreams << ContentDatastreamReport.new(ds, mover.migrate)
60
+ end
57
61
  end
58
- end
59
62
 
60
- def convert_rdf_datastream ds
61
- if source.datastreams.key?(ds)
63
+ def convert_rdf_datastream(ds)
64
+ return unless source.datastreams.key?(ds)
62
65
  mover = FedoraMigrate::RDFDatastreamMover.new(datastream_content(ds), target)
63
66
  report.rdf_datastreams << RDFDatastreamReport.new(ds, mover.migrate)
64
67
  end
65
- end
66
68
 
67
- def datastream_content(dsid)
68
- source.datastreams[dsid.to_s]
69
- end
69
+ def datastream_content(dsid)
70
+ source.datastreams[dsid.to_s]
71
+ end
70
72
 
71
- def migrate_permissions
72
- if source.datastreams.keys.include?(RIGHTS_DATASTREAM) && target.respond_to?(:permissions)
73
+ def migrate_permissions
74
+ return unless source.datastreams.keys.include?(RIGHTS_DATASTREAM) && target.respond_to?(:permissions)
73
75
  mover = FedoraMigrate::PermissionsMover.new(source.datastreams[RIGHTS_DATASTREAM], target)
74
76
  report.permissions = mover.migrate
75
77
  end
76
- end
77
-
78
- def migrate_dates
79
- report.dates = FedoraMigrate::DatesMover.new(source, target).migrate
80
- end
81
-
82
- def create_target_model
83
- builder = FedoraMigrate::TargetConstructor.new(source.models).build
84
- raise FedoraMigrate::Errors::MigrationError, "No qualified targets found in #{source.pid}" if builder.target.nil?
85
- @target = builder.target.new(id: id_component)
86
- end
87
78
 
79
+ def migrate_dates
80
+ report.dates = FedoraMigrate::DatesMover.new(source, target).migrate
81
+ end
88
82
  end
89
83
  end
@@ -1,32 +1,30 @@
1
1
  module FedoraMigrate::Permissions
2
-
3
2
  # Taken from Hydra::AccessControls::Permissions under version 7.2.2
4
3
  #
5
4
  # We need the reader methods to get permissions from the Fedora3
6
5
  # rightsMetadata datastreams
7
6
 
8
7
  def read_groups
9
- rightsMetadata.groups.map {|k, v| k if v == 'read'}.compact
8
+ rightsMetadata.groups.map { |k, v| k if v == 'read' }.compact
10
9
  end
11
-
10
+
12
11
  def edit_groups
13
- rightsMetadata.groups.map {|k, v| k if v == 'edit'}.compact
12
+ rightsMetadata.groups.map { |k, v| k if v == 'edit' }.compact
14
13
  end
15
-
14
+
16
15
  def discover_groups
17
- rightsMetadata.groups.map {|k, v| k if v == 'discover'}.compact
16
+ rightsMetadata.groups.map { |k, v| k if v == 'discover' }.compact
18
17
  end
19
18
 
20
19
  def read_users
21
- rightsMetadata.users.map {|k, v| k if v == 'read'}.compact
20
+ rightsMetadata.users.map { |k, v| k if v == 'read' }.compact
22
21
  end
23
22
 
24
23
  def edit_users
25
- rightsMetadata.users.map {|k, v| k if v == 'edit'}.compact
24
+ rightsMetadata.users.map { |k, v| k if v == 'edit' }.compact
26
25
  end
27
26
 
28
27
  def discover_users
29
- rightsMetadata.users.map {|k, v| k if v == 'discover'}.compact
28
+ rightsMetadata.users.map { |k, v| k if v == 'discover' }.compact
30
29
  end
31
-
32
30
  end
@@ -1,20 +1,17 @@
1
1
  module FedoraMigrate
2
2
  class PermissionsMover < Mover
3
-
4
3
  include FedoraMigrate::Permissions
5
4
 
6
5
  attr_accessor :rightsMetadata
7
6
 
8
7
  def post_initialize
9
- if source.respond_to?(:content)
10
- @rightsMetadata = datastream_from_content
11
- end
8
+ @rightsMetadata = datastream_from_content if source.respond_to?(:content)
12
9
  end
13
10
 
14
11
  def migrate
15
12
  FedoraMigrate::Permissions.instance_methods.each do |permission|
16
- report << "#{permission} = #{self.send(permission)}"
17
- target.send(permission.to_s+"=", self.send(permission))
13
+ report << "#{permission} = #{send(permission)}"
14
+ target.send(permission.to_s + "=", send(permission))
18
15
  end
19
16
  save
20
17
  super
@@ -22,10 +19,9 @@ module FedoraMigrate
22
19
 
23
20
  private
24
21
 
25
- def datastream_from_content ds = FedoraMigrate::RightsMetadata.new
26
- ds.ng_xml = source.content
27
- ds
28
- end
29
-
22
+ def datastream_from_content(ds = FedoraMigrate::RightsMetadata.new)
23
+ ds.ng_xml = source.content
24
+ ds
25
+ end
30
26
  end
31
27
  end
@@ -2,7 +2,6 @@ require 'rchardet'
2
2
 
3
3
  module FedoraMigrate
4
4
  class RDFDatastreamMover < Mover
5
-
6
5
  def migrate
7
6
  before_rdf_datastream_migration
8
7
  migrate_rdf_triples
@@ -22,7 +21,7 @@ module FedoraMigrate
22
21
  end
23
22
 
24
23
  def updated_datastream_content
25
- correct_encoding(datastream_content).gsub(/<.+#{source.pid}>/,"<#{target.uri}>")
24
+ correct_encoding(datastream_content).gsub(/<.+#{source.pid}>/, "<#{target.uri}>")
26
25
  end
27
26
 
28
27
  def datastream_content
@@ -1,6 +1,5 @@
1
1
  module FedoraMigrate
2
2
  class RelsExtDatastreamMover < Mover
3
-
4
3
  RELS_EXT_DATASTREAM = "RELS-EXT".freeze
5
4
 
6
5
  def migrate
@@ -18,42 +17,41 @@ module FedoraMigrate
18
17
 
19
18
  private
20
19
 
21
- def migrate_statements
22
- statements.each do |statement|
23
- triple = [target.rdf_subject, migrate_predicate(statement.predicate), migrate_object(statement.object)]
24
- target.ldp_source.graph << triple
25
- report << triple.join("--")
20
+ def migrate_statements
21
+ statements.each do |statement|
22
+ triple = [target.rdf_subject, migrate_predicate(statement.predicate), migrate_object(statement.object)]
23
+ target.ldp_source.graph << triple
24
+ report << triple.join("--")
25
+ end
26
26
  end
27
- end
28
-
29
- def update_index
30
- target.reload
31
- target.update_index
32
- end
33
27
 
34
- def graph
35
- @graph ||= RDF::Graph.new { |g| g.from_rdfxml(source.datastreams[RELS_EXT_DATASTREAM].content) }
36
- end
28
+ def update_index
29
+ target.reload
30
+ target.update_index
31
+ end
37
32
 
38
- # Override this if any predicate transformation is needed
39
- def migrate_predicate(fc3_uri)
40
- fc3_uri
41
- end
33
+ def graph
34
+ @graph ||= RDF::Graph.new { |g| g.from_rdfxml(source.datastreams[RELS_EXT_DATASTREAM].content) }
35
+ end
42
36
 
43
- def migrate_object(fc3_uri)
44
- RDF::URI.new(ActiveFedora::Base.id_to_uri(id_component(fc3_uri)))
45
- end
37
+ # Override this if any predicate transformation is needed
38
+ def migrate_predicate(fc3_uri)
39
+ fc3_uri
40
+ end
46
41
 
47
- def has_missing_object?(statement)
48
- return false if ActiveFedora::Base.exists?(id_component(statement.object))
49
- report << "could not migrate relationship #{statement.predicate} because #{statement.object} doesn't exist in Fedora 4"
50
- true
51
- end
42
+ def migrate_object(fc3_uri)
43
+ RDF::URI.new(ActiveFedora::Base.id_to_uri(id_component(fc3_uri)))
44
+ end
52
45
 
53
- # All the graph statements except hasModel and those with missing objects
54
- def statements
55
- graph.statements.reject { |stmt| stmt.predicate == ActiveFedora::RDF::Fcrepo::Model.hasModel || has_missing_object?(stmt) }
56
- end
46
+ def missing_object?(statement)
47
+ return false if ActiveFedora::Base.exists?(id_component(statement.object))
48
+ report << "could not migrate relationship #{statement.predicate} because #{statement.object} doesn't exist in Fedora 4"
49
+ true
50
+ end
57
51
 
52
+ # All the graph statements except hasModel and those with missing objects
53
+ def statements
54
+ graph.statements.reject { |stmt| stmt.predicate == ActiveFedora::RDF::Fcrepo::Model.hasModel || missing_object?(stmt) }
55
+ end
58
56
  end
59
57
  end
@@ -1,17 +1,15 @@
1
1
  module FedoraMigrate
2
2
  class RepositoryMigrator
3
-
4
3
  include MigrationOptions
5
4
 
6
5
  attr_accessor :source_objects, :namespace, :report, :source, :result
7
6
 
8
7
  SingleObjectReport = Struct.new(:status, :object, :relationships)
9
8
 
10
- def initialize namespace = nil, options = {}
9
+ def initialize(namespace = nil, options = {})
11
10
  @namespace = namespace || repository_namespace
12
11
  @options = options
13
12
  @report = MigrationReport.new(@options.fetch(:report, nil))
14
- @source_objects = get_source_objects
15
13
  conversion_options
16
14
  end
17
15
 
@@ -36,7 +34,7 @@ module FedoraMigrate
36
34
  end
37
35
 
38
36
  def migrate_relationships
39
- return "Relationship migration halted because #{failures.to_s} objects didn't migrate successfully." if failures > 0 && not_forced?
37
+ return "Relationship migration halted because #{failures} objects didn't migrate successfully." if failures > 0 && not_forced?
40
38
  source_objects.each do |object|
41
39
  @source = object
42
40
  @result = find_or_create_single_object_report
@@ -44,8 +42,8 @@ module FedoraMigrate
44
42
  end
45
43
  end
46
44
 
47
- def get_source_objects
48
- FedoraMigrate.source.connection.search(nil).collect { |o| qualifying_object(o) }.compact
45
+ def source_objects
46
+ @source_objects ||= FedoraMigrate.source.connection.search(nil).collect { |o| qualifying_object(o) }.compact
49
47
  end
50
48
 
51
49
  def failures
@@ -54,48 +52,47 @@ module FedoraMigrate
54
52
 
55
53
  private
56
54
 
57
- def migrate_object
58
- result.object = FedoraMigrate::ObjectMover.new(source, nil, options).migrate
59
- result.status = true
60
- rescue StandardError => e
61
- result.object = e.inspect
62
- result.status = false
63
- ensure
64
- report.save(source.pid, result)
65
- end
66
-
67
- def migrate_relationship
68
- result.relationships = FedoraMigrate::RelsExtDatastreamMover.new(source).migrate
69
- result.status = true
70
- rescue StandardError => e
71
- result.relationships = e.inspect
72
- result.status = false
73
- ensure
74
- report.save(source.pid, result)
75
- end
55
+ def migrate_object
56
+ result.object = FedoraMigrate::ObjectMover.new(source, nil, options).migrate
57
+ result.status = true
58
+ rescue StandardError => e
59
+ result.object = e.inspect
60
+ result.status = false
61
+ ensure
62
+ report.save(source.pid, result)
63
+ end
76
64
 
77
- def repository_namespace
78
- FedoraMigrate.source.connection.repository_profile["repositoryPID"]["repositoryPID"].split(/:/).first.strip
79
- end
65
+ def migrate_relationship
66
+ result.relationships = FedoraMigrate::RelsExtDatastreamMover.new(source).migrate
67
+ result.status = true
68
+ rescue StandardError => e
69
+ result.relationships = e.inspect
70
+ result.status = false
71
+ ensure
72
+ report.save(source.pid, result)
73
+ end
80
74
 
81
- def qualifying_object object
82
- name = object.pid.split(/:/).first
83
- return object if name.match(namespace)
84
- end
75
+ def repository_namespace
76
+ FedoraMigrate.source.connection.repository_profile["repositoryPID"]["repositoryPID"].split(/:/).first.strip
77
+ end
85
78
 
86
- def migration_required?
87
- return false if blacklist.include?(source.pid)
88
- return true if report.results[source.pid].nil?
89
- !report.results[source.pid]["status"]
90
- end
79
+ def qualifying_object(object)
80
+ name = object.pid.split(/:/).first
81
+ return object if name.match(namespace)
82
+ end
91
83
 
92
- def find_or_create_single_object_report
93
- if report.results[source.pid].nil?
94
- SingleObjectReport.new
95
- else
96
- SingleObjectReport.new(report.results[source.pid]["status"],report.results[source.pid]["object"],report.results[source.pid]["relationships"])
84
+ def migration_required?
85
+ return false if blacklist.include?(source.pid)
86
+ return true if report.results[source.pid].nil?
87
+ !report.results[source.pid]["status"]
97
88
  end
98
- end
99
89
 
90
+ def find_or_create_single_object_report
91
+ if report.results[source.pid].nil?
92
+ SingleObjectReport.new
93
+ else
94
+ SingleObjectReport.new(report.results[source.pid]["status"], report.results[source.pid]["object"], report.results[source.pid]["relationships"])
95
+ end
96
+ end
100
97
  end
101
98
  end