fedora-migrate 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +128 -0
  3. data/.rubocop_todo.yml +9 -0
  4. data/.travis.yml +3 -2
  5. data/Gemfile +2 -0
  6. data/fedora-migrate.gemspec +1 -0
  7. data/lib/fedora-migrate.rb +3 -5
  8. data/lib/fedora_migrate/content_mover.rb +7 -11
  9. data/lib/fedora_migrate/datastream_mover.rb +21 -24
  10. data/lib/fedora_migrate/datastream_verification.rb +15 -17
  11. data/lib/fedora_migrate/dates_mover.rb +0 -2
  12. data/lib/fedora_migrate/file_configurator.rb +3 -5
  13. data/lib/fedora_migrate/hooks.rb +0 -2
  14. data/lib/fedora_migrate/logger.rb +8 -11
  15. data/lib/fedora_migrate/migration_options.rb +6 -7
  16. data/lib/fedora_migrate/migration_report.rb +18 -22
  17. data/lib/fedora_migrate/mover.rb +4 -6
  18. data/lib/fedora_migrate/object_mover.rb +28 -34
  19. data/lib/fedora_migrate/permissions.rb +8 -10
  20. data/lib/fedora_migrate/permissions_mover.rb +7 -11
  21. data/lib/fedora_migrate/rdf_datastream_mover.rb +1 -2
  22. data/lib/fedora_migrate/rels_ext_datastream_mover.rb +29 -31
  23. data/lib/fedora_migrate/repository_migrator.rb +40 -43
  24. data/lib/fedora_migrate/rights_metadata.rb +109 -114
  25. data/lib/fedora_migrate/rubydora_connection.rb +4 -5
  26. data/lib/fedora_migrate/target_constructor.rb +19 -22
  27. data/lib/fedora_migrate/version.rb +1 -1
  28. data/spec/integration/content_versions_spec.rb +12 -14
  29. data/spec/integration/custom_target_spec.rb +44 -0
  30. data/spec/integration/fedora3_interface_spec.rb +7 -11
  31. data/spec/integration/missing_relationships_spec.rb +8 -10
  32. data/spec/integration/object_migration_spec.rb +20 -31
  33. data/spec/integration/permission_migration_spec.rb +4 -6
  34. data/spec/integration/rdf_migration_spec.rb +3 -6
  35. data/spec/integration/relationship_migration_spec.rb +6 -7
  36. data/spec/integration/repository_migration_spec.rb +14 -19
  37. data/spec/integration/versions_spec.rb +6 -8
  38. data/spec/spec_helper.rb +3 -3
  39. data/spec/support/example_model.rb +23 -25
  40. data/spec/unit/content_mover_spec.rb +21 -23
  41. data/spec/unit/datastream_mover_spec.rb +10 -14
  42. data/spec/unit/datastream_verification_spec.rb +7 -9
  43. data/spec/unit/dates_mover_spec.rb +3 -4
  44. data/spec/unit/fedora_migrate_spec.rb +2 -6
  45. data/spec/unit/file_configurator_spec.rb +4 -8
  46. data/spec/unit/migration_options_spec.rb +1 -3
  47. data/spec/unit/migration_report_spec.rb +5 -6
  48. data/spec/unit/mover_spec.rb +10 -12
  49. data/spec/unit/object_mover_spec.rb +9 -16
  50. data/spec/unit/permissions_mover_spec.rb +8 -11
  51. data/spec/unit/rels_ext_datastream_mover_spec.rb +4 -6
  52. data/spec/unit/repository_migrator_spec.rb +12 -14
  53. data/spec/unit/rubydora_connection_spec.rb +3 -5
  54. data/spec/unit/target_constructor_spec.rb +10 -16
  55. data/tasks/dev.rake +9 -1
  56. metadata +21 -3
@@ -1,10 +1,9 @@
1
1
  module FedoraMigrate
2
2
  module MigrationOptions
3
-
4
3
  attr_accessor :options, :conversions
5
4
 
6
5
  def conversion_options
7
- self.conversions = options.nil? ? [] : [options[:convert]].flatten
6
+ self.conversions = options.nil? ? [] : [options[:convert]].flatten
8
7
  end
9
8
 
10
9
  def forced?
@@ -23,12 +22,12 @@ module FedoraMigrate
23
22
  return [] if options.nil?
24
23
  options.fetch(:blacklist, [])
25
24
  end
26
-
25
+
27
26
  private
28
-
29
- def option_true?(name)
30
- !!(options && options[name])
31
- end
32
27
 
28
+ def option_true?(name)
29
+ return false unless options
30
+ options.fetch(name, false)
31
+ end
33
32
  end
34
33
  end
@@ -1,13 +1,12 @@
1
1
  module FedoraMigrate
2
2
  class MigrationReport
3
-
4
3
  attr_accessor :path, :results
5
4
 
6
5
  DEFAULT_PATH = "migration_report".freeze
7
6
 
8
- def initialize path=nil
7
+ def initialize(path = nil)
9
8
  @path = path.nil? ? DEFAULT_PATH : path
10
- FileUtils::mkdir_p(@path)
9
+ FileUtils.mkdir_p(@path)
11
10
  reload
12
11
  end
13
12
 
@@ -15,9 +14,7 @@ module FedoraMigrate
15
14
  @results = load_results_from_directory
16
15
  end
17
16
 
18
- def empty?
19
- results.empty?
20
- end
17
+ delegate :empty?, to: :results
21
18
 
22
19
  def failed_objects
23
20
  results.keys.map { |k| k unless results[k]["status"] }.compact
@@ -31,36 +28,35 @@ module FedoraMigrate
31
28
  results.keys.count
32
29
  end
33
30
 
34
- def report_failures output = String.new
31
+ def report_failures(output = '')
35
32
  failed_objects.each do |k|
36
- output << "#{k}:\n\tobject: #{results[k]["object"]}\n\trelationships: #{results[k]["relationships"]}\n\n"
33
+ output << "#{k}:\n\tobject: #{results[k]['object']}\n\trelationships: #{results[k]['relationships']}\n\n"
37
34
  end
38
35
  output
39
36
  end
40
37
 
41
38
  # Receives and individual report and writes it to the MigrationReport directory
42
- def save pid, report
43
- file = File.join(path,file_from_pid(pid))
39
+ def save(pid, report)
40
+ file = File.join(path, file_from_pid(pid))
44
41
  json = JSON.load(report.to_json)
45
42
  File.write(file, JSON.pretty_generate(json))
46
43
  end
47
44
 
48
45
  private
49
46
 
50
- def load_results_from_directory assembled = Hash.new
51
- Dir.glob(File.join(path,"*.json")).each do |file|
52
- assembled[pid_from_file(file)] = JSON.parse(File.read(file))
47
+ def load_results_from_directory(assembled = {})
48
+ Dir.glob(File.join(path, "*.json")).each do |file|
49
+ assembled[pid_from_file(file)] = JSON.parse(File.read(file))
50
+ end
51
+ assembled
53
52
  end
54
- assembled
55
- end
56
-
57
- def pid_from_file file
58
- File.basename(file, ".*").gsub(/_/,":")
59
- end
60
53
 
61
- def file_from_pid pid
62
- pid.gsub(/:/,"_")+".json"
63
- end
54
+ def pid_from_file(file)
55
+ File.basename(file, ".*").tr('_', ":")
56
+ end
64
57
 
58
+ def file_from_pid(pid)
59
+ pid.tr(':', "_") + ".json"
60
+ end
65
61
  end
66
62
  end
@@ -1,12 +1,11 @@
1
1
  module FedoraMigrate
2
2
  class Mover
3
-
4
3
  include MigrationOptions
5
4
  include Hooks
6
5
 
7
6
  attr_accessor :target, :source, :report
8
7
 
9
- def initialize *args
8
+ def initialize(*args)
10
9
  @source = args[0]
11
10
  @target = args[1]
12
11
  @options = args[2]
@@ -37,17 +36,16 @@ module FedoraMigrate
37
36
  end
38
37
  end
39
38
 
40
- def id_component object=nil
39
+ def id_component(object = nil)
41
40
  object ||= source
42
41
  raise FedoraMigrate::Errors::MigrationError, "can't get the id component without an object" if object.nil?
43
42
  self.class.id_component(object)
44
43
  end
45
44
 
46
- def self.id_component object
47
- return object.pid.split(/:/).last if object.kind_of?(Rubydora::DigitalObject)
45
+ def self.id_component(object)
46
+ return object.pid.split(/:/).last if object.is_a?(Rubydora::DigitalObject)
48
47
  return object.to_s.split(/:/).last if object.respond_to?(:to_s)
49
48
  nil
50
49
  end
51
-
52
50
  end
53
51
  end
@@ -1,6 +1,5 @@
1
1
  module FedoraMigrate
2
2
  class ObjectMover < Mover
3
-
4
3
  RIGHTS_DATASTREAM = "rightsMetadata".freeze
5
4
 
6
5
  ContentDatastreamReport = Struct.new(:ds, :versions)
@@ -38,52 +37,47 @@ module FedoraMigrate
38
37
  save
39
38
  end
40
39
 
40
+ def target
41
+ @target ||= FedoraMigrate::TargetConstructor.new(source).build
42
+ end
43
+
41
44
  private
42
45
 
43
- def migrate_datastreams
44
- migrate_content_datastreams
45
- migrate_permissions
46
- migrate_dates
47
- end
46
+ def migrate_datastreams
47
+ migrate_content_datastreams
48
+ migrate_permissions
49
+ migrate_dates
50
+ end
48
51
 
49
- # We have to call save before migrating content datastreams, otherwise versions aren't recorded
50
- # TODO: this will fail if required fields are defined in a descMetadata datastream that is not
51
- # converted to RDF (issue #8)
52
- def migrate_content_datastreams
53
- save
54
- target.attached_files.keys.each do |ds|
55
- mover = FedoraMigrate::DatastreamMover.new(source.datastreams[ds.to_s], target.attached_files[ds.to_s], options)
56
- report.content_datastreams << ContentDatastreamReport.new(ds, mover.migrate)
52
+ # We have to call save before migrating content datastreams, otherwise versions aren't recorded
53
+ # TODO: this will fail if required fields are defined in a descMetadata datastream that is not
54
+ # converted to RDF (issue #8)
55
+ def migrate_content_datastreams
56
+ save
57
+ target.attached_files.keys.each do |ds|
58
+ mover = FedoraMigrate::DatastreamMover.new(source.datastreams[ds.to_s], target.attached_files[ds.to_s], options)
59
+ report.content_datastreams << ContentDatastreamReport.new(ds, mover.migrate)
60
+ end
57
61
  end
58
- end
59
62
 
60
- def convert_rdf_datastream ds
61
- if source.datastreams.key?(ds)
63
+ def convert_rdf_datastream(ds)
64
+ return unless source.datastreams.key?(ds)
62
65
  mover = FedoraMigrate::RDFDatastreamMover.new(datastream_content(ds), target)
63
66
  report.rdf_datastreams << RDFDatastreamReport.new(ds, mover.migrate)
64
67
  end
65
- end
66
68
 
67
- def datastream_content(dsid)
68
- source.datastreams[dsid.to_s]
69
- end
69
+ def datastream_content(dsid)
70
+ source.datastreams[dsid.to_s]
71
+ end
70
72
 
71
- def migrate_permissions
72
- if source.datastreams.keys.include?(RIGHTS_DATASTREAM) && target.respond_to?(:permissions)
73
+ def migrate_permissions
74
+ return unless source.datastreams.keys.include?(RIGHTS_DATASTREAM) && target.respond_to?(:permissions)
73
75
  mover = FedoraMigrate::PermissionsMover.new(source.datastreams[RIGHTS_DATASTREAM], target)
74
76
  report.permissions = mover.migrate
75
77
  end
76
- end
77
-
78
- def migrate_dates
79
- report.dates = FedoraMigrate::DatesMover.new(source, target).migrate
80
- end
81
-
82
- def create_target_model
83
- builder = FedoraMigrate::TargetConstructor.new(source.models).build
84
- raise FedoraMigrate::Errors::MigrationError, "No qualified targets found in #{source.pid}" if builder.target.nil?
85
- @target = builder.target.new(id: id_component)
86
- end
87
78
 
79
+ def migrate_dates
80
+ report.dates = FedoraMigrate::DatesMover.new(source, target).migrate
81
+ end
88
82
  end
89
83
  end
@@ -1,32 +1,30 @@
1
1
  module FedoraMigrate::Permissions
2
-
3
2
  # Taken from Hydra::AccessControls::Permissions under version 7.2.2
4
3
  #
5
4
  # We need the reader methods to get permissions from the Fedora3
6
5
  # rightsMetadata datastreams
7
6
 
8
7
  def read_groups
9
- rightsMetadata.groups.map {|k, v| k if v == 'read'}.compact
8
+ rightsMetadata.groups.map { |k, v| k if v == 'read' }.compact
10
9
  end
11
-
10
+
12
11
  def edit_groups
13
- rightsMetadata.groups.map {|k, v| k if v == 'edit'}.compact
12
+ rightsMetadata.groups.map { |k, v| k if v == 'edit' }.compact
14
13
  end
15
-
14
+
16
15
  def discover_groups
17
- rightsMetadata.groups.map {|k, v| k if v == 'discover'}.compact
16
+ rightsMetadata.groups.map { |k, v| k if v == 'discover' }.compact
18
17
  end
19
18
 
20
19
  def read_users
21
- rightsMetadata.users.map {|k, v| k if v == 'read'}.compact
20
+ rightsMetadata.users.map { |k, v| k if v == 'read' }.compact
22
21
  end
23
22
 
24
23
  def edit_users
25
- rightsMetadata.users.map {|k, v| k if v == 'edit'}.compact
24
+ rightsMetadata.users.map { |k, v| k if v == 'edit' }.compact
26
25
  end
27
26
 
28
27
  def discover_users
29
- rightsMetadata.users.map {|k, v| k if v == 'discover'}.compact
28
+ rightsMetadata.users.map { |k, v| k if v == 'discover' }.compact
30
29
  end
31
-
32
30
  end
@@ -1,20 +1,17 @@
1
1
  module FedoraMigrate
2
2
  class PermissionsMover < Mover
3
-
4
3
  include FedoraMigrate::Permissions
5
4
 
6
5
  attr_accessor :rightsMetadata
7
6
 
8
7
  def post_initialize
9
- if source.respond_to?(:content)
10
- @rightsMetadata = datastream_from_content
11
- end
8
+ @rightsMetadata = datastream_from_content if source.respond_to?(:content)
12
9
  end
13
10
 
14
11
  def migrate
15
12
  FedoraMigrate::Permissions.instance_methods.each do |permission|
16
- report << "#{permission} = #{self.send(permission)}"
17
- target.send(permission.to_s+"=", self.send(permission))
13
+ report << "#{permission} = #{send(permission)}"
14
+ target.send(permission.to_s + "=", send(permission))
18
15
  end
19
16
  save
20
17
  super
@@ -22,10 +19,9 @@ module FedoraMigrate
22
19
 
23
20
  private
24
21
 
25
- def datastream_from_content ds = FedoraMigrate::RightsMetadata.new
26
- ds.ng_xml = source.content
27
- ds
28
- end
29
-
22
+ def datastream_from_content(ds = FedoraMigrate::RightsMetadata.new)
23
+ ds.ng_xml = source.content
24
+ ds
25
+ end
30
26
  end
31
27
  end
@@ -2,7 +2,6 @@ require 'rchardet'
2
2
 
3
3
  module FedoraMigrate
4
4
  class RDFDatastreamMover < Mover
5
-
6
5
  def migrate
7
6
  before_rdf_datastream_migration
8
7
  migrate_rdf_triples
@@ -22,7 +21,7 @@ module FedoraMigrate
22
21
  end
23
22
 
24
23
  def updated_datastream_content
25
- correct_encoding(datastream_content).gsub(/<.+#{source.pid}>/,"<#{target.uri}>")
24
+ correct_encoding(datastream_content).gsub(/<.+#{source.pid}>/, "<#{target.uri}>")
26
25
  end
27
26
 
28
27
  def datastream_content
@@ -1,6 +1,5 @@
1
1
  module FedoraMigrate
2
2
  class RelsExtDatastreamMover < Mover
3
-
4
3
  RELS_EXT_DATASTREAM = "RELS-EXT".freeze
5
4
 
6
5
  def migrate
@@ -18,42 +17,41 @@ module FedoraMigrate
18
17
 
19
18
  private
20
19
 
21
- def migrate_statements
22
- statements.each do |statement|
23
- triple = [target.rdf_subject, migrate_predicate(statement.predicate), migrate_object(statement.object)]
24
- target.ldp_source.graph << triple
25
- report << triple.join("--")
20
+ def migrate_statements
21
+ statements.each do |statement|
22
+ triple = [target.rdf_subject, migrate_predicate(statement.predicate), migrate_object(statement.object)]
23
+ target.ldp_source.graph << triple
24
+ report << triple.join("--")
25
+ end
26
26
  end
27
- end
28
-
29
- def update_index
30
- target.reload
31
- target.update_index
32
- end
33
27
 
34
- def graph
35
- @graph ||= RDF::Graph.new { |g| g.from_rdfxml(source.datastreams[RELS_EXT_DATASTREAM].content) }
36
- end
28
+ def update_index
29
+ target.reload
30
+ target.update_index
31
+ end
37
32
 
38
- # Override this if any predicate transformation is needed
39
- def migrate_predicate(fc3_uri)
40
- fc3_uri
41
- end
33
+ def graph
34
+ @graph ||= RDF::Graph.new { |g| g.from_rdfxml(source.datastreams[RELS_EXT_DATASTREAM].content) }
35
+ end
42
36
 
43
- def migrate_object(fc3_uri)
44
- RDF::URI.new(ActiveFedora::Base.id_to_uri(id_component(fc3_uri)))
45
- end
37
+ # Override this if any predicate transformation is needed
38
+ def migrate_predicate(fc3_uri)
39
+ fc3_uri
40
+ end
46
41
 
47
- def has_missing_object?(statement)
48
- return false if ActiveFedora::Base.exists?(id_component(statement.object))
49
- report << "could not migrate relationship #{statement.predicate} because #{statement.object} doesn't exist in Fedora 4"
50
- true
51
- end
42
+ def migrate_object(fc3_uri)
43
+ RDF::URI.new(ActiveFedora::Base.id_to_uri(id_component(fc3_uri)))
44
+ end
52
45
 
53
- # All the graph statements except hasModel and those with missing objects
54
- def statements
55
- graph.statements.reject { |stmt| stmt.predicate == ActiveFedora::RDF::Fcrepo::Model.hasModel || has_missing_object?(stmt) }
56
- end
46
+ def missing_object?(statement)
47
+ return false if ActiveFedora::Base.exists?(id_component(statement.object))
48
+ report << "could not migrate relationship #{statement.predicate} because #{statement.object} doesn't exist in Fedora 4"
49
+ true
50
+ end
57
51
 
52
+ # All the graph statements except hasModel and those with missing objects
53
+ def statements
54
+ graph.statements.reject { |stmt| stmt.predicate == ActiveFedora::RDF::Fcrepo::Model.hasModel || missing_object?(stmt) }
55
+ end
58
56
  end
59
57
  end
@@ -1,17 +1,15 @@
1
1
  module FedoraMigrate
2
2
  class RepositoryMigrator
3
-
4
3
  include MigrationOptions
5
4
 
6
5
  attr_accessor :source_objects, :namespace, :report, :source, :result
7
6
 
8
7
  SingleObjectReport = Struct.new(:status, :object, :relationships)
9
8
 
10
- def initialize namespace = nil, options = {}
9
+ def initialize(namespace = nil, options = {})
11
10
  @namespace = namespace || repository_namespace
12
11
  @options = options
13
12
  @report = MigrationReport.new(@options.fetch(:report, nil))
14
- @source_objects = get_source_objects
15
13
  conversion_options
16
14
  end
17
15
 
@@ -36,7 +34,7 @@ module FedoraMigrate
36
34
  end
37
35
 
38
36
  def migrate_relationships
39
- return "Relationship migration halted because #{failures.to_s} objects didn't migrate successfully." if failures > 0 && not_forced?
37
+ return "Relationship migration halted because #{failures} objects didn't migrate successfully." if failures > 0 && not_forced?
40
38
  source_objects.each do |object|
41
39
  @source = object
42
40
  @result = find_or_create_single_object_report
@@ -44,8 +42,8 @@ module FedoraMigrate
44
42
  end
45
43
  end
46
44
 
47
- def get_source_objects
48
- FedoraMigrate.source.connection.search(nil).collect { |o| qualifying_object(o) }.compact
45
+ def source_objects
46
+ @source_objects ||= FedoraMigrate.source.connection.search(nil).collect { |o| qualifying_object(o) }.compact
49
47
  end
50
48
 
51
49
  def failures
@@ -54,48 +52,47 @@ module FedoraMigrate
54
52
 
55
53
  private
56
54
 
57
- def migrate_object
58
- result.object = FedoraMigrate::ObjectMover.new(source, nil, options).migrate
59
- result.status = true
60
- rescue StandardError => e
61
- result.object = e.inspect
62
- result.status = false
63
- ensure
64
- report.save(source.pid, result)
65
- end
66
-
67
- def migrate_relationship
68
- result.relationships = FedoraMigrate::RelsExtDatastreamMover.new(source).migrate
69
- result.status = true
70
- rescue StandardError => e
71
- result.relationships = e.inspect
72
- result.status = false
73
- ensure
74
- report.save(source.pid, result)
75
- end
55
+ def migrate_object
56
+ result.object = FedoraMigrate::ObjectMover.new(source, nil, options).migrate
57
+ result.status = true
58
+ rescue StandardError => e
59
+ result.object = e.inspect
60
+ result.status = false
61
+ ensure
62
+ report.save(source.pid, result)
63
+ end
76
64
 
77
- def repository_namespace
78
- FedoraMigrate.source.connection.repository_profile["repositoryPID"]["repositoryPID"].split(/:/).first.strip
79
- end
65
+ def migrate_relationship
66
+ result.relationships = FedoraMigrate::RelsExtDatastreamMover.new(source).migrate
67
+ result.status = true
68
+ rescue StandardError => e
69
+ result.relationships = e.inspect
70
+ result.status = false
71
+ ensure
72
+ report.save(source.pid, result)
73
+ end
80
74
 
81
- def qualifying_object object
82
- name = object.pid.split(/:/).first
83
- return object if name.match(namespace)
84
- end
75
+ def repository_namespace
76
+ FedoraMigrate.source.connection.repository_profile["repositoryPID"]["repositoryPID"].split(/:/).first.strip
77
+ end
85
78
 
86
- def migration_required?
87
- return false if blacklist.include?(source.pid)
88
- return true if report.results[source.pid].nil?
89
- !report.results[source.pid]["status"]
90
- end
79
+ def qualifying_object(object)
80
+ name = object.pid.split(/:/).first
81
+ return object if name.match(namespace)
82
+ end
91
83
 
92
- def find_or_create_single_object_report
93
- if report.results[source.pid].nil?
94
- SingleObjectReport.new
95
- else
96
- SingleObjectReport.new(report.results[source.pid]["status"],report.results[source.pid]["object"],report.results[source.pid]["relationships"])
84
+ def migration_required?
85
+ return false if blacklist.include?(source.pid)
86
+ return true if report.results[source.pid].nil?
87
+ !report.results[source.pid]["status"]
97
88
  end
98
- end
99
89
 
90
+ def find_or_create_single_object_report
91
+ if report.results[source.pid].nil?
92
+ SingleObjectReport.new
93
+ else
94
+ SingleObjectReport.new(report.results[source.pid]["status"], report.results[source.pid]["object"], report.results[source.pid]["relationships"])
95
+ end
96
+ end
100
97
  end
101
98
  end