fedora-migrate 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/lib/fedora-migrate.rb +3 -5
- data/lib/fedora_migrate/content_mover.rb +32 -10
- data/lib/fedora_migrate/datastream_mover.rb +4 -7
- data/lib/fedora_migrate/datastream_verification.rb +1 -3
- data/lib/fedora_migrate/dates_mover.rb +19 -6
- data/lib/fedora_migrate/migration_report.rb +40 -0
- data/lib/fedora_migrate/mover.rb +11 -14
- data/lib/fedora_migrate/object_mover.rb +25 -9
- data/lib/fedora_migrate/permissions_mover.rb +2 -2
- data/lib/fedora_migrate/rdf_datastream_mover.rb +1 -1
- data/lib/fedora_migrate/rels_ext_datastream_mover.rb +5 -2
- data/lib/fedora_migrate/repository_migrator.rb +35 -22
- data/lib/fedora_migrate/target_constructor.rb +1 -2
- data/lib/fedora_migrate/version.rb +1 -1
- data/lib/tasks/fedora-migrate.rake +8 -2
- data/spec/fixtures/failed-report.json +339 -0
- data/spec/fixtures/sample-report.json +166 -0
- data/spec/integration/missing_relationships_spec.rb +15 -6
- data/spec/integration/repository_migration_spec.rb +25 -6
- data/spec/unit/content_mover_spec.rb +17 -10
- data/spec/unit/datastream_verification_spec.rb +2 -8
- data/spec/unit/migration_report_spec.rb +58 -0
- data/spec/unit/object_mover_spec.rb +2 -2
- data/spec/unit/repository_migrator_spec.rb +39 -10
- data/spec/unit/rubydora_connection_spec.rb +4 -0
- data/tasks/dev.rake +9 -3
- metadata +10 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ce0bad9581c3b5b080d0a73a26792ee7942ce1b3
|
4
|
+
data.tar.gz: 79932ebfe5726d0ef5dd67b1a5598847ed056175
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 79d5f57909a5a92ea9155b6b09b2e85733df39212e62e9c227e9732d5c5eeb8c286e1c8905c52fb40d511e8c5641568c763327405593bf1c2583ef97e0a81222
|
7
|
+
data.tar.gz: 5a57b9afc653f14a33dcffaad44fc15bd5c9cde314b785e866b2786d8af92336dcfc8bcb212e24362cc6f7162e599bd9b7b41f0cd43761d6f26f23da0effab01
|
data/.gitignore
CHANGED
data/lib/fedora-migrate.rb
CHANGED
@@ -19,6 +19,7 @@ module FedoraMigrate
|
|
19
19
|
autoload :Hooks
|
20
20
|
autoload :Logger
|
21
21
|
autoload :MigrationOptions
|
22
|
+
autoload :MigrationReport
|
22
23
|
autoload :Mover
|
23
24
|
autoload :ObjectMover
|
24
25
|
autoload :Permissions
|
@@ -33,17 +34,13 @@ module FedoraMigrate
|
|
33
34
|
autoload :TripleConverter
|
34
35
|
|
35
36
|
class << self
|
36
|
-
attr_reader :fedora_config, :
|
37
|
+
attr_reader :fedora_config, :source
|
37
38
|
attr_accessor :configurator
|
38
39
|
|
39
40
|
def fedora_config
|
40
41
|
@fedora_config ||= ActiveFedora::Config.new(configurator.fedora3_config)
|
41
42
|
end
|
42
43
|
|
43
|
-
def config_options
|
44
|
-
@config_options ||= "comming soon!"
|
45
|
-
end
|
46
|
-
|
47
44
|
def source
|
48
45
|
@source ||= FedoraMigrate::RubydoraConnection.new(fedora_config.credentials)
|
49
46
|
end
|
@@ -56,6 +53,7 @@ module FedoraMigrate
|
|
56
53
|
migrator = FedoraMigrate::RepositoryMigrator.new(args[:namespace], args[:options])
|
57
54
|
migrator.migrate_objects
|
58
55
|
migrator.migrate_relationships
|
56
|
+
migrator
|
59
57
|
end
|
60
58
|
|
61
59
|
end
|
@@ -1,24 +1,44 @@
|
|
1
1
|
module FedoraMigrate
|
2
2
|
class ContentMover < Mover
|
3
3
|
|
4
|
+
include DatastreamVerification
|
5
|
+
|
6
|
+
class Report
|
7
|
+
attr_accessor :name, :mime_type, :original_date, :error
|
8
|
+
def success?
|
9
|
+
error.nil?
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
4
13
|
def migrate
|
5
|
-
return
|
14
|
+
return report if nil_source
|
6
15
|
move_content
|
16
|
+
report_results
|
7
17
|
insert_date_created_by_application
|
18
|
+
super
|
19
|
+
end
|
20
|
+
|
21
|
+
def results_report
|
22
|
+
Report.new
|
8
23
|
end
|
9
24
|
|
10
25
|
def move_content
|
11
26
|
target.content = source.content
|
12
27
|
target.original_name = source.label.try(:gsub, /"/, '\"')
|
13
28
|
target.mime_type = source.mimeType
|
14
|
-
Logger.info "#{target.inspect}"
|
15
29
|
save
|
30
|
+
report.error = "Failed checksum" unless valid?
|
31
|
+
end
|
32
|
+
|
33
|
+
def report_results
|
34
|
+
report.name = target.original_name
|
35
|
+
report.mime_type = target.mime_type
|
16
36
|
end
|
17
37
|
|
18
38
|
def insert_date_created_by_application
|
19
|
-
result = perform_sparql_insert
|
20
|
-
|
21
|
-
|
39
|
+
result = perform_sparql_insert
|
40
|
+
report.original_date = source.createDate.iso8601
|
41
|
+
report.error = "There was a problem with sparql #{result.status} #{result.body}" unless result.status == 204
|
22
42
|
end
|
23
43
|
|
24
44
|
def sparql_insert
|
@@ -35,15 +55,17 @@ EOF
|
|
35
55
|
|
36
56
|
private
|
37
57
|
|
38
|
-
def nil_content_message
|
39
|
-
Logger.info "datastream '#{source.dsid}' is nil. It's probably defined in the target but not present in the source"
|
40
|
-
true
|
41
|
-
end
|
42
|
-
|
43
58
|
def perform_sparql_insert
|
44
59
|
ActiveFedora.fedora.connection.patch(target.metadata.metadata_uri, sparql_insert, "Content-Type" => "application/sparql-update")
|
45
60
|
end
|
46
61
|
|
62
|
+
def nil_source
|
63
|
+
if source.content.nil?
|
64
|
+
report.error = "Nil source -- it's probably defined in the target but not present in the source"
|
65
|
+
true
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
47
69
|
end
|
48
70
|
|
49
71
|
end
|
@@ -1,8 +1,6 @@
|
|
1
1
|
module FedoraMigrate
|
2
2
|
class DatastreamMover < Mover
|
3
3
|
|
4
|
-
include DatastreamVerification
|
5
|
-
|
6
4
|
attr_accessor :versionable
|
7
5
|
|
8
6
|
def post_initialize
|
@@ -25,6 +23,7 @@ module FedoraMigrate
|
|
25
23
|
before_datastream_migration
|
26
24
|
migrate_datastream
|
27
25
|
after_datastream_migration
|
26
|
+
super
|
28
27
|
end
|
29
28
|
|
30
29
|
private
|
@@ -40,22 +39,20 @@ module FedoraMigrate
|
|
40
39
|
# Reload the target, otherwise the checksum is nil
|
41
40
|
def migrate_current
|
42
41
|
migrate_content
|
43
|
-
target.reload
|
44
|
-
valid?
|
42
|
+
target.reload if report.last.success?
|
45
43
|
end
|
46
44
|
|
47
45
|
# Rubydora stores the versions array as the most recent first. We explicitly sort them according to createDate
|
48
46
|
def migrate_versions
|
49
47
|
source.versions.sort { |a,b| a.createDate <=> b.createDate }.each do |version|
|
50
48
|
migrate_content(version)
|
51
|
-
target.create_version
|
52
|
-
valid?(version)
|
49
|
+
target.create_version if report.last.success? && !application_creates_versions?
|
53
50
|
end
|
54
51
|
end
|
55
52
|
|
56
53
|
def migrate_content datastream=nil
|
57
54
|
datastream ||= source
|
58
|
-
FedoraMigrate::ContentMover.new(datastream, target).migrate
|
55
|
+
report << FedoraMigrate::ContentMover.new(datastream, target).migrate
|
59
56
|
end
|
60
57
|
|
61
58
|
end
|
@@ -4,9 +4,7 @@ module FedoraMigrate::DatastreamVerification
|
|
4
4
|
|
5
5
|
def valid? datastream=nil
|
6
6
|
@datastream = datastream || @source
|
7
|
-
|
8
|
-
FedoraMigrate::Logger.warn "#{@datastream.pid} datastream #{@datastream.dsid} validation failed" unless check
|
9
|
-
check
|
7
|
+
has_matching_checksums? || has_matching_nokogiri_checksums?
|
10
8
|
end
|
11
9
|
|
12
10
|
def has_matching_checksums?
|
@@ -1,13 +1,26 @@
|
|
1
1
|
module FedoraMigrate
|
2
2
|
class DatesMover < Mover
|
3
3
|
|
4
|
+
Report = Struct.new(:uploaded, :modified)
|
5
|
+
|
4
6
|
def migrate
|
5
|
-
if source.respond_to?(:createdDate) && target.respond_to?(:date_uploaded)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
migrate_date_uploaded if source.respond_to?(:createdDate) && target.respond_to?(:date_uploaded)
|
8
|
+
migrate_date_modified if source.respond_to?(:lastModifiedDate) && target.respond_to?(:date_modified)
|
9
|
+
super
|
10
|
+
end
|
11
|
+
|
12
|
+
def results_report
|
13
|
+
Report.new
|
14
|
+
end
|
15
|
+
|
16
|
+
def migrate_date_uploaded
|
17
|
+
target.date_uploaded = source.createdDate
|
18
|
+
report.uploaded = source.createdDate
|
19
|
+
end
|
20
|
+
|
21
|
+
def migrate_date_modified
|
22
|
+
target.date_modified = source.lastModifiedDate
|
23
|
+
report.modified = source.lastModifiedDate
|
11
24
|
end
|
12
25
|
|
13
26
|
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module FedoraMigrate
|
2
|
+
class MigrationReport
|
3
|
+
|
4
|
+
attr_accessor :results
|
5
|
+
|
6
|
+
def initialize report=nil
|
7
|
+
@results = report.nil? ? Hash.new : JSON.parse(File.read(report))
|
8
|
+
end
|
9
|
+
|
10
|
+
def empty?
|
11
|
+
results.empty?
|
12
|
+
end
|
13
|
+
|
14
|
+
def failed_objects
|
15
|
+
results.keys.map { |k| k unless results[k]["status"] }.compact
|
16
|
+
end
|
17
|
+
|
18
|
+
def failures
|
19
|
+
failed_objects.count
|
20
|
+
end
|
21
|
+
|
22
|
+
def total_objects
|
23
|
+
results.keys.count
|
24
|
+
end
|
25
|
+
|
26
|
+
def report_failures output = String.new
|
27
|
+
failed_objects.each do |k|
|
28
|
+
output << "#{k}:\n\tobject: #{results[k]["object"]}\n\trelationships: #{results[k]["relationships"]}\n\n"
|
29
|
+
end
|
30
|
+
output
|
31
|
+
end
|
32
|
+
|
33
|
+
def save path=nil
|
34
|
+
json = JSON.load(results.to_json)
|
35
|
+
file = path.nil? ? "report.json" : File.join(path,"report.json")
|
36
|
+
File.write(file, JSON.pretty_generate(json))
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
data/lib/fedora_migrate/mover.rb
CHANGED
@@ -4,24 +4,29 @@ module FedoraMigrate
|
|
4
4
|
include MigrationOptions
|
5
5
|
include Hooks
|
6
6
|
|
7
|
-
attr_accessor :target, :source
|
7
|
+
attr_accessor :target, :source, :report
|
8
8
|
|
9
9
|
def initialize *args
|
10
10
|
@source = args[0]
|
11
11
|
@target = args[1]
|
12
12
|
@options = args[2]
|
13
|
+
@report = results_report
|
13
14
|
post_initialize
|
14
15
|
end
|
15
16
|
|
16
17
|
def post_initialize
|
17
18
|
end
|
18
19
|
|
20
|
+
def results_report
|
21
|
+
[]
|
22
|
+
end
|
23
|
+
|
24
|
+
def migrate
|
25
|
+
report
|
26
|
+
end
|
27
|
+
|
19
28
|
def save
|
20
|
-
|
21
|
-
Logger.info "success for target UID #{target_description}"
|
22
|
-
else
|
23
|
-
raise FedoraMigrate::Errors::MigrationError, "Failed to save target: #{target_errors}"
|
24
|
-
end
|
29
|
+
raise FedoraMigrate::Errors::MigrationError, "Failed to save target: #{target_errors}" unless target.save
|
25
30
|
end
|
26
31
|
|
27
32
|
def target_errors
|
@@ -32,14 +37,6 @@ module FedoraMigrate
|
|
32
37
|
end
|
33
38
|
end
|
34
39
|
|
35
|
-
def target_description
|
36
|
-
if target.respond_to?(:id)
|
37
|
-
target.id
|
38
|
-
else
|
39
|
-
target.inspect
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
40
|
def id_component object=nil
|
44
41
|
object ||= source
|
45
42
|
raise FedoraMigrate::Errors::MigrationError, "can't get the id component without an object" if object.nil?
|
@@ -3,13 +3,16 @@ module FedoraMigrate
|
|
3
3
|
|
4
4
|
RIGHTS_DATASTREAM = "rightsMetadata".freeze
|
5
5
|
|
6
|
+
ContentDatastreamReport = Struct.new(:ds, :versions)
|
7
|
+
RDFDatastreamReport = Struct.new(:ds, :status)
|
8
|
+
Report = Struct.new(:id, :class, :content_datastreams, :rdf_datastreams, :permissions, :dates)
|
9
|
+
|
6
10
|
def migrate
|
7
11
|
prepare_target
|
8
12
|
conversions.collect { |ds| convert_rdf_datastream(ds) }
|
9
|
-
|
10
|
-
migrate_permissions
|
11
|
-
migrate_dates
|
13
|
+
migrate_datastreams
|
12
14
|
complete_target
|
15
|
+
super
|
13
16
|
end
|
14
17
|
|
15
18
|
def post_initialize
|
@@ -17,19 +20,32 @@ module FedoraMigrate
|
|
17
20
|
create_target_model if target.nil?
|
18
21
|
end
|
19
22
|
|
23
|
+
def results_report
|
24
|
+
Report.new.tap do |report|
|
25
|
+
report.content_datastreams = []
|
26
|
+
report.rdf_datastreams = []
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
20
30
|
def prepare_target
|
21
|
-
|
31
|
+
report.class = target.class.to_s
|
32
|
+
report.id = target.id
|
22
33
|
before_object_migration
|
23
34
|
end
|
24
35
|
|
25
36
|
def complete_target
|
26
|
-
Logger.info "running after_object_migration hooks"
|
27
37
|
after_object_migration
|
28
38
|
save
|
29
39
|
end
|
30
40
|
|
31
41
|
private
|
32
42
|
|
43
|
+
def migrate_datastreams
|
44
|
+
migrate_content_datastreams
|
45
|
+
migrate_permissions
|
46
|
+
migrate_dates
|
47
|
+
end
|
48
|
+
|
33
49
|
# We have to call save before migrating content datastreams, otherwise versions aren't recorded
|
34
50
|
# TODO: this will fail if required fields are defined in a descMetadata datastream that is not
|
35
51
|
# converted to RDF (issue #8)
|
@@ -37,14 +53,14 @@ module FedoraMigrate
|
|
37
53
|
save
|
38
54
|
target.attached_files.keys.each do |ds|
|
39
55
|
mover = FedoraMigrate::DatastreamMover.new(source.datastreams[ds.to_s], target.attached_files[ds.to_s], options)
|
40
|
-
mover.migrate
|
56
|
+
report.content_datastreams << ContentDatastreamReport.new(ds, mover.migrate)
|
41
57
|
end
|
42
58
|
end
|
43
59
|
|
44
60
|
def convert_rdf_datastream ds
|
45
61
|
if source.datastreams.key?(ds)
|
46
62
|
mover = FedoraMigrate::RDFDatastreamMover.new(datastream_content(ds), target)
|
47
|
-
mover.migrate
|
63
|
+
report.rdf_datastreams << RDFDatastreamReport.new(ds, mover.migrate)
|
48
64
|
end
|
49
65
|
end
|
50
66
|
|
@@ -55,12 +71,12 @@ module FedoraMigrate
|
|
55
71
|
def migrate_permissions
|
56
72
|
if source.datastreams.keys.include?(RIGHTS_DATASTREAM) && target.respond_to?(:permissions)
|
57
73
|
mover = FedoraMigrate::PermissionsMover.new(source.datastreams[RIGHTS_DATASTREAM], target)
|
58
|
-
mover.migrate
|
74
|
+
report.permissions = mover.migrate
|
59
75
|
end
|
60
76
|
end
|
61
77
|
|
62
78
|
def migrate_dates
|
63
|
-
FedoraMigrate::DatesMover.new(source, target).migrate
|
79
|
+
report.dates = FedoraMigrate::DatesMover.new(source, target).migrate
|
64
80
|
end
|
65
81
|
|
66
82
|
def create_target_model
|
@@ -13,13 +13,13 @@ module FedoraMigrate
|
|
13
13
|
|
14
14
|
def migrate
|
15
15
|
FedoraMigrate::Permissions.instance_methods.each do |permission|
|
16
|
-
|
16
|
+
report << "#{permission} = #{self.send(permission)}"
|
17
17
|
target.send(permission.to_s+"=", self.send(permission))
|
18
18
|
end
|
19
19
|
save
|
20
|
+
super
|
20
21
|
end
|
21
22
|
|
22
|
-
|
23
23
|
private
|
24
24
|
|
25
25
|
def datastream_from_content ds = FedoraMigrate::RightsMetadata.new
|
@@ -4,11 +4,11 @@ module FedoraMigrate
|
|
4
4
|
class RDFDatastreamMover < Mover
|
5
5
|
|
6
6
|
def migrate
|
7
|
-
Logger.info "converting datastream '#{source.dsid}' to RDF"
|
8
7
|
before_rdf_datastream_migration
|
9
8
|
migrate_rdf_triples
|
10
9
|
after_rdf_datastream_migration
|
11
10
|
save
|
11
|
+
super
|
12
12
|
end
|
13
13
|
|
14
14
|
def migrate_rdf_triples
|
@@ -7,6 +7,7 @@ module FedoraMigrate
|
|
7
7
|
migrate_statements
|
8
8
|
target.ldp_source.update
|
9
9
|
update_index
|
10
|
+
super
|
10
11
|
end
|
11
12
|
|
12
13
|
def post_initialize
|
@@ -19,7 +20,9 @@ module FedoraMigrate
|
|
19
20
|
|
20
21
|
def migrate_statements
|
21
22
|
statements.each do |statement|
|
22
|
-
|
23
|
+
triple = [target.rdf_subject, migrate_predicate(statement.predicate), migrate_object(statement.object)]
|
24
|
+
target.ldp_source.graph << triple
|
25
|
+
report << triple.join("--")
|
23
26
|
end
|
24
27
|
end
|
25
28
|
|
@@ -43,7 +46,7 @@ module FedoraMigrate
|
|
43
46
|
|
44
47
|
def has_missing_object?(statement)
|
45
48
|
return false if ActiveFedora::Base.exists?(id_component(statement.object))
|
46
|
-
|
49
|
+
report << "could not migrate relationship #{statement.predicate} because #{statement.object} doesn't exist in Fedora 4"
|
47
50
|
true
|
48
51
|
end
|
49
52
|
|