fedora-migrate 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/lib/fedora-migrate.rb +3 -5
- data/lib/fedora_migrate/content_mover.rb +32 -10
- data/lib/fedora_migrate/datastream_mover.rb +4 -7
- data/lib/fedora_migrate/datastream_verification.rb +1 -3
- data/lib/fedora_migrate/dates_mover.rb +19 -6
- data/lib/fedora_migrate/migration_report.rb +40 -0
- data/lib/fedora_migrate/mover.rb +11 -14
- data/lib/fedora_migrate/object_mover.rb +25 -9
- data/lib/fedora_migrate/permissions_mover.rb +2 -2
- data/lib/fedora_migrate/rdf_datastream_mover.rb +1 -1
- data/lib/fedora_migrate/rels_ext_datastream_mover.rb +5 -2
- data/lib/fedora_migrate/repository_migrator.rb +35 -22
- data/lib/fedora_migrate/target_constructor.rb +1 -2
- data/lib/fedora_migrate/version.rb +1 -1
- data/lib/tasks/fedora-migrate.rake +8 -2
- data/spec/fixtures/failed-report.json +339 -0
- data/spec/fixtures/sample-report.json +166 -0
- data/spec/integration/missing_relationships_spec.rb +15 -6
- data/spec/integration/repository_migration_spec.rb +25 -6
- data/spec/unit/content_mover_spec.rb +17 -10
- data/spec/unit/datastream_verification_spec.rb +2 -8
- data/spec/unit/migration_report_spec.rb +58 -0
- data/spec/unit/object_mover_spec.rb +2 -2
- data/spec/unit/repository_migrator_spec.rb +39 -10
- data/spec/unit/rubydora_connection_spec.rb +4 -0
- data/tasks/dev.rake +9 -3
- metadata +10 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ce0bad9581c3b5b080d0a73a26792ee7942ce1b3
|
4
|
+
data.tar.gz: 79932ebfe5726d0ef5dd67b1a5598847ed056175
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 79d5f57909a5a92ea9155b6b09b2e85733df39212e62e9c227e9732d5c5eeb8c286e1c8905c52fb40d511e8c5641568c763327405593bf1c2583ef97e0a81222
|
7
|
+
data.tar.gz: 5a57b9afc653f14a33dcffaad44fc15bd5c9cde314b785e866b2786d8af92336dcfc8bcb212e24362cc6f7162e599bd9b7b41f0cd43761d6f26f23da0effab01
|
data/.gitignore
CHANGED
data/lib/fedora-migrate.rb
CHANGED
@@ -19,6 +19,7 @@ module FedoraMigrate
|
|
19
19
|
autoload :Hooks
|
20
20
|
autoload :Logger
|
21
21
|
autoload :MigrationOptions
|
22
|
+
autoload :MigrationReport
|
22
23
|
autoload :Mover
|
23
24
|
autoload :ObjectMover
|
24
25
|
autoload :Permissions
|
@@ -33,17 +34,13 @@ module FedoraMigrate
|
|
33
34
|
autoload :TripleConverter
|
34
35
|
|
35
36
|
class << self
|
36
|
-
attr_reader :fedora_config, :
|
37
|
+
attr_reader :fedora_config, :source
|
37
38
|
attr_accessor :configurator
|
38
39
|
|
39
40
|
def fedora_config
|
40
41
|
@fedora_config ||= ActiveFedora::Config.new(configurator.fedora3_config)
|
41
42
|
end
|
42
43
|
|
43
|
-
def config_options
|
44
|
-
@config_options ||= "comming soon!"
|
45
|
-
end
|
46
|
-
|
47
44
|
def source
|
48
45
|
@source ||= FedoraMigrate::RubydoraConnection.new(fedora_config.credentials)
|
49
46
|
end
|
@@ -56,6 +53,7 @@ module FedoraMigrate
|
|
56
53
|
migrator = FedoraMigrate::RepositoryMigrator.new(args[:namespace], args[:options])
|
57
54
|
migrator.migrate_objects
|
58
55
|
migrator.migrate_relationships
|
56
|
+
migrator
|
59
57
|
end
|
60
58
|
|
61
59
|
end
|
@@ -1,24 +1,44 @@
|
|
1
1
|
module FedoraMigrate
|
2
2
|
class ContentMover < Mover
|
3
3
|
|
4
|
+
include DatastreamVerification
|
5
|
+
|
6
|
+
class Report
|
7
|
+
attr_accessor :name, :mime_type, :original_date, :error
|
8
|
+
def success?
|
9
|
+
error.nil?
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
4
13
|
def migrate
|
5
|
-
return
|
14
|
+
return report if nil_source
|
6
15
|
move_content
|
16
|
+
report_results
|
7
17
|
insert_date_created_by_application
|
18
|
+
super
|
19
|
+
end
|
20
|
+
|
21
|
+
def results_report
|
22
|
+
Report.new
|
8
23
|
end
|
9
24
|
|
10
25
|
def move_content
|
11
26
|
target.content = source.content
|
12
27
|
target.original_name = source.label.try(:gsub, /"/, '\"')
|
13
28
|
target.mime_type = source.mimeType
|
14
|
-
Logger.info "#{target.inspect}"
|
15
29
|
save
|
30
|
+
report.error = "Failed checksum" unless valid?
|
31
|
+
end
|
32
|
+
|
33
|
+
def report_results
|
34
|
+
report.name = target.original_name
|
35
|
+
report.mime_type = target.mime_type
|
16
36
|
end
|
17
37
|
|
18
38
|
def insert_date_created_by_application
|
19
|
-
result = perform_sparql_insert
|
20
|
-
|
21
|
-
|
39
|
+
result = perform_sparql_insert
|
40
|
+
report.original_date = source.createDate.iso8601
|
41
|
+
report.error = "There was a problem with sparql #{result.status} #{result.body}" unless result.status == 204
|
22
42
|
end
|
23
43
|
|
24
44
|
def sparql_insert
|
@@ -35,15 +55,17 @@ EOF
|
|
35
55
|
|
36
56
|
private
|
37
57
|
|
38
|
-
def nil_content_message
|
39
|
-
Logger.info "datastream '#{source.dsid}' is nil. It's probably defined in the target but not present in the source"
|
40
|
-
true
|
41
|
-
end
|
42
|
-
|
43
58
|
def perform_sparql_insert
|
44
59
|
ActiveFedora.fedora.connection.patch(target.metadata.metadata_uri, sparql_insert, "Content-Type" => "application/sparql-update")
|
45
60
|
end
|
46
61
|
|
62
|
+
def nil_source
|
63
|
+
if source.content.nil?
|
64
|
+
report.error = "Nil source -- it's probably defined in the target but not present in the source"
|
65
|
+
true
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
47
69
|
end
|
48
70
|
|
49
71
|
end
|
@@ -1,8 +1,6 @@
|
|
1
1
|
module FedoraMigrate
|
2
2
|
class DatastreamMover < Mover
|
3
3
|
|
4
|
-
include DatastreamVerification
|
5
|
-
|
6
4
|
attr_accessor :versionable
|
7
5
|
|
8
6
|
def post_initialize
|
@@ -25,6 +23,7 @@ module FedoraMigrate
|
|
25
23
|
before_datastream_migration
|
26
24
|
migrate_datastream
|
27
25
|
after_datastream_migration
|
26
|
+
super
|
28
27
|
end
|
29
28
|
|
30
29
|
private
|
@@ -40,22 +39,20 @@ module FedoraMigrate
|
|
40
39
|
# Reload the target, otherwise the checksum is nil
|
41
40
|
def migrate_current
|
42
41
|
migrate_content
|
43
|
-
target.reload
|
44
|
-
valid?
|
42
|
+
target.reload if report.last.success?
|
45
43
|
end
|
46
44
|
|
47
45
|
# Rubydora stores the versions array as the most recent first. We explicitly sort them according to createDate
|
48
46
|
def migrate_versions
|
49
47
|
source.versions.sort { |a,b| a.createDate <=> b.createDate }.each do |version|
|
50
48
|
migrate_content(version)
|
51
|
-
target.create_version
|
52
|
-
valid?(version)
|
49
|
+
target.create_version if report.last.success? && !application_creates_versions?
|
53
50
|
end
|
54
51
|
end
|
55
52
|
|
56
53
|
def migrate_content datastream=nil
|
57
54
|
datastream ||= source
|
58
|
-
FedoraMigrate::ContentMover.new(datastream, target).migrate
|
55
|
+
report << FedoraMigrate::ContentMover.new(datastream, target).migrate
|
59
56
|
end
|
60
57
|
|
61
58
|
end
|
@@ -4,9 +4,7 @@ module FedoraMigrate::DatastreamVerification
|
|
4
4
|
|
5
5
|
def valid? datastream=nil
|
6
6
|
@datastream = datastream || @source
|
7
|
-
|
8
|
-
FedoraMigrate::Logger.warn "#{@datastream.pid} datastream #{@datastream.dsid} validation failed" unless check
|
9
|
-
check
|
7
|
+
has_matching_checksums? || has_matching_nokogiri_checksums?
|
10
8
|
end
|
11
9
|
|
12
10
|
def has_matching_checksums?
|
@@ -1,13 +1,26 @@
|
|
1
1
|
module FedoraMigrate
|
2
2
|
class DatesMover < Mover
|
3
3
|
|
4
|
+
Report = Struct.new(:uploaded, :modified)
|
5
|
+
|
4
6
|
def migrate
|
5
|
-
if source.respond_to?(:createdDate) && target.respond_to?(:date_uploaded)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
migrate_date_uploaded if source.respond_to?(:createdDate) && target.respond_to?(:date_uploaded)
|
8
|
+
migrate_date_modified if source.respond_to?(:lastModifiedDate) && target.respond_to?(:date_modified)
|
9
|
+
super
|
10
|
+
end
|
11
|
+
|
12
|
+
def results_report
|
13
|
+
Report.new
|
14
|
+
end
|
15
|
+
|
16
|
+
def migrate_date_uploaded
|
17
|
+
target.date_uploaded = source.createdDate
|
18
|
+
report.uploaded = source.createdDate
|
19
|
+
end
|
20
|
+
|
21
|
+
def migrate_date_modified
|
22
|
+
target.date_modified = source.lastModifiedDate
|
23
|
+
report.modified = source.lastModifiedDate
|
11
24
|
end
|
12
25
|
|
13
26
|
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module FedoraMigrate
|
2
|
+
class MigrationReport
|
3
|
+
|
4
|
+
attr_accessor :results
|
5
|
+
|
6
|
+
def initialize report=nil
|
7
|
+
@results = report.nil? ? Hash.new : JSON.parse(File.read(report))
|
8
|
+
end
|
9
|
+
|
10
|
+
def empty?
|
11
|
+
results.empty?
|
12
|
+
end
|
13
|
+
|
14
|
+
def failed_objects
|
15
|
+
results.keys.map { |k| k unless results[k]["status"] }.compact
|
16
|
+
end
|
17
|
+
|
18
|
+
def failures
|
19
|
+
failed_objects.count
|
20
|
+
end
|
21
|
+
|
22
|
+
def total_objects
|
23
|
+
results.keys.count
|
24
|
+
end
|
25
|
+
|
26
|
+
def report_failures output = String.new
|
27
|
+
failed_objects.each do |k|
|
28
|
+
output << "#{k}:\n\tobject: #{results[k]["object"]}\n\trelationships: #{results[k]["relationships"]}\n\n"
|
29
|
+
end
|
30
|
+
output
|
31
|
+
end
|
32
|
+
|
33
|
+
def save path=nil
|
34
|
+
json = JSON.load(results.to_json)
|
35
|
+
file = path.nil? ? "report.json" : File.join(path,"report.json")
|
36
|
+
File.write(file, JSON.pretty_generate(json))
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
data/lib/fedora_migrate/mover.rb
CHANGED
@@ -4,24 +4,29 @@ module FedoraMigrate
|
|
4
4
|
include MigrationOptions
|
5
5
|
include Hooks
|
6
6
|
|
7
|
-
attr_accessor :target, :source
|
7
|
+
attr_accessor :target, :source, :report
|
8
8
|
|
9
9
|
def initialize *args
|
10
10
|
@source = args[0]
|
11
11
|
@target = args[1]
|
12
12
|
@options = args[2]
|
13
|
+
@report = results_report
|
13
14
|
post_initialize
|
14
15
|
end
|
15
16
|
|
16
17
|
def post_initialize
|
17
18
|
end
|
18
19
|
|
20
|
+
def results_report
|
21
|
+
[]
|
22
|
+
end
|
23
|
+
|
24
|
+
def migrate
|
25
|
+
report
|
26
|
+
end
|
27
|
+
|
19
28
|
def save
|
20
|
-
|
21
|
-
Logger.info "success for target UID #{target_description}"
|
22
|
-
else
|
23
|
-
raise FedoraMigrate::Errors::MigrationError, "Failed to save target: #{target_errors}"
|
24
|
-
end
|
29
|
+
raise FedoraMigrate::Errors::MigrationError, "Failed to save target: #{target_errors}" unless target.save
|
25
30
|
end
|
26
31
|
|
27
32
|
def target_errors
|
@@ -32,14 +37,6 @@ module FedoraMigrate
|
|
32
37
|
end
|
33
38
|
end
|
34
39
|
|
35
|
-
def target_description
|
36
|
-
if target.respond_to?(:id)
|
37
|
-
target.id
|
38
|
-
else
|
39
|
-
target.inspect
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
40
|
def id_component object=nil
|
44
41
|
object ||= source
|
45
42
|
raise FedoraMigrate::Errors::MigrationError, "can't get the id component without an object" if object.nil?
|
@@ -3,13 +3,16 @@ module FedoraMigrate
|
|
3
3
|
|
4
4
|
RIGHTS_DATASTREAM = "rightsMetadata".freeze
|
5
5
|
|
6
|
+
ContentDatastreamReport = Struct.new(:ds, :versions)
|
7
|
+
RDFDatastreamReport = Struct.new(:ds, :status)
|
8
|
+
Report = Struct.new(:id, :class, :content_datastreams, :rdf_datastreams, :permissions, :dates)
|
9
|
+
|
6
10
|
def migrate
|
7
11
|
prepare_target
|
8
12
|
conversions.collect { |ds| convert_rdf_datastream(ds) }
|
9
|
-
|
10
|
-
migrate_permissions
|
11
|
-
migrate_dates
|
13
|
+
migrate_datastreams
|
12
14
|
complete_target
|
15
|
+
super
|
13
16
|
end
|
14
17
|
|
15
18
|
def post_initialize
|
@@ -17,19 +20,32 @@ module FedoraMigrate
|
|
17
20
|
create_target_model if target.nil?
|
18
21
|
end
|
19
22
|
|
23
|
+
def results_report
|
24
|
+
Report.new.tap do |report|
|
25
|
+
report.content_datastreams = []
|
26
|
+
report.rdf_datastreams = []
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
20
30
|
def prepare_target
|
21
|
-
|
31
|
+
report.class = target.class.to_s
|
32
|
+
report.id = target.id
|
22
33
|
before_object_migration
|
23
34
|
end
|
24
35
|
|
25
36
|
def complete_target
|
26
|
-
Logger.info "running after_object_migration hooks"
|
27
37
|
after_object_migration
|
28
38
|
save
|
29
39
|
end
|
30
40
|
|
31
41
|
private
|
32
42
|
|
43
|
+
def migrate_datastreams
|
44
|
+
migrate_content_datastreams
|
45
|
+
migrate_permissions
|
46
|
+
migrate_dates
|
47
|
+
end
|
48
|
+
|
33
49
|
# We have to call save before migrating content datastreams, otherwise versions aren't recorded
|
34
50
|
# TODO: this will fail if required fields are defined in a descMetadata datastream that is not
|
35
51
|
# converted to RDF (issue #8)
|
@@ -37,14 +53,14 @@ module FedoraMigrate
|
|
37
53
|
save
|
38
54
|
target.attached_files.keys.each do |ds|
|
39
55
|
mover = FedoraMigrate::DatastreamMover.new(source.datastreams[ds.to_s], target.attached_files[ds.to_s], options)
|
40
|
-
mover.migrate
|
56
|
+
report.content_datastreams << ContentDatastreamReport.new(ds, mover.migrate)
|
41
57
|
end
|
42
58
|
end
|
43
59
|
|
44
60
|
def convert_rdf_datastream ds
|
45
61
|
if source.datastreams.key?(ds)
|
46
62
|
mover = FedoraMigrate::RDFDatastreamMover.new(datastream_content(ds), target)
|
47
|
-
mover.migrate
|
63
|
+
report.rdf_datastreams << RDFDatastreamReport.new(ds, mover.migrate)
|
48
64
|
end
|
49
65
|
end
|
50
66
|
|
@@ -55,12 +71,12 @@ module FedoraMigrate
|
|
55
71
|
def migrate_permissions
|
56
72
|
if source.datastreams.keys.include?(RIGHTS_DATASTREAM) && target.respond_to?(:permissions)
|
57
73
|
mover = FedoraMigrate::PermissionsMover.new(source.datastreams[RIGHTS_DATASTREAM], target)
|
58
|
-
mover.migrate
|
74
|
+
report.permissions = mover.migrate
|
59
75
|
end
|
60
76
|
end
|
61
77
|
|
62
78
|
def migrate_dates
|
63
|
-
FedoraMigrate::DatesMover.new(source, target).migrate
|
79
|
+
report.dates = FedoraMigrate::DatesMover.new(source, target).migrate
|
64
80
|
end
|
65
81
|
|
66
82
|
def create_target_model
|
@@ -13,13 +13,13 @@ module FedoraMigrate
|
|
13
13
|
|
14
14
|
def migrate
|
15
15
|
FedoraMigrate::Permissions.instance_methods.each do |permission|
|
16
|
-
|
16
|
+
report << "#{permission} = #{self.send(permission)}"
|
17
17
|
target.send(permission.to_s+"=", self.send(permission))
|
18
18
|
end
|
19
19
|
save
|
20
|
+
super
|
20
21
|
end
|
21
22
|
|
22
|
-
|
23
23
|
private
|
24
24
|
|
25
25
|
def datastream_from_content ds = FedoraMigrate::RightsMetadata.new
|
@@ -4,11 +4,11 @@ module FedoraMigrate
|
|
4
4
|
class RDFDatastreamMover < Mover
|
5
5
|
|
6
6
|
def migrate
|
7
|
-
Logger.info "converting datastream '#{source.dsid}' to RDF"
|
8
7
|
before_rdf_datastream_migration
|
9
8
|
migrate_rdf_triples
|
10
9
|
after_rdf_datastream_migration
|
11
10
|
save
|
11
|
+
super
|
12
12
|
end
|
13
13
|
|
14
14
|
def migrate_rdf_triples
|
@@ -7,6 +7,7 @@ module FedoraMigrate
|
|
7
7
|
migrate_statements
|
8
8
|
target.ldp_source.update
|
9
9
|
update_index
|
10
|
+
super
|
10
11
|
end
|
11
12
|
|
12
13
|
def post_initialize
|
@@ -19,7 +20,9 @@ module FedoraMigrate
|
|
19
20
|
|
20
21
|
def migrate_statements
|
21
22
|
statements.each do |statement|
|
22
|
-
|
23
|
+
triple = [target.rdf_subject, migrate_predicate(statement.predicate), migrate_object(statement.object)]
|
24
|
+
target.ldp_source.graph << triple
|
25
|
+
report << triple.join("--")
|
23
26
|
end
|
24
27
|
end
|
25
28
|
|
@@ -43,7 +46,7 @@ module FedoraMigrate
|
|
43
46
|
|
44
47
|
def has_missing_object?(statement)
|
45
48
|
return false if ActiveFedora::Base.exists?(id_component(statement.object))
|
46
|
-
|
49
|
+
report << "could not migrate relationship #{statement.predicate} because #{statement.object} doesn't exist in Fedora 4"
|
47
50
|
true
|
48
51
|
end
|
49
52
|
|