fedora-migrate 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a1c6f75dc48d80e784199985d948420c2c449de4
4
- data.tar.gz: 0002dcb8c3e0508510359bc427c70cd2cf0b1f76
3
+ metadata.gz: ce0bad9581c3b5b080d0a73a26792ee7942ce1b3
4
+ data.tar.gz: 79932ebfe5726d0ef5dd67b1a5598847ed056175
5
5
  SHA512:
6
- metadata.gz: cc62419d71580ecda28950eea55ada46b397d517730eff12f3d3bdbfb3b184565a1499f8ec2e219d24f61395b7be42ea2f357b18ec772d4887b6706786a97301
7
- data.tar.gz: e2adbf133dc201a4176958a53b83fe9dfc32351dfd59a72651e73ac527b4be01adf8ee762e76ac6233bb118b88daca67af1091ac4e09291fb2b888d28a56d4f6
6
+ metadata.gz: 79d5f57909a5a92ea9155b6b09b2e85733df39212e62e9c227e9732d5c5eeb8c286e1c8905c52fb40d511e8c5641568c763327405593bf1c2583ef97e0a81222
7
+ data.tar.gz: 5a57b9afc653f14a33dcffaad44fc15bd5c9cde314b785e866b2786d8af92336dcfc8bcb212e24362cc6f7162e599bd9b7b41f0cd43761d6f26f23da0effab01
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ mkmf.log
15
15
  jetty
16
16
  .ruby-version
17
17
  .rvmrc
18
+ report.json
@@ -19,6 +19,7 @@ module FedoraMigrate
19
19
  autoload :Hooks
20
20
  autoload :Logger
21
21
  autoload :MigrationOptions
22
+ autoload :MigrationReport
22
23
  autoload :Mover
23
24
  autoload :ObjectMover
24
25
  autoload :Permissions
@@ -33,17 +34,13 @@ module FedoraMigrate
33
34
  autoload :TripleConverter
34
35
 
35
36
  class << self
36
- attr_reader :fedora_config, :config_options, :source
37
+ attr_reader :fedora_config, :source
37
38
  attr_accessor :configurator
38
39
 
39
40
  def fedora_config
40
41
  @fedora_config ||= ActiveFedora::Config.new(configurator.fedora3_config)
41
42
  end
42
43
 
43
- def config_options
44
- @config_options ||= "comming soon!"
45
- end
46
-
47
44
  def source
48
45
  @source ||= FedoraMigrate::RubydoraConnection.new(fedora_config.credentials)
49
46
  end
@@ -56,6 +53,7 @@ module FedoraMigrate
56
53
  migrator = FedoraMigrate::RepositoryMigrator.new(args[:namespace], args[:options])
57
54
  migrator.migrate_objects
58
55
  migrator.migrate_relationships
56
+ migrator
59
57
  end
60
58
 
61
59
  end
@@ -1,24 +1,44 @@
1
1
  module FedoraMigrate
2
2
  class ContentMover < Mover
3
3
 
4
+ include DatastreamVerification
5
+
6
+ class Report
7
+ attr_accessor :name, :mime_type, :original_date, :error
8
+ def success?
9
+ error.nil?
10
+ end
11
+ end
12
+
4
13
  def migrate
5
- return nil_content_message if source.content.nil?
14
+ return report if nil_source
6
15
  move_content
16
+ report_results
7
17
  insert_date_created_by_application
18
+ super
19
+ end
20
+
21
+ def results_report
22
+ Report.new
8
23
  end
9
24
 
10
25
  def move_content
11
26
  target.content = source.content
12
27
  target.original_name = source.label.try(:gsub, /"/, '\"')
13
28
  target.mime_type = source.mimeType
14
- Logger.info "#{target.inspect}"
15
29
  save
30
+ report.error = "Failed checksum" unless valid?
31
+ end
32
+
33
+ def report_results
34
+ report.name = target.original_name
35
+ report.mime_type = target.mime_type
16
36
  end
17
37
 
18
38
  def insert_date_created_by_application
19
- result = perform_sparql_insert
20
- return true if result.status == 204
21
- raise FedoraMigrate::Errors::MigrationError, "problem with sparql #{result.status} #{result.body}"
39
+ result = perform_sparql_insert
40
+ report.original_date = source.createDate.iso8601
41
+ report.error = "There was a problem with sparql #{result.status} #{result.body}" unless result.status == 204
22
42
  end
23
43
 
24
44
  def sparql_insert
@@ -35,15 +55,17 @@ EOF
35
55
 
36
56
  private
37
57
 
38
- def nil_content_message
39
- Logger.info "datastream '#{source.dsid}' is nil. It's probably defined in the target but not present in the source"
40
- true
41
- end
42
-
43
58
  def perform_sparql_insert
44
59
  ActiveFedora.fedora.connection.patch(target.metadata.metadata_uri, sparql_insert, "Content-Type" => "application/sparql-update")
45
60
  end
46
61
 
62
+ def nil_source
63
+ if source.content.nil?
64
+ report.error = "Nil source -- it's probably defined in the target but not present in the source"
65
+ true
66
+ end
67
+ end
68
+
47
69
  end
48
70
 
49
71
  end
@@ -1,8 +1,6 @@
1
1
  module FedoraMigrate
2
2
  class DatastreamMover < Mover
3
3
 
4
- include DatastreamVerification
5
-
6
4
  attr_accessor :versionable
7
5
 
8
6
  def post_initialize
@@ -25,6 +23,7 @@ module FedoraMigrate
25
23
  before_datastream_migration
26
24
  migrate_datastream
27
25
  after_datastream_migration
26
+ super
28
27
  end
29
28
 
30
29
  private
@@ -40,22 +39,20 @@ module FedoraMigrate
40
39
  # Reload the target, otherwise the checksum is nil
41
40
  def migrate_current
42
41
  migrate_content
43
- target.reload
44
- valid?
42
+ target.reload if report.last.success?
45
43
  end
46
44
 
47
45
  # Rubydora stores the versions array as the most recent first. We explicitly sort them according to createDate
48
46
  def migrate_versions
49
47
  source.versions.sort { |a,b| a.createDate <=> b.createDate }.each do |version|
50
48
  migrate_content(version)
51
- target.create_version unless application_creates_versions?
52
- valid?(version)
49
+ target.create_version if report.last.success? && !application_creates_versions?
53
50
  end
54
51
  end
55
52
 
56
53
  def migrate_content datastream=nil
57
54
  datastream ||= source
58
- FedoraMigrate::ContentMover.new(datastream, target).migrate
55
+ report << FedoraMigrate::ContentMover.new(datastream, target).migrate
59
56
  end
60
57
 
61
58
  end
@@ -4,9 +4,7 @@ module FedoraMigrate::DatastreamVerification
4
4
 
5
5
  def valid? datastream=nil
6
6
  @datastream = datastream || @source
7
- check = has_matching_checksums? || has_matching_nokogiri_checksums?
8
- FedoraMigrate::Logger.warn "#{@datastream.pid} datastream #{@datastream.dsid} validation failed" unless check
9
- check
7
+ has_matching_checksums? || has_matching_nokogiri_checksums?
10
8
  end
11
9
 
12
10
  def has_matching_checksums?
@@ -1,13 +1,26 @@
1
1
  module FedoraMigrate
2
2
  class DatesMover < Mover
3
3
 
4
+ Report = Struct.new(:uploaded, :modified)
5
+
4
6
  def migrate
5
- if source.respond_to?(:createdDate) && target.respond_to?(:date_uploaded)
6
- target.date_uploaded = source.createdDate
7
- end
8
- if source.respond_to?(:lastModifiedDate) && target.respond_to?(:date_modified)
9
- target.date_modified = source.lastModifiedDate
10
- end
7
+ migrate_date_uploaded if source.respond_to?(:createdDate) && target.respond_to?(:date_uploaded)
8
+ migrate_date_modified if source.respond_to?(:lastModifiedDate) && target.respond_to?(:date_modified)
9
+ super
10
+ end
11
+
12
+ def results_report
13
+ Report.new
14
+ end
15
+
16
+ def migrate_date_uploaded
17
+ target.date_uploaded = source.createdDate
18
+ report.uploaded = source.createdDate
19
+ end
20
+
21
+ def migrate_date_modified
22
+ target.date_modified = source.lastModifiedDate
23
+ report.modified = source.lastModifiedDate
11
24
  end
12
25
 
13
26
  end
@@ -0,0 +1,40 @@
1
+ module FedoraMigrate
2
+ class MigrationReport
3
+
4
+ attr_accessor :results
5
+
6
+ def initialize report=nil
7
+ @results = report.nil? ? Hash.new : JSON.parse(File.read(report))
8
+ end
9
+
10
+ def empty?
11
+ results.empty?
12
+ end
13
+
14
+ def failed_objects
15
+ results.keys.map { |k| k unless results[k]["status"] }.compact
16
+ end
17
+
18
+ def failures
19
+ failed_objects.count
20
+ end
21
+
22
+ def total_objects
23
+ results.keys.count
24
+ end
25
+
26
+ def report_failures output = String.new
27
+ failed_objects.each do |k|
28
+ output << "#{k}:\n\tobject: #{results[k]["object"]}\n\trelationships: #{results[k]["relationships"]}\n\n"
29
+ end
30
+ output
31
+ end
32
+
33
+ def save path=nil
34
+ json = JSON.load(results.to_json)
35
+ file = path.nil? ? "report.json" : File.join(path,"report.json")
36
+ File.write(file, JSON.pretty_generate(json))
37
+ end
38
+
39
+ end
40
+ end
@@ -4,24 +4,29 @@ module FedoraMigrate
4
4
  include MigrationOptions
5
5
  include Hooks
6
6
 
7
- attr_accessor :target, :source
7
+ attr_accessor :target, :source, :report
8
8
 
9
9
  def initialize *args
10
10
  @source = args[0]
11
11
  @target = args[1]
12
12
  @options = args[2]
13
+ @report = results_report
13
14
  post_initialize
14
15
  end
15
16
 
16
17
  def post_initialize
17
18
  end
18
19
 
20
+ def results_report
21
+ []
22
+ end
23
+
24
+ def migrate
25
+ report
26
+ end
27
+
19
28
  def save
20
- if target.save
21
- Logger.info "success for target UID #{target_description}"
22
- else
23
- raise FedoraMigrate::Errors::MigrationError, "Failed to save target: #{target_errors}"
24
- end
29
+ raise FedoraMigrate::Errors::MigrationError, "Failed to save target: #{target_errors}" unless target.save
25
30
  end
26
31
 
27
32
  def target_errors
@@ -32,14 +37,6 @@ module FedoraMigrate
32
37
  end
33
38
  end
34
39
 
35
- def target_description
36
- if target.respond_to?(:id)
37
- target.id
38
- else
39
- target.inspect
40
- end
41
- end
42
-
43
40
  def id_component object=nil
44
41
  object ||= source
45
42
  raise FedoraMigrate::Errors::MigrationError, "can't get the id component without an object" if object.nil?
@@ -3,13 +3,16 @@ module FedoraMigrate
3
3
 
4
4
  RIGHTS_DATASTREAM = "rightsMetadata".freeze
5
5
 
6
+ ContentDatastreamReport = Struct.new(:ds, :versions)
7
+ RDFDatastreamReport = Struct.new(:ds, :status)
8
+ Report = Struct.new(:id, :class, :content_datastreams, :rdf_datastreams, :permissions, :dates)
9
+
6
10
  def migrate
7
11
  prepare_target
8
12
  conversions.collect { |ds| convert_rdf_datastream(ds) }
9
- migrate_content_datastreams
10
- migrate_permissions
11
- migrate_dates
13
+ migrate_datastreams
12
14
  complete_target
15
+ super
13
16
  end
14
17
 
15
18
  def post_initialize
@@ -17,19 +20,32 @@ module FedoraMigrate
17
20
  create_target_model if target.nil?
18
21
  end
19
22
 
23
+ def results_report
24
+ Report.new.tap do |report|
25
+ report.content_datastreams = []
26
+ report.rdf_datastreams = []
27
+ end
28
+ end
29
+
20
30
  def prepare_target
21
- Logger.info "running before_object_migration hooks"
31
+ report.class = target.class.to_s
32
+ report.id = target.id
22
33
  before_object_migration
23
34
  end
24
35
 
25
36
  def complete_target
26
- Logger.info "running after_object_migration hooks"
27
37
  after_object_migration
28
38
  save
29
39
  end
30
40
 
31
41
  private
32
42
 
43
+ def migrate_datastreams
44
+ migrate_content_datastreams
45
+ migrate_permissions
46
+ migrate_dates
47
+ end
48
+
33
49
  # We have to call save before migrating content datastreams, otherwise versions aren't recorded
34
50
  # TODO: this will fail if required fields are defined in a descMetadata datastream that is not
35
51
  # converted to RDF (issue #8)
@@ -37,14 +53,14 @@ module FedoraMigrate
37
53
  save
38
54
  target.attached_files.keys.each do |ds|
39
55
  mover = FedoraMigrate::DatastreamMover.new(source.datastreams[ds.to_s], target.attached_files[ds.to_s], options)
40
- mover.migrate
56
+ report.content_datastreams << ContentDatastreamReport.new(ds, mover.migrate)
41
57
  end
42
58
  end
43
59
 
44
60
  def convert_rdf_datastream ds
45
61
  if source.datastreams.key?(ds)
46
62
  mover = FedoraMigrate::RDFDatastreamMover.new(datastream_content(ds), target)
47
- mover.migrate
63
+ report.rdf_datastreams << RDFDatastreamReport.new(ds, mover.migrate)
48
64
  end
49
65
  end
50
66
 
@@ -55,12 +71,12 @@ module FedoraMigrate
55
71
  def migrate_permissions
56
72
  if source.datastreams.keys.include?(RIGHTS_DATASTREAM) && target.respond_to?(:permissions)
57
73
  mover = FedoraMigrate::PermissionsMover.new(source.datastreams[RIGHTS_DATASTREAM], target)
58
- mover.migrate
74
+ report.permissions = mover.migrate
59
75
  end
60
76
  end
61
77
 
62
78
  def migrate_dates
63
- FedoraMigrate::DatesMover.new(source, target).migrate
79
+ report.dates = FedoraMigrate::DatesMover.new(source, target).migrate
64
80
  end
65
81
 
66
82
  def create_target_model
@@ -13,13 +13,13 @@ module FedoraMigrate
13
13
 
14
14
  def migrate
15
15
  FedoraMigrate::Permissions.instance_methods.each do |permission|
16
- Logger.info "setting #{permission} to #{self.send(permission)}"
16
+ report << "#{permission} = #{self.send(permission)}"
17
17
  target.send(permission.to_s+"=", self.send(permission))
18
18
  end
19
19
  save
20
+ super
20
21
  end
21
22
 
22
-
23
23
  private
24
24
 
25
25
  def datastream_from_content ds = FedoraMigrate::RightsMetadata.new
@@ -4,11 +4,11 @@ module FedoraMigrate
4
4
  class RDFDatastreamMover < Mover
5
5
 
6
6
  def migrate
7
- Logger.info "converting datastream '#{source.dsid}' to RDF"
8
7
  before_rdf_datastream_migration
9
8
  migrate_rdf_triples
10
9
  after_rdf_datastream_migration
11
10
  save
11
+ super
12
12
  end
13
13
 
14
14
  def migrate_rdf_triples
@@ -7,6 +7,7 @@ module FedoraMigrate
7
7
  migrate_statements
8
8
  target.ldp_source.update
9
9
  update_index
10
+ super
10
11
  end
11
12
 
12
13
  def post_initialize
@@ -19,7 +20,9 @@ module FedoraMigrate
19
20
 
20
21
  def migrate_statements
21
22
  statements.each do |statement|
22
- target.ldp_source.graph << [target.rdf_subject, migrate_predicate(statement.predicate), migrate_object(statement.object)]
23
+ triple = [target.rdf_subject, migrate_predicate(statement.predicate), migrate_object(statement.object)]
24
+ target.ldp_source.graph << triple
25
+ report << triple.join("--")
23
26
  end
24
27
  end
25
28
 
@@ -43,7 +46,7 @@ module FedoraMigrate
43
46
 
44
47
  def has_missing_object?(statement)
45
48
  return false if ActiveFedora::Base.exists?(id_component(statement.object))
46
- Logger.warn "#{source.pid} could not migrate relationship #{statement.predicate} because #{statement.object} doesn't exist in Fedora 4"
49
+ report << "could not migrate relationship #{statement.predicate} because #{statement.object} doesn't exist in Fedora 4"
47
50
  true
48
51
  end
49
52