fedora-migrate 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/Gemfile +1 -0
  4. data/LICENSE +14 -0
  5. data/README.md +201 -6
  6. data/config/jetty.yml +2 -2
  7. data/fedora-migrate.gemspec +7 -6
  8. data/lib/fedora-migrate.rb +7 -24
  9. data/lib/fedora_migrate/content_mover.rb +49 -0
  10. data/lib/fedora_migrate/datastream_mover.rb +19 -34
  11. data/lib/fedora_migrate/datastream_verification.rb +36 -0
  12. data/lib/fedora_migrate/dates_mover.rb +14 -0
  13. data/lib/fedora_migrate/hooks.rb +23 -0
  14. data/lib/fedora_migrate/migration_options.rb +18 -0
  15. data/lib/fedora_migrate/mover.rb +12 -0
  16. data/lib/fedora_migrate/object_mover.rb +20 -9
  17. data/lib/fedora_migrate/rdf_datastream_mover.rb +31 -14
  18. data/lib/fedora_migrate/rels_ext_datastream_mover.rb +28 -62
  19. data/lib/fedora_migrate/repository_migrator.rb +30 -25
  20. data/lib/fedora_migrate/rubydora_connection.rb +0 -2
  21. data/lib/fedora_migrate/target_constructor.rb +39 -0
  22. data/lib/fedora_migrate/version.rb +1 -1
  23. data/spec/fixtures/objects/scholarsphere_5712mc568.xml +7284 -0
  24. data/spec/fixtures/objects/scholarsphere_7d279232g.xml +20120 -0
  25. data/spec/fixtures/objects/scholarsphere_sf2686078.xml +8823 -0
  26. data/spec/fixtures/objects/scholarsphere_x346dj04v.xml +188 -0
  27. data/spec/fixtures/objects/scholarsphere_x346dj06d.xml +255 -0
  28. data/spec/fixtures/objects/scholarsphere_x346dj08z.xml +1242 -0
  29. data/spec/fixtures/objects/sufia_5m60qr94g.xml +68 -0
  30. data/spec/fixtures/objects/sufia_5m60qr95r.xml +133 -0
  31. data/spec/fixtures/objects/sufia_5m60qr961.xml +133 -0
  32. data/spec/fixtures/objects/sufia_5m60qr979.xml +118 -0
  33. data/spec/integration/content_versions_spec.rb +24 -1
  34. data/spec/integration/missing_relationships_spec.rb +30 -0
  35. data/spec/integration/object_migration_spec.rb +49 -5
  36. data/spec/integration/rdf_migration_spec.rb +38 -13
  37. data/spec/integration/relationship_migration_spec.rb +10 -9
  38. data/spec/integration/repository_migration_spec.rb +46 -19
  39. data/spec/integration/versions_spec.rb +32 -0
  40. data/spec/spec_helper.rb +8 -1
  41. data/spec/support/example_model.rb +56 -0
  42. data/spec/unit/content_mover_spec.rb +78 -0
  43. data/spec/unit/datastream_verification_spec.rb +60 -0
  44. data/spec/unit/dates_mover_spec.rb +33 -0
  45. data/spec/unit/migration_options_spec.rb +61 -0
  46. data/spec/unit/mover_spec.rb +35 -1
  47. data/spec/unit/object_mover_spec.rb +1 -3
  48. data/spec/unit/rels_ext_datastream_mover_spec.rb +28 -18
  49. data/spec/unit/repository_migrator_spec.rb +16 -5
  50. data/spec/unit/target_constructor_spec.rb +34 -0
  51. data/tasks/dev.rake +1 -1
  52. metadata +80 -38
  53. data/LICENSE.txt +0 -22
  54. data/lib/fedora_migrate/rdf_datastream_parser.rb +0 -29
  55. data/lib/fedora_migrate/triple_converter.rb +0 -39
  56. data/spec/fixtures/datastreams/rdf_ntriples_datastream.txt +0 -2
  57. data/spec/unit/rdf_datastream_mover_spec.rb +0 -8
  58. data/spec/unit/rdf_datastream_parser_spec.rb +0 -38
  59. data/spec/unit/triple_converter_spec.rb +0 -35
@@ -0,0 +1,78 @@
1
+ require 'spec_helper'
2
+
3
+ describe FedoraMigrate::ContentMover do
4
+
5
+ let(:nil_source) { double("Source", content: nil, dsid: "datastream") }
6
+ let(:source) do
7
+ double("Source",
8
+ content: "foo",
9
+ dsid: "datastream",
10
+ label: "label",
11
+ mimeType: "mimetype",
12
+ createDate: Time.new(1993, 02, 24, 12, 0, 0, "+09:00") # Rubydora returns Time objects for datastreams' creation dates
13
+ )
14
+ end
15
+ let(:target) { double("Target", content: "") }
16
+
17
+ describe "#migrate" do
18
+ context "without content" do
19
+ subject { FedoraMigrate::ContentMover.new(nil_source, target).migrate }
20
+ it { is_expected.to be true }
21
+ end
22
+ context "with content" do
23
+ subject { FedoraMigrate::ContentMover.new(source, target).migrate }
24
+ before do
25
+ allow_any_instance_of(FedoraMigrate::ContentMover).to receive(:move_content).and_return(true)
26
+ allow_any_instance_of(FedoraMigrate::ContentMover).to receive(:insert_date_created_by_application).and_return(true)
27
+ end
28
+ it { is_expected.to be true }
29
+ end
30
+ end
31
+
32
+ describe "#move_content" do
33
+ before do
34
+ allow(target).to receive(:content=).with("foo")
35
+ allow(target).to receive(:original_name=).with("label")
36
+ allow(target).to receive(:mime_type=).with("mimetype")
37
+ allow(target).to receive(:save).and_return(true)
38
+ allow_any_instance_of(FedoraMigrate::ContentMover).to receive(:insert_date_created_by_application).and_return(true)
39
+ end
40
+ subject do
41
+ FedoraMigrate::ContentMover.new(source, target).move_content
42
+ end
43
+ it { is_expected.to be true }
44
+ end
45
+
46
+ describe "#insert_date_created_by_application" do
47
+ subject { FedoraMigrate::ContentMover.new(source, target).insert_date_created_by_application }
48
+ context "with a successful update" do
49
+ let(:successful_status) { double("Result", status: 204) }
50
+ before { allow_any_instance_of(FedoraMigrate::ContentMover).to receive(:perform_sparql_insert).and_return(successful_status) }
51
+ it { is_expected.to be true }
52
+ end
53
+ context "with an unsuccessful update" do
54
+ let(:unsuccessful_status) { double("Result", status: 404, body: "Error!") }
55
+ before { allow_any_instance_of(FedoraMigrate::ContentMover).to receive(:perform_sparql_insert).and_return(unsuccessful_status) }
56
+ it "should raise an error" do
57
+ expect { subject }.to raise_error FedoraMigrate::Errors::MigrationError
58
+ end
59
+ end
60
+ end
61
+
62
+ describe "#sparql_insert" do
63
+ let(:sample_sparql_query) do
64
+ <<-EOF
65
+ PREFIX premis: <http://www.loc.gov/premis/rdf/v1#>
66
+ PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
67
+ DELETE WHERE { ?s premis:hasDateCreatedByApplication ?o } ;
68
+ INSERT {
69
+ <> premis:hasDateCreatedByApplication "1993-02-24T12:00:00+09:00"^^xsd:dateTime .
70
+ }
71
+ WHERE { }
72
+ EOF
73
+ end
74
+ subject { FedoraMigrate::ContentMover.new(source, target).sparql_insert }
75
+ it { is_expected.to eql sample_sparql_query }
76
+ end
77
+
78
+ end
@@ -0,0 +1,60 @@
1
+ require 'spec_helper'
2
+
3
+ describe FedoraMigrate::DatastreamVerification do
4
+
5
+ class TestSubject
6
+ include FedoraMigrate::DatastreamVerification
7
+ def initialize datastream
8
+ @datastream = datastream
9
+ @source = datastream
10
+ end
11
+ end
12
+
13
+ describe "binary sources from Fedora3" do
14
+ let(:bad_binary_source) { double("Datastream", checksum: "bad", mimeType: "binary", content: "XXXXXX", dsid: "content", pid: "abc123") }
15
+ let(:good_binary_source) { double("Datastream", checksum: "foo", mimeType: "binary", content: "foo", dsid: "content", pid: "abc123") }
16
+ let(:missing_checksum) { double("Datastream", checksum: "missing", mimeType: "binary", content: "foo", dsid: "content", pid: "abc123") }
17
+ context "that match Fedora4's checksum" do
18
+ subject { TestSubject.new(good_binary_source) }
19
+ before { allow(subject).to receive(:target_checksum).once.and_return("foo") }
20
+ it { is_expected.to have_matching_checksums }
21
+ it { is_expected.to be_valid }
22
+ end
23
+ context "that do not match Fedora4's checksum" do
24
+ subject { TestSubject.new(bad_binary_source) }
25
+ before { allow(subject).to receive(:target_checksum).twice.and_return("bar") }
26
+ specify "are not valid and logged" do
27
+ expect(FedoraMigrate::Logger).to receive(:warn)
28
+ expect(subject).to_not be_valid
29
+ end
30
+ end
31
+ context "when the checksum is missing" do
32
+ subject { TestSubject.new(missing_checksum) }
33
+ context "and a newly calculated checksum matches" do
34
+ before { allow(subject).to receive(:target_checksum).twice.and_return(Digest::SHA1.hexdigest("foo")) }
35
+ it { is_expected.to have_matching_checksums }
36
+ it { is_expected.to be_valid }
37
+ end
38
+ context "and a newly calculated checksum does not match" do
39
+ before { expect_any_instance_of(TestSubject).to receive(:target_checksum).twice.and_return(Digest::SHA1.hexdigest("bar")) }
40
+ specify "are not valid and logged" do
41
+ expect(FedoraMigrate::Logger).to receive(:warn)
42
+ expect(subject).to_not be_valid
43
+ end
44
+ end
45
+ end
46
+ end
47
+
48
+ describe "xml sources from Fedora3" do
49
+ subject { TestSubject.new(double("Datastream", checksum: "invalid", mimeType: "text/xml", content: "<bar></bar>")) }
50
+ context "when the datastream content is correctly altered upon migration" do
51
+ before { allow(subject).to receive(:target_content).once.and_return("<?xml version=\"1.0\"?>\n<bar></bar>") }
52
+ it { is_expected.to have_matching_nokogiri_checksums }
53
+ end
54
+ context "when the datastream content is incorrectly altered upon migration" do
55
+ before { allow(subject).to receive(:target_content).once.and_return("<?xml version=\"1.0\"?>\n<baz></baz>") }
56
+ it { is_expected.to_not have_matching_nokogiri_checksums }
57
+ end
58
+ end
59
+
60
+ end
@@ -0,0 +1,33 @@
1
+ require 'spec_helper'
2
+
3
+ describe FedoraMigrate::DatesMover do
4
+ let(:target) { ExampleModel::RDFObject.new }
5
+ let(:source) { instance_double('Source', createdDate: 'yesterday', lastModifiedDate: 'today') }
6
+
7
+ subject { FedoraMigrate::DatesMover.new(source, target) }
8
+
9
+ describe '#migrate' do
10
+
11
+ it 'migrates the create and mod dates' do
12
+ subject.migrate
13
+ expect(target.date_uploaded).to eq 'yesterday'
14
+ expect(target.date_modified).to eq 'today'
15
+ end
16
+
17
+ context "when the source methods don't exist" do
18
+ let(:source) { instance_double('Source with no date methods') }
19
+ it 'gracefully does nothing' do
20
+ expect{ subject.migrate }.to_not raise_error
21
+ expect(target.date_uploaded).to be_nil
22
+ expect(target.date_modified).to be_nil
23
+ end
24
+ end
25
+
26
+ context "when the target methods don't exist" do
27
+ let(:target) { instance_double('Target with no date methods') }
28
+ it 'gracefully does nothing' do
29
+ expect{ subject.migrate }.to_not raise_error
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,61 @@
1
+ require 'spec_helper'
2
+
3
+ describe FedoraMigrate::MigrationOptions do
4
+
5
+ class TestCase
6
+ include FedoraMigrate::MigrationOptions
7
+ end
8
+
9
+ describe "#conversion_options" do
10
+ subject do
11
+ TestCase.new.tap do |example|
12
+ example.options = { convert: 'datastream' }
13
+ end
14
+ end
15
+ specify "sets the name of the datastream to convert" do
16
+ expect(subject.conversion_options).to include "datastream"
17
+ end
18
+ it { is_expected.to be_not_forced }
19
+ end
20
+
21
+ describe "#forced?" do
22
+ context "when set to true" do
23
+ subject do
24
+ TestCase.new.tap do |example|
25
+ example.options = { convert: "datastream", force: true }
26
+ end
27
+ end
28
+ it { is_expected.to be_forced }
29
+ end
30
+ context "when set to false" do
31
+ subject do
32
+ TestCase.new.tap do |example|
33
+ example.options = { force: false }
34
+ end
35
+ end
36
+ it { is_expected.to be_not_forced }
37
+ end
38
+ context "by default" do
39
+ subject { TestCase.new }
40
+ it { is_expected.to be_not_forced }
41
+ end
42
+ end
43
+
44
+ describe "#application_creates_versions" do
45
+ context "by default" do
46
+ subject do
47
+ TestCase.new.application_creates_versions?
48
+ end
49
+ it { is_expected.to be false }
50
+ end
51
+ context "when our own Hydra application creates versions" do
52
+ subject do
53
+ TestCase.new.tap do |example|
54
+ example.options = { application_creates_versions: true }
55
+ end
56
+ end
57
+ it { is_expected.to be_application_creates_versions }
58
+ end
59
+ end
60
+
61
+ end
@@ -7,7 +7,6 @@ describe FedoraMigrate::Mover do
7
7
  it { is_expected.to respond_to :options }
8
8
 
9
9
  describe "#new" do
10
-
11
10
  context "with two arguments" do
12
11
  subject { FedoraMigrate::Mover.new("foo", "bar") }
13
12
  specify "has a source" do
@@ -34,6 +33,41 @@ describe FedoraMigrate::Mover do
34
33
  expect(subject.target).to be_nil
35
34
  end
36
35
  end
36
+ end
37
37
 
38
+ describe "::id_component" do
39
+ context "with a Rubydora object" do
40
+ let(:id) { "rb68xc11m" }
41
+ let(:object) { FedoraMigrate.source.connection.find("sufia:#{id}") }
42
+ subject { FedoraMigrate::Mover.id_component(object) }
43
+ it { is_expected.to eql(id) }
44
+ end
45
+ context "with a URI" do
46
+ let(:object) { RDF::URI.new("foo:bar") }
47
+ subject { FedoraMigrate::Mover.id_component(object) }
48
+ it { is_expected.to eql("bar") }
49
+ end
50
+ context "with a string" do
51
+ let(:object) { "foo:bar" }
52
+ subject { FedoraMigrate::Mover.id_component(object) }
53
+ it { is_expected.to eql("bar") }
54
+ end
38
55
  end
56
+
57
+ describe "#id_component" do
58
+ context "with a source" do
59
+ subject { FedoraMigrate::Mover.new("source:pid").id_component }
60
+ it { is_expected.to eql("pid") }
61
+ end
62
+ context "object, but no source" do
63
+ subject { FedoraMigrate::Mover.new.id_component("source:pid") }
64
+ it { is_expected.to eql("pid") }
65
+ end
66
+ context "neither object, nor source" do
67
+ specify "raises an error" do
68
+ expect { FedoraMigrate::Mover.new.id_component }.to raise_error(FedoraMigrate::Errors::MigrationError)
69
+ end
70
+ end
71
+ end
72
+
39
73
  end
@@ -7,7 +7,6 @@ describe FedoraMigrate::ObjectMover do
7
7
  end
8
8
 
9
9
  describe "#new" do
10
-
11
10
  it { is_expected.to respond_to :source }
12
11
  it { is_expected.to respond_to :target }
13
12
  it { is_expected.to respond_to :post_initialize }
@@ -19,8 +18,7 @@ describe FedoraMigrate::ObjectMover do
19
18
  end
20
19
  it "should call the before hook and save the target" do
21
20
  expect_any_instance_of(FedoraMigrate::ObjectMover).to receive(:before_object_migration)
22
- expect_any_instance_of(FedoraMigrate::ObjectMover).to receive(:save).and_return(true)
23
- expect(subject).to be true
21
+ expect(subject).to be nil
24
22
  end
25
23
  end
26
24
 
@@ -2,35 +2,45 @@ require 'spec_helper'
2
2
 
3
3
  describe FedoraMigrate::RelsExtDatastreamMover do
4
4
 
5
- context "with a target" do
5
+ let(:file_id) { "rb68xc11m" }
6
+ let(:batch_id) { "rb68xc09k" }
7
+ let(:source) { FedoraMigrate.source.connection.find("sufia:#{file_id}") }
8
+ let(:query) { subject.target }
6
9
 
10
+ context "with target objects present in Fedora 4" do
7
11
  before do
8
- allow_any_instance_of(FedoraMigrate::RelsExtDatastreamMover).to receive(:retrieve_subject).and_return("subject")
9
- allow_any_instance_of(FedoraMigrate::RelsExtDatastreamMover).to receive(:retrieve_object).and_return("object")
12
+ ActiveFedora::Base.create(id: file_id)
13
+ ActiveFedora::Base.create(id: batch_id)
10
14
  end
11
15
 
12
- subject do
13
- FedoraMigrate::RelsExtDatastreamMover.new(
14
- FedoraMigrate.source.connection.find("sufia:rb68xc11m")
15
- )
16
- end
17
-
18
- describe "#relationships" do
19
- it "should parse the source's RELS-EXT datastream for relationships" do
20
- expect(subject.relationships).to include(:part_of => ["object"])
16
+ describe "#initialize" do
17
+ context "without a target" do
18
+ subject { FedoraMigrate::RelsExtDatastreamMover.new(source).target }
19
+ it { is_expected.to be_kind_of(ActiveFedora::Base) }
20
+ end
21
+ context "with a supplied target" do
22
+ subject { FedoraMigrate::RelsExtDatastreamMover.new(source, "a target").target }
23
+ it { is_expected.to eql "a target" }
21
24
  end
22
25
  end
23
26
 
24
- describe "#ng_xml" do
25
- it "should return a Nokogiri document of the object's RELS-EXT datastream" do
26
- expect(subject.ng_xml).to be_kind_of(Nokogiri::XML::Document)
27
+ describe "#migrate" do
28
+ context "with an existing target" do
29
+ before { FedoraMigrate::RelsExtDatastreamMover.new(source).migrate }
30
+ subject { ActiveFedora::Base.find(file_id).ldp_source.graph.query([nil, ActiveFedora::RDF::Fcrepo::RelsExt.isPartOf, nil]) }
31
+ it "migrates RDF relationships" do
32
+ expect(subject.first.subject).to eq RDF::URI.new("http://localhost:8983/fedora/rest/test/#{file_id}")
33
+ expect(subject.first.object).to eq RDF::URI.new("http://localhost:8983/fedora/rest/test/#{batch_id}")
34
+ end
27
35
  end
28
36
  end
37
+ end
29
38
 
30
- describe "#has_relationships?" do
31
- it { is_expected.to have_relationships }
39
+ context "with a non-existent target" do
40
+ let(:error_message) { "Target object was not found in Fedora 4. Did you migrate it?" }
41
+ it "raises an error" do
42
+ expect { FedoraMigrate::RelsExtDatastreamMover.new(source) }.to raise_error(FedoraMigrate::Errors::MigrationError, error_message)
32
43
  end
33
-
34
44
  end
35
45
 
36
46
  end
@@ -5,12 +5,23 @@ describe FedoraMigrate::RepositoryMigrator do
5
5
  let(:namespace) { "sufia" }
6
6
 
7
7
  it { is_expected.to respond_to(:source_objects) }
8
- it { is_expected.to respond_to(:results) }
8
+ it { is_expected.to respond_to(:failed) }
9
9
  it { is_expected.to respond_to(:namespace) }
10
10
 
11
- describe "#results" do
12
- specify "are initially empty" do
13
- expect(subject.results).to eql([])
11
+ describe "#failed" do
12
+ specify "sets to zero" do
13
+ expect(subject.failed).to eql(0)
14
+ end
15
+ end
16
+
17
+ context "when forcing" do
18
+ before do
19
+ allow_any_instance_of(FedoraMigrate::RepositoryMigrator).to receive(:source_objects).and_return([])
20
+ allow_any_instance_of(FedoraMigrate::RepositoryMigrator).to receive(:failed).and_return(1)
21
+ end
22
+ subject { FedoraMigrate::RepositoryMigrator.new(namespace, { force: true }) }
23
+ specify "migrate relationships if failures are present" do
24
+ expect(subject.migrate_relationships).to be true
14
25
  end
15
26
  end
16
27
 
@@ -35,7 +46,7 @@ describe FedoraMigrate::RepositoryMigrator do
35
46
  end
36
47
  it "should exclude fedora-system objects" do
37
48
  expect(subject.source_objects).to_not include("fedora-system:ContentModel-3.0")
38
- expect(subject.source_objects.count).to eql 5
49
+ expect(subject.source_objects.count).to eql 9
39
50
  end
40
51
  end
41
52
  end
@@ -0,0 +1,34 @@
1
+ require 'spec_helper'
2
+
3
+ describe FedoraMigrate::TargetConstructor do
4
+
5
+ context "with one qualified model" do
6
+ let(:list) { ["info:fedora/fedora-system:FedoraObject-3.0", "info:fedora/afmodel:String"] }
7
+ subject { FedoraMigrate::TargetConstructor.new(list).build }
8
+ it "should choose the one that is valid" do
9
+ expect(subject.target).to eql String
10
+ end
11
+ end
12
+
13
+ context "with multiple qualified models" do
14
+ let(:list) { ["info:fedora/fedora-system:FedoraObject-3.0", "info:fedora/afmodel:Array", "info:fedora/afmodel:String"] }
15
+ subject { FedoraMigrate::TargetConstructor.new(list).build }
16
+ it "should choose the first one that is valid" do
17
+ expect(subject.target).to eql Array
18
+ end
19
+ end
20
+
21
+ context "with a single qualified model" do
22
+ subject { FedoraMigrate::TargetConstructor.new("info:fedora/afmodel:Array").build }
23
+ it "should be valid" do
24
+ expect(subject.target).to eql Array
25
+ end
26
+ end
27
+
28
+ context "with multiple unqualified models" do
29
+ let(:list) { ["info:fedora/fedora-system:FedoraObject-3.0", "info:fedora/fedora-system:FooObject"] }
30
+ subject { FedoraMigrate::TargetConstructor.new(list).build.target }
31
+ it { is_expected.to be_nil }
32
+ end
33
+
34
+ end
data/tasks/dev.rake CHANGED
@@ -14,7 +14,7 @@ namespace :fixtures do
14
14
  desc "Load Fedora3 fixtures for testing; use FIXTURE_PATH= for your own"
15
15
  task :load do
16
16
  repo = FedoraMigrate.source
17
- path = ENV["FIXTURE_PATH"] || "spec/fixtures/objects"
17
+ path = ENV["FIXTURE_PATH"] || "spec/fixtures/objects"
18
18
  Dir.glob(File.join(path,"*.xml")).each do |f|
19
19
  fixture = File.open(f)
20
20
  begin