fedora-migrate 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/Gemfile +1 -0
  4. data/LICENSE +14 -0
  5. data/README.md +201 -6
  6. data/config/jetty.yml +2 -2
  7. data/fedora-migrate.gemspec +7 -6
  8. data/lib/fedora-migrate.rb +7 -24
  9. data/lib/fedora_migrate/content_mover.rb +49 -0
  10. data/lib/fedora_migrate/datastream_mover.rb +19 -34
  11. data/lib/fedora_migrate/datastream_verification.rb +36 -0
  12. data/lib/fedora_migrate/dates_mover.rb +14 -0
  13. data/lib/fedora_migrate/hooks.rb +23 -0
  14. data/lib/fedora_migrate/migration_options.rb +18 -0
  15. data/lib/fedora_migrate/mover.rb +12 -0
  16. data/lib/fedora_migrate/object_mover.rb +20 -9
  17. data/lib/fedora_migrate/rdf_datastream_mover.rb +31 -14
  18. data/lib/fedora_migrate/rels_ext_datastream_mover.rb +28 -62
  19. data/lib/fedora_migrate/repository_migrator.rb +30 -25
  20. data/lib/fedora_migrate/rubydora_connection.rb +0 -2
  21. data/lib/fedora_migrate/target_constructor.rb +39 -0
  22. data/lib/fedora_migrate/version.rb +1 -1
  23. data/spec/fixtures/objects/scholarsphere_5712mc568.xml +7284 -0
  24. data/spec/fixtures/objects/scholarsphere_7d279232g.xml +20120 -0
  25. data/spec/fixtures/objects/scholarsphere_sf2686078.xml +8823 -0
  26. data/spec/fixtures/objects/scholarsphere_x346dj04v.xml +188 -0
  27. data/spec/fixtures/objects/scholarsphere_x346dj06d.xml +255 -0
  28. data/spec/fixtures/objects/scholarsphere_x346dj08z.xml +1242 -0
  29. data/spec/fixtures/objects/sufia_5m60qr94g.xml +68 -0
  30. data/spec/fixtures/objects/sufia_5m60qr95r.xml +133 -0
  31. data/spec/fixtures/objects/sufia_5m60qr961.xml +133 -0
  32. data/spec/fixtures/objects/sufia_5m60qr979.xml +118 -0
  33. data/spec/integration/content_versions_spec.rb +24 -1
  34. data/spec/integration/missing_relationships_spec.rb +30 -0
  35. data/spec/integration/object_migration_spec.rb +49 -5
  36. data/spec/integration/rdf_migration_spec.rb +38 -13
  37. data/spec/integration/relationship_migration_spec.rb +10 -9
  38. data/spec/integration/repository_migration_spec.rb +46 -19
  39. data/spec/integration/versions_spec.rb +32 -0
  40. data/spec/spec_helper.rb +8 -1
  41. data/spec/support/example_model.rb +56 -0
  42. data/spec/unit/content_mover_spec.rb +78 -0
  43. data/spec/unit/datastream_verification_spec.rb +60 -0
  44. data/spec/unit/dates_mover_spec.rb +33 -0
  45. data/spec/unit/migration_options_spec.rb +61 -0
  46. data/spec/unit/mover_spec.rb +35 -1
  47. data/spec/unit/object_mover_spec.rb +1 -3
  48. data/spec/unit/rels_ext_datastream_mover_spec.rb +28 -18
  49. data/spec/unit/repository_migrator_spec.rb +16 -5
  50. data/spec/unit/target_constructor_spec.rb +34 -0
  51. data/tasks/dev.rake +1 -1
  52. metadata +80 -38
  53. data/LICENSE.txt +0 -22
  54. data/lib/fedora_migrate/rdf_datastream_parser.rb +0 -29
  55. data/lib/fedora_migrate/triple_converter.rb +0 -39
  56. data/spec/fixtures/datastreams/rdf_ntriples_datastream.txt +0 -2
  57. data/spec/unit/rdf_datastream_mover_spec.rb +0 -8
  58. data/spec/unit/rdf_datastream_parser_spec.rb +0 -38
  59. data/spec/unit/triple_converter_spec.rb +0 -35
@@ -0,0 +1,78 @@
1
+ require 'spec_helper'
2
+
3
+ describe FedoraMigrate::ContentMover do
4
+
5
+ let(:nil_source) { double("Source", content: nil, dsid: "datastream") }
6
+ let(:source) do
7
+ double("Source",
8
+ content: "foo",
9
+ dsid: "datastream",
10
+ label: "label",
11
+ mimeType: "mimetype",
12
+ createDate: Time.new(1993, 02, 24, 12, 0, 0, "+09:00") # Rubydora returns Time objects for datastreams' creation dates
13
+ )
14
+ end
15
+ let(:target) { double("Target", content: "") }
16
+
17
+ describe "#migrate" do
18
+ context "without content" do
19
+ subject { FedoraMigrate::ContentMover.new(nil_source, target).migrate }
20
+ it { is_expected.to be true }
21
+ end
22
+ context "with content" do
23
+ subject { FedoraMigrate::ContentMover.new(source, target).migrate }
24
+ before do
25
+ allow_any_instance_of(FedoraMigrate::ContentMover).to receive(:move_content).and_return(true)
26
+ allow_any_instance_of(FedoraMigrate::ContentMover).to receive(:insert_date_created_by_application).and_return(true)
27
+ end
28
+ it { is_expected.to be true }
29
+ end
30
+ end
31
+
32
+ describe "#move_content" do
33
+ before do
34
+ allow(target).to receive(:content=).with("foo")
35
+ allow(target).to receive(:original_name=).with("label")
36
+ allow(target).to receive(:mime_type=).with("mimetype")
37
+ allow(target).to receive(:save).and_return(true)
38
+ allow_any_instance_of(FedoraMigrate::ContentMover).to receive(:insert_date_created_by_application).and_return(true)
39
+ end
40
+ subject do
41
+ FedoraMigrate::ContentMover.new(source, target).move_content
42
+ end
43
+ it { is_expected.to be true }
44
+ end
45
+
46
+ describe "#insert_date_created_by_application" do
47
+ subject { FedoraMigrate::ContentMover.new(source, target).insert_date_created_by_application }
48
+ context "with a successful update" do
49
+ let(:successful_status) { double("Result", status: 204) }
50
+ before { allow_any_instance_of(FedoraMigrate::ContentMover).to receive(:perform_sparql_insert).and_return(successful_status) }
51
+ it { is_expected.to be true }
52
+ end
53
+ context "with an unsuccessful update" do
54
+ let(:unsuccessful_status) { double("Result", status: 404, body: "Error!") }
55
+ before { allow_any_instance_of(FedoraMigrate::ContentMover).to receive(:perform_sparql_insert).and_return(unsuccessful_status) }
56
+ it "should raise an error" do
57
+ expect { subject }.to raise_error FedoraMigrate::Errors::MigrationError
58
+ end
59
+ end
60
+ end
61
+
62
+ describe "#sparql_insert" do
63
+ let(:sample_sparql_query) do
64
+ <<-EOF
65
+ PREFIX premis: <http://www.loc.gov/premis/rdf/v1#>
66
+ PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
67
+ DELETE WHERE { ?s premis:hasDateCreatedByApplication ?o } ;
68
+ INSERT {
69
+ <> premis:hasDateCreatedByApplication "1993-02-24T12:00:00+09:00"^^xsd:dateTime .
70
+ }
71
+ WHERE { }
72
+ EOF
73
+ end
74
+ subject { FedoraMigrate::ContentMover.new(source, target).sparql_insert }
75
+ it { is_expected.to eql sample_sparql_query }
76
+ end
77
+
78
+ end
@@ -0,0 +1,60 @@
1
+ require 'spec_helper'
2
+
3
+ describe FedoraMigrate::DatastreamVerification do
4
+
5
+ class TestSubject
6
+ include FedoraMigrate::DatastreamVerification
7
+ def initialize datastream
8
+ @datastream = datastream
9
+ @source = datastream
10
+ end
11
+ end
12
+
13
+ describe "binary sources from Fedora3" do
14
+ let(:bad_binary_source) { double("Datastream", checksum: "bad", mimeType: "binary", content: "XXXXXX", dsid: "content", pid: "abc123") }
15
+ let(:good_binary_source) { double("Datastream", checksum: "foo", mimeType: "binary", content: "foo", dsid: "content", pid: "abc123") }
16
+ let(:missing_checksum) { double("Datastream", checksum: "missing", mimeType: "binary", content: "foo", dsid: "content", pid: "abc123") }
17
+ context "that match Fedora4's checksum" do
18
+ subject { TestSubject.new(good_binary_source) }
19
+ before { allow(subject).to receive(:target_checksum).once.and_return("foo") }
20
+ it { is_expected.to have_matching_checksums }
21
+ it { is_expected.to be_valid }
22
+ end
23
+ context "that do not match Fedora4's checksum" do
24
+ subject { TestSubject.new(bad_binary_source) }
25
+ before { allow(subject).to receive(:target_checksum).twice.and_return("bar") }
26
+ specify "are not valid and logged" do
27
+ expect(FedoraMigrate::Logger).to receive(:warn)
28
+ expect(subject).to_not be_valid
29
+ end
30
+ end
31
+ context "when the checksum is missing" do
32
+ subject { TestSubject.new(missing_checksum) }
33
+ context "and a newly calculated checksum matches" do
34
+ before { allow(subject).to receive(:target_checksum).twice.and_return(Digest::SHA1.hexdigest("foo")) }
35
+ it { is_expected.to have_matching_checksums }
36
+ it { is_expected.to be_valid }
37
+ end
38
+ context "and a newly calculated checksum does not match" do
39
+ before { expect_any_instance_of(TestSubject).to receive(:target_checksum).twice.and_return(Digest::SHA1.hexdigest("bar")) }
40
+ specify "are not valid and logged" do
41
+ expect(FedoraMigrate::Logger).to receive(:warn)
42
+ expect(subject).to_not be_valid
43
+ end
44
+ end
45
+ end
46
+ end
47
+
48
+ describe "xml sources from Fedora3" do
49
+ subject { TestSubject.new(double("Datastream", checksum: "invalid", mimeType: "text/xml", content: "<bar></bar>")) }
50
+ context "when the datastream content is correctly altered upon migration" do
51
+ before { allow(subject).to receive(:target_content).once.and_return("<?xml version=\"1.0\"?>\n<bar></bar>") }
52
+ it { is_expected.to have_matching_nokogiri_checksums }
53
+ end
54
+ context "when the datastream content is incorrectly altered upon migration" do
55
+ before { allow(subject).to receive(:target_content).once.and_return("<?xml version=\"1.0\"?>\n<baz></baz>") }
56
+ it { is_expected.to_not have_matching_nokogiri_checksums }
57
+ end
58
+ end
59
+
60
+ end
@@ -0,0 +1,33 @@
1
+ require 'spec_helper'
2
+
3
+ describe FedoraMigrate::DatesMover do
4
+ let(:target) { ExampleModel::RDFObject.new }
5
+ let(:source) { instance_double('Source', createdDate: 'yesterday', lastModifiedDate: 'today') }
6
+
7
+ subject { FedoraMigrate::DatesMover.new(source, target) }
8
+
9
+ describe '#migrate' do
10
+
11
+ it 'migrates the create and mod dates' do
12
+ subject.migrate
13
+ expect(target.date_uploaded).to eq 'yesterday'
14
+ expect(target.date_modified).to eq 'today'
15
+ end
16
+
17
+ context "when the source methods don't exist" do
18
+ let(:source) { instance_double('Source with no date methods') }
19
+ it 'gracefully does nothing' do
20
+ expect{ subject.migrate }.to_not raise_error
21
+ expect(target.date_uploaded).to be_nil
22
+ expect(target.date_modified).to be_nil
23
+ end
24
+ end
25
+
26
+ context "when the target methods don't exist" do
27
+ let(:target) { instance_double('Target with no date methods') }
28
+ it 'gracefully does nothing' do
29
+ expect{ subject.migrate }.to_not raise_error
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,61 @@
1
+ require 'spec_helper'
2
+
3
+ describe FedoraMigrate::MigrationOptions do
4
+
5
+ class TestCase
6
+ include FedoraMigrate::MigrationOptions
7
+ end
8
+
9
+ describe "#conversion_options" do
10
+ subject do
11
+ TestCase.new.tap do |example|
12
+ example.options = { convert: 'datastream' }
13
+ end
14
+ end
15
+ specify "sets the name of the datastream to convert" do
16
+ expect(subject.conversion_options).to include "datastream"
17
+ end
18
+ it { is_expected.to be_not_forced }
19
+ end
20
+
21
+ describe "#forced?" do
22
+ context "when set to true" do
23
+ subject do
24
+ TestCase.new.tap do |example|
25
+ example.options = { convert: "datastream", force: true }
26
+ end
27
+ end
28
+ it { is_expected.to be_forced }
29
+ end
30
+ context "when set to false" do
31
+ subject do
32
+ TestCase.new.tap do |example|
33
+ example.options = { force: false }
34
+ end
35
+ end
36
+ it { is_expected.to be_not_forced }
37
+ end
38
+ context "by default" do
39
+ subject { TestCase.new }
40
+ it { is_expected.to be_not_forced }
41
+ end
42
+ end
43
+
44
+ describe "#application_creates_versions" do
45
+ context "by default" do
46
+ subject do
47
+ TestCase.new.application_creates_versions?
48
+ end
49
+ it { is_expected.to be false }
50
+ end
51
+ context "when our own Hydra application creates versions" do
52
+ subject do
53
+ TestCase.new.tap do |example|
54
+ example.options = { application_creates_versions: true }
55
+ end
56
+ end
57
+ it { is_expected.to be_application_creates_versions }
58
+ end
59
+ end
60
+
61
+ end
@@ -7,7 +7,6 @@ describe FedoraMigrate::Mover do
7
7
  it { is_expected.to respond_to :options }
8
8
 
9
9
  describe "#new" do
10
-
11
10
  context "with two arguments" do
12
11
  subject { FedoraMigrate::Mover.new("foo", "bar") }
13
12
  specify "has a source" do
@@ -34,6 +33,41 @@ describe FedoraMigrate::Mover do
34
33
  expect(subject.target).to be_nil
35
34
  end
36
35
  end
36
+ end
37
37
 
38
+ describe "::id_component" do
39
+ context "with a Rubydora object" do
40
+ let(:id) { "rb68xc11m" }
41
+ let(:object) { FedoraMigrate.source.connection.find("sufia:#{id}") }
42
+ subject { FedoraMigrate::Mover.id_component(object) }
43
+ it { is_expected.to eql(id) }
44
+ end
45
+ context "with a URI" do
46
+ let(:object) { RDF::URI.new("foo:bar") }
47
+ subject { FedoraMigrate::Mover.id_component(object) }
48
+ it { is_expected.to eql("bar") }
49
+ end
50
+ context "with a string" do
51
+ let(:object) { "foo:bar" }
52
+ subject { FedoraMigrate::Mover.id_component(object) }
53
+ it { is_expected.to eql("bar") }
54
+ end
38
55
  end
56
+
57
+ describe "#id_component" do
58
+ context "with a source" do
59
+ subject { FedoraMigrate::Mover.new("source:pid").id_component }
60
+ it { is_expected.to eql("pid") }
61
+ end
62
+ context "object, but no source" do
63
+ subject { FedoraMigrate::Mover.new.id_component("source:pid") }
64
+ it { is_expected.to eql("pid") }
65
+ end
66
+ context "neither object, nor source" do
67
+ specify "raises an error" do
68
+ expect { FedoraMigrate::Mover.new.id_component }.to raise_error(FedoraMigrate::Errors::MigrationError)
69
+ end
70
+ end
71
+ end
72
+
39
73
  end
@@ -7,7 +7,6 @@ describe FedoraMigrate::ObjectMover do
7
7
  end
8
8
 
9
9
  describe "#new" do
10
-
11
10
  it { is_expected.to respond_to :source }
12
11
  it { is_expected.to respond_to :target }
13
12
  it { is_expected.to respond_to :post_initialize }
@@ -19,8 +18,7 @@ describe FedoraMigrate::ObjectMover do
19
18
  end
20
19
  it "should call the before hook and save the target" do
21
20
  expect_any_instance_of(FedoraMigrate::ObjectMover).to receive(:before_object_migration)
22
- expect_any_instance_of(FedoraMigrate::ObjectMover).to receive(:save).and_return(true)
23
- expect(subject).to be true
21
+ expect(subject).to be nil
24
22
  end
25
23
  end
26
24
 
@@ -2,35 +2,45 @@ require 'spec_helper'
2
2
 
3
3
  describe FedoraMigrate::RelsExtDatastreamMover do
4
4
 
5
- context "with a target" do
5
+ let(:file_id) { "rb68xc11m" }
6
+ let(:batch_id) { "rb68xc09k" }
7
+ let(:source) { FedoraMigrate.source.connection.find("sufia:#{file_id}") }
8
+ let(:query) { subject.target }
6
9
 
10
+ context "with target objects present in Fedora 4" do
7
11
  before do
8
- allow_any_instance_of(FedoraMigrate::RelsExtDatastreamMover).to receive(:retrieve_subject).and_return("subject")
9
- allow_any_instance_of(FedoraMigrate::RelsExtDatastreamMover).to receive(:retrieve_object).and_return("object")
12
+ ActiveFedora::Base.create(id: file_id)
13
+ ActiveFedora::Base.create(id: batch_id)
10
14
  end
11
15
 
12
- subject do
13
- FedoraMigrate::RelsExtDatastreamMover.new(
14
- FedoraMigrate.source.connection.find("sufia:rb68xc11m")
15
- )
16
- end
17
-
18
- describe "#relationships" do
19
- it "should parse the source's RELS-EXT datastream for relationships" do
20
- expect(subject.relationships).to include(:part_of => ["object"])
16
+ describe "#initialize" do
17
+ context "without a target" do
18
+ subject { FedoraMigrate::RelsExtDatastreamMover.new(source).target }
19
+ it { is_expected.to be_kind_of(ActiveFedora::Base) }
20
+ end
21
+ context "with a supplied target" do
22
+ subject { FedoraMigrate::RelsExtDatastreamMover.new(source, "a target").target }
23
+ it { is_expected.to eql "a target" }
21
24
  end
22
25
  end
23
26
 
24
- describe "#ng_xml" do
25
- it "should return a Nokogiri document of the object's RELS-EXT datastream" do
26
- expect(subject.ng_xml).to be_kind_of(Nokogiri::XML::Document)
27
+ describe "#migrate" do
28
+ context "with an existing target" do
29
+ before { FedoraMigrate::RelsExtDatastreamMover.new(source).migrate }
30
+ subject { ActiveFedora::Base.find(file_id).ldp_source.graph.query([nil, ActiveFedora::RDF::Fcrepo::RelsExt.isPartOf, nil]) }
31
+ it "migrates RDF relationships" do
32
+ expect(subject.first.subject).to eq RDF::URI.new("http://localhost:8983/fedora/rest/test/#{file_id}")
33
+ expect(subject.first.object).to eq RDF::URI.new("http://localhost:8983/fedora/rest/test/#{batch_id}")
34
+ end
27
35
  end
28
36
  end
37
+ end
29
38
 
30
- describe "#has_relationships?" do
31
- it { is_expected.to have_relationships }
39
+ context "with a non-existent target" do
40
+ let(:error_message) { "Target object was not found in Fedora 4. Did you migrate it?" }
41
+ it "raises an error" do
42
+ expect { FedoraMigrate::RelsExtDatastreamMover.new(source) }.to raise_error(FedoraMigrate::Errors::MigrationError, error_message)
32
43
  end
33
-
34
44
  end
35
45
 
36
46
  end
@@ -5,12 +5,23 @@ describe FedoraMigrate::RepositoryMigrator do
5
5
  let(:namespace) { "sufia" }
6
6
 
7
7
  it { is_expected.to respond_to(:source_objects) }
8
- it { is_expected.to respond_to(:results) }
8
+ it { is_expected.to respond_to(:failed) }
9
9
  it { is_expected.to respond_to(:namespace) }
10
10
 
11
- describe "#results" do
12
- specify "are initially empty" do
13
- expect(subject.results).to eql([])
11
+ describe "#failed" do
12
+ specify "sets to zero" do
13
+ expect(subject.failed).to eql(0)
14
+ end
15
+ end
16
+
17
+ context "when forcing" do
18
+ before do
19
+ allow_any_instance_of(FedoraMigrate::RepositoryMigrator).to receive(:source_objects).and_return([])
20
+ allow_any_instance_of(FedoraMigrate::RepositoryMigrator).to receive(:failed).and_return(1)
21
+ end
22
+ subject { FedoraMigrate::RepositoryMigrator.new(namespace, { force: true }) }
23
+ specify "migrate relationships if failures are present" do
24
+ expect(subject.migrate_relationships).to be true
14
25
  end
15
26
  end
16
27
 
@@ -35,7 +46,7 @@ describe FedoraMigrate::RepositoryMigrator do
35
46
  end
36
47
  it "should exclude fedora-system objects" do
37
48
  expect(subject.source_objects).to_not include("fedora-system:ContentModel-3.0")
38
- expect(subject.source_objects.count).to eql 5
49
+ expect(subject.source_objects.count).to eql 9
39
50
  end
40
51
  end
41
52
  end
@@ -0,0 +1,34 @@
1
+ require 'spec_helper'
2
+
3
+ describe FedoraMigrate::TargetConstructor do
4
+
5
+ context "with one qualified model" do
6
+ let(:list) { ["info:fedora/fedora-system:FedoraObject-3.0", "info:fedora/afmodel:String"] }
7
+ subject { FedoraMigrate::TargetConstructor.new(list).build }
8
+ it "should choose the one that is valid" do
9
+ expect(subject.target).to eql String
10
+ end
11
+ end
12
+
13
+ context "with multiple qualified models" do
14
+ let(:list) { ["info:fedora/fedora-system:FedoraObject-3.0", "info:fedora/afmodel:Array", "info:fedora/afmodel:String"] }
15
+ subject { FedoraMigrate::TargetConstructor.new(list).build }
16
+ it "should choose the first one that is valid" do
17
+ expect(subject.target).to eql Array
18
+ end
19
+ end
20
+
21
+ context "with a single qualified model" do
22
+ subject { FedoraMigrate::TargetConstructor.new("info:fedora/afmodel:Array").build }
23
+ it "should be valid" do
24
+ expect(subject.target).to eql Array
25
+ end
26
+ end
27
+
28
+ context "with multiple unqualified models" do
29
+ let(:list) { ["info:fedora/fedora-system:FedoraObject-3.0", "info:fedora/fedora-system:FooObject"] }
30
+ subject { FedoraMigrate::TargetConstructor.new(list).build.target }
31
+ it { is_expected.to be_nil }
32
+ end
33
+
34
+ end
data/tasks/dev.rake CHANGED
@@ -14,7 +14,7 @@ namespace :fixtures do
14
14
  desc "Load Fedora3 fixtures for testing; use FIXTURE_PATH= for your own"
15
15
  task :load do
16
16
  repo = FedoraMigrate.source
17
- path = ENV["FIXTURE_PATH"] || "spec/fixtures/objects"
17
+ path = ENV["FIXTURE_PATH"] || "spec/fixtures/objects"
18
18
  Dir.glob(File.join(path,"*.xml")).each do |f|
19
19
  fixture = File.open(f)
20
20
  begin