rof 0.0.1.pre → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.ruby-version +1 -1
  3. data/.travis.yml +12 -2
  4. data/Gemfile +1 -0
  5. data/README.md +87 -0
  6. data/bin/.ruby-version +1 -0
  7. data/bin/csv_to_rof +26 -0
  8. data/bin/fedora_to_rof +57 -0
  9. data/bin/osf_to_rof +40 -0
  10. data/bin/rof +78 -0
  11. data/bulk-ingest.md +242 -0
  12. data/labels.md +111 -0
  13. data/lib/rof.rb +20 -1
  14. data/lib/rof/access.rb +57 -0
  15. data/lib/rof/cli.rb +122 -0
  16. data/lib/rof/collection.rb +109 -0
  17. data/lib/rof/compare_rof.rb +92 -0
  18. data/lib/rof/filters/bendo.rb +33 -0
  19. data/lib/rof/filters/date_stamp.rb +36 -0
  20. data/lib/rof/filters/file_to_url.rb +27 -0
  21. data/lib/rof/filters/label.rb +153 -0
  22. data/lib/rof/filters/work.rb +111 -0
  23. data/lib/rof/get_from_fedora.rb +196 -0
  24. data/lib/rof/ingest.rb +204 -0
  25. data/lib/rof/ingesters/rels_ext_ingester.rb +78 -0
  26. data/lib/rof/ingesters/rights_metadata_ingester.rb +68 -0
  27. data/lib/rof/osf_context.rb +19 -0
  28. data/lib/rof/osf_to_rof.rb +122 -0
  29. data/lib/rof/rdf_context.rb +36 -0
  30. data/lib/rof/translate_csv.rb +112 -0
  31. data/lib/rof/utility.rb +84 -0
  32. data/lib/rof/version.rb +2 -2
  33. data/rof.gemspec +17 -0
  34. data/spec/fixtures/a.json +4 -0
  35. data/spec/fixtures/label.json +20 -0
  36. data/spec/fixtures/osf/b6psa.tar.gz +0 -0
  37. data/spec/fixtures/rof/dev0012829m.rof +45 -0
  38. data/spec/fixtures/vcr_tests/fedora_to_rof1.yml +5274 -0
  39. data/spec/fixtures/vecnet-citation.json +73 -0
  40. data/spec/lib/rof/access_spec.rb +36 -0
  41. data/spec/lib/rof/cli_spec.rb +66 -0
  42. data/spec/lib/rof/collection_spec.rb +90 -0
  43. data/spec/lib/rof/compare_rof_spec.rb +263 -0
  44. data/spec/lib/rof/filters/date_stamp_spec.rb +90 -0
  45. data/spec/lib/rof/filters/file_to_url_spec.rb +70 -0
  46. data/spec/lib/rof/filters/label_spec.rb +94 -0
  47. data/spec/lib/rof/filters/work_spec.rb +87 -0
  48. data/spec/lib/rof/ingest_spec.rb +117 -0
  49. data/spec/lib/rof/ingesters/rels_ext_ingester_spec.rb +62 -0
  50. data/spec/lib/rof/ingesters/rights_metadata_ingester_spec.rb +114 -0
  51. data/spec/lib/rof/osf_to_rof_spec.rb +76 -0
  52. data/spec/lib/rof/translate_csv_spec.rb +109 -0
  53. data/spec/lib/rof/utility_spec.rb +64 -0
  54. data/spec/lib/rof_spec.rb +14 -0
  55. data/spec/spec_helper.rb +11 -11
  56. metadata +283 -18
@@ -0,0 +1,90 @@
1
+ require 'spec_helper'
2
+
3
+ module ROF
4
+ module Filters
5
+ describe DateStamp do
6
+ before(:all) do
7
+ @today = Date.new(2015, 1, 23)
8
+ @today_s = "2015-01-23Z"
9
+ @w = DateStamp.new(@today)
10
+ end
11
+
12
+ it "it adds a metadata section if needed" do
13
+ items = [{
14
+ "type" => "ABC"
15
+ }]
16
+ after = @w.process(items, '')
17
+ expect(after.length).to eq(1)
18
+ expect(after.first).to eq({
19
+ "type" => "ABC",
20
+ "metadata" => {
21
+ "@context" => ROF::RdfContext,
22
+ "dc:dateSubmitted" => @today_s,
23
+ "dc:modified" => @today_s
24
+ }
25
+ })
26
+ end
27
+
28
+ it "adds a metadata relation if missing" do
29
+ items = [{
30
+ "type" => "BCD",
31
+ "metadata" => {
32
+ "dc:title" => "something"
33
+ }
34
+ }]
35
+ after = @w.process(items, '')
36
+ expect(after.length).to eq(1)
37
+ expect(after.first).to eq({
38
+ "type" => "BCD",
39
+ "metadata" => {
40
+ "dc:title" => "something",
41
+ "dc:dateSubmitted" => @today_s,
42
+ "dc:modified" => @today_s
43
+ }
44
+ })
45
+ end
46
+
47
+ it "doesn't mess with exsiting values" do
48
+ items = [{
49
+ "type" => "CDE",
50
+ "metadata" => {
51
+ "dc:title" => "anotherthing",
52
+ "dc:dateSubmitted" => "any date"
53
+ }
54
+ }]
55
+ after = @w.process(items, '')
56
+ expect(after.length).to eq(1)
57
+ expect(after.first).to eq({
58
+ "type" => "CDE",
59
+ "metadata" => {
60
+ "dc:title" => "anotherthing",
61
+ "dc:dateSubmitted" => "any date",
62
+ "dc:modified" => @today_s
63
+ }
64
+ })
65
+ end
66
+
67
+ it "always update the date modified" do
68
+ items = [{
69
+ "type" => "CDE",
70
+ "metadata" => {
71
+ "dc:title" => "anotherthing",
72
+ "dc:dateSubmitted" => "any date",
73
+ "dc:modified" => "any date"
74
+ }
75
+ }]
76
+ after = @w.process(items, '')
77
+ expect(after.length).to eq(1)
78
+ expect(after.first).to eq({
79
+ "type" => "CDE",
80
+ "metadata" => {
81
+ "dc:title" => "anotherthing",
82
+ "dc:dateSubmitted" => "any date",
83
+ "dc:modified" => @today_s
84
+ }
85
+ })
86
+ end
87
+
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,70 @@
1
+ require 'spec_helper'
2
+
3
+ module ROF
4
+ module Filters
5
+ describe FileToUrl do
6
+ before(:all) do
7
+ @w = FileToUrl.new
8
+ end
9
+
10
+ it "skips rof objects which don't have bendo-items" do
11
+ items = [{
12
+ "type" => "ABC"
13
+ }]
14
+ after = @w.process(items, '')
15
+ expect(after.length).to eq(1)
16
+ expect(after.first).to eq({
17
+ "type" => "ABC"
18
+ })
19
+ end
20
+
21
+ it "skips rof object which don't have a content-file" do
22
+ items = [{
23
+ "type" => "ABC",
24
+ "bendo-item" => "12345"
25
+ },
26
+ {
27
+ "bendo-item" => "12345",
28
+ "thumbnail-content" => "a_file.png"
29
+ }]
30
+ after = @w.process(items, '')
31
+ expect(after.length).to eq(2)
32
+ expect(after.first).to eq({
33
+ "type" => "ABC",
34
+ "bendo-item" => "12345"
35
+ })
36
+ expect(after.last).to eq({
37
+ "bendo-item" => "12345",
38
+ "thumbnail-content" => "a_file.png"
39
+ })
40
+ end
41
+
42
+ it "converts content files into URLs" do
43
+ items = [{
44
+ "bendo-item" => "12345",
45
+ "content-file" => "a/file.txt"
46
+ },{
47
+ "bendo-item" => "12345",
48
+ "content-file" => "b/file.png",
49
+ "content-meta" => {
50
+ "mime-type" => "image/png"
51
+ }}]
52
+ after = @w.process(items, '')
53
+ expect(after.length).to eq(2)
54
+ expect(after.first).to eq({
55
+ "bendo-item" => "12345",
56
+ "content-meta" => {
57
+ "URL" => "bendo:/item/12345/a/file.txt"
58
+ }
59
+ })
60
+ expect(after.last).to eq({
61
+ "bendo-item" => "12345",
62
+ "content-meta" => {
63
+ "mime-type" => "image/png",
64
+ "URL" => "bendo:/item/12345/b/file.png"
65
+ }
66
+ })
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,94 @@
1
+ require 'spec_helper'
2
+
3
+ module ROF
4
+ module Filters
5
+ describe Label do
6
+ let(:ids) { ["101", "102", "103", "104", "105"] }
7
+ before(:each) {
8
+ @labeler = Label.new(nil, id_list: ids)
9
+ }
10
+ it "ignores non-fojects" do
11
+ list = [{"type" => "not fobject"}]
12
+ expect(@labeler.process(list, '')).to eq([{"type" => "not fobject"}])
13
+ end
14
+ it "skips already assigned ids" do
15
+ list = [{"type" => "fobject", "pid" => "123"}]
16
+ expect(@labeler.process(list, '')).to eq([{"type" => "fobject", "pid" => "123", "bendo-item" => "123"}])
17
+ end
18
+ it "assignes missing pids" do
19
+ list = [{"type" => "fobject"}]
20
+ expect(@labeler.process(list, '')).to eq([{"type" => "fobject", "pid" => "101", "bendo-item" => "101"}])
21
+ end
22
+ it "assignes pids which are labels" do
23
+ list = [{"type" => "fobject", "pid" => "$(zzz)"}]
24
+ expect(@labeler.process(list, '')).to eq([{"type" => "fobject", "pid" => "101", "bendo-item" => "101"}])
25
+ end
26
+ it "resolves loops" do
27
+ list = [{"type" => "fobject",
28
+ "pid" => "$(zzz)",
29
+ "rels-ext" => {
30
+ "partOf" => ["123", "$(zzz)"]
31
+ }}]
32
+ expect(@labeler.process(list, '')).to eq([{"type" => "fobject",
33
+ "pid" => "101",
34
+ "bendo-item"=>"101",
35
+ "rels-ext" => {
36
+ "partOf" => ["123", "101"]}}])
37
+ end
38
+ it "handles multiple items" do
39
+ list = [{"type" => "fobject",
40
+ "pid" => "$(zzz)",
41
+ "rels-ext" => {
42
+ "partOf" => ["123", "$(zzz)"]
43
+ }},
44
+ {"type" => "fobject",
45
+ "rels-ext" => { "memberOf" => ["$(zzz)"]}}]
46
+ expect(@labeler.process(list, '')).to eq([
47
+ {"type" => "fobject",
48
+ "pid" => "101",
49
+ "bendo-item" => "101",
50
+ "rels-ext" => {
51
+ "partOf" => ["123", "101"]
52
+ }},
53
+ {"type" => "fobject",
54
+ "pid" => "102",
55
+ "bendo-item" => "101",
56
+ "rels-ext" => {
57
+ "memberOf" => ["101"]
58
+ }}
59
+ ])
60
+ end
61
+ it "errors on undefined labels" do
62
+ list = [{"type" => "fobject",
63
+ "rels-ext" => {
64
+ "partOf" => ["123", "$(zzz)"]
65
+ }}]
66
+ expect { @labeler.process(list, '') }.to raise_error(Label::MissingLabel)
67
+ end
68
+
69
+ it "replaces labels in arrays" do
70
+ list = ["a", "something $(b) and $(a)", "$(not a label)"]
71
+ labels = {"a" => "abc", "b" => "qwe"}
72
+ expect(@labeler.replace_labels(list, labels, false)).to eq(["a", "something qwe and abc", "$(not a label)"])
73
+
74
+ hash = {"$(a)" => "this should $(b)", sym: :symbol, b: {b: "$(a) $(z)"}}
75
+ expect(@labeler.replace_labels(hash, labels, false)).to eq({
76
+ "$(a)" => "this should qwe",
77
+ sym: :symbol,
78
+ b: {b: "abc $(z)"}
79
+ })
80
+ end
81
+
82
+ it "handles pids in isMemberOf" do
83
+ list = [
84
+ {"type" => "fobject", "pid" => "$(zzz)"},
85
+ {"type" => "fobject", "rels-ext" => { "isMemberOfCollection" => ["$(zzz)"]}}
86
+ ]
87
+ expect(@labeler.process(list, '')).to eq([
88
+ {"type" => "fobject", "pid" => "101", "bendo-item" =>"101"},
89
+ {"type" => "fobject", "pid" => "102", "bendo-item" =>"101", "rels-ext" => { "isMemberOfCollection" => ["101"]}}
90
+ ])
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,87 @@
1
+ require 'spec_helper'
2
+
3
+ module ROF
4
+ module Filters
5
+ describe Work do
6
+ it "handles variant work types" do
7
+ w = Work.new
8
+
9
+ item = {"type" => "Work", "owner" => "user1"}
10
+ after = w.process_one_work(item)
11
+ expect(after.first).to include("type" => "fobject", "af-model" => "GenericWork")
12
+
13
+ item = {"type" => "Work-Image", "owner" => "user1"}
14
+ after = w.process_one_work(item)
15
+ expect(after.first).to include("type" => "fobject", "af-model" => "Image")
16
+
17
+ item = {"type" => "work-image", "owner" => "user1"}
18
+ after = w.process_one_work(item)
19
+ expect(after.first).to include("type" => "fobject", "af-model" => "image")
20
+
21
+ item = {"type" => "Image", "owner" => "user1"}
22
+ after = w.process_one_work(item)
23
+ expect(after.first).to include("type" => "fobject", "af-model" => "Image")
24
+
25
+ item = {"type" => "image", "owner" => "user1"}
26
+ after = w.process_one_work(item)
27
+ expect(after.first).to include("type" => "fobject", "af-model" => "Image")
28
+
29
+ item = {"type" => "Other", "owner" => "user1"}
30
+ after = w.process_one_work(item)
31
+ expect(after.first).to eq(item)
32
+ end
33
+
34
+ it "makes the first file be the representative" do
35
+ w = Work.new
36
+
37
+ item = {"type" => "Work", "owner" => "user1", "files" => ["a.txt", "b.jpeg"]}
38
+ after = w.process_one_work(item)
39
+ expect(after.length).to eq(3)
40
+ expect(after[0]).to include("type" => "fobject",
41
+ "af-model" => "GenericWork",
42
+ "pid" => "$(pid--0)",
43
+ "properties" => ROF::Utility.prop_ds("user1", "$(pid--1)"))
44
+ expect(after[1]).to include("type" => "fobject",
45
+ "af-model" => "GenericFile",
46
+ "pid" => "$(pid--1)")
47
+ expect(after[2]).to include("type" => "fobject",
48
+ "af-model" => "GenericFile")
49
+ expect(after[2]["metadata"]).to include("dc:title" => "b.jpeg")
50
+ end
51
+
52
+ it "decodes files correctly" do
53
+ w = Work.new
54
+
55
+ item = {
56
+ "type" => "Work",
57
+ "owner" => "user1",
58
+ "rights" => {"edit" => ["user1"]},
59
+ "metadata" => {
60
+ "@context" => RdfContext,
61
+ "dc:title" => "Q, A Letter"},
62
+ "files" => [
63
+ "thumb",
64
+ {
65
+ "type" => "+",
66
+ "owner" => "user1",
67
+ "files" => ["extra file.txt"],
68
+ "rights" => {"edit" => ["user1"]}
69
+ }]
70
+ }
71
+ after = w.process_one_work(item)
72
+ expect(after.length).to eq(3)
73
+ expect(after[0]).to include("type" => "fobject",
74
+ "af-model" => "GenericWork",
75
+ "rels-ext" => {},
76
+ "pid" => "$(pid--0)")
77
+ expect(after[1]).to include("type" => "fobject",
78
+ "af-model" => "GenericFile",
79
+ "pid" => "$(pid--1)",
80
+ "content-file" => "thumb")
81
+ expect(after[2]).to include("type" => "fobject",
82
+ "af-model" => "GenericFile",
83
+ "content-file" => "extra file.txt")
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,117 @@
1
+ require 'spec_helper'
2
+
3
+ module ROF
4
+ describe "Ingest" do
5
+ it "requires a fobject" do
6
+ item = {"type" => "not fobject"}
7
+ expect {ROF.Ingest(item)}.to raise_error(ROF::NotFobjectError)
8
+ end
9
+ it "requires a pid" do
10
+ item = {"type" => "fobject"}
11
+ expect {ROF.Ingest(item)}.to raise_error(ROF::MissingPidError)
12
+ end
13
+ it "disallows both id and pid" do
14
+ item = {"type" => "fobject", "id" => '1', "pid" => '1'}
15
+ expect {ROF.Ingest(item)}.to raise_error(ROF::TooManyIdentitiesError)
16
+ end
17
+ it "rejects two ways of giving a datastream" do
18
+ item = {"type" => "fobject",
19
+ "pid" => "test:1",
20
+ "content" => "hello",
21
+ "content-file" => "filename.txt"
22
+ }
23
+ expect {ROF.Ingest(item)}.to raise_error(ROF::SourceError)
24
+ end
25
+ it "uploads datastreams with apropos metadata" do
26
+ item = {"type" => "fobject",
27
+ "pid" => "test:1",
28
+ "af-model" => "GenericFile",
29
+ "content" => "jello",
30
+ "content-meta" => {"label" => "test stream 1",
31
+ "mime-type" => "application/jello"},
32
+ "other-meta" => {"label" => "test stream 2"},
33
+ }
34
+ expect(ROF.Ingest(item)).to eq(["rels-ext", "content", "other"])
35
+ end
36
+
37
+ it "treats id as a surrogate for pid when pid is missing" do
38
+ item = {"type" => "fobject",
39
+ "id" => "test:1",
40
+ "af-model" => "GenericFile",
41
+ "content" => "jello",
42
+ "content-meta" => {"label" => "test stream 1",
43
+ "mime-type" => "application/jello"},
44
+ "other-meta" => {"label" => "test stream 2"},
45
+ }
46
+ expect(ROF.Ingest(item)).to eq(["rels-ext", "content", "other"])
47
+ end
48
+
49
+ it "doesn't touch the rels ext if the model and rels-ext key are missing" do
50
+ item = {"type" => "fobject",
51
+ "id" => "test:1",
52
+ "content" => "jello",
53
+ "content-meta" => {"label" => "test stream 1",
54
+ "mime-type" => "application/jello"},
55
+ "other-meta" => {"label" => "test stream 2"},
56
+ }
57
+ expect(ROF.Ingest(item)).to eq(["content", "other"])
58
+ end
59
+
60
+ it "raises an error if content is not a string" do
61
+ item = {"type" => "fobject",
62
+ "id" => "test:1",
63
+ "af-model" => "GenericFile",
64
+ "content" => ["list", "of", "items"]
65
+ }
66
+ expect {ROF.Ingest(item)}.to raise_error(ROF::SourceError)
67
+ end
68
+
69
+ it "ignores null data streams" do
70
+ item = {"type" => "fobject",
71
+ "id" => "test:1",
72
+ "af-model" => "GenericFile",
73
+ "content" => nil
74
+ }
75
+ expect(ROF.Ingest(item)).to eq(["rels-ext", "content"])
76
+ end
77
+
78
+ describe "RDF Metadata" do
79
+ it "loads JSON-LD" do
80
+ item = {"pid" => "test:1",
81
+ "metadata" => {
82
+ "@context" => {
83
+ "dc" => "http://purl.org/dc/terms/",
84
+ },
85
+ "dc:title" => "Hello Z",
86
+ }
87
+ }
88
+ s = ROF.ingest_ld_metadata(item, nil)
89
+ expect(s).to eq %(<info:fedora/test:1> <http://purl.org/dc/terms/title> "Hello Z" .\n)
90
+ end
91
+
92
+ it "handles @graph objects" do
93
+ item = {"pid" => "test:1",
94
+ "metadata" => {
95
+ "@context" => {
96
+ "dc" => "http://purl.org/dc/terms/",
97
+ "dc:creator" => {"@type" => "@id"},
98
+ },
99
+ "@graph" => [
100
+ {"@id" => "_:b0",
101
+ "dc:title" => "Hello"},
102
+ {"@id" => "info:fedora/test:1",
103
+ "dc:creator" => "_:b0"},
104
+ ]}}
105
+ s = ROF.ingest_ld_metadata(item, nil)
106
+ s = s.split("\n").sort.join("\n") # canonicalize the line ordering
107
+ expect(s).to eq %(<info:fedora/test:1> <http://purl.org/dc/terms/creator> _:b0 .\n_:b0 <http://purl.org/dc/terms/title> "Hello" .)
108
+ end
109
+ end
110
+ end
111
+
112
+ describe "file_searching" do
113
+ it "raises an error on missing files" do
114
+ expect {ROF.find_file_and_open("file.txt",[],"r")}.to raise_error(Errno::ENOENT)
115
+ end
116
+ end
117
+ end