rof 0.0.1.pre → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.ruby-version +1 -1
  3. data/.travis.yml +12 -2
  4. data/Gemfile +1 -0
  5. data/README.md +87 -0
  6. data/bin/.ruby-version +1 -0
  7. data/bin/csv_to_rof +26 -0
  8. data/bin/fedora_to_rof +57 -0
  9. data/bin/osf_to_rof +40 -0
  10. data/bin/rof +78 -0
  11. data/bulk-ingest.md +242 -0
  12. data/labels.md +111 -0
  13. data/lib/rof.rb +20 -1
  14. data/lib/rof/access.rb +57 -0
  15. data/lib/rof/cli.rb +122 -0
  16. data/lib/rof/collection.rb +109 -0
  17. data/lib/rof/compare_rof.rb +92 -0
  18. data/lib/rof/filters/bendo.rb +33 -0
  19. data/lib/rof/filters/date_stamp.rb +36 -0
  20. data/lib/rof/filters/file_to_url.rb +27 -0
  21. data/lib/rof/filters/label.rb +153 -0
  22. data/lib/rof/filters/work.rb +111 -0
  23. data/lib/rof/get_from_fedora.rb +196 -0
  24. data/lib/rof/ingest.rb +204 -0
  25. data/lib/rof/ingesters/rels_ext_ingester.rb +78 -0
  26. data/lib/rof/ingesters/rights_metadata_ingester.rb +68 -0
  27. data/lib/rof/osf_context.rb +19 -0
  28. data/lib/rof/osf_to_rof.rb +122 -0
  29. data/lib/rof/rdf_context.rb +36 -0
  30. data/lib/rof/translate_csv.rb +112 -0
  31. data/lib/rof/utility.rb +84 -0
  32. data/lib/rof/version.rb +2 -2
  33. data/rof.gemspec +17 -0
  34. data/spec/fixtures/a.json +4 -0
  35. data/spec/fixtures/label.json +20 -0
  36. data/spec/fixtures/osf/b6psa.tar.gz +0 -0
  37. data/spec/fixtures/rof/dev0012829m.rof +45 -0
  38. data/spec/fixtures/vcr_tests/fedora_to_rof1.yml +5274 -0
  39. data/spec/fixtures/vecnet-citation.json +73 -0
  40. data/spec/lib/rof/access_spec.rb +36 -0
  41. data/spec/lib/rof/cli_spec.rb +66 -0
  42. data/spec/lib/rof/collection_spec.rb +90 -0
  43. data/spec/lib/rof/compare_rof_spec.rb +263 -0
  44. data/spec/lib/rof/filters/date_stamp_spec.rb +90 -0
  45. data/spec/lib/rof/filters/file_to_url_spec.rb +70 -0
  46. data/spec/lib/rof/filters/label_spec.rb +94 -0
  47. data/spec/lib/rof/filters/work_spec.rb +87 -0
  48. data/spec/lib/rof/ingest_spec.rb +117 -0
  49. data/spec/lib/rof/ingesters/rels_ext_ingester_spec.rb +62 -0
  50. data/spec/lib/rof/ingesters/rights_metadata_ingester_spec.rb +114 -0
  51. data/spec/lib/rof/osf_to_rof_spec.rb +76 -0
  52. data/spec/lib/rof/translate_csv_spec.rb +109 -0
  53. data/spec/lib/rof/utility_spec.rb +64 -0
  54. data/spec/lib/rof_spec.rb +14 -0
  55. data/spec/spec_helper.rb +11 -11
  56. metadata +283 -18
@@ -0,0 +1,90 @@
1
+ require 'spec_helper'
2
+
3
+ module ROF
4
+ module Filters
5
+ describe DateStamp do
6
+ before(:all) do
7
+ @today = Date.new(2015, 1, 23)
8
+ @today_s = "2015-01-23Z"
9
+ @w = DateStamp.new(@today)
10
+ end
11
+
12
+ it "it adds a metadata section if needed" do
13
+ items = [{
14
+ "type" => "ABC"
15
+ }]
16
+ after = @w.process(items, '')
17
+ expect(after.length).to eq(1)
18
+ expect(after.first).to eq({
19
+ "type" => "ABC",
20
+ "metadata" => {
21
+ "@context" => ROF::RdfContext,
22
+ "dc:dateSubmitted" => @today_s,
23
+ "dc:modified" => @today_s
24
+ }
25
+ })
26
+ end
27
+
28
+ it "adds a metadata relation if missing" do
29
+ items = [{
30
+ "type" => "BCD",
31
+ "metadata" => {
32
+ "dc:title" => "something"
33
+ }
34
+ }]
35
+ after = @w.process(items, '')
36
+ expect(after.length).to eq(1)
37
+ expect(after.first).to eq({
38
+ "type" => "BCD",
39
+ "metadata" => {
40
+ "dc:title" => "something",
41
+ "dc:dateSubmitted" => @today_s,
42
+ "dc:modified" => @today_s
43
+ }
44
+ })
45
+ end
46
+
47
+ it "doesn't mess with exsiting values" do
48
+ items = [{
49
+ "type" => "CDE",
50
+ "metadata" => {
51
+ "dc:title" => "anotherthing",
52
+ "dc:dateSubmitted" => "any date"
53
+ }
54
+ }]
55
+ after = @w.process(items, '')
56
+ expect(after.length).to eq(1)
57
+ expect(after.first).to eq({
58
+ "type" => "CDE",
59
+ "metadata" => {
60
+ "dc:title" => "anotherthing",
61
+ "dc:dateSubmitted" => "any date",
62
+ "dc:modified" => @today_s
63
+ }
64
+ })
65
+ end
66
+
67
+ it "always update the date modified" do
68
+ items = [{
69
+ "type" => "CDE",
70
+ "metadata" => {
71
+ "dc:title" => "anotherthing",
72
+ "dc:dateSubmitted" => "any date",
73
+ "dc:modified" => "any date"
74
+ }
75
+ }]
76
+ after = @w.process(items, '')
77
+ expect(after.length).to eq(1)
78
+ expect(after.first).to eq({
79
+ "type" => "CDE",
80
+ "metadata" => {
81
+ "dc:title" => "anotherthing",
82
+ "dc:dateSubmitted" => "any date",
83
+ "dc:modified" => @today_s
84
+ }
85
+ })
86
+ end
87
+
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,70 @@
1
+ require 'spec_helper'
2
+
3
+ module ROF
4
+ module Filters
5
+ describe FileToUrl do
6
+ before(:all) do
7
+ @w = FileToUrl.new
8
+ end
9
+
10
+ it "skips rof objects which don't have bendo-items" do
11
+ items = [{
12
+ "type" => "ABC"
13
+ }]
14
+ after = @w.process(items, '')
15
+ expect(after.length).to eq(1)
16
+ expect(after.first).to eq({
17
+ "type" => "ABC"
18
+ })
19
+ end
20
+
21
+ it "skips rof object which don't have a content-file" do
22
+ items = [{
23
+ "type" => "ABC",
24
+ "bendo-item" => "12345"
25
+ },
26
+ {
27
+ "bendo-item" => "12345",
28
+ "thumbnail-content" => "a_file.png"
29
+ }]
30
+ after = @w.process(items, '')
31
+ expect(after.length).to eq(2)
32
+ expect(after.first).to eq({
33
+ "type" => "ABC",
34
+ "bendo-item" => "12345"
35
+ })
36
+ expect(after.last).to eq({
37
+ "bendo-item" => "12345",
38
+ "thumbnail-content" => "a_file.png"
39
+ })
40
+ end
41
+
42
+ it "converts content files into URLs" do
43
+ items = [{
44
+ "bendo-item" => "12345",
45
+ "content-file" => "a/file.txt"
46
+ },{
47
+ "bendo-item" => "12345",
48
+ "content-file" => "b/file.png",
49
+ "content-meta" => {
50
+ "mime-type" => "image/png"
51
+ }}]
52
+ after = @w.process(items, '')
53
+ expect(after.length).to eq(2)
54
+ expect(after.first).to eq({
55
+ "bendo-item" => "12345",
56
+ "content-meta" => {
57
+ "URL" => "bendo:/item/12345/a/file.txt"
58
+ }
59
+ })
60
+ expect(after.last).to eq({
61
+ "bendo-item" => "12345",
62
+ "content-meta" => {
63
+ "mime-type" => "image/png",
64
+ "URL" => "bendo:/item/12345/b/file.png"
65
+ }
66
+ })
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,94 @@
1
+ require 'spec_helper'
2
+
3
+ module ROF
4
+ module Filters
5
+ describe Label do
6
+ let(:ids) { ["101", "102", "103", "104", "105"] }
7
+ before(:each) {
8
+ @labeler = Label.new(nil, id_list: ids)
9
+ }
10
+ it "ignores non-fojects" do
11
+ list = [{"type" => "not fobject"}]
12
+ expect(@labeler.process(list, '')).to eq([{"type" => "not fobject"}])
13
+ end
14
+ it "skips already assigned ids" do
15
+ list = [{"type" => "fobject", "pid" => "123"}]
16
+ expect(@labeler.process(list, '')).to eq([{"type" => "fobject", "pid" => "123", "bendo-item" => "123"}])
17
+ end
18
+ it "assignes missing pids" do
19
+ list = [{"type" => "fobject"}]
20
+ expect(@labeler.process(list, '')).to eq([{"type" => "fobject", "pid" => "101", "bendo-item" => "101"}])
21
+ end
22
+ it "assignes pids which are labels" do
23
+ list = [{"type" => "fobject", "pid" => "$(zzz)"}]
24
+ expect(@labeler.process(list, '')).to eq([{"type" => "fobject", "pid" => "101", "bendo-item" => "101"}])
25
+ end
26
+ it "resolves loops" do
27
+ list = [{"type" => "fobject",
28
+ "pid" => "$(zzz)",
29
+ "rels-ext" => {
30
+ "partOf" => ["123", "$(zzz)"]
31
+ }}]
32
+ expect(@labeler.process(list, '')).to eq([{"type" => "fobject",
33
+ "pid" => "101",
34
+ "bendo-item"=>"101",
35
+ "rels-ext" => {
36
+ "partOf" => ["123", "101"]}}])
37
+ end
38
+ it "handles multiple items" do
39
+ list = [{"type" => "fobject",
40
+ "pid" => "$(zzz)",
41
+ "rels-ext" => {
42
+ "partOf" => ["123", "$(zzz)"]
43
+ }},
44
+ {"type" => "fobject",
45
+ "rels-ext" => { "memberOf" => ["$(zzz)"]}}]
46
+ expect(@labeler.process(list, '')).to eq([
47
+ {"type" => "fobject",
48
+ "pid" => "101",
49
+ "bendo-item" => "101",
50
+ "rels-ext" => {
51
+ "partOf" => ["123", "101"]
52
+ }},
53
+ {"type" => "fobject",
54
+ "pid" => "102",
55
+ "bendo-item" => "101",
56
+ "rels-ext" => {
57
+ "memberOf" => ["101"]
58
+ }}
59
+ ])
60
+ end
61
+ it "errors on undefined labels" do
62
+ list = [{"type" => "fobject",
63
+ "rels-ext" => {
64
+ "partOf" => ["123", "$(zzz)"]
65
+ }}]
66
+ expect { @labeler.process(list, '') }.to raise_error(Label::MissingLabel)
67
+ end
68
+
69
+ it "replaces labels in arrays" do
70
+ list = ["a", "something $(b) and $(a)", "$(not a label)"]
71
+ labels = {"a" => "abc", "b" => "qwe"}
72
+ expect(@labeler.replace_labels(list, labels, false)).to eq(["a", "something qwe and abc", "$(not a label)"])
73
+
74
+ hash = {"$(a)" => "this should $(b)", sym: :symbol, b: {b: "$(a) $(z)"}}
75
+ expect(@labeler.replace_labels(hash, labels, false)).to eq({
76
+ "$(a)" => "this should qwe",
77
+ sym: :symbol,
78
+ b: {b: "abc $(z)"}
79
+ })
80
+ end
81
+
82
+ it "handles pids in isMemberOf" do
83
+ list = [
84
+ {"type" => "fobject", "pid" => "$(zzz)"},
85
+ {"type" => "fobject", "rels-ext" => { "isMemberOfCollection" => ["$(zzz)"]}}
86
+ ]
87
+ expect(@labeler.process(list, '')).to eq([
88
+ {"type" => "fobject", "pid" => "101", "bendo-item" =>"101"},
89
+ {"type" => "fobject", "pid" => "102", "bendo-item" =>"101", "rels-ext" => { "isMemberOfCollection" => ["101"]}}
90
+ ])
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,87 @@
1
+ require 'spec_helper'
2
+
3
+ module ROF
4
+ module Filters
5
+ describe Work do
6
+ it "handles variant work types" do
7
+ w = Work.new
8
+
9
+ item = {"type" => "Work", "owner" => "user1"}
10
+ after = w.process_one_work(item)
11
+ expect(after.first).to include("type" => "fobject", "af-model" => "GenericWork")
12
+
13
+ item = {"type" => "Work-Image", "owner" => "user1"}
14
+ after = w.process_one_work(item)
15
+ expect(after.first).to include("type" => "fobject", "af-model" => "Image")
16
+
17
+ item = {"type" => "work-image", "owner" => "user1"}
18
+ after = w.process_one_work(item)
19
+ expect(after.first).to include("type" => "fobject", "af-model" => "image")
20
+
21
+ item = {"type" => "Image", "owner" => "user1"}
22
+ after = w.process_one_work(item)
23
+ expect(after.first).to include("type" => "fobject", "af-model" => "Image")
24
+
25
+ item = {"type" => "image", "owner" => "user1"}
26
+ after = w.process_one_work(item)
27
+ expect(after.first).to include("type" => "fobject", "af-model" => "Image")
28
+
29
+ item = {"type" => "Other", "owner" => "user1"}
30
+ after = w.process_one_work(item)
31
+ expect(after.first).to eq(item)
32
+ end
33
+
34
+ it "makes the first file be the representative" do
35
+ w = Work.new
36
+
37
+ item = {"type" => "Work", "owner" => "user1", "files" => ["a.txt", "b.jpeg"]}
38
+ after = w.process_one_work(item)
39
+ expect(after.length).to eq(3)
40
+ expect(after[0]).to include("type" => "fobject",
41
+ "af-model" => "GenericWork",
42
+ "pid" => "$(pid--0)",
43
+ "properties" => ROF::Utility.prop_ds("user1", "$(pid--1)"))
44
+ expect(after[1]).to include("type" => "fobject",
45
+ "af-model" => "GenericFile",
46
+ "pid" => "$(pid--1)")
47
+ expect(after[2]).to include("type" => "fobject",
48
+ "af-model" => "GenericFile")
49
+ expect(after[2]["metadata"]).to include("dc:title" => "b.jpeg")
50
+ end
51
+
52
+ it "decodes files correctly" do
53
+ w = Work.new
54
+
55
+ item = {
56
+ "type" => "Work",
57
+ "owner" => "user1",
58
+ "rights" => {"edit" => ["user1"]},
59
+ "metadata" => {
60
+ "@context" => RdfContext,
61
+ "dc:title" => "Q, A Letter"},
62
+ "files" => [
63
+ "thumb",
64
+ {
65
+ "type" => "+",
66
+ "owner" => "user1",
67
+ "files" => ["extra file.txt"],
68
+ "rights" => {"edit" => ["user1"]}
69
+ }]
70
+ }
71
+ after = w.process_one_work(item)
72
+ expect(after.length).to eq(3)
73
+ expect(after[0]).to include("type" => "fobject",
74
+ "af-model" => "GenericWork",
75
+ "rels-ext" => {},
76
+ "pid" => "$(pid--0)")
77
+ expect(after[1]).to include("type" => "fobject",
78
+ "af-model" => "GenericFile",
79
+ "pid" => "$(pid--1)",
80
+ "content-file" => "thumb")
81
+ expect(after[2]).to include("type" => "fobject",
82
+ "af-model" => "GenericFile",
83
+ "content-file" => "extra file.txt")
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,117 @@
1
+ require 'spec_helper'
2
+
3
+ module ROF
4
+ describe "Ingest" do
5
+ it "requires a fobject" do
6
+ item = {"type" => "not fobject"}
7
+ expect {ROF.Ingest(item)}.to raise_error(ROF::NotFobjectError)
8
+ end
9
+ it "requires a pid" do
10
+ item = {"type" => "fobject"}
11
+ expect {ROF.Ingest(item)}.to raise_error(ROF::MissingPidError)
12
+ end
13
+ it "disallows both id and pid" do
14
+ item = {"type" => "fobject", "id" => '1', "pid" => '1'}
15
+ expect {ROF.Ingest(item)}.to raise_error(ROF::TooManyIdentitiesError)
16
+ end
17
+ it "rejects two ways of giving a datastream" do
18
+ item = {"type" => "fobject",
19
+ "pid" => "test:1",
20
+ "content" => "hello",
21
+ "content-file" => "filename.txt"
22
+ }
23
+ expect {ROF.Ingest(item)}.to raise_error(ROF::SourceError)
24
+ end
25
+ it "uploads datastreams with apropos metadata" do
26
+ item = {"type" => "fobject",
27
+ "pid" => "test:1",
28
+ "af-model" => "GenericFile",
29
+ "content" => "jello",
30
+ "content-meta" => {"label" => "test stream 1",
31
+ "mime-type" => "application/jello"},
32
+ "other-meta" => {"label" => "test stream 2"},
33
+ }
34
+ expect(ROF.Ingest(item)).to eq(["rels-ext", "content", "other"])
35
+ end
36
+
37
+ it "treats id as a surrogate for pid when pid is missing" do
38
+ item = {"type" => "fobject",
39
+ "id" => "test:1",
40
+ "af-model" => "GenericFile",
41
+ "content" => "jello",
42
+ "content-meta" => {"label" => "test stream 1",
43
+ "mime-type" => "application/jello"},
44
+ "other-meta" => {"label" => "test stream 2"},
45
+ }
46
+ expect(ROF.Ingest(item)).to eq(["rels-ext", "content", "other"])
47
+ end
48
+
49
+ it "doesn't touch the rels ext if the model and rels-ext key are missing" do
50
+ item = {"type" => "fobject",
51
+ "id" => "test:1",
52
+ "content" => "jello",
53
+ "content-meta" => {"label" => "test stream 1",
54
+ "mime-type" => "application/jello"},
55
+ "other-meta" => {"label" => "test stream 2"},
56
+ }
57
+ expect(ROF.Ingest(item)).to eq(["content", "other"])
58
+ end
59
+
60
+ it "raises an error if content is not a string" do
61
+ item = {"type" => "fobject",
62
+ "id" => "test:1",
63
+ "af-model" => "GenericFile",
64
+ "content" => ["list", "of", "items"]
65
+ }
66
+ expect {ROF.Ingest(item)}.to raise_error(ROF::SourceError)
67
+ end
68
+
69
+ it "ignores null data streams" do
70
+ item = {"type" => "fobject",
71
+ "id" => "test:1",
72
+ "af-model" => "GenericFile",
73
+ "content" => nil
74
+ }
75
+ expect(ROF.Ingest(item)).to eq(["rels-ext", "content"])
76
+ end
77
+
78
+ describe "RDF Metadata" do
79
+ it "loads JSON-LD" do
80
+ item = {"pid" => "test:1",
81
+ "metadata" => {
82
+ "@context" => {
83
+ "dc" => "http://purl.org/dc/terms/",
84
+ },
85
+ "dc:title" => "Hello Z",
86
+ }
87
+ }
88
+ s = ROF.ingest_ld_metadata(item, nil)
89
+ expect(s).to eq %(<info:fedora/test:1> <http://purl.org/dc/terms/title> "Hello Z" .\n)
90
+ end
91
+
92
+ it "handles @graph objects" do
93
+ item = {"pid" => "test:1",
94
+ "metadata" => {
95
+ "@context" => {
96
+ "dc" => "http://purl.org/dc/terms/",
97
+ "dc:creator" => {"@type" => "@id"},
98
+ },
99
+ "@graph" => [
100
+ {"@id" => "_:b0",
101
+ "dc:title" => "Hello"},
102
+ {"@id" => "info:fedora/test:1",
103
+ "dc:creator" => "_:b0"},
104
+ ]}}
105
+ s = ROF.ingest_ld_metadata(item, nil)
106
+ s = s.split("\n").sort.join("\n") # canonicalize the line ordering
107
+ expect(s).to eq %(<info:fedora/test:1> <http://purl.org/dc/terms/creator> _:b0 .\n_:b0 <http://purl.org/dc/terms/title> "Hello" .)
108
+ end
109
+ end
110
+ end
111
+
112
+ describe "file_searching" do
113
+ it "raises an error on missing files" do
114
+ expect {ROF.find_file_and_open("file.txt",[],"r")}.to raise_error(Errno::ENOENT)
115
+ end
116
+ end
117
+ end