briard 2.4.2 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/codeql-analysis.yml +72 -0
  3. data/.github/workflows/rubocop.yml +50 -0
  4. data/.rubocop.yml +144 -620
  5. data/.rubocop_todo.yml +76 -0
  6. data/CHANGELOG.md +18 -0
  7. data/Gemfile +2 -0
  8. data/Gemfile.lock +40 -6
  9. data/Rakefile +1 -1
  10. data/{bolognese.gemspec → briard.gemspec} +46 -39
  11. data/lib/briard/array.rb +2 -2
  12. data/lib/briard/author_utils.rb +79 -71
  13. data/lib/briard/cli.rb +12 -13
  14. data/lib/briard/crossref_utils.rb +73 -61
  15. data/lib/briard/datacite_utils.rb +132 -106
  16. data/lib/briard/doi_utils.rb +10 -10
  17. data/lib/briard/metadata.rb +96 -106
  18. data/lib/briard/metadata_utils.rb +87 -78
  19. data/lib/briard/readers/bibtex_reader.rb +65 -65
  20. data/lib/briard/readers/cff_reader.rb +88 -70
  21. data/lib/briard/readers/citeproc_reader.rb +90 -84
  22. data/lib/briard/readers/codemeta_reader.rb +68 -50
  23. data/lib/briard/readers/crosscite_reader.rb +2 -2
  24. data/lib/briard/readers/crossref_reader.rb +249 -210
  25. data/lib/briard/readers/datacite_json_reader.rb +3 -3
  26. data/lib/briard/readers/datacite_reader.rb +225 -189
  27. data/lib/briard/readers/npm_reader.rb +49 -42
  28. data/lib/briard/readers/ris_reader.rb +82 -80
  29. data/lib/briard/readers/schema_org_reader.rb +182 -159
  30. data/lib/briard/string.rb +1 -1
  31. data/lib/briard/utils.rb +4 -4
  32. data/lib/briard/version.rb +3 -1
  33. data/lib/briard/whitelist_scrubber.rb +11 -4
  34. data/lib/briard/writers/bibtex_writer.rb +14 -8
  35. data/lib/briard/writers/cff_writer.rb +33 -26
  36. data/lib/briard/writers/codemeta_writer.rb +19 -15
  37. data/lib/briard/writers/csv_writer.rb +6 -4
  38. data/lib/briard/writers/datacite_json_writer.rb +8 -2
  39. data/lib/briard/writers/jats_writer.rb +33 -28
  40. data/lib/briard/writers/rdf_xml_writer.rb +1 -1
  41. data/lib/briard/writers/ris_writer.rb +30 -18
  42. data/lib/briard/writers/turtle_writer.rb +1 -1
  43. data/lib/briard.rb +6 -6
  44. data/rubocop.sarif +0 -0
  45. data/spec/array_spec.rb +5 -5
  46. data/spec/author_utils_spec.rb +151 -132
  47. data/spec/datacite_utils_spec.rb +135 -83
  48. data/spec/doi_utils_spec.rb +168 -164
  49. data/spec/find_from_format_spec.rb +69 -69
  50. data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/onlies_keep_specific_tags.yml +65 -0
  51. data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/removes_a_tags.yml +65 -0
  52. data/spec/metadata_spec.rb +91 -90
  53. data/spec/readers/bibtex_reader_spec.rb +43 -38
  54. data/spec/readers/cff_reader_spec.rb +165 -153
  55. data/spec/readers/citeproc_reader_spec.rb +45 -40
  56. data/spec/readers/codemeta_reader_spec.rb +128 -115
  57. data/spec/readers/crosscite_reader_spec.rb +34 -24
  58. data/spec/readers/crossref_reader_spec.rb +1098 -939
  59. data/spec/readers/datacite_json_reader_spec.rb +53 -40
  60. data/spec/readers/datacite_reader_spec.rb +1541 -1337
  61. data/spec/readers/npm_reader_spec.rb +48 -43
  62. data/spec/readers/ris_reader_spec.rb +53 -47
  63. data/spec/readers/schema_org_reader_spec.rb +329 -267
  64. data/spec/spec_helper.rb +6 -5
  65. data/spec/utils_spec.rb +371 -347
  66. data/spec/writers/bibtex_writer_spec.rb +143 -143
  67. data/spec/writers/cff_writer_spec.rb +96 -90
  68. data/spec/writers/citation_writer_spec.rb +34 -33
  69. data/spec/writers/citeproc_writer_spec.rb +226 -224
  70. data/spec/writers/codemeta_writer_spec.rb +18 -16
  71. data/spec/writers/crosscite_writer_spec.rb +91 -73
  72. data/spec/writers/crossref_writer_spec.rb +99 -91
  73. data/spec/writers/csv_writer_spec.rb +70 -70
  74. data/spec/writers/datacite_json_writer_spec.rb +78 -68
  75. data/spec/writers/datacite_writer_spec.rb +417 -322
  76. data/spec/writers/jats_writer_spec.rb +177 -161
  77. data/spec/writers/rdf_xml_writer_spec.rb +68 -63
  78. data/spec/writers/ris_writer_spec.rb +162 -162
  79. data/spec/writers/turtle_writer_spec.rb +47 -47
  80. metadata +242 -166
  81. data/.github/workflows/release.yml +0 -47
@@ -3,130 +3,143 @@
3
3
  require 'spec_helper'
4
4
 
5
5
  describe Briard::Metadata, vcr: true do
6
- let(:fixture_path) { "spec/fixtures/" }
6
+ let(:fixture_path) { 'spec/fixtures/' }
7
7
 
8
- context "get schema_org raw" do
9
- it "BlogPosting" do
10
- input = fixture_path + 'schema_org.json'
11
- subject = Briard::Metadata.new(input: input)
12
- expect(subject.raw).to eq(IO.read(input).strip)
8
+ context 'get schema_org raw' do
9
+ it 'BlogPosting' do
10
+ input = "#{fixture_path}schema_org.json"
11
+ subject = described_class.new(input: input)
12
+ expect(subject.raw).to eq(File.read(input).strip)
13
13
  end
14
14
  end
15
15
 
16
- context "get schema_org metadata" do
17
- it "BlogPosting" do
18
- input = "https://blog.front-matter.io/posts/eating-your-own-dog-food"
19
- subject = Briard::Metadata.new(input: input, from: "schema_org")
16
+ context 'get schema_org metadata' do
17
+ it 'BlogPosting' do
18
+ input = 'https://blog.front-matter.io/posts/eating-your-own-dog-food'
19
+ subject = described_class.new(input: input, from: 'schema_org')
20
20
  expect(subject.valid?).to be true
21
- expect(subject.id).to eq("https://doi.org/10.53731/r79vxn1-97aq74v-ag58n")
22
- expect(subject.url).to eq("https://blog.front-matter.io/posts/eating-your-own-dog-food")
23
- expect(subject.types).to eq("bibtex"=>"article", "citeproc"=>"article-newspaper", "resourceTypeGeneral"=>"Preprint", "ris"=>"GEN", "schemaOrg"=>"Article")
24
- expect(subject.creators).to eq([{"affiliation"=>[],"familyName"=>"Fenner", "givenName"=>"Martin", "name"=>"Fenner, Martin", "nameIdentifiers"=> [{"nameIdentifier"=>"https://orcid.org/0000-0003-1419-2405", "nameIdentifierScheme"=>"ORCID", "schemeUri"=>"https://orcid.org"}], "nameType"=>"Personal"}])
25
- expect(subject.titles).to eq([{"title"=>"Eating your own Dog Food"}])
26
- expect(subject.descriptions.first["description"]).to start_with("Eating your own dog food")
27
- expect(subject.subjects).to eq([{"subject"=>"feature"}])
28
- expect(subject.dates).to eq([{"date"=>"2016-12-20T00:00:00Z", "dateType"=>"Issued"}, {"date"=>"2022-08-15T09:06:22Z", "dateType"=>"Updated"}])
29
- expect(subject.publication_year).to eq("2016")
21
+ expect(subject.id).to eq('https://doi.org/10.53731/r79vxn1-97aq74v-ag58n')
22
+ expect(subject.url).to eq('https://blog.front-matter.io/posts/eating-your-own-dog-food')
23
+ expect(subject.types).to eq('bibtex' => 'article', 'citeproc' => 'article-newspaper',
24
+ 'resourceTypeGeneral' => 'Preprint', 'ris' => 'GEN', 'schemaOrg' => 'Article')
25
+ expect(subject.creators).to eq([{ 'affiliation' => [], 'familyName' => 'Fenner',
26
+ 'givenName' => 'Martin', 'name' => 'Fenner, Martin', 'nameIdentifiers' => [{ 'nameIdentifier' => 'https://orcid.org/0000-0003-1419-2405', 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org' }], 'nameType' => 'Personal' }])
27
+ expect(subject.titles).to eq([{ 'title' => 'Eating your own Dog Food' }])
28
+ expect(subject.descriptions.first['description']).to start_with('Eating your own dog food')
29
+ expect(subject.subjects).to eq([{ 'subject' => 'feature' }])
30
+ expect(subject.dates).to eq([{ 'date' => '2016-12-20T00:00:00Z', 'dateType' => 'Issued' },
31
+ { 'date' => '2022-08-15T09:06:22Z', 'dateType' => 'Updated' }])
32
+ expect(subject.publication_year).to eq('2016')
30
33
  expect(subject.related_identifiers.length).to eq(0)
31
- expect(subject.publisher).to eq("Front Matter")
34
+ expect(subject.publisher).to eq('Front Matter')
32
35
  end
33
36
 
34
- it "BlogPosting with new DOI" do
35
- input = "https://blog.front-matter.io/posts/eating-your-own-dog-food"
36
- subject = Briard::Metadata.new(input: input, doi: "10.5438/0000-00ss")
37
+ it 'BlogPosting with new DOI' do
38
+ input = 'https://blog.front-matter.io/posts/eating-your-own-dog-food'
39
+ subject = described_class.new(input: input, doi: '10.5438/0000-00ss')
37
40
  expect(subject.valid?).to be true
38
- expect(subject.id).to eq("https://doi.org/10.5438/0000-00ss")
39
- expect(subject.doi).to eq("10.5438/0000-00ss")
40
- expect(subject.url).to eq("https://blog.front-matter.io/posts/eating-your-own-dog-food")
41
- expect(subject.types).to eq("bibtex"=>"article", "citeproc"=>"article-newspaper", "resourceTypeGeneral"=>"Preprint", "ris"=>"GEN", "schemaOrg"=>"Article")
41
+ expect(subject.id).to eq('https://doi.org/10.5438/0000-00ss')
42
+ expect(subject.doi).to eq('10.5438/0000-00ss')
43
+ expect(subject.url).to eq('https://blog.front-matter.io/posts/eating-your-own-dog-food')
44
+ expect(subject.types).to eq('bibtex' => 'article', 'citeproc' => 'article-newspaper',
45
+ 'resourceTypeGeneral' => 'Preprint', 'ris' => 'GEN', 'schemaOrg' => 'Article')
42
46
  end
43
47
 
44
- it "BlogPosting with type as array" do
45
- input = fixture_path + 'schema_org_type_as_array.json'
46
- subject = Briard::Metadata.new(input: input)
48
+ it 'BlogPosting with type as array' do
49
+ input = "#{fixture_path}schema_org_type_as_array.json"
50
+ subject = described_class.new(input: input)
47
51
  expect(subject.valid?).to be true
48
- expect(subject.id).to eq("https://doi.org/10.5438/4k3m-nyvg")
49
- expect(subject.url).to eq("https://blog.datacite.org/eating-your-own-dog-food")
50
- expect(subject.types).to eq("bibtex"=>"article", "citeproc"=>"post-weblog", "resourceTypeGeneral"=>"Preprint", "ris"=>"GEN", "schemaOrg"=>"BlogPosting")
51
- expect(subject.creators).to eq([{"affiliation"=>[{"name"=>"DataCite"}],"familyName"=>"Fenner", "givenName"=>"Martin", "name"=>"Fenner, Martin", "nameIdentifiers"=> [{"nameIdentifier"=>"https://orcid.org/0000-0003-1419-2405", "nameIdentifierScheme"=>"ORCID", "schemeUri"=>"https://orcid.org"}], "nameType"=>"Personal"}])
52
- expect(subject.titles).to eq([{"title"=>"Eating your own Dog Food"}])
53
- expect(subject.descriptions.first["description"]).to start_with("Eating your own dog food")
54
- expect(subject.subjects).to eq([{"subject"=>"datacite"}, {"subject"=>"doi"}, {"subject"=>"metadata"}, {"subject"=>"featured"}])
55
- expect(subject.dates).to eq([{"date"=>"2016-12-20", "dateType"=>"Issued"},
56
- {"date"=>"2016-12-20", "dateType"=>"Created"},
57
- {"date"=>"2016-12-20", "dateType"=>"Updated"}])
58
- expect(subject.publication_year).to eq("2016")
52
+ expect(subject.id).to eq('https://doi.org/10.5438/4k3m-nyvg')
53
+ expect(subject.url).to eq('https://blog.datacite.org/eating-your-own-dog-food')
54
+ expect(subject.types).to eq('bibtex' => 'article', 'citeproc' => 'post-weblog',
55
+ 'resourceTypeGeneral' => 'Preprint', 'ris' => 'GEN', 'schemaOrg' => 'BlogPosting')
56
+ expect(subject.creators).to eq([{ 'affiliation' => [{ 'name' => 'DataCite' }],
57
+ 'familyName' => 'Fenner', 'givenName' => 'Martin', 'name' => 'Fenner, Martin', 'nameIdentifiers' => [{ 'nameIdentifier' => 'https://orcid.org/0000-0003-1419-2405', 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org' }], 'nameType' => 'Personal' }])
58
+ expect(subject.titles).to eq([{ 'title' => 'Eating your own Dog Food' }])
59
+ expect(subject.descriptions.first['description']).to start_with('Eating your own dog food')
60
+ expect(subject.subjects).to eq([{ 'subject' => 'datacite' }, { 'subject' => 'doi' },
61
+ { 'subject' => 'metadata' }, { 'subject' => 'featured' }])
62
+ expect(subject.dates).to eq([{ 'date' => '2016-12-20', 'dateType' => 'Issued' },
63
+ { 'date' => '2016-12-20', 'dateType' => 'Created' },
64
+ { 'date' => '2016-12-20', 'dateType' => 'Updated' }])
65
+ expect(subject.publication_year).to eq('2016')
59
66
  expect(subject.related_identifiers.length).to eq(3)
60
- expect(subject.related_identifiers.last).to eq("relatedIdentifier"=>"10.5438/55e5-t5c0", "relatedIdentifierType"=>"DOI", "relationType"=>"References", "resourceTypeGeneral" => "Text")
61
- expect(subject.publisher).to eq("DataCite")
67
+ expect(subject.related_identifiers.last).to eq('relatedIdentifier' => '10.5438/55e5-t5c0',
68
+ 'relatedIdentifierType' => 'DOI', 'relationType' => 'References', 'resourceTypeGeneral' => 'Text')
69
+ expect(subject.publisher).to eq('DataCite')
62
70
  end
63
71
 
64
- context "get schema_org metadata front matter" do
65
- it "BlogPosting" do
66
- input = "https://blog.front-matter.io/posts/step-forward-for-software-citation"
67
- subject = Briard::Metadata.new(input: input, from: "schema_org")
72
+ context 'get schema_org metadata front matter' do
73
+ it 'BlogPosting' do
74
+ input = 'https://blog.front-matter.io/posts/step-forward-for-software-citation'
75
+ subject = described_class.new(input: input, from: 'schema_org')
68
76
  expect(subject.valid?).to be true
69
- expect(subject.id).to eq("https://doi.org/10.53731/r9531p1-97aq74v-ag78v")
70
- expect(subject.url).to eq("https://blog.front-matter.io/posts/step-forward-for-software-citation")
71
- expect(subject.types).to eq("bibtex"=>"article", "citeproc"=>"article-newspaper", "resourceTypeGeneral"=>"Preprint", "ris"=>"GEN", "schemaOrg"=>"Article")
72
- expect(subject.creators).to eq([{"affiliation"=>[],
73
- "familyName"=>"Fenner", "givenName"=>"Martin", "name"=>"Fenner, Martin", "nameIdentifiers"=> [{"nameIdentifier"=>"https://orcid.org/0000-0003-1419-2405", "nameIdentifierScheme"=>"ORCID", "schemeUri"=>"https://orcid.org"}], "nameType"=>"Personal"}])
74
- expect(subject.titles).to eq([{"title"=>"A step forward for software citation: GitHub's enhanced software citation support"}])
75
- expect(subject.descriptions.first["description"]).to start_with("On August 19, GitHub announced software citation")
76
- expect(subject.subjects).to eq([{"subject"=>"news"}])
77
- expect(subject.dates).to eq([{"date"=>"2021-08-24T16:57:24Z", "dateType"=>"Issued"},
78
- {"date"=>"2022-08-15T19:05:14Z", "dateType"=>"Updated"}])
79
- expect(subject.publication_year).to eq("2021")
77
+ expect(subject.id).to eq('https://doi.org/10.53731/r9531p1-97aq74v-ag78v')
78
+ expect(subject.url).to eq('https://blog.front-matter.io/posts/step-forward-for-software-citation')
79
+ expect(subject.types).to eq('bibtex' => 'article', 'citeproc' => 'article-newspaper',
80
+ 'resourceTypeGeneral' => 'Preprint', 'ris' => 'GEN', 'schemaOrg' => 'Article')
81
+ expect(subject.creators).to eq([{ 'affiliation' => [],
82
+ 'familyName' => 'Fenner', 'givenName' => 'Martin', 'name' => 'Fenner, Martin', 'nameIdentifiers' => [{ 'nameIdentifier' => 'https://orcid.org/0000-0003-1419-2405', 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org' }], 'nameType' => 'Personal' }])
83
+ expect(subject.titles).to eq([{ 'title' => "A step forward for software citation: GitHub's enhanced software citation support" }])
84
+ expect(subject.descriptions.first['description']).to start_with('On August 19, GitHub announced software citation')
85
+ expect(subject.subjects).to eq([{ 'subject' => 'news' }])
86
+ expect(subject.dates).to eq([{ 'date' => '2021-08-24T16:57:24Z', 'dateType' => 'Issued' },
87
+ { 'date' => '2022-08-15T19:05:14Z', 'dateType' => 'Updated' }])
88
+ expect(subject.publication_year).to eq('2021')
80
89
  expect(subject.related_identifiers.length).to eq(0)
81
- expect(subject.container).to eq("identifier"=>"2749-9952", "identifierType"=>"ISSN", "title"=>"Front Matter", "type"=>"Blog")
82
- expect(subject.publisher).to eq("Front Matter")
90
+ expect(subject.container).to eq('identifier' => '2749-9952', 'identifierType' => 'ISSN',
91
+ 'title' => 'Front Matter', 'type' => 'Blog')
92
+ expect(subject.publisher).to eq('Front Matter')
83
93
  end
84
94
  end
85
95
 
86
- it "zenodo" do
87
- input = "https://www.zenodo.org/record/1196821"
88
- subject = Briard::Metadata.new(input: input, from: "schema_org")
96
+ it 'zenodo' do
97
+ input = 'https://www.zenodo.org/record/1196821'
98
+ subject = described_class.new(input: input, from: 'schema_org')
89
99
  expect(subject.valid?).to be false
90
- expect(subject.language).to eq("eng")
100
+ expect(subject.language).to eq('eng')
91
101
  expect(subject.errors).to eq("49:0: ERROR: Element '{http://datacite.org/schema/kernel-4}publisher': [facet 'minLength'] The value has a length of '0'; this underruns the allowed minimum length of '1'.")
92
- expect(subject.id).to eq("https://doi.org/10.5281/zenodo.1196821")
93
- expect(subject.doi).to eq("10.5281/zenodo.1196821")
94
- expect(subject.url).to eq("https://zenodo.org/record/1196821")
95
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
96
- expect(subject.titles).to eq([{"title"=>"PsPM-SC4B: SCR, ECG, EMG, PSR and respiration measurements in a delay fear conditioning task with auditory CS and electrical US"}])
102
+ expect(subject.id).to eq('https://doi.org/10.5281/zenodo.1196821')
103
+ expect(subject.doi).to eq('10.5281/zenodo.1196821')
104
+ expect(subject.url).to eq('https://zenodo.org/record/1196821')
105
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
106
+ 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
107
+ expect(subject.titles).to eq([{ 'title' => 'PsPM-SC4B: SCR, ECG, EMG, PSR and respiration measurements in a delay fear conditioning task with auditory CS and electrical US' }])
97
108
  expect(subject.creators.size).to eq(6)
98
- expect(subject.creators.first).to eq("name" => "Staib, Matthias",
99
- "nameIdentifiers" => [{"nameIdentifier"=>"https://orcid.org/0000-0001-9688-838X", "nameIdentifierScheme"=>"ORCID", "schemeUri"=>"https://orcid.org"}],
100
- "nameType" => "Personal", "givenName"=>"Matthias", "familyName"=>"Staib", "affiliation" => [{"name"=>"University of Zurich, Zurich, Switzerland"}])
101
- expect(subject.publisher).to be_nil
102
- expect(subject.publication_year).to eq("2018")
103
- expect(subject.subjects).to eq([{"subject"=>"pupil size response"},
104
- {"subject"=>"skin conductance response"},
105
- {"subject"=>"electrocardiogram"},
106
- {"subject"=>"electromyogram"},
107
- {"subject"=>"electrodermal activity"},
108
- {"subject"=>"galvanic skin response"},
109
- {"subject"=>"psr"},
110
- {"subject"=>"scr"},
111
- {"subject"=>"ecg"},
112
- {"subject"=>"emg"},
113
- {"subject"=>"eda"},
114
- {"subject"=>"gsr"}])
109
+ expect(subject.creators.first).to eq('name' => 'Staib, Matthias',
110
+ 'nameIdentifiers' => [{ 'nameIdentifier' => 'https://orcid.org/0000-0001-9688-838X', 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org' }],
111
+ 'nameType' => 'Personal', 'givenName' => 'Matthias', 'familyName' => 'Staib', 'affiliation' => [{ 'name' => 'University of Zurich, Zurich, Switzerland' }])
112
+ expect(subject.publisher.nil?).to be(true)
113
+ expect(subject.publication_year).to eq('2018')
114
+ expect(subject.subjects).to eq([{ 'subject' => 'pupil size response' },
115
+ { 'subject' => 'skin conductance response' },
116
+ { 'subject' => 'electrocardiogram' },
117
+ { 'subject' => 'electromyogram' },
118
+ { 'subject' => 'electrodermal activity' },
119
+ { 'subject' => 'galvanic skin response' },
120
+ { 'subject' => 'psr' },
121
+ { 'subject' => 'scr' },
122
+ { 'subject' => 'ecg' },
123
+ { 'subject' => 'emg' },
124
+ { 'subject' => 'eda' },
125
+ { 'subject' => 'gsr' }])
115
126
  end
116
127
 
117
- it "pangaea" do
118
- input = "https://doi.pangaea.de/10.1594/PANGAEA.836178"
119
- subject = Briard::Metadata.new(input: input, from: "schema_org")
128
+ it 'pangaea' do
129
+ input = 'https://doi.pangaea.de/10.1594/PANGAEA.836178'
130
+ subject = described_class.new(input: input, from: 'schema_org')
120
131
  expect(subject.valid?).to be true
121
- expect(subject.id).to eq("https://doi.org/10.1594/pangaea.836178")
122
- expect(subject.doi).to eq("10.1594/pangaea.836178")
123
- expect(subject.url).to eq("https://doi.pangaea.de/10.1594/PANGAEA.836178")
124
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
125
- expect(subject.titles).to eq([{"title"=>"Hydrological and meteorological investigations in a lake near Kangerlussuaq, west Greenland"}])
132
+ expect(subject.id).to eq('https://doi.org/10.1594/pangaea.836178')
133
+ expect(subject.doi).to eq('10.1594/pangaea.836178')
134
+ expect(subject.url).to eq('https://doi.pangaea.de/10.1594/PANGAEA.836178')
135
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
136
+ 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
137
+ expect(subject.titles).to eq([{ 'title' => 'Hydrological and meteorological investigations in a lake near Kangerlussuaq, west Greenland' }])
126
138
  expect(subject.creators.size).to eq(8)
127
- expect(subject.creators.first).to eq("nameType" => "Personal", "name"=>"Johansson, Emma", "givenName"=>"Emma", "familyName"=>"Johansson")
128
- expect(subject.publisher).to eq("PANGAEA")
129
- expect(subject.publication_year).to eq("2014")
139
+ expect(subject.creators.first).to eq('nameType' => 'Personal', 'name' => 'Johansson, Emma',
140
+ 'givenName' => 'Emma', 'familyName' => 'Johansson')
141
+ expect(subject.publisher).to eq('PANGAEA')
142
+ expect(subject.publication_year).to eq('2014')
130
143
  end
131
144
 
132
145
  # TODO: check redirections
@@ -143,22 +156,26 @@ describe Briard::Metadata, vcr: true do
143
156
  # expect(subject.creators.first).to eq("familyName"=>"MOGHADDAM", "givenName"=>"M.", "name"=>"MOGHADDAM, M.", "nameType"=>"Personal", "nameIdentifiers"=>[], "affiliation" => [])
144
157
  # end
145
158
 
146
- it "harvard dataverse" do
147
- input = "https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/NJ7XSO"
148
- subject = Briard::Metadata.new(input: input, from: "schema_org")
159
+ it 'harvard dataverse' do
160
+ input = 'https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/NJ7XSO'
161
+ subject = described_class.new(input: input, from: 'schema_org')
149
162
  expect(subject.valid?).to be true
150
- expect(subject.id).to eq("https://doi.org/10.7910/dvn/nj7xso")
151
- expect(subject.doi).to eq("10.7910/dvn/nj7xso")
152
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
153
- expect(subject.titles).to eq([{"title"=>"Summary data ankylosing spondylitis GWAS"}])
154
- expect(subject.container).to eq("identifier"=>"https://dataverse.harvard.edu", "identifierType"=>"URL", "title"=>"Harvard Dataverse", "type"=>"DataRepository")
155
- expect(subject.creators).to eq([{"name" => "International Genetics Of Ankylosing Spondylitis Consortium (IGAS)", "nameIdentifiers"=>[], "affiliation" => []}])
156
- expect(subject.subjects).to eq([{"subject"=>"medicine, health and life sciences"},
157
- {"subject"=>"genome-wide association studies"},
158
- {"subject"=>"ankylosing spondylitis"}])
163
+ expect(subject.id).to eq('https://doi.org/10.7910/dvn/nj7xso')
164
+ expect(subject.doi).to eq('10.7910/dvn/nj7xso')
165
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
166
+ 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
167
+ expect(subject.titles).to eq([{ 'title' => 'Summary data ankylosing spondylitis GWAS' }])
168
+ expect(subject.container).to eq('identifier' => 'https://dataverse.harvard.edu',
169
+ 'identifierType' => 'URL', 'title' => 'Harvard Dataverse', 'type' => 'DataRepository')
170
+ expect(subject.creators).to eq([{
171
+ 'name' => 'International Genetics Of Ankylosing Spondylitis Consortium (IGAS)', 'nameIdentifiers' => [], 'affiliation' => []
172
+ }])
173
+ expect(subject.subjects).to eq([{ 'subject' => 'medicine, health and life sciences' },
174
+ { 'subject' => 'genome-wide association studies' },
175
+ { 'subject' => 'ankylosing spondylitis' }])
159
176
  end
160
177
 
161
- # TODO check 403 status in DOI resolver
178
+ # TODO: check 403 status in DOI resolver
162
179
  # it "harvard dataverse via identifiers.org" do
163
180
  # input = "https://identifiers.org/doi/10.7910/DVN/NJ7XSO"
164
181
  # subject = Briard::Metadata.new(input: input, from: "schema_org")
@@ -172,199 +189,244 @@ describe Briard::Metadata, vcr: true do
172
189
  # end
173
190
  end
174
191
 
175
- context "get schema_org metadata as string" do
176
- it "BlogPosting" do
177
- input = fixture_path + 'schema_org.json'
178
- subject = Briard::Metadata.new(input: input)
192
+ context 'get schema_org metadata as string' do
193
+ it 'BlogPosting' do
194
+ input = "#{fixture_path}schema_org.json"
195
+ subject = described_class.new(input: input)
179
196
  expect(subject.valid?).to be true
180
- expect(subject.language).to eq("en")
181
- expect(subject.id).to eq("https://doi.org/10.5438/4k3m-nyvg")
182
- expect(subject.url).to eq("https://blog.datacite.org/eating-your-own-dog-food")
183
- expect(subject.types).to eq("bibtex"=>"article", "citeproc"=>"post-weblog", "resourceTypeGeneral"=>"Preprint", "ris"=>"GEN", "schemaOrg"=>"BlogPosting")
184
- expect(subject.creators).to eq([{"familyName"=>"Fenner", "givenName"=>"Martin", "name"=>"Fenner, Martin", "nameIdentifiers"=> [{"nameIdentifier"=>"https://orcid.org/0000-0003-1419-2405", "nameIdentifierScheme"=>"ORCID",
185
- + "schemeUri"=>"https://orcid.org"}], "nameType"=>"Personal"}])
186
- expect(subject.titles).to eq([{"title"=>"Eating your own Dog Food"}])
187
- expect(subject.descriptions.first["description"]).to start_with("Eating your own dog food")
188
- expect(subject.subjects).to eq([{"subject"=>"datacite"}, {"subject"=>"doi"}, {"subject"=>"metadata"}, {"subject"=>"featured"}])
189
- expect(subject.dates).to eq([{"date"=>"2016-12-20", "dateType"=>"Issued"},
190
- {"date"=>"2016-12-20", "dateType"=>"Created"},
191
- {"date"=>"2016-12-20", "dateType"=>"Updated"}])
192
- expect(subject.publication_year).to eq("2016")
197
+ expect(subject.language).to eq('en')
198
+ expect(subject.id).to eq('https://doi.org/10.5438/4k3m-nyvg')
199
+ expect(subject.url).to eq('https://blog.datacite.org/eating-your-own-dog-food')
200
+ expect(subject.types).to eq('bibtex' => 'article', 'citeproc' => 'post-weblog',
201
+ 'resourceTypeGeneral' => 'Preprint', 'ris' => 'GEN', 'schemaOrg' => 'BlogPosting')
202
+ expect(subject.creators).to eq([{ 'familyName' => 'Fenner', 'givenName' => 'Martin', 'name' => 'Fenner, Martin', 'nameIdentifiers' => [{ 'nameIdentifier' => 'https://orcid.org/0000-0003-1419-2405', 'nameIdentifierScheme' => 'ORCID',
203
+ + 'schemeUri' => 'https://orcid.org' }], 'nameType' => 'Personal' }])
204
+ expect(subject.titles).to eq([{ 'title' => 'Eating your own Dog Food' }])
205
+ expect(subject.descriptions.first['description']).to start_with('Eating your own dog food')
206
+ expect(subject.subjects).to eq([{ 'subject' => 'datacite' }, { 'subject' => 'doi' },
207
+ { 'subject' => 'metadata' }, { 'subject' => 'featured' }])
208
+ expect(subject.dates).to eq([{ 'date' => '2016-12-20', 'dateType' => 'Issued' },
209
+ { 'date' => '2016-12-20', 'dateType' => 'Created' },
210
+ { 'date' => '2016-12-20', 'dateType' => 'Updated' }])
211
+ expect(subject.publication_year).to eq('2016')
193
212
  expect(subject.related_identifiers.length).to eq(3)
194
- expect(subject.related_identifiers.last).to eq("relatedIdentifier"=>"10.5438/55e5-t5c0", "relatedIdentifierType"=>"DOI", "relationType"=>"References", "resourceTypeGeneral" => "Text")
195
- expect(subject.publisher).to eq("DataCite")
213
+ expect(subject.related_identifiers.last).to eq('relatedIdentifier' => '10.5438/55e5-t5c0',
214
+ 'relatedIdentifierType' => 'DOI', 'relationType' => 'References', 'resourceTypeGeneral' => 'Text')
215
+ expect(subject.publisher).to eq('DataCite')
196
216
  end
197
217
 
198
- it "GTEx dataset" do
199
- input = fixture_path + 'schema_org_gtex.json'
200
- subject = Briard::Metadata.new(input: input)
218
+ it 'GTEx dataset' do
219
+ input = "#{fixture_path}schema_org_gtex.json"
220
+ subject = described_class.new(input: input)
201
221
 
202
222
  expect(subject.valid?).to be true
203
- expect(subject.id).to eq("https://doi.org/10.25491/d50j-3083")
204
- expect(subject.identifiers).to eq([{"identifier"=>"687610993", "identifierType"=>"md5"}])
205
- expect(subject.url).to eq("https://ors.datacite.org/doi:/10.25491/d50j-3083")
206
- expect(subject.content_url).to eq(["https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_expression_matrices.tar.gz"])
207
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceType"=>"Gene expression matrices", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
208
- expect(subject.creators).to eq([{"name"=>"The GTEx Consortium", "nameType"=>"Organizational", "nameIdentifiers"=>[], "affiliation" => []}])
209
- expect(subject.titles).to eq([{"title"=>"Fully processed, filtered and normalized gene expression matrices (in BED format) for each tissue, which were used as input into FastQTL for eQTL discovery"}])
210
- expect(subject.version_info).to eq("v7")
211
- expect(subject.subjects).to eq([{"subject"=>"gtex"}, {"subject"=>"annotation"}, {"subject"=>"phenotype"}, {"subject"=>"gene regulation"}, {"subject"=>"transcriptomics"}])
212
- expect(subject.dates).to eq([{"date"=>"2017", "dateType"=>"Issued"}])
213
- expect(subject.publication_year).to eq("2017")
214
- expect(subject.container).to eq("title"=>"GTEx", "type"=>"DataRepository")
215
- expect(subject.publisher).to eq("GTEx")
223
+ expect(subject.id).to eq('https://doi.org/10.25491/d50j-3083')
224
+ expect(subject.identifiers).to eq([{ 'identifier' => '687610993',
225
+ 'identifierType' => 'md5' }])
226
+ expect(subject.url).to eq('https://ors.datacite.org/doi:/10.25491/d50j-3083')
227
+ expect(subject.content_url).to eq(['https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_expression_matrices.tar.gz'])
228
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
229
+ 'resourceType' => 'Gene expression matrices', 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
230
+ expect(subject.creators).to eq([{ 'name' => 'The GTEx Consortium', 'nameType' => 'Organizational',
231
+ 'nameIdentifiers' => [], 'affiliation' => [] }])
232
+ expect(subject.titles).to eq([{ 'title' => 'Fully processed, filtered and normalized gene expression matrices (in BED format) for each tissue, which were used as input into FastQTL for eQTL discovery' }])
233
+ expect(subject.version_info).to eq('v7')
234
+ expect(subject.subjects).to eq([{ 'subject' => 'gtex' }, { 'subject' => 'annotation' },
235
+ { 'subject' => 'phenotype' }, { 'subject' => 'gene regulation' }, { 'subject' => 'transcriptomics' }])
236
+ expect(subject.dates).to eq([{ 'date' => '2017', 'dateType' => 'Issued' }])
237
+ expect(subject.publication_year).to eq('2017')
238
+ expect(subject.container).to eq('title' => 'GTEx', 'type' => 'DataRepository')
239
+ expect(subject.publisher).to eq('GTEx')
216
240
  expect(subject.funding_references.length).to eq(7)
217
- expect(subject.funding_references.first).to eq("funderIdentifier"=>"https://doi.org/10.13039/100000052", "funderIdentifierType"=>"Crossref Funder ID", "funderName"=>"Common Fund of the Office of the Director of the NIH")
241
+ expect(subject.funding_references.first).to eq(
242
+ 'funderIdentifier' => 'https://doi.org/10.13039/100000052', 'funderIdentifierType' => 'Crossref Funder ID', 'funderName' => 'Common Fund of the Office of the Director of the NIH'
243
+ )
218
244
  end
219
245
 
220
- it "TOPMed dataset" do
221
- input = fixture_path + 'schema_org_topmed.json'
222
- subject = Briard::Metadata.new(input: input)
246
+ it 'TOPMed dataset' do
247
+ input = "#{fixture_path}schema_org_topmed.json"
248
+ subject = described_class.new(input: input)
223
249
  expect(subject.valid?).to be true
224
- expect(subject.identifiers).to eq([{"identifier"=>"3b33f6b9338fccab0901b7d317577ea3", "identifierType"=>"md5"},
225
- {"identifier"=>"ark:/99999/fk41CrU4eszeLUDe", "identifierType"=>"minid"},
226
- {"identifier"=>"dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7", "identifierType"=>"dataguid"}])
227
- expect(subject.url).to eq("https://ors.datacite.org/doi:/10.23725/8na3-9s47")
228
- expect(subject.content_url).to eq(["s3://cgp-commons-public/topmed_open_access/197bc047-e917-55ed-852d-d563cdbc50e4/NWD165827.recab.cram", "gs://topmed-irc-share/public/NWD165827.recab.cram"])
229
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceType"=>"CRAM file", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
230
- expect(subject.creators).to eq([{"name"=>"TOPMed IRC", "nameType"=>"Organizational", "nameIdentifiers"=>[], "affiliation" => []}])
231
- expect(subject.titles).to eq([{"title"=>"NWD165827.recab.cram"}])
232
- expect(subject.subjects).to eq([{"subject"=>"topmed"}, {"subject"=>"whole genome sequencing"}])
233
- expect(subject.dates).to eq([{"date"=>"2017-11-30", "dateType"=>"Issued"}])
234
- expect(subject.publication_year).to eq("2017")
235
- expect(subject.publisher).to eq("TOPMed")
236
- expect(subject.related_identifiers).to eq([{"relatedIdentifier"=>"10.23725/2g4s-qv04", "relatedIdentifierType"=>"DOI", "relationType"=>"References", "resourceTypeGeneral"=>"Dataset"}])
237
- expect(subject.funding_references).to eq([{"funderIdentifier"=>"https://doi.org/10.13039/100000050", "funderIdentifierType"=>"Crossref Funder ID", "funderName"=>"National Heart, Lung, and Blood Institute (NHLBI)"}])
250
+ expect(subject.identifiers).to eq([{ 'identifier' => '3b33f6b9338fccab0901b7d317577ea3', 'identifierType' => 'md5' },
251
+ { 'identifier' => 'ark:/99999/fk41CrU4eszeLUDe',
252
+ 'identifierType' => 'minid' },
253
+ { 'identifier' => 'dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7',
254
+ 'identifierType' => 'dataguid' }])
255
+ expect(subject.url).to eq('https://ors.datacite.org/doi:/10.23725/8na3-9s47')
256
+ expect(subject.content_url).to eq([
257
+ 's3://cgp-commons-public/topmed_open_access/197bc047-e917-55ed-852d-d563cdbc50e4/NWD165827.recab.cram', 'gs://topmed-irc-share/public/NWD165827.recab.cram'
258
+ ])
259
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
260
+ 'resourceType' => 'CRAM file', 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
261
+ expect(subject.creators).to eq([{ 'name' => 'TOPMed IRC', 'nameType' => 'Organizational',
262
+ 'nameIdentifiers' => [], 'affiliation' => [] }])
263
+ expect(subject.titles).to eq([{ 'title' => 'NWD165827.recab.cram' }])
264
+ expect(subject.subjects).to eq([{ 'subject' => 'topmed' },
265
+ { 'subject' => 'whole genome sequencing' }])
266
+ expect(subject.dates).to eq([{ 'date' => '2017-11-30', 'dateType' => 'Issued' }])
267
+ expect(subject.publication_year).to eq('2017')
268
+ expect(subject.publisher).to eq('TOPMed')
269
+ expect(subject.related_identifiers).to eq([{ 'relatedIdentifier' => '10.23725/2g4s-qv04',
270
+ 'relatedIdentifierType' => 'DOI', 'relationType' => 'References', 'resourceTypeGeneral' => 'Dataset' }])
271
+ expect(subject.funding_references).to eq([{
272
+ 'funderIdentifier' => 'https://doi.org/10.13039/100000050', 'funderIdentifierType' => 'Crossref Funder ID', 'funderName' => 'National Heart, Lung, and Blood Institute (NHLBI)'
273
+ }])
238
274
  end
239
275
 
240
- it "tdl_iodp dataset" do
241
- input = fixture_path + 'schema_org_tdl_iodp_invalid_authors.json'
242
- subject = Briard::Metadata.new(input: input)
276
+ it 'tdl_iodp dataset' do
277
+ input = "#{fixture_path}schema_org_tdl_iodp_invalid_authors.json"
278
+ subject = described_class.new(input: input)
243
279
  expect(subject.valid?).to be false
244
280
  end
245
281
 
246
- it "geolocation" do
247
- input = fixture_path + 'schema_org_geolocation.json'
248
- subject = Briard::Metadata.new(input: input)
282
+ it 'geolocation' do
283
+ input = "#{fixture_path}schema_org_geolocation.json"
284
+ subject = described_class.new(input: input)
249
285
 
250
286
  expect(subject.valid?).to be true
251
- expect(subject.identifiers).to eq([{"identifier"=>"https://doi.org/10.6071/z7wc73", "identifierType"=>"DOI"}])
252
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceType"=>"dataset", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
287
+ expect(subject.identifiers).to eq([{ 'identifier' => 'https://doi.org/10.6071/z7wc73',
288
+ 'identifierType' => 'DOI' }])
289
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
290
+ 'resourceType' => 'dataset', 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
253
291
  expect(subject.creators.length).to eq(6)
254
- expect(subject.creators.first).to eq("familyName"=>"Bales", "givenName"=>"Roger", "name"=>"Bales, Roger", "nameType"=>"Personal")
255
- expect(subject.titles).to eq([{"title"=>"Southern Sierra Critical Zone Observatory (SSCZO), Providence Creek meteorological data, soil moisture and temperature, snow depth and air temperature"}])
256
- expect(subject.subjects).to eq([{"subject"=>"earth sciences"},
257
- {"subject"=>"soil moisture"},
258
- {"subject"=>"soil temperature"},
259
- {"subject"=>"snow depth"},
260
- {"subject"=>"air temperature"},
261
- {"subject"=>"water balance"},
262
- {"subject"=>"nevada"},
263
- {"subject"=>"sierra (mountain range)"}])
264
- expect(subject.dates).to eq([{"date"=>"2013", "dateType"=>"Issued"},
265
- {"date"=>"2014-10-17", "dateType"=>"Updated"}])
266
- expect(subject.publication_year).to eq("2013")
267
- expect(subject.publisher).to eq("UC Merced")
268
- expect(subject.funding_references).to eq([{"funderName"=>"National Science Foundation, Division of Earth Sciences, Critical Zone Observatories"}])
269
- expect(subject.geo_locations).to eq([{"geoLocationPlace"=>"Providence Creek (Lower, Upper and P301)", "geoLocationPoint"=>{"pointLatitude"=>"37.047756", "pointLongitude"=>"-119.221094"}}])
292
+ expect(subject.creators.first).to eq('familyName' => 'Bales', 'givenName' => 'Roger',
293
+ 'name' => 'Bales, Roger', 'nameType' => 'Personal')
294
+ expect(subject.titles).to eq([{ 'title' => 'Southern Sierra Critical Zone Observatory (SSCZO), Providence Creek meteorological data, soil moisture and temperature, snow depth and air temperature' }])
295
+ expect(subject.subjects).to eq([{ 'subject' => 'earth sciences' },
296
+ { 'subject' => 'soil moisture' },
297
+ { 'subject' => 'soil temperature' },
298
+ { 'subject' => 'snow depth' },
299
+ { 'subject' => 'air temperature' },
300
+ { 'subject' => 'water balance' },
301
+ { 'subject' => 'nevada' },
302
+ { 'subject' => 'sierra (mountain range)' }])
303
+ expect(subject.dates).to eq([{ 'date' => '2013', 'dateType' => 'Issued' },
304
+ { 'date' => '2014-10-17', 'dateType' => 'Updated' }])
305
+ expect(subject.publication_year).to eq('2013')
306
+ expect(subject.publisher).to eq('UC Merced')
307
+ expect(subject.funding_references).to eq([{ 'funderName' => 'National Science Foundation, Division of Earth Sciences, Critical Zone Observatories' }])
308
+ expect(subject.geo_locations).to eq([{
309
+ 'geoLocationPlace' => 'Providence Creek (Lower, Upper and P301)', 'geoLocationPoint' => {
310
+ 'pointLatitude' => '37.047756', 'pointLongitude' => '-119.221094'
311
+ }
312
+ }])
270
313
  end
271
314
 
272
- it "geolocation geoshape" do
273
- input = fixture_path + 'schema_org_geoshape.json'
274
- subject = Briard::Metadata.new(input: input)
315
+ it 'geolocation geoshape' do
316
+ input = "#{fixture_path}schema_org_geoshape.json"
317
+ subject = described_class.new(input: input)
275
318
 
276
319
  expect(subject.valid?).to be true
277
- expect(subject.language).to eq("en")
278
- expect(subject.id).to eq("https://doi.org/10.1594/pangaea.842237")
279
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
320
+ expect(subject.language).to eq('en')
321
+ expect(subject.id).to eq('https://doi.org/10.1594/pangaea.842237')
322
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
323
+ 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
280
324
  expect(subject.creators.length).to eq(2)
281
- expect(subject.creators.first).to eq("name"=>"Tara Oceans Consortium, Coordinators", "nameType"=>"Organizational", "nameIdentifiers"=>[], "affiliation" => [])
282
- expect(subject.titles).to eq([{"title"=>"Registry of all stations from the Tara Oceans Expedition (2009-2013)"}])
283
- expect(subject.dates).to eq([{"date"=>"2015-02-03", "dateType"=>"Issued"}])
284
- expect(subject.publication_year).to eq("2015")
285
- expect(subject.publisher).to eq("PANGAEA")
286
- expect(subject.geo_locations).to eq([{"geoLocationBox"=>{"eastBoundLongitude"=>"174.9006", "northBoundLatitude"=>"79.6753", "southBoundLatitude"=>"-64.3088", "westBoundLongitude"=>"-168.5182"}}])
325
+ expect(subject.creators.first).to eq('name' => 'Tara Oceans Consortium, Coordinators',
326
+ 'nameType' => 'Organizational', 'nameIdentifiers' => [], 'affiliation' => [])
327
+ expect(subject.titles).to eq([{ 'title' => 'Registry of all stations from the Tara Oceans Expedition (2009-2013)' }])
328
+ expect(subject.dates).to eq([{ 'date' => '2015-02-03', 'dateType' => 'Issued' }])
329
+ expect(subject.publication_year).to eq('2015')
330
+ expect(subject.publisher).to eq('PANGAEA')
331
+ expect(subject.geo_locations).to eq([{ 'geoLocationBox' => { 'eastBoundLongitude' => '174.9006',
332
+ 'northBoundLatitude' => '79.6753', 'southBoundLatitude' => '-64.3088', 'westBoundLongitude' => '-168.5182' } }])
287
333
  end
288
334
 
289
- it "schema_org list" do
290
- data = IO.read(fixture_path + 'schema_org_list.json').strip
335
+ it 'schema_org list' do
336
+ data = File.read("#{fixture_path}schema_org_list.json").strip
291
337
  input = JSON.parse(data).first.to_json
292
- subject = Briard::Metadata.new(input: input)
338
+ subject = described_class.new(input: input)
293
339
  expect(subject.valid?).to be true
294
- expect(subject.id).to eq("https://doi.org/10.23725/7jg3-v803")
295
- expect(subject.identifiers).to eq([{"identifier"=>"ark:/99999/fk4E1n6n1YHKxPk", "identifierType"=>"minid"},
296
- {"identifier"=>"dg.4503/01b048d0-e128-4cb0-94e9-b2d2cab7563d",
297
- "identifierType"=>"dataguid"},
298
- {"identifier"=>"f9e72bdf25bf4b4f0e581d9218fec2eb", "identifierType"=>"md5"}])
299
- expect(subject.url).to eq("https://ors.datacite.org/doi:/10.23725/7jg3-v803")
300
- expect(subject.content_url).to eq(["s3://cgp-commons-public/topmed_open_access/44a8837b-4456-5709-b56b-54e23000f13a/NWD100953.recab.cram","gs://topmed-irc-share/public/NWD100953.recab.cram","dos://dos.commons.ucsc-cgp.org/01b048d0-e128-4cb0-94e9-b2d2cab7563d?version=2018-05-26T133719.491772Z"])
301
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceType"=>"CRAM file", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
302
- expect(subject.creators).to eq([{"name"=>"TOPMed", "nameType"=>"Organizational", "nameIdentifiers"=>[], "affiliation" => []}])
303
- expect(subject.titles).to eq([{"title"=>"NWD100953.recab.cram"}])
304
- expect(subject.subjects).to eq([{"subject"=>"topmed"}, {"subject"=>"whole genome sequencing"}])
305
- expect(subject.dates).to eq([{"date"=>"2017-11-30", "dateType"=>"Issued"}])
306
- expect(subject.publication_year).to eq("2017")
307
- expect(subject.publisher).to eq("TOPMed")
308
- expect(subject.funding_references).to eq([{"funderIdentifier"=>"https://doi.org/10.13039/100000050", "funderIdentifierType"=>"Crossref Funder ID", "funderName"=>"National Heart, Lung, and Blood Institute (NHLBI)"}])
340
+ expect(subject.id).to eq('https://doi.org/10.23725/7jg3-v803')
341
+ expect(subject.identifiers).to eq([{ 'identifier' => 'ark:/99999/fk4E1n6n1YHKxPk', 'identifierType' => 'minid' },
342
+ { 'identifier' => 'dg.4503/01b048d0-e128-4cb0-94e9-b2d2cab7563d',
343
+ 'identifierType' => 'dataguid' },
344
+ { 'identifier' => 'f9e72bdf25bf4b4f0e581d9218fec2eb',
345
+ 'identifierType' => 'md5' }])
346
+ expect(subject.url).to eq('https://ors.datacite.org/doi:/10.23725/7jg3-v803')
347
+ expect(subject.content_url).to eq([
348
+ 's3://cgp-commons-public/topmed_open_access/44a8837b-4456-5709-b56b-54e23000f13a/NWD100953.recab.cram', 'gs://topmed-irc-share/public/NWD100953.recab.cram', 'dos://dos.commons.ucsc-cgp.org/01b048d0-e128-4cb0-94e9-b2d2cab7563d?version=2018-05-26T133719.491772Z'
349
+ ])
350
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
351
+ 'resourceType' => 'CRAM file', 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
352
+ expect(subject.creators).to eq([{ 'name' => 'TOPMed', 'nameType' => 'Organizational',
353
+ 'nameIdentifiers' => [], 'affiliation' => [] }])
354
+ expect(subject.titles).to eq([{ 'title' => 'NWD100953.recab.cram' }])
355
+ expect(subject.subjects).to eq([{ 'subject' => 'topmed' },
356
+ { 'subject' => 'whole genome sequencing' }])
357
+ expect(subject.dates).to eq([{ 'date' => '2017-11-30', 'dateType' => 'Issued' }])
358
+ expect(subject.publication_year).to eq('2017')
359
+ expect(subject.publisher).to eq('TOPMed')
360
+ expect(subject.funding_references).to eq([{
361
+ 'funderIdentifier' => 'https://doi.org/10.13039/100000050', 'funderIdentifierType' => 'Crossref Funder ID', 'funderName' => 'National Heart, Lung, and Blood Institute (NHLBI)'
362
+ }])
309
363
  end
310
364
 
311
- it "aida dataset" do
312
- input = fixture_path + 'aida.json'
313
- subject = Briard::Metadata.new(input: input)
365
+ it 'aida dataset' do
366
+ input = "#{fixture_path}aida.json"
367
+ subject = described_class.new(input: input)
314
368
 
315
369
  expect(subject.valid?).to be true
316
- expect(subject.id).to eq("https://doi.org/10.23698/aida/drov")
317
- expect(subject.url).to eq("https://doi.aida.medtech4health.se/10.23698/aida/drov")
318
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
370
+ expect(subject.id).to eq('https://doi.org/10.23698/aida/drov')
371
+ expect(subject.url).to eq('https://doi.aida.medtech4health.se/10.23698/aida/drov')
372
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
373
+ 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
319
374
  # expect(subject.creators).to eq([{"familyName"=>"Lindman", "givenName"=>"Karin", "name"=>"Lindman, Karin", "nameIdentifiers"=>[{"nameIdentifier"=> "https://orcid.org/0000-0003-1298-517X", "nameIdentifierScheme"=>"ORCID", "schemeUri"=>"https://orcid.org"}], "nameType"=>"Personal"}])
320
- expect(subject.titles).to eq([{"title"=>"Ovary data from the Visual Sweden project DROID"}])
321
- expect(subject.version_info).to eq("1.0")
322
- expect(subject.subjects).to eq([{"subject"=>"pathology"}, {"subject"=>"whole slide imaging"}, {"subject"=>"annotated"}])
323
- expect(subject.dates).to eq([{"date"=>"2019-01-09", "dateType"=>"Issued"},
324
- {"date"=>"2019-01-09", "dateType"=>"Created"},
325
- {"date"=>"2019-01-09", "dateType"=>"Updated"}])
326
- expect(subject.publication_year).to eq("2019")
327
- expect(subject.id).to eq("https://doi.org/10.23698/aida/drov")
328
- expect(subject.publisher).to eq("AIDA")
329
- expect(subject.rights_list).to eq([{"rights"=>"Restricted access", "rightsUri"=>"https://datasets.aida.medtech4health.se/10.23698/aida/drov#license"}])
330
- expect(subject.id).to eq("https://doi.org/10.23698/aida/drov")
375
+ expect(subject.titles).to eq([{ 'title' => 'Ovary data from the Visual Sweden project DROID' }])
376
+ expect(subject.version_info).to eq('1.0')
377
+ expect(subject.subjects).to eq([{ 'subject' => 'pathology' }, { 'subject' => 'whole slide imaging' },
378
+ { 'subject' => 'annotated' }])
379
+ expect(subject.dates).to eq([{ 'date' => '2019-01-09', 'dateType' => 'Issued' },
380
+ { 'date' => '2019-01-09', 'dateType' => 'Created' },
381
+ { 'date' => '2019-01-09', 'dateType' => 'Updated' }])
382
+ expect(subject.publication_year).to eq('2019')
383
+ expect(subject.id).to eq('https://doi.org/10.23698/aida/drov')
384
+ expect(subject.publisher).to eq('AIDA')
385
+ expect(subject.rights_list).to eq([{ 'rights' => 'Restricted access',
386
+ 'rightsUri' => 'https://datasets.aida.medtech4health.se/10.23698/aida/drov#license' }])
387
+ expect(subject.id).to eq('https://doi.org/10.23698/aida/drov')
331
388
  end
332
389
 
333
- it "from attributes" do
334
- subject = Briard::Metadata.new(input: nil,
335
- from: "schema_org",
336
- doi: "10.5281/zenodo.1239",
337
- creators: [{"type"=>"Person", "name"=>"Jahn, Najko", "givenName"=>"Najko", "familyName"=>"Jahn"}],
338
- titles: [{ "title" => "Publication Fp7 Funding Acknowledgment - Plos Openaire" }],
339
- descriptions: [{ "description" => "The dataset contains a sample of metadata describing papers" }],
340
- publisher: "Zenodo",
341
- publication_year: "2013",
342
- dates: [{"date"=>"2013-04-03", "dateType"=>"Issued"}],
343
- funding_references: [{"awardNumber"=>"246686",
344
- "awardTitle"=>"Open Access Infrastructure for Research in Europe",
345
- "awardUri"=>"info:eu-repo/grantAgreement/EC/FP7/246686/",
346
- "funderIdentifier"=>"https://doi.org/10.13039/501100000780",
347
- "funderIdentifierType"=>"Crossref Funder ID",
348
- "funderName"=>"European Commission"}],
349
- types: { "resourceTypeGeneral" => "Dataset", "schemaOrg" => "Dataset" })
390
+ it 'from attributes' do
391
+ subject = described_class.new(input: nil,
392
+ from: 'schema_org',
393
+ doi: '10.5281/zenodo.1239',
394
+ creators: [{ 'type' => 'Person', 'name' => 'Jahn, Najko', 'givenName' => 'Najko',
395
+ 'familyName' => 'Jahn' }],
396
+ titles: [{ 'title' => 'Publication Fp7 Funding Acknowledgment - Plos Openaire' }],
397
+ descriptions: [{ 'description' => 'The dataset contains a sample of metadata describing papers' }],
398
+ publisher: 'Zenodo',
399
+ publication_year: '2013',
400
+ dates: [{ 'date' => '2013-04-03',
401
+ 'dateType' => 'Issued' }],
402
+ funding_references: [{ 'awardNumber' => '246686',
403
+ 'awardTitle' => 'Open Access Infrastructure for Research in Europe',
404
+ 'awardUri' => 'info:eu-repo/grantAgreement/EC/FP7/246686/',
405
+ 'funderIdentifier' => 'https://doi.org/10.13039/501100000780',
406
+ 'funderIdentifierType' => 'Crossref Funder ID',
407
+ 'funderName' => 'European Commission' }],
408
+ types: {
409
+ 'resourceTypeGeneral' => 'Dataset', 'schemaOrg' => 'Dataset'
410
+ })
350
411
 
351
412
  expect(subject.valid?).to be true
352
- expect(subject.doi).to eq("10.5281/zenodo.1239")
353
- expect(subject.id).to eq("https://doi.org/10.5281/zenodo.1239")
354
- expect(subject.types["schemaOrg"]).to eq("Dataset")
355
- expect(subject.types["resourceTypeGeneral"]).to eq("Dataset")
356
- expect(subject.creators).to eq([{"familyName"=>"Jahn", "givenName"=>"Najko", "name"=>"Jahn, Najko", "type"=>"Person"}])
357
- expect(subject.titles).to eq([{"title"=>"Publication Fp7 Funding Acknowledgment - Plos Openaire"}])
358
- expect(subject.descriptions.first["description"]).to start_with("The dataset contains a sample of metadata describing papers")
359
- expect(subject.dates).to eq([{"date"=>"2013-04-03", "dateType"=>"Issued"}])
360
- expect(subject.publication_year).to eq("2013")
361
- expect(subject.publisher).to eq("Zenodo")
362
- expect(subject.funding_references).to eq([{"awardNumber"=>"246686",
363
- "awardTitle"=>"Open Access Infrastructure for Research in Europe",
364
- "awardUri"=>"info:eu-repo/grantAgreement/EC/FP7/246686/",
365
- "funderIdentifier"=>"https://doi.org/10.13039/501100000780",
366
- "funderIdentifierType"=>"Crossref Funder ID",
367
- "funderName"=>"European Commission"}])
413
+ expect(subject.doi).to eq('10.5281/zenodo.1239')
414
+ expect(subject.id).to eq('https://doi.org/10.5281/zenodo.1239')
415
+ expect(subject.types['schemaOrg']).to eq('Dataset')
416
+ expect(subject.types['resourceTypeGeneral']).to eq('Dataset')
417
+ expect(subject.creators).to eq([{ 'familyName' => 'Jahn', 'givenName' => 'Najko',
418
+ 'name' => 'Jahn, Najko', 'type' => 'Person' }])
419
+ expect(subject.titles).to eq([{ 'title' => 'Publication Fp7 Funding Acknowledgment - Plos Openaire' }])
420
+ expect(subject.descriptions.first['description']).to start_with('The dataset contains a sample of metadata describing papers')
421
+ expect(subject.dates).to eq([{ 'date' => '2013-04-03', 'dateType' => 'Issued' }])
422
+ expect(subject.publication_year).to eq('2013')
423
+ expect(subject.publisher).to eq('Zenodo')
424
+ expect(subject.funding_references).to eq([{ 'awardNumber' => '246686',
425
+ 'awardTitle' => 'Open Access Infrastructure for Research in Europe',
426
+ 'awardUri' => 'info:eu-repo/grantAgreement/EC/FP7/246686/',
427
+ 'funderIdentifier' => 'https://doi.org/10.13039/501100000780',
428
+ 'funderIdentifierType' => 'Crossref Funder ID',
429
+ 'funderName' => 'European Commission' }])
368
430
  end
369
431
  end
370
432
  end