briard 2.4.1 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/codeql-analysis.yml +72 -0
  3. data/.github/workflows/rubocop.yml +50 -0
  4. data/.rubocop.yml +144 -620
  5. data/.rubocop_todo.yml +76 -0
  6. data/CHANGELOG.md +22 -0
  7. data/Gemfile +2 -0
  8. data/Gemfile.lock +43 -6
  9. data/Rakefile +1 -1
  10. data/{bolognese.gemspec → briard.gemspec} +46 -38
  11. data/lib/briard/array.rb +2 -2
  12. data/lib/briard/author_utils.rb +79 -71
  13. data/lib/briard/cli.rb +12 -13
  14. data/lib/briard/crossref_utils.rb +73 -61
  15. data/lib/briard/datacite_utils.rb +132 -106
  16. data/lib/briard/doi_utils.rb +10 -10
  17. data/lib/briard/metadata.rb +96 -106
  18. data/lib/briard/metadata_utils.rb +87 -78
  19. data/lib/briard/readers/bibtex_reader.rb +65 -65
  20. data/lib/briard/readers/cff_reader.rb +88 -70
  21. data/lib/briard/readers/citeproc_reader.rb +90 -84
  22. data/lib/briard/readers/codemeta_reader.rb +68 -50
  23. data/lib/briard/readers/crosscite_reader.rb +2 -2
  24. data/lib/briard/readers/crossref_reader.rb +249 -210
  25. data/lib/briard/readers/datacite_json_reader.rb +3 -3
  26. data/lib/briard/readers/datacite_reader.rb +225 -189
  27. data/lib/briard/readers/npm_reader.rb +49 -42
  28. data/lib/briard/readers/ris_reader.rb +82 -80
  29. data/lib/briard/readers/schema_org_reader.rb +182 -159
  30. data/lib/briard/string.rb +1 -1
  31. data/lib/briard/utils.rb +4 -4
  32. data/lib/briard/version.rb +3 -1
  33. data/lib/briard/whitelist_scrubber.rb +11 -4
  34. data/lib/briard/writers/bibtex_writer.rb +14 -8
  35. data/lib/briard/writers/cff_writer.rb +33 -26
  36. data/lib/briard/writers/codemeta_writer.rb +19 -15
  37. data/lib/briard/writers/csv_writer.rb +6 -4
  38. data/lib/briard/writers/datacite_json_writer.rb +8 -2
  39. data/lib/briard/writers/jats_writer.rb +33 -28
  40. data/lib/briard/writers/rdf_xml_writer.rb +1 -1
  41. data/lib/briard/writers/ris_writer.rb +30 -18
  42. data/lib/briard/writers/turtle_writer.rb +1 -1
  43. data/lib/briard.rb +6 -6
  44. data/rubocop.sarif +0 -0
  45. data/spec/array_spec.rb +5 -5
  46. data/spec/author_utils_spec.rb +151 -132
  47. data/spec/datacite_utils_spec.rb +135 -83
  48. data/spec/doi_utils_spec.rb +168 -164
  49. data/spec/find_from_format_spec.rb +69 -69
  50. data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/onlies_keep_specific_tags.yml +65 -0
  51. data/spec/fixtures/vcr_cassettes/Briard_Metadata/sanitize/removes_a_tags.yml +65 -0
  52. data/spec/metadata_spec.rb +91 -90
  53. data/spec/readers/bibtex_reader_spec.rb +43 -38
  54. data/spec/readers/cff_reader_spec.rb +165 -153
  55. data/spec/readers/citeproc_reader_spec.rb +45 -40
  56. data/spec/readers/codemeta_reader_spec.rb +128 -115
  57. data/spec/readers/crosscite_reader_spec.rb +34 -24
  58. data/spec/readers/crossref_reader_spec.rb +1098 -939
  59. data/spec/readers/datacite_json_reader_spec.rb +53 -40
  60. data/spec/readers/datacite_reader_spec.rb +1541 -1337
  61. data/spec/readers/npm_reader_spec.rb +48 -43
  62. data/spec/readers/ris_reader_spec.rb +53 -47
  63. data/spec/readers/schema_org_reader_spec.rb +329 -267
  64. data/spec/spec_helper.rb +6 -5
  65. data/spec/utils_spec.rb +371 -347
  66. data/spec/writers/bibtex_writer_spec.rb +143 -143
  67. data/spec/writers/cff_writer_spec.rb +96 -90
  68. data/spec/writers/citation_writer_spec.rb +34 -33
  69. data/spec/writers/citeproc_writer_spec.rb +226 -224
  70. data/spec/writers/codemeta_writer_spec.rb +18 -16
  71. data/spec/writers/crosscite_writer_spec.rb +91 -73
  72. data/spec/writers/crossref_writer_spec.rb +99 -91
  73. data/spec/writers/csv_writer_spec.rb +70 -70
  74. data/spec/writers/datacite_json_writer_spec.rb +78 -68
  75. data/spec/writers/datacite_writer_spec.rb +417 -322
  76. data/spec/writers/jats_writer_spec.rb +177 -161
  77. data/spec/writers/rdf_xml_writer_spec.rb +68 -63
  78. data/spec/writers/ris_writer_spec.rb +162 -162
  79. data/spec/writers/turtle_writer_spec.rb +47 -47
  80. metadata +250 -160
  81. data/.github/workflows/release.yml +0 -47
@@ -3,130 +3,143 @@
3
3
  require 'spec_helper'
4
4
 
5
5
  describe Briard::Metadata, vcr: true do
6
- let(:fixture_path) { "spec/fixtures/" }
6
+ let(:fixture_path) { 'spec/fixtures/' }
7
7
 
8
- context "get schema_org raw" do
9
- it "BlogPosting" do
10
- input = fixture_path + 'schema_org.json'
11
- subject = Briard::Metadata.new(input: input)
12
- expect(subject.raw).to eq(IO.read(input).strip)
8
+ context 'get schema_org raw' do
9
+ it 'BlogPosting' do
10
+ input = "#{fixture_path}schema_org.json"
11
+ subject = described_class.new(input: input)
12
+ expect(subject.raw).to eq(File.read(input).strip)
13
13
  end
14
14
  end
15
15
 
16
- context "get schema_org metadata" do
17
- it "BlogPosting" do
18
- input = "https://blog.front-matter.io/posts/eating-your-own-dog-food"
19
- subject = Briard::Metadata.new(input: input, from: "schema_org")
16
+ context 'get schema_org metadata' do
17
+ it 'BlogPosting' do
18
+ input = 'https://blog.front-matter.io/posts/eating-your-own-dog-food'
19
+ subject = described_class.new(input: input, from: 'schema_org')
20
20
  expect(subject.valid?).to be true
21
- expect(subject.id).to eq("https://doi.org/10.53731/r79vxn1-97aq74v-ag58n")
22
- expect(subject.url).to eq("https://blog.front-matter.io/posts/eating-your-own-dog-food")
23
- expect(subject.types).to eq("bibtex"=>"article", "citeproc"=>"article-newspaper", "resourceTypeGeneral"=>"Preprint", "ris"=>"GEN", "schemaOrg"=>"Article")
24
- expect(subject.creators).to eq([{"affiliation"=>[],"familyName"=>"Fenner", "givenName"=>"Martin", "name"=>"Fenner, Martin", "nameIdentifiers"=> [{"nameIdentifier"=>"https://orcid.org/0000-0003-1419-2405", "nameIdentifierScheme"=>"ORCID", "schemeUri"=>"https://orcid.org"}], "nameType"=>"Personal"}])
25
- expect(subject.titles).to eq([{"title"=>"Eating your own Dog Food"}])
26
- expect(subject.descriptions.first["description"]).to start_with("Eating your own dog food")
27
- expect(subject.subjects).to eq([{"subject"=>"feature"}])
28
- expect(subject.dates).to eq([{"date"=>"2016-12-20T00:00:00Z", "dateType"=>"Issued"}, {"date"=>"2022-08-15T09:06:22Z", "dateType"=>"Updated"}])
29
- expect(subject.publication_year).to eq("2016")
21
+ expect(subject.id).to eq('https://doi.org/10.53731/r79vxn1-97aq74v-ag58n')
22
+ expect(subject.url).to eq('https://blog.front-matter.io/posts/eating-your-own-dog-food')
23
+ expect(subject.types).to eq('bibtex' => 'article', 'citeproc' => 'article-newspaper',
24
+ 'resourceTypeGeneral' => 'Preprint', 'ris' => 'GEN', 'schemaOrg' => 'Article')
25
+ expect(subject.creators).to eq([{ 'affiliation' => [], 'familyName' => 'Fenner',
26
+ 'givenName' => 'Martin', 'name' => 'Fenner, Martin', 'nameIdentifiers' => [{ 'nameIdentifier' => 'https://orcid.org/0000-0003-1419-2405', 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org' }], 'nameType' => 'Personal' }])
27
+ expect(subject.titles).to eq([{ 'title' => 'Eating your own Dog Food' }])
28
+ expect(subject.descriptions.first['description']).to start_with('Eating your own dog food')
29
+ expect(subject.subjects).to eq([{ 'subject' => 'feature' }])
30
+ expect(subject.dates).to eq([{ 'date' => '2016-12-20T00:00:00Z', 'dateType' => 'Issued' },
31
+ { 'date' => '2022-08-15T09:06:22Z', 'dateType' => 'Updated' }])
32
+ expect(subject.publication_year).to eq('2016')
30
33
  expect(subject.related_identifiers.length).to eq(0)
31
- expect(subject.publisher).to eq("Front Matter")
34
+ expect(subject.publisher).to eq('Front Matter')
32
35
  end
33
36
 
34
- it "BlogPosting with new DOI" do
35
- input = "https://blog.front-matter.io/posts/eating-your-own-dog-food"
36
- subject = Briard::Metadata.new(input: input, doi: "10.5438/0000-00ss")
37
+ it 'BlogPosting with new DOI' do
38
+ input = 'https://blog.front-matter.io/posts/eating-your-own-dog-food'
39
+ subject = described_class.new(input: input, doi: '10.5438/0000-00ss')
37
40
  expect(subject.valid?).to be true
38
- expect(subject.id).to eq("https://doi.org/10.5438/0000-00ss")
39
- expect(subject.doi).to eq("10.5438/0000-00ss")
40
- expect(subject.url).to eq("https://blog.front-matter.io/posts/eating-your-own-dog-food")
41
- expect(subject.types).to eq("bibtex"=>"article", "citeproc"=>"article-newspaper", "resourceTypeGeneral"=>"Preprint", "ris"=>"GEN", "schemaOrg"=>"Article")
41
+ expect(subject.id).to eq('https://doi.org/10.5438/0000-00ss')
42
+ expect(subject.doi).to eq('10.5438/0000-00ss')
43
+ expect(subject.url).to eq('https://blog.front-matter.io/posts/eating-your-own-dog-food')
44
+ expect(subject.types).to eq('bibtex' => 'article', 'citeproc' => 'article-newspaper',
45
+ 'resourceTypeGeneral' => 'Preprint', 'ris' => 'GEN', 'schemaOrg' => 'Article')
42
46
  end
43
47
 
44
- it "BlogPosting with type as array" do
45
- input = fixture_path + 'schema_org_type_as_array.json'
46
- subject = Briard::Metadata.new(input: input)
48
+ it 'BlogPosting with type as array' do
49
+ input = "#{fixture_path}schema_org_type_as_array.json"
50
+ subject = described_class.new(input: input)
47
51
  expect(subject.valid?).to be true
48
- expect(subject.id).to eq("https://doi.org/10.5438/4k3m-nyvg")
49
- expect(subject.url).to eq("https://blog.datacite.org/eating-your-own-dog-food")
50
- expect(subject.types).to eq("bibtex"=>"article", "citeproc"=>"post-weblog", "resourceTypeGeneral"=>"Preprint", "ris"=>"GEN", "schemaOrg"=>"BlogPosting")
51
- expect(subject.creators).to eq([{"affiliation"=>[{"name"=>"DataCite"}],"familyName"=>"Fenner", "givenName"=>"Martin", "name"=>"Fenner, Martin", "nameIdentifiers"=> [{"nameIdentifier"=>"https://orcid.org/0000-0003-1419-2405", "nameIdentifierScheme"=>"ORCID", "schemeUri"=>"https://orcid.org"}], "nameType"=>"Personal"}])
52
- expect(subject.titles).to eq([{"title"=>"Eating your own Dog Food"}])
53
- expect(subject.descriptions.first["description"]).to start_with("Eating your own dog food")
54
- expect(subject.subjects).to eq([{"subject"=>"datacite"}, {"subject"=>"doi"}, {"subject"=>"metadata"}, {"subject"=>"featured"}])
55
- expect(subject.dates).to eq([{"date"=>"2016-12-20", "dateType"=>"Issued"},
56
- {"date"=>"2016-12-20", "dateType"=>"Created"},
57
- {"date"=>"2016-12-20", "dateType"=>"Updated"}])
58
- expect(subject.publication_year).to eq("2016")
52
+ expect(subject.id).to eq('https://doi.org/10.5438/4k3m-nyvg')
53
+ expect(subject.url).to eq('https://blog.datacite.org/eating-your-own-dog-food')
54
+ expect(subject.types).to eq('bibtex' => 'article', 'citeproc' => 'post-weblog',
55
+ 'resourceTypeGeneral' => 'Preprint', 'ris' => 'GEN', 'schemaOrg' => 'BlogPosting')
56
+ expect(subject.creators).to eq([{ 'affiliation' => [{ 'name' => 'DataCite' }],
57
+ 'familyName' => 'Fenner', 'givenName' => 'Martin', 'name' => 'Fenner, Martin', 'nameIdentifiers' => [{ 'nameIdentifier' => 'https://orcid.org/0000-0003-1419-2405', 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org' }], 'nameType' => 'Personal' }])
58
+ expect(subject.titles).to eq([{ 'title' => 'Eating your own Dog Food' }])
59
+ expect(subject.descriptions.first['description']).to start_with('Eating your own dog food')
60
+ expect(subject.subjects).to eq([{ 'subject' => 'datacite' }, { 'subject' => 'doi' },
61
+ { 'subject' => 'metadata' }, { 'subject' => 'featured' }])
62
+ expect(subject.dates).to eq([{ 'date' => '2016-12-20', 'dateType' => 'Issued' },
63
+ { 'date' => '2016-12-20', 'dateType' => 'Created' },
64
+ { 'date' => '2016-12-20', 'dateType' => 'Updated' }])
65
+ expect(subject.publication_year).to eq('2016')
59
66
  expect(subject.related_identifiers.length).to eq(3)
60
- expect(subject.related_identifiers.last).to eq("relatedIdentifier"=>"10.5438/55e5-t5c0", "relatedIdentifierType"=>"DOI", "relationType"=>"References", "resourceTypeGeneral" => "Text")
61
- expect(subject.publisher).to eq("DataCite")
67
+ expect(subject.related_identifiers.last).to eq('relatedIdentifier' => '10.5438/55e5-t5c0',
68
+ 'relatedIdentifierType' => 'DOI', 'relationType' => 'References', 'resourceTypeGeneral' => 'Text')
69
+ expect(subject.publisher).to eq('DataCite')
62
70
  end
63
71
 
64
- context "get schema_org metadata front matter" do
65
- it "BlogPosting" do
66
- input = "https://blog.front-matter.io/posts/step-forward-for-software-citation"
67
- subject = Briard::Metadata.new(input: input, from: "schema_org")
72
+ context 'get schema_org metadata front matter' do
73
+ it 'BlogPosting' do
74
+ input = 'https://blog.front-matter.io/posts/step-forward-for-software-citation'
75
+ subject = described_class.new(input: input, from: 'schema_org')
68
76
  expect(subject.valid?).to be true
69
- expect(subject.id).to eq("https://doi.org/10.53731/r9531p1-97aq74v-ag78v")
70
- expect(subject.url).to eq("https://blog.front-matter.io/posts/step-forward-for-software-citation")
71
- expect(subject.types).to eq("bibtex"=>"article", "citeproc"=>"article-newspaper", "resourceTypeGeneral"=>"Preprint", "ris"=>"GEN", "schemaOrg"=>"Article")
72
- expect(subject.creators).to eq([{"affiliation"=>[],
73
- "familyName"=>"Fenner", "givenName"=>"Martin", "name"=>"Fenner, Martin", "nameIdentifiers"=> [{"nameIdentifier"=>"https://orcid.org/0000-0003-1419-2405", "nameIdentifierScheme"=>"ORCID", "schemeUri"=>"https://orcid.org"}], "nameType"=>"Personal"}])
74
- expect(subject.titles).to eq([{"title"=>"A step forward for software citation: GitHub's enhanced software citation support"}])
75
- expect(subject.descriptions.first["description"]).to start_with("On August 19, GitHub announced software citation")
76
- expect(subject.subjects).to eq([{"subject"=>"news"}])
77
- expect(subject.dates).to eq([{"date"=>"2021-08-24T16:57:24Z", "dateType"=>"Issued"},
78
- {"date"=>"2022-08-15T19:05:14Z", "dateType"=>"Updated"}])
79
- expect(subject.publication_year).to eq("2021")
77
+ expect(subject.id).to eq('https://doi.org/10.53731/r9531p1-97aq74v-ag78v')
78
+ expect(subject.url).to eq('https://blog.front-matter.io/posts/step-forward-for-software-citation')
79
+ expect(subject.types).to eq('bibtex' => 'article', 'citeproc' => 'article-newspaper',
80
+ 'resourceTypeGeneral' => 'Preprint', 'ris' => 'GEN', 'schemaOrg' => 'Article')
81
+ expect(subject.creators).to eq([{ 'affiliation' => [],
82
+ 'familyName' => 'Fenner', 'givenName' => 'Martin', 'name' => 'Fenner, Martin', 'nameIdentifiers' => [{ 'nameIdentifier' => 'https://orcid.org/0000-0003-1419-2405', 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org' }], 'nameType' => 'Personal' }])
83
+ expect(subject.titles).to eq([{ 'title' => "A step forward for software citation: GitHub's enhanced software citation support" }])
84
+ expect(subject.descriptions.first['description']).to start_with('On August 19, GitHub announced software citation')
85
+ expect(subject.subjects).to eq([{ 'subject' => 'news' }])
86
+ expect(subject.dates).to eq([{ 'date' => '2021-08-24T16:57:24Z', 'dateType' => 'Issued' },
87
+ { 'date' => '2022-08-15T19:05:14Z', 'dateType' => 'Updated' }])
88
+ expect(subject.publication_year).to eq('2021')
80
89
  expect(subject.related_identifiers.length).to eq(0)
81
- expect(subject.container).to eq("identifier"=>"2749-9952", "identifierType"=>"ISSN", "title"=>"Front Matter", "type"=>"Blog")
82
- expect(subject.publisher).to eq("Front Matter")
90
+ expect(subject.container).to eq('identifier' => '2749-9952', 'identifierType' => 'ISSN',
91
+ 'title' => 'Front Matter', 'type' => 'Blog')
92
+ expect(subject.publisher).to eq('Front Matter')
83
93
  end
84
94
  end
85
95
 
86
- it "zenodo" do
87
- input = "https://www.zenodo.org/record/1196821"
88
- subject = Briard::Metadata.new(input: input, from: "schema_org")
96
+ it 'zenodo' do
97
+ input = 'https://www.zenodo.org/record/1196821'
98
+ subject = described_class.new(input: input, from: 'schema_org')
89
99
  expect(subject.valid?).to be false
90
- expect(subject.language).to eq("eng")
100
+ expect(subject.language).to eq('eng')
91
101
  expect(subject.errors).to eq("49:0: ERROR: Element '{http://datacite.org/schema/kernel-4}publisher': [facet 'minLength'] The value has a length of '0'; this underruns the allowed minimum length of '1'.")
92
- expect(subject.id).to eq("https://doi.org/10.5281/zenodo.1196821")
93
- expect(subject.doi).to eq("10.5281/zenodo.1196821")
94
- expect(subject.url).to eq("https://zenodo.org/record/1196821")
95
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
96
- expect(subject.titles).to eq([{"title"=>"PsPM-SC4B: SCR, ECG, EMG, PSR and respiration measurements in a delay fear conditioning task with auditory CS and electrical US"}])
102
+ expect(subject.id).to eq('https://doi.org/10.5281/zenodo.1196821')
103
+ expect(subject.doi).to eq('10.5281/zenodo.1196821')
104
+ expect(subject.url).to eq('https://zenodo.org/record/1196821')
105
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
106
+ 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
107
+ expect(subject.titles).to eq([{ 'title' => 'PsPM-SC4B: SCR, ECG, EMG, PSR and respiration measurements in a delay fear conditioning task with auditory CS and electrical US' }])
97
108
  expect(subject.creators.size).to eq(6)
98
- expect(subject.creators.first).to eq("name" => "Staib, Matthias",
99
- "nameIdentifiers" => [{"nameIdentifier"=>"https://orcid.org/0000-0001-9688-838X", "nameIdentifierScheme"=>"ORCID", "schemeUri"=>"https://orcid.org"}],
100
- "nameType" => "Personal", "givenName"=>"Matthias", "familyName"=>"Staib", "affiliation" => [{"name"=>"University of Zurich, Zurich, Switzerland"}])
101
- expect(subject.publisher).to be_nil
102
- expect(subject.publication_year).to eq("2018")
103
- expect(subject.subjects).to eq([{"subject"=>"pupil size response"},
104
- {"subject"=>"skin conductance response"},
105
- {"subject"=>"electrocardiogram"},
106
- {"subject"=>"electromyogram"},
107
- {"subject"=>"electrodermal activity"},
108
- {"subject"=>"galvanic skin response"},
109
- {"subject"=>"psr"},
110
- {"subject"=>"scr"},
111
- {"subject"=>"ecg"},
112
- {"subject"=>"emg"},
113
- {"subject"=>"eda"},
114
- {"subject"=>"gsr"}])
109
+ expect(subject.creators.first).to eq('name' => 'Staib, Matthias',
110
+ 'nameIdentifiers' => [{ 'nameIdentifier' => 'https://orcid.org/0000-0001-9688-838X', 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org' }],
111
+ 'nameType' => 'Personal', 'givenName' => 'Matthias', 'familyName' => 'Staib', 'affiliation' => [{ 'name' => 'University of Zurich, Zurich, Switzerland' }])
112
+ expect(subject.publisher.nil?).to be(true)
113
+ expect(subject.publication_year).to eq('2018')
114
+ expect(subject.subjects).to eq([{ 'subject' => 'pupil size response' },
115
+ { 'subject' => 'skin conductance response' },
116
+ { 'subject' => 'electrocardiogram' },
117
+ { 'subject' => 'electromyogram' },
118
+ { 'subject' => 'electrodermal activity' },
119
+ { 'subject' => 'galvanic skin response' },
120
+ { 'subject' => 'psr' },
121
+ { 'subject' => 'scr' },
122
+ { 'subject' => 'ecg' },
123
+ { 'subject' => 'emg' },
124
+ { 'subject' => 'eda' },
125
+ { 'subject' => 'gsr' }])
115
126
  end
116
127
 
117
- it "pangaea" do
118
- input = "https://doi.pangaea.de/10.1594/PANGAEA.836178"
119
- subject = Briard::Metadata.new(input: input, from: "schema_org")
128
+ it 'pangaea' do
129
+ input = 'https://doi.pangaea.de/10.1594/PANGAEA.836178'
130
+ subject = described_class.new(input: input, from: 'schema_org')
120
131
  expect(subject.valid?).to be true
121
- expect(subject.id).to eq("https://doi.org/10.1594/pangaea.836178")
122
- expect(subject.doi).to eq("10.1594/pangaea.836178")
123
- expect(subject.url).to eq("https://doi.pangaea.de/10.1594/PANGAEA.836178")
124
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
125
- expect(subject.titles).to eq([{"title"=>"Hydrological and meteorological investigations in a lake near Kangerlussuaq, west Greenland"}])
132
+ expect(subject.id).to eq('https://doi.org/10.1594/pangaea.836178')
133
+ expect(subject.doi).to eq('10.1594/pangaea.836178')
134
+ expect(subject.url).to eq('https://doi.pangaea.de/10.1594/PANGAEA.836178')
135
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
136
+ 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
137
+ expect(subject.titles).to eq([{ 'title' => 'Hydrological and meteorological investigations in a lake near Kangerlussuaq, west Greenland' }])
126
138
  expect(subject.creators.size).to eq(8)
127
- expect(subject.creators.first).to eq("nameType" => "Personal", "name"=>"Johansson, Emma", "givenName"=>"Emma", "familyName"=>"Johansson")
128
- expect(subject.publisher).to eq("PANGAEA")
129
- expect(subject.publication_year).to eq("2014")
139
+ expect(subject.creators.first).to eq('nameType' => 'Personal', 'name' => 'Johansson, Emma',
140
+ 'givenName' => 'Emma', 'familyName' => 'Johansson')
141
+ expect(subject.publisher).to eq('PANGAEA')
142
+ expect(subject.publication_year).to eq('2014')
130
143
  end
131
144
 
132
145
  # TODO: check redirections
@@ -143,22 +156,26 @@ describe Briard::Metadata, vcr: true do
143
156
  # expect(subject.creators.first).to eq("familyName"=>"MOGHADDAM", "givenName"=>"M.", "name"=>"MOGHADDAM, M.", "nameType"=>"Personal", "nameIdentifiers"=>[], "affiliation" => [])
144
157
  # end
145
158
 
146
- it "harvard dataverse" do
147
- input = "https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/NJ7XSO"
148
- subject = Briard::Metadata.new(input: input, from: "schema_org")
159
+ it 'harvard dataverse' do
160
+ input = 'https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/NJ7XSO'
161
+ subject = described_class.new(input: input, from: 'schema_org')
149
162
  expect(subject.valid?).to be true
150
- expect(subject.id).to eq("https://doi.org/10.7910/dvn/nj7xso")
151
- expect(subject.doi).to eq("10.7910/dvn/nj7xso")
152
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
153
- expect(subject.titles).to eq([{"title"=>"Summary data ankylosing spondylitis GWAS"}])
154
- expect(subject.container).to eq("identifier"=>"https://dataverse.harvard.edu", "identifierType"=>"URL", "title"=>"Harvard Dataverse", "type"=>"DataRepository")
155
- expect(subject.creators).to eq([{"name" => "International Genetics Of Ankylosing Spondylitis Consortium (IGAS)", "nameIdentifiers"=>[], "affiliation" => []}])
156
- expect(subject.subjects).to eq([{"subject"=>"medicine, health and life sciences"},
157
- {"subject"=>"genome-wide association studies"},
158
- {"subject"=>"ankylosing spondylitis"}])
163
+ expect(subject.id).to eq('https://doi.org/10.7910/dvn/nj7xso')
164
+ expect(subject.doi).to eq('10.7910/dvn/nj7xso')
165
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
166
+ 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
167
+ expect(subject.titles).to eq([{ 'title' => 'Summary data ankylosing spondylitis GWAS' }])
168
+ expect(subject.container).to eq('identifier' => 'https://dataverse.harvard.edu',
169
+ 'identifierType' => 'URL', 'title' => 'Harvard Dataverse', 'type' => 'DataRepository')
170
+ expect(subject.creators).to eq([{
171
+ 'name' => 'International Genetics Of Ankylosing Spondylitis Consortium (IGAS)', 'nameIdentifiers' => [], 'affiliation' => []
172
+ }])
173
+ expect(subject.subjects).to eq([{ 'subject' => 'medicine, health and life sciences' },
174
+ { 'subject' => 'genome-wide association studies' },
175
+ { 'subject' => 'ankylosing spondylitis' }])
159
176
  end
160
177
 
161
- # TODO check 403 status in DOI resolver
178
+ # TODO: check 403 status in DOI resolver
162
179
  # it "harvard dataverse via identifiers.org" do
163
180
  # input = "https://identifiers.org/doi/10.7910/DVN/NJ7XSO"
164
181
  # subject = Briard::Metadata.new(input: input, from: "schema_org")
@@ -172,199 +189,244 @@ describe Briard::Metadata, vcr: true do
172
189
  # end
173
190
  end
174
191
 
175
- context "get schema_org metadata as string" do
176
- it "BlogPosting" do
177
- input = fixture_path + 'schema_org.json'
178
- subject = Briard::Metadata.new(input: input)
192
+ context 'get schema_org metadata as string' do
193
+ it 'BlogPosting' do
194
+ input = "#{fixture_path}schema_org.json"
195
+ subject = described_class.new(input: input)
179
196
  expect(subject.valid?).to be true
180
- expect(subject.language).to eq("en")
181
- expect(subject.id).to eq("https://doi.org/10.5438/4k3m-nyvg")
182
- expect(subject.url).to eq("https://blog.datacite.org/eating-your-own-dog-food")
183
- expect(subject.types).to eq("bibtex"=>"article", "citeproc"=>"post-weblog", "resourceTypeGeneral"=>"Preprint", "ris"=>"GEN", "schemaOrg"=>"BlogPosting")
184
- expect(subject.creators).to eq([{"familyName"=>"Fenner", "givenName"=>"Martin", "name"=>"Fenner, Martin", "nameIdentifiers"=> [{"nameIdentifier"=>"https://orcid.org/0000-0003-1419-2405", "nameIdentifierScheme"=>"ORCID",
185
- + "schemeUri"=>"https://orcid.org"}], "nameType"=>"Personal"}])
186
- expect(subject.titles).to eq([{"title"=>"Eating your own Dog Food"}])
187
- expect(subject.descriptions.first["description"]).to start_with("Eating your own dog food")
188
- expect(subject.subjects).to eq([{"subject"=>"datacite"}, {"subject"=>"doi"}, {"subject"=>"metadata"}, {"subject"=>"featured"}])
189
- expect(subject.dates).to eq([{"date"=>"2016-12-20", "dateType"=>"Issued"},
190
- {"date"=>"2016-12-20", "dateType"=>"Created"},
191
- {"date"=>"2016-12-20", "dateType"=>"Updated"}])
192
- expect(subject.publication_year).to eq("2016")
197
+ expect(subject.language).to eq('en')
198
+ expect(subject.id).to eq('https://doi.org/10.5438/4k3m-nyvg')
199
+ expect(subject.url).to eq('https://blog.datacite.org/eating-your-own-dog-food')
200
+ expect(subject.types).to eq('bibtex' => 'article', 'citeproc' => 'post-weblog',
201
+ 'resourceTypeGeneral' => 'Preprint', 'ris' => 'GEN', 'schemaOrg' => 'BlogPosting')
202
+ expect(subject.creators).to eq([{ 'familyName' => 'Fenner', 'givenName' => 'Martin', 'name' => 'Fenner, Martin', 'nameIdentifiers' => [{ 'nameIdentifier' => 'https://orcid.org/0000-0003-1419-2405', 'nameIdentifierScheme' => 'ORCID',
203
+ + 'schemeUri' => 'https://orcid.org' }], 'nameType' => 'Personal' }])
204
+ expect(subject.titles).to eq([{ 'title' => 'Eating your own Dog Food' }])
205
+ expect(subject.descriptions.first['description']).to start_with('Eating your own dog food')
206
+ expect(subject.subjects).to eq([{ 'subject' => 'datacite' }, { 'subject' => 'doi' },
207
+ { 'subject' => 'metadata' }, { 'subject' => 'featured' }])
208
+ expect(subject.dates).to eq([{ 'date' => '2016-12-20', 'dateType' => 'Issued' },
209
+ { 'date' => '2016-12-20', 'dateType' => 'Created' },
210
+ { 'date' => '2016-12-20', 'dateType' => 'Updated' }])
211
+ expect(subject.publication_year).to eq('2016')
193
212
  expect(subject.related_identifiers.length).to eq(3)
194
- expect(subject.related_identifiers.last).to eq("relatedIdentifier"=>"10.5438/55e5-t5c0", "relatedIdentifierType"=>"DOI", "relationType"=>"References", "resourceTypeGeneral" => "Text")
195
- expect(subject.publisher).to eq("DataCite")
213
+ expect(subject.related_identifiers.last).to eq('relatedIdentifier' => '10.5438/55e5-t5c0',
214
+ 'relatedIdentifierType' => 'DOI', 'relationType' => 'References', 'resourceTypeGeneral' => 'Text')
215
+ expect(subject.publisher).to eq('DataCite')
196
216
  end
197
217
 
198
- it "GTEx dataset" do
199
- input = fixture_path + 'schema_org_gtex.json'
200
- subject = Briard::Metadata.new(input: input)
218
+ it 'GTEx dataset' do
219
+ input = "#{fixture_path}schema_org_gtex.json"
220
+ subject = described_class.new(input: input)
201
221
 
202
222
  expect(subject.valid?).to be true
203
- expect(subject.id).to eq("https://doi.org/10.25491/d50j-3083")
204
- expect(subject.identifiers).to eq([{"identifier"=>"687610993", "identifierType"=>"md5"}])
205
- expect(subject.url).to eq("https://ors.datacite.org/doi:/10.25491/d50j-3083")
206
- expect(subject.content_url).to eq(["https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_expression_matrices.tar.gz"])
207
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceType"=>"Gene expression matrices", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
208
- expect(subject.creators).to eq([{"name"=>"The GTEx Consortium", "nameType"=>"Organizational", "nameIdentifiers"=>[], "affiliation" => []}])
209
- expect(subject.titles).to eq([{"title"=>"Fully processed, filtered and normalized gene expression matrices (in BED format) for each tissue, which were used as input into FastQTL for eQTL discovery"}])
210
- expect(subject.version_info).to eq("v7")
211
- expect(subject.subjects).to eq([{"subject"=>"gtex"}, {"subject"=>"annotation"}, {"subject"=>"phenotype"}, {"subject"=>"gene regulation"}, {"subject"=>"transcriptomics"}])
212
- expect(subject.dates).to eq([{"date"=>"2017", "dateType"=>"Issued"}])
213
- expect(subject.publication_year).to eq("2017")
214
- expect(subject.container).to eq("title"=>"GTEx", "type"=>"DataRepository")
215
- expect(subject.publisher).to eq("GTEx")
223
+ expect(subject.id).to eq('https://doi.org/10.25491/d50j-3083')
224
+ expect(subject.identifiers).to eq([{ 'identifier' => '687610993',
225
+ 'identifierType' => 'md5' }])
226
+ expect(subject.url).to eq('https://ors.datacite.org/doi:/10.25491/d50j-3083')
227
+ expect(subject.content_url).to eq(['https://storage.googleapis.com/gtex_analysis_v7/single_tissue_eqtl_data/GTEx_Analysis_v7_eQTL_expression_matrices.tar.gz'])
228
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
229
+ 'resourceType' => 'Gene expression matrices', 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
230
+ expect(subject.creators).to eq([{ 'name' => 'The GTEx Consortium', 'nameType' => 'Organizational',
231
+ 'nameIdentifiers' => [], 'affiliation' => [] }])
232
+ expect(subject.titles).to eq([{ 'title' => 'Fully processed, filtered and normalized gene expression matrices (in BED format) for each tissue, which were used as input into FastQTL for eQTL discovery' }])
233
+ expect(subject.version_info).to eq('v7')
234
+ expect(subject.subjects).to eq([{ 'subject' => 'gtex' }, { 'subject' => 'annotation' },
235
+ { 'subject' => 'phenotype' }, { 'subject' => 'gene regulation' }, { 'subject' => 'transcriptomics' }])
236
+ expect(subject.dates).to eq([{ 'date' => '2017', 'dateType' => 'Issued' }])
237
+ expect(subject.publication_year).to eq('2017')
238
+ expect(subject.container).to eq('title' => 'GTEx', 'type' => 'DataRepository')
239
+ expect(subject.publisher).to eq('GTEx')
216
240
  expect(subject.funding_references.length).to eq(7)
217
- expect(subject.funding_references.first).to eq("funderIdentifier"=>"https://doi.org/10.13039/100000052", "funderIdentifierType"=>"Crossref Funder ID", "funderName"=>"Common Fund of the Office of the Director of the NIH")
241
+ expect(subject.funding_references.first).to eq(
242
+ 'funderIdentifier' => 'https://doi.org/10.13039/100000052', 'funderIdentifierType' => 'Crossref Funder ID', 'funderName' => 'Common Fund of the Office of the Director of the NIH'
243
+ )
218
244
  end
219
245
 
220
- it "TOPMed dataset" do
221
- input = fixture_path + 'schema_org_topmed.json'
222
- subject = Briard::Metadata.new(input: input)
246
+ it 'TOPMed dataset' do
247
+ input = "#{fixture_path}schema_org_topmed.json"
248
+ subject = described_class.new(input: input)
223
249
  expect(subject.valid?).to be true
224
- expect(subject.identifiers).to eq([{"identifier"=>"3b33f6b9338fccab0901b7d317577ea3", "identifierType"=>"md5"},
225
- {"identifier"=>"ark:/99999/fk41CrU4eszeLUDe", "identifierType"=>"minid"},
226
- {"identifier"=>"dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7", "identifierType"=>"dataguid"}])
227
- expect(subject.url).to eq("https://ors.datacite.org/doi:/10.23725/8na3-9s47")
228
- expect(subject.content_url).to eq(["s3://cgp-commons-public/topmed_open_access/197bc047-e917-55ed-852d-d563cdbc50e4/NWD165827.recab.cram", "gs://topmed-irc-share/public/NWD165827.recab.cram"])
229
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceType"=>"CRAM file", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
230
- expect(subject.creators).to eq([{"name"=>"TOPMed IRC", "nameType"=>"Organizational", "nameIdentifiers"=>[], "affiliation" => []}])
231
- expect(subject.titles).to eq([{"title"=>"NWD165827.recab.cram"}])
232
- expect(subject.subjects).to eq([{"subject"=>"topmed"}, {"subject"=>"whole genome sequencing"}])
233
- expect(subject.dates).to eq([{"date"=>"2017-11-30", "dateType"=>"Issued"}])
234
- expect(subject.publication_year).to eq("2017")
235
- expect(subject.publisher).to eq("TOPMed")
236
- expect(subject.related_identifiers).to eq([{"relatedIdentifier"=>"10.23725/2g4s-qv04", "relatedIdentifierType"=>"DOI", "relationType"=>"References", "resourceTypeGeneral"=>"Dataset"}])
237
- expect(subject.funding_references).to eq([{"funderIdentifier"=>"https://doi.org/10.13039/100000050", "funderIdentifierType"=>"Crossref Funder ID", "funderName"=>"National Heart, Lung, and Blood Institute (NHLBI)"}])
250
+ expect(subject.identifiers).to eq([{ 'identifier' => '3b33f6b9338fccab0901b7d317577ea3', 'identifierType' => 'md5' },
251
+ { 'identifier' => 'ark:/99999/fk41CrU4eszeLUDe',
252
+ 'identifierType' => 'minid' },
253
+ { 'identifier' => 'dg.4503/c3d66dc9-58da-411c-83c4-dd656aa3c4b7',
254
+ 'identifierType' => 'dataguid' }])
255
+ expect(subject.url).to eq('https://ors.datacite.org/doi:/10.23725/8na3-9s47')
256
+ expect(subject.content_url).to eq([
257
+ 's3://cgp-commons-public/topmed_open_access/197bc047-e917-55ed-852d-d563cdbc50e4/NWD165827.recab.cram', 'gs://topmed-irc-share/public/NWD165827.recab.cram'
258
+ ])
259
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
260
+ 'resourceType' => 'CRAM file', 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
261
+ expect(subject.creators).to eq([{ 'name' => 'TOPMed IRC', 'nameType' => 'Organizational',
262
+ 'nameIdentifiers' => [], 'affiliation' => [] }])
263
+ expect(subject.titles).to eq([{ 'title' => 'NWD165827.recab.cram' }])
264
+ expect(subject.subjects).to eq([{ 'subject' => 'topmed' },
265
+ { 'subject' => 'whole genome sequencing' }])
266
+ expect(subject.dates).to eq([{ 'date' => '2017-11-30', 'dateType' => 'Issued' }])
267
+ expect(subject.publication_year).to eq('2017')
268
+ expect(subject.publisher).to eq('TOPMed')
269
+ expect(subject.related_identifiers).to eq([{ 'relatedIdentifier' => '10.23725/2g4s-qv04',
270
+ 'relatedIdentifierType' => 'DOI', 'relationType' => 'References', 'resourceTypeGeneral' => 'Dataset' }])
271
+ expect(subject.funding_references).to eq([{
272
+ 'funderIdentifier' => 'https://doi.org/10.13039/100000050', 'funderIdentifierType' => 'Crossref Funder ID', 'funderName' => 'National Heart, Lung, and Blood Institute (NHLBI)'
273
+ }])
238
274
  end
239
275
 
240
- it "tdl_iodp dataset" do
241
- input = fixture_path + 'schema_org_tdl_iodp_invalid_authors.json'
242
- subject = Briard::Metadata.new(input: input)
276
+ it 'tdl_iodp dataset' do
277
+ input = "#{fixture_path}schema_org_tdl_iodp_invalid_authors.json"
278
+ subject = described_class.new(input: input)
243
279
  expect(subject.valid?).to be false
244
280
  end
245
281
 
246
- it "geolocation" do
247
- input = fixture_path + 'schema_org_geolocation.json'
248
- subject = Briard::Metadata.new(input: input)
282
+ it 'geolocation' do
283
+ input = "#{fixture_path}schema_org_geolocation.json"
284
+ subject = described_class.new(input: input)
249
285
 
250
286
  expect(subject.valid?).to be true
251
- expect(subject.identifiers).to eq([{"identifier"=>"https://doi.org/10.6071/z7wc73", "identifierType"=>"DOI"}])
252
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceType"=>"dataset", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
287
+ expect(subject.identifiers).to eq([{ 'identifier' => 'https://doi.org/10.6071/z7wc73',
288
+ 'identifierType' => 'DOI' }])
289
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
290
+ 'resourceType' => 'dataset', 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
253
291
  expect(subject.creators.length).to eq(6)
254
- expect(subject.creators.first).to eq("familyName"=>"Bales", "givenName"=>"Roger", "name"=>"Bales, Roger", "nameType"=>"Personal")
255
- expect(subject.titles).to eq([{"title"=>"Southern Sierra Critical Zone Observatory (SSCZO), Providence Creek meteorological data, soil moisture and temperature, snow depth and air temperature"}])
256
- expect(subject.subjects).to eq([{"subject"=>"earth sciences"},
257
- {"subject"=>"soil moisture"},
258
- {"subject"=>"soil temperature"},
259
- {"subject"=>"snow depth"},
260
- {"subject"=>"air temperature"},
261
- {"subject"=>"water balance"},
262
- {"subject"=>"nevada"},
263
- {"subject"=>"sierra (mountain range)"}])
264
- expect(subject.dates).to eq([{"date"=>"2013", "dateType"=>"Issued"},
265
- {"date"=>"2014-10-17", "dateType"=>"Updated"}])
266
- expect(subject.publication_year).to eq("2013")
267
- expect(subject.publisher).to eq("UC Merced")
268
- expect(subject.funding_references).to eq([{"funderName"=>"National Science Foundation, Division of Earth Sciences, Critical Zone Observatories"}])
269
- expect(subject.geo_locations).to eq([{"geoLocationPlace"=>"Providence Creek (Lower, Upper and P301)", "geoLocationPoint"=>{"pointLatitude"=>"37.047756", "pointLongitude"=>"-119.221094"}}])
292
+ expect(subject.creators.first).to eq('familyName' => 'Bales', 'givenName' => 'Roger',
293
+ 'name' => 'Bales, Roger', 'nameType' => 'Personal')
294
+ expect(subject.titles).to eq([{ 'title' => 'Southern Sierra Critical Zone Observatory (SSCZO), Providence Creek meteorological data, soil moisture and temperature, snow depth and air temperature' }])
295
+ expect(subject.subjects).to eq([{ 'subject' => 'earth sciences' },
296
+ { 'subject' => 'soil moisture' },
297
+ { 'subject' => 'soil temperature' },
298
+ { 'subject' => 'snow depth' },
299
+ { 'subject' => 'air temperature' },
300
+ { 'subject' => 'water balance' },
301
+ { 'subject' => 'nevada' },
302
+ { 'subject' => 'sierra (mountain range)' }])
303
+ expect(subject.dates).to eq([{ 'date' => '2013', 'dateType' => 'Issued' },
304
+ { 'date' => '2014-10-17', 'dateType' => 'Updated' }])
305
+ expect(subject.publication_year).to eq('2013')
306
+ expect(subject.publisher).to eq('UC Merced')
307
+ expect(subject.funding_references).to eq([{ 'funderName' => 'National Science Foundation, Division of Earth Sciences, Critical Zone Observatories' }])
308
+ expect(subject.geo_locations).to eq([{
309
+ 'geoLocationPlace' => 'Providence Creek (Lower, Upper and P301)', 'geoLocationPoint' => {
310
+ 'pointLatitude' => '37.047756', 'pointLongitude' => '-119.221094'
311
+ }
312
+ }])
270
313
  end
271
314
 
272
- it "geolocation geoshape" do
273
- input = fixture_path + 'schema_org_geoshape.json'
274
- subject = Briard::Metadata.new(input: input)
315
+ it 'geolocation geoshape' do
316
+ input = "#{fixture_path}schema_org_geoshape.json"
317
+ subject = described_class.new(input: input)
275
318
 
276
319
  expect(subject.valid?).to be true
277
- expect(subject.language).to eq("en")
278
- expect(subject.id).to eq("https://doi.org/10.1594/pangaea.842237")
279
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
320
+ expect(subject.language).to eq('en')
321
+ expect(subject.id).to eq('https://doi.org/10.1594/pangaea.842237')
322
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
323
+ 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
280
324
  expect(subject.creators.length).to eq(2)
281
- expect(subject.creators.first).to eq("name"=>"Tara Oceans Consortium, Coordinators", "nameType"=>"Organizational", "nameIdentifiers"=>[], "affiliation" => [])
282
- expect(subject.titles).to eq([{"title"=>"Registry of all stations from the Tara Oceans Expedition (2009-2013)"}])
283
- expect(subject.dates).to eq([{"date"=>"2015-02-03", "dateType"=>"Issued"}])
284
- expect(subject.publication_year).to eq("2015")
285
- expect(subject.publisher).to eq("PANGAEA")
286
- expect(subject.geo_locations).to eq([{"geoLocationBox"=>{"eastBoundLongitude"=>"174.9006", "northBoundLatitude"=>"79.6753", "southBoundLatitude"=>"-64.3088", "westBoundLongitude"=>"-168.5182"}}])
325
+ expect(subject.creators.first).to eq('name' => 'Tara Oceans Consortium, Coordinators',
326
+ 'nameType' => 'Organizational', 'nameIdentifiers' => [], 'affiliation' => [])
327
+ expect(subject.titles).to eq([{ 'title' => 'Registry of all stations from the Tara Oceans Expedition (2009-2013)' }])
328
+ expect(subject.dates).to eq([{ 'date' => '2015-02-03', 'dateType' => 'Issued' }])
329
+ expect(subject.publication_year).to eq('2015')
330
+ expect(subject.publisher).to eq('PANGAEA')
331
+ expect(subject.geo_locations).to eq([{ 'geoLocationBox' => { 'eastBoundLongitude' => '174.9006',
332
+ 'northBoundLatitude' => '79.6753', 'southBoundLatitude' => '-64.3088', 'westBoundLongitude' => '-168.5182' } }])
287
333
  end
288
334
 
289
- it "schema_org list" do
290
- data = IO.read(fixture_path + 'schema_org_list.json').strip
335
+ it 'schema_org list' do
336
+ data = File.read("#{fixture_path}schema_org_list.json").strip
291
337
  input = JSON.parse(data).first.to_json
292
- subject = Briard::Metadata.new(input: input)
338
+ subject = described_class.new(input: input)
293
339
  expect(subject.valid?).to be true
294
- expect(subject.id).to eq("https://doi.org/10.23725/7jg3-v803")
295
- expect(subject.identifiers).to eq([{"identifier"=>"ark:/99999/fk4E1n6n1YHKxPk", "identifierType"=>"minid"},
296
- {"identifier"=>"dg.4503/01b048d0-e128-4cb0-94e9-b2d2cab7563d",
297
- "identifierType"=>"dataguid"},
298
- {"identifier"=>"f9e72bdf25bf4b4f0e581d9218fec2eb", "identifierType"=>"md5"}])
299
- expect(subject.url).to eq("https://ors.datacite.org/doi:/10.23725/7jg3-v803")
300
- expect(subject.content_url).to eq(["s3://cgp-commons-public/topmed_open_access/44a8837b-4456-5709-b56b-54e23000f13a/NWD100953.recab.cram","gs://topmed-irc-share/public/NWD100953.recab.cram","dos://dos.commons.ucsc-cgp.org/01b048d0-e128-4cb0-94e9-b2d2cab7563d?version=2018-05-26T133719.491772Z"])
301
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceType"=>"CRAM file", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
302
- expect(subject.creators).to eq([{"name"=>"TOPMed", "nameType"=>"Organizational", "nameIdentifiers"=>[], "affiliation" => []}])
303
- expect(subject.titles).to eq([{"title"=>"NWD100953.recab.cram"}])
304
- expect(subject.subjects).to eq([{"subject"=>"topmed"}, {"subject"=>"whole genome sequencing"}])
305
- expect(subject.dates).to eq([{"date"=>"2017-11-30", "dateType"=>"Issued"}])
306
- expect(subject.publication_year).to eq("2017")
307
- expect(subject.publisher).to eq("TOPMed")
308
- expect(subject.funding_references).to eq([{"funderIdentifier"=>"https://doi.org/10.13039/100000050", "funderIdentifierType"=>"Crossref Funder ID", "funderName"=>"National Heart, Lung, and Blood Institute (NHLBI)"}])
340
+ expect(subject.id).to eq('https://doi.org/10.23725/7jg3-v803')
341
+ expect(subject.identifiers).to eq([{ 'identifier' => 'ark:/99999/fk4E1n6n1YHKxPk', 'identifierType' => 'minid' },
342
+ { 'identifier' => 'dg.4503/01b048d0-e128-4cb0-94e9-b2d2cab7563d',
343
+ 'identifierType' => 'dataguid' },
344
+ { 'identifier' => 'f9e72bdf25bf4b4f0e581d9218fec2eb',
345
+ 'identifierType' => 'md5' }])
346
+ expect(subject.url).to eq('https://ors.datacite.org/doi:/10.23725/7jg3-v803')
347
+ expect(subject.content_url).to eq([
348
+ 's3://cgp-commons-public/topmed_open_access/44a8837b-4456-5709-b56b-54e23000f13a/NWD100953.recab.cram', 'gs://topmed-irc-share/public/NWD100953.recab.cram', 'dos://dos.commons.ucsc-cgp.org/01b048d0-e128-4cb0-94e9-b2d2cab7563d?version=2018-05-26T133719.491772Z'
349
+ ])
350
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
351
+ 'resourceType' => 'CRAM file', 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
352
+ expect(subject.creators).to eq([{ 'name' => 'TOPMed', 'nameType' => 'Organizational',
353
+ 'nameIdentifiers' => [], 'affiliation' => [] }])
354
+ expect(subject.titles).to eq([{ 'title' => 'NWD100953.recab.cram' }])
355
+ expect(subject.subjects).to eq([{ 'subject' => 'topmed' },
356
+ { 'subject' => 'whole genome sequencing' }])
357
+ expect(subject.dates).to eq([{ 'date' => '2017-11-30', 'dateType' => 'Issued' }])
358
+ expect(subject.publication_year).to eq('2017')
359
+ expect(subject.publisher).to eq('TOPMed')
360
+ expect(subject.funding_references).to eq([{
361
+ 'funderIdentifier' => 'https://doi.org/10.13039/100000050', 'funderIdentifierType' => 'Crossref Funder ID', 'funderName' => 'National Heart, Lung, and Blood Institute (NHLBI)'
362
+ }])
309
363
  end
310
364
 
311
- it "aida dataset" do
312
- input = fixture_path + 'aida.json'
313
- subject = Briard::Metadata.new(input: input)
365
+ it 'aida dataset' do
366
+ input = "#{fixture_path}aida.json"
367
+ subject = described_class.new(input: input)
314
368
 
315
369
  expect(subject.valid?).to be true
316
- expect(subject.id).to eq("https://doi.org/10.23698/aida/drov")
317
- expect(subject.url).to eq("https://doi.aida.medtech4health.se/10.23698/aida/drov")
318
- expect(subject.types).to eq("bibtex"=>"misc", "citeproc"=>"dataset", "resourceTypeGeneral"=>"Dataset", "ris"=>"DATA", "schemaOrg"=>"Dataset")
370
+ expect(subject.id).to eq('https://doi.org/10.23698/aida/drov')
371
+ expect(subject.url).to eq('https://doi.aida.medtech4health.se/10.23698/aida/drov')
372
+ expect(subject.types).to eq('bibtex' => 'misc', 'citeproc' => 'dataset',
373
+ 'resourceTypeGeneral' => 'Dataset', 'ris' => 'DATA', 'schemaOrg' => 'Dataset')
319
374
  # expect(subject.creators).to eq([{"familyName"=>"Lindman", "givenName"=>"Karin", "name"=>"Lindman, Karin", "nameIdentifiers"=>[{"nameIdentifier"=> "https://orcid.org/0000-0003-1298-517X", "nameIdentifierScheme"=>"ORCID", "schemeUri"=>"https://orcid.org"}], "nameType"=>"Personal"}])
320
- expect(subject.titles).to eq([{"title"=>"Ovary data from the Visual Sweden project DROID"}])
321
- expect(subject.version_info).to eq("1.0")
322
- expect(subject.subjects).to eq([{"subject"=>"pathology"}, {"subject"=>"whole slide imaging"}, {"subject"=>"annotated"}])
323
- expect(subject.dates).to eq([{"date"=>"2019-01-09", "dateType"=>"Issued"},
324
- {"date"=>"2019-01-09", "dateType"=>"Created"},
325
- {"date"=>"2019-01-09", "dateType"=>"Updated"}])
326
- expect(subject.publication_year).to eq("2019")
327
- expect(subject.id).to eq("https://doi.org/10.23698/aida/drov")
328
- expect(subject.publisher).to eq("AIDA")
329
- expect(subject.rights_list).to eq([{"rights"=>"Restricted access", "rightsUri"=>"https://datasets.aida.medtech4health.se/10.23698/aida/drov#license"}])
330
- expect(subject.id).to eq("https://doi.org/10.23698/aida/drov")
375
+ expect(subject.titles).to eq([{ 'title' => 'Ovary data from the Visual Sweden project DROID' }])
376
+ expect(subject.version_info).to eq('1.0')
377
+ expect(subject.subjects).to eq([{ 'subject' => 'pathology' }, { 'subject' => 'whole slide imaging' },
378
+ { 'subject' => 'annotated' }])
379
+ expect(subject.dates).to eq([{ 'date' => '2019-01-09', 'dateType' => 'Issued' },
380
+ { 'date' => '2019-01-09', 'dateType' => 'Created' },
381
+ { 'date' => '2019-01-09', 'dateType' => 'Updated' }])
382
+ expect(subject.publication_year).to eq('2019')
383
+ expect(subject.id).to eq('https://doi.org/10.23698/aida/drov')
384
+ expect(subject.publisher).to eq('AIDA')
385
+ expect(subject.rights_list).to eq([{ 'rights' => 'Restricted access',
386
+ 'rightsUri' => 'https://datasets.aida.medtech4health.se/10.23698/aida/drov#license' }])
387
+ expect(subject.id).to eq('https://doi.org/10.23698/aida/drov')
331
388
  end
332
389
 
333
- it "from attributes" do
334
- subject = Briard::Metadata.new(input: nil,
335
- from: "schema_org",
336
- doi: "10.5281/zenodo.1239",
337
- creators: [{"type"=>"Person", "name"=>"Jahn, Najko", "givenName"=>"Najko", "familyName"=>"Jahn"}],
338
- titles: [{ "title" => "Publication Fp7 Funding Acknowledgment - Plos Openaire" }],
339
- descriptions: [{ "description" => "The dataset contains a sample of metadata describing papers" }],
340
- publisher: "Zenodo",
341
- publication_year: "2013",
342
- dates: [{"date"=>"2013-04-03", "dateType"=>"Issued"}],
343
- funding_references: [{"awardNumber"=>"246686",
344
- "awardTitle"=>"Open Access Infrastructure for Research in Europe",
345
- "awardUri"=>"info:eu-repo/grantAgreement/EC/FP7/246686/",
346
- "funderIdentifier"=>"https://doi.org/10.13039/501100000780",
347
- "funderIdentifierType"=>"Crossref Funder ID",
348
- "funderName"=>"European Commission"}],
349
- types: { "resourceTypeGeneral" => "Dataset", "schemaOrg" => "Dataset" })
390
+ it 'from attributes' do
391
+ subject = described_class.new(input: nil,
392
+ from: 'schema_org',
393
+ doi: '10.5281/zenodo.1239',
394
+ creators: [{ 'type' => 'Person', 'name' => 'Jahn, Najko', 'givenName' => 'Najko',
395
+ 'familyName' => 'Jahn' }],
396
+ titles: [{ 'title' => 'Publication Fp7 Funding Acknowledgment - Plos Openaire' }],
397
+ descriptions: [{ 'description' => 'The dataset contains a sample of metadata describing papers' }],
398
+ publisher: 'Zenodo',
399
+ publication_year: '2013',
400
+ dates: [{ 'date' => '2013-04-03',
401
+ 'dateType' => 'Issued' }],
402
+ funding_references: [{ 'awardNumber' => '246686',
403
+ 'awardTitle' => 'Open Access Infrastructure for Research in Europe',
404
+ 'awardUri' => 'info:eu-repo/grantAgreement/EC/FP7/246686/',
405
+ 'funderIdentifier' => 'https://doi.org/10.13039/501100000780',
406
+ 'funderIdentifierType' => 'Crossref Funder ID',
407
+ 'funderName' => 'European Commission' }],
408
+ types: {
409
+ 'resourceTypeGeneral' => 'Dataset', 'schemaOrg' => 'Dataset'
410
+ })
350
411
 
351
412
  expect(subject.valid?).to be true
352
- expect(subject.doi).to eq("10.5281/zenodo.1239")
353
- expect(subject.id).to eq("https://doi.org/10.5281/zenodo.1239")
354
- expect(subject.types["schemaOrg"]).to eq("Dataset")
355
- expect(subject.types["resourceTypeGeneral"]).to eq("Dataset")
356
- expect(subject.creators).to eq([{"familyName"=>"Jahn", "givenName"=>"Najko", "name"=>"Jahn, Najko", "type"=>"Person"}])
357
- expect(subject.titles).to eq([{"title"=>"Publication Fp7 Funding Acknowledgment - Plos Openaire"}])
358
- expect(subject.descriptions.first["description"]).to start_with("The dataset contains a sample of metadata describing papers")
359
- expect(subject.dates).to eq([{"date"=>"2013-04-03", "dateType"=>"Issued"}])
360
- expect(subject.publication_year).to eq("2013")
361
- expect(subject.publisher).to eq("Zenodo")
362
- expect(subject.funding_references).to eq([{"awardNumber"=>"246686",
363
- "awardTitle"=>"Open Access Infrastructure for Research in Europe",
364
- "awardUri"=>"info:eu-repo/grantAgreement/EC/FP7/246686/",
365
- "funderIdentifier"=>"https://doi.org/10.13039/501100000780",
366
- "funderIdentifierType"=>"Crossref Funder ID",
367
- "funderName"=>"European Commission"}])
413
+ expect(subject.doi).to eq('10.5281/zenodo.1239')
414
+ expect(subject.id).to eq('https://doi.org/10.5281/zenodo.1239')
415
+ expect(subject.types['schemaOrg']).to eq('Dataset')
416
+ expect(subject.types['resourceTypeGeneral']).to eq('Dataset')
417
+ expect(subject.creators).to eq([{ 'familyName' => 'Jahn', 'givenName' => 'Najko',
418
+ 'name' => 'Jahn, Najko', 'type' => 'Person' }])
419
+ expect(subject.titles).to eq([{ 'title' => 'Publication Fp7 Funding Acknowledgment - Plos Openaire' }])
420
+ expect(subject.descriptions.first['description']).to start_with('The dataset contains a sample of metadata describing papers')
421
+ expect(subject.dates).to eq([{ 'date' => '2013-04-03', 'dateType' => 'Issued' }])
422
+ expect(subject.publication_year).to eq('2013')
423
+ expect(subject.publisher).to eq('Zenodo')
424
+ expect(subject.funding_references).to eq([{ 'awardNumber' => '246686',
425
+ 'awardTitle' => 'Open Access Infrastructure for Research in Europe',
426
+ 'awardUri' => 'info:eu-repo/grantAgreement/EC/FP7/246686/',
427
+ 'funderIdentifier' => 'https://doi.org/10.13039/501100000780',
428
+ 'funderIdentifierType' => 'Crossref Funder ID',
429
+ 'funderName' => 'European Commission' }])
368
430
  end
369
431
  end
370
432
  end