commonmeta-ruby 3.5 → 3.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,19 +4,19 @@ module Commonmeta
4
4
  module Readers
5
5
  module CrossrefReader
6
6
  def get_crossref(id: nil, **options)
7
- return { 'string' => nil, 'state' => 'not_found' } unless id.present?
7
+ return { "string" => nil, "state" => "not_found" } unless id.present?
8
8
 
9
9
  api_url = crossref_api_url(id, options)
10
10
  response = HTTP.get(api_url)
11
- return { 'string' => nil, 'state' => 'not_found' } unless response.status.success?
11
+ return { "string" => nil, "state" => "not_found" } unless response.status.success?
12
12
 
13
- { 'string' => response.body.to_s }
13
+ { "string" => response.body.to_s }
14
14
  end
15
15
 
16
16
  def read_crossref(string: nil, **options)
17
17
  if string.present?
18
18
  errors = jsonlint(string)
19
- return { 'errors' => errors } if errors.present?
19
+ return { "errors" => errors } if errors.present?
20
20
  end
21
21
 
22
22
  read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url,
@@ -24,159 +24,168 @@ module Commonmeta
24
24
  meta = string.present? ? JSON.parse(string) : {}
25
25
 
26
26
  # optionally strip out the message wrapper from API
27
- meta = meta.dig('message') if meta.dig('message').present?
27
+ meta = meta.dig("message") if meta.dig("message").present?
28
28
 
29
- resource_type = meta.fetch('type', nil)
29
+ resource_type = meta.fetch("type", nil)
30
30
  resource_type = resource_type.present? ? resource_type.underscore.camelcase : nil
31
- type = Commonmeta::Utils::CR_TO_CM_TRANSLATIONS.fetch(resource_type, 'Other')
31
+ type = Commonmeta::Utils::CR_TO_CM_TRANSLATIONS.fetch(resource_type, "Other")
32
32
 
33
- member_id = meta.fetch('member', nil)
33
+ member_id = meta.fetch("member", nil)
34
34
  # TODO: get publisher from member_id almost always return publisher name, but sometimes does not
35
35
  publisher = if member_id.present?
36
- get_crossref_member(member_id)
37
- else
38
- meta.fetch('publisher', nil)
39
- end
40
-
41
- contributors = if meta.fetch('author', nil).present?
42
- get_authors(from_csl(Array.wrap(meta.fetch('author', nil))))
43
- else
44
- []
45
- end
46
- editors = Array.wrap(meta.fetch('editor', nil)).each { |e| e['contributorType'] = 'Editor' }
36
+ get_crossref_member(member_id)
37
+ else
38
+ meta.fetch("publisher", nil)
39
+ end
40
+
41
+ contributors = if meta.fetch("author", nil).present?
42
+ get_authors(from_csl(Array.wrap(meta.fetch("author", nil))))
43
+ else
44
+ []
45
+ end
46
+ editors = Array.wrap(meta.fetch("editor", nil)).each { |e| e["contributorType"] = "Editor" }
47
47
  contributors += get_authors(from_csl(editors))
48
48
 
49
49
  date = {}
50
- date['submitted'] = nil
51
- date['accepted'] = meta.dig('accepted', 'date-time')
52
- date['published'] =
53
- meta.dig('issued',
54
- 'date-time') || get_date_from_date_parts(meta.fetch('issued',
50
+ date["submitted"] = nil
51
+ date["accepted"] = meta.dig("accepted", "date-time")
52
+ date["published"] =
53
+ meta.dig("issued",
54
+ "date-time") || get_date_from_date_parts(meta.fetch("issued",
55
55
  nil)) || get_date_from_date_parts(meta.fetch(
56
- 'created', nil
57
- ))
58
- date['updated'] =
59
- meta.dig('updated',
60
- 'date-time') || meta.dig('deposited',
61
- 'date-time') || get_date_from_date_parts(meta.fetch(
62
- 'deposited', nil
63
- ))
56
+ "created", nil
57
+ ))
58
+ date["updated"] =
59
+ meta.dig("updated",
60
+ "date-time") || meta.dig("deposited",
61
+ "date-time") || get_date_from_date_parts(meta.fetch(
62
+ "deposited", nil
63
+ ))
64
64
 
65
65
  # TODO: fix timestamp. Until then, remove time as this is not always stable with Crossref (different server timezones)
66
- date['published'] = get_iso8601_date(date['published']) if date['published'].present?
67
- date['updated'] = get_iso8601_date(date['updated']) if date['updated'].present?
66
+ date["published"] = get_iso8601_date(date["published"]) if date["published"].present?
67
+ date["updated"] = get_iso8601_date(date["updated"]) if date["updated"].present?
68
68
 
69
- license = if meta.fetch('license', nil)
70
- hsh_to_spdx('rightsURI' => meta.dig('license', 0, 'URL'))
71
- end
72
- issn = Array.wrap(meta.fetch('issn-type', nil)).find { |i| i['type'] == 'electronic' } ||
73
- Array.wrap(meta.fetch('issn-type', nil)).find { |i| i['type'] == 'print' } || {}
74
- issn = issn.fetch('value', nil) if issn.present?
69
+ license = if meta.fetch("license", nil)
70
+ hsh_to_spdx("rightsURI" => meta.dig("license", 0, "URL"))
71
+ end
72
+ issn = Array.wrap(meta.fetch("issn-type", nil)).find { |i| i["type"] == "electronic" } ||
73
+ Array.wrap(meta.fetch("issn-type", nil)).find { |i| i["type"] == "print" } || {}
74
+ issn = issn.fetch("value", nil) if issn.present?
75
75
 
76
- references = Array.wrap(meta.fetch('reference', nil)).map { |r| get_reference(r) }
76
+ references = Array.wrap(meta.fetch("reference", nil)).map { |r| get_reference(r) }
77
77
 
78
- funding_references = Array.wrap(meta.fetch('funder', nil)).reduce([]) do |sum, funding|
78
+ funding_references = Array.wrap(meta.fetch("funder", nil)).reduce([]) do |sum, funding|
79
79
  funding_reference = {
80
- 'funderName' => funding['name'],
81
- 'funderIdentifier' => funding['DOI'] ? doi_as_url(funding['DOI']) : nil,
82
- 'funderIdentifierType' => funding['DOI'].to_s.starts_with?('10.13039') ? 'Crossref Funder ID' : nil
80
+ "funderName" => funding["name"],
81
+ "funderIdentifier" => funding["DOI"] ? doi_as_url(funding["DOI"]) : nil,
82
+ "funderIdentifierType" => funding["DOI"].to_s.starts_with?("10.13039") ? "Crossref Funder ID" : nil,
83
83
  }.compact
84
- if funding['name'].present? && funding['award'].present?
85
- Array.wrap(funding['award']).each do |award|
86
- funding_reference['awardNumber'] = award
84
+ if funding["name"].present? && funding["award"].present?
85
+ Array.wrap(funding["award"]).each do |award|
86
+ funding_reference["awardNumber"] = award
87
87
  end
88
88
  end
89
89
 
90
90
  sum += [funding_reference] if funding_reference.present?
91
91
  sum
92
92
  end
93
+ files = Array.wrap(meta.fetch("link", nil)).reduce([]) do |sum, file|
94
+ if file["content-type"] != "unspecified"
95
+ file = { "url" => file.fetch("URL", nil), "mimeType" => file.fetch("content-type", nil) }
96
+ sum += [file]
97
+ end
98
+ sum
99
+ end
100
+
93
101
  container_type = case resource_type
94
- when 'JournalArticle', 'JournalIssue'
95
- 'Journal'
96
- when 'BookChapter'
97
- 'Book'
98
- when 'Monograph'
99
- 'BookSeries'
100
- end
101
-
102
- first_page = if meta.fetch('page', nil).present?
103
- meta.fetch('page').split('-').map(&:strip)[0]
104
- end
105
- last_page = if meta.fetch('page', nil).present?
106
- meta.fetch('page').split('-').map(&:strip)[1]
107
- end
108
-
109
- container = { 'type' => container_type,
110
- 'title' => parse_attributes(meta.fetch('container-title', nil),
102
+ when "JournalArticle", "JournalIssue"
103
+ "Journal"
104
+ when "BookChapter"
105
+ "Book"
106
+ when "Monograph"
107
+ "BookSeries"
108
+ end
109
+
110
+ first_page = if meta.fetch("page", nil).present?
111
+ meta.fetch("page").split("-").map(&:strip)[0]
112
+ end
113
+ last_page = if meta.fetch("page", nil).present?
114
+ meta.fetch("page").split("-").map(&:strip)[1]
115
+ end
116
+
117
+ container = { "type" => container_type,
118
+ "title" => parse_attributes(meta.fetch("container-title", nil),
111
119
  first: true).to_s.squish.presence,
112
- 'identifier' => issn.present? ? issn : nil,
113
- 'identifierType' => issn.present? ? 'ISSN' : nil,
114
- 'volume' => meta.fetch('volume', nil),
115
- 'issue' => meta.fetch('issue', nil),
116
- 'firstPage' => first_page,
117
- 'lastPage' => last_page }.compact
118
-
119
- id = normalize_id(meta.fetch('id', nil) || meta.fetch('DOI', nil))
120
-
121
- id = normalize_doi(options[:doi] || options[:id] || meta.fetch('DOI', nil))
122
- title = if meta.fetch('title', nil).is_a?(Array)
123
- meta.fetch('title', nil)[0]
124
- else
125
- meta.fetch('title', nil)
126
- end
127
- title = title.blank? ? ':(unav)' : title.squish
128
- state = meta.present? || read_options.present? ? 'findable' : 'not_found'
129
- subjects = Array.wrap(meta.fetch('categories', nil)).reduce([]) do |sum, subject|
120
+ "identifier" => issn.present? ? issn : nil,
121
+ "identifierType" => issn.present? ? "ISSN" : nil,
122
+ "volume" => meta.fetch("volume", nil),
123
+ "issue" => meta.fetch("issue", nil),
124
+ "firstPage" => first_page,
125
+ "lastPage" => last_page }.compact
126
+
127
+ id = normalize_id(meta.fetch("id", nil) || meta.fetch("DOI", nil))
128
+
129
+ id = normalize_doi(options[:doi] || options[:id] || meta.fetch("DOI", nil))
130
+ title = if meta.fetch("title", nil).is_a?(Array)
131
+ meta.fetch("title", nil)[0]
132
+ else
133
+ meta.fetch("title", nil)
134
+ end
135
+ title = title.blank? ? ":(unav)" : title.squish
136
+ state = meta.present? || read_options.present? ? "findable" : "not_found"
137
+ subjects = Array.wrap(meta.fetch("categories", nil)).reduce([]) do |sum, subject|
130
138
  sum += name_to_fos(subject)
131
139
 
132
140
  sum
133
141
  end
134
- abstract = meta.fetch('abstract', nil)
142
+ abstract = meta.fetch("abstract", nil)
135
143
  provider = get_doi_ra(id)
136
144
 
137
- { 'id' => id,
138
- 'type' => type,
139
- 'url' => normalize_id(meta.dig('resource', 'primary', 'URL')),
140
- 'titles' => [{ 'title' => title }],
141
- 'contributors' => contributors,
142
- 'container' => container,
143
- 'publisher' => publisher,
144
- 'references' => references,
145
- 'date' => date.compact,
146
- 'descriptions' => if abstract.present?
147
- [{ 'description' => sanitize(abstract),
148
- 'descriptionType' => 'Abstract' }]
149
- else
150
- []
151
- end,
152
- 'license' => license,
153
- 'alternate_identifiers' => [],
154
- 'funding_references' => funding_references,
155
- 'version' => meta.fetch('version', nil),
156
- 'subjects' => subjects,
157
- 'provider' => provider,
158
- 'schema_version' => 'http://datacite.org/schema/kernel-4',
159
- 'state' => state }.compact.merge(read_options)
145
+ { "id" => id,
146
+ "type" => type,
147
+ "url" => normalize_id(meta.dig("resource", "primary", "URL")),
148
+ "titles" => [{ "title" => title }],
149
+ "contributors" => contributors,
150
+ "container" => container,
151
+ "publisher" => publisher,
152
+ "references" => references,
153
+ "date" => date.compact,
154
+ "descriptions" => if abstract.present?
155
+ [{ "description" => sanitize(abstract),
156
+ "descriptionType" => "Abstract" }]
157
+ else
158
+ []
159
+ end,
160
+ "license" => license,
161
+ "alternate_identifiers" => [],
162
+ "funding_references" => funding_references,
163
+ "files" => files.presence,
164
+ "version" => meta.fetch("version", nil),
165
+ "subjects" => subjects,
166
+ "provider" => provider,
167
+ "schema_version" => "http://datacite.org/schema/kernel-4",
168
+ "state" => state }.compact.merge(read_options)
160
169
  end
161
170
 
162
171
  def get_reference(reference)
163
172
  return nil unless reference.present? || !reference.is_a?(Hash)
164
173
 
165
- doi = reference.dig('DOI')
174
+ doi = reference.dig("DOI")
166
175
  {
167
- 'key' => reference.dig('key'),
168
- 'doi' => doi ? normalize_doi(doi) : nil,
169
- 'contributor' => reference.dig('author'),
170
- 'title' => reference.dig('article-title'),
171
- 'publisher' => reference.dig('publisher'),
172
- 'publicationYear' => reference.dig('year'),
173
- 'volume' => reference.dig('volume'),
174
- 'issue' => reference.dig('issue'),
175
- 'firstPage' => reference.dig('first-page'),
176
- 'lastPage' => reference.dig('last-page'),
177
- 'containerTitle' => reference.dig('journal-title'),
178
- 'edition' => nil,
179
- 'unstructured' => doi.nil? ? reference.dig('unstructured') : nil
176
+ "key" => reference.dig("key"),
177
+ "doi" => doi ? normalize_doi(doi) : nil,
178
+ "contributor" => reference.dig("author"),
179
+ "title" => reference.dig("article-title"),
180
+ "publisher" => reference.dig("publisher"),
181
+ "publicationYear" => reference.dig("year"),
182
+ "volume" => reference.dig("volume"),
183
+ "issue" => reference.dig("issue"),
184
+ "firstPage" => reference.dig("first-page"),
185
+ "lastPage" => reference.dig("last-page"),
186
+ "containerTitle" => reference.dig("journal-title"),
187
+ "edition" => nil,
188
+ "unstructured" => doi.nil? ? reference.dig("unstructured") : nil,
180
189
  }.compact
181
190
  end
182
191
  end
@@ -88,6 +88,7 @@ module Commonmeta
88
88
  end.map do |reference|
89
89
  get_datacite_reference(reference)
90
90
  end
91
+ files = Array.wrap(meta.fetch("content_url", nil)).map { |file| { "url" => file } }
91
92
  formats = meta.fetch('formats', nil)
92
93
  sizes = meta.fetch('sizes', nil)
93
94
  schema_version = meta.fetch('schema_version', nil) || 'http://datacite.org/schema/kernel-4'
@@ -105,6 +106,7 @@ module Commonmeta
105
106
  'alternate_identifiers' => alternate_identifiers.presence,
106
107
  'references' => references,
107
108
  'funding_references' => funding_references,
109
+ 'files' => files.presence,
108
110
  'date' => date.compact,
109
111
  'descriptions' => descriptions,
110
112
  'license' => license,
@@ -20,9 +20,13 @@ module Commonmeta
20
20
 
21
21
  meta = string.present? ? JSON.parse(string) : {}
22
22
 
23
- url = normalize_url(meta.fetch("url", nil))
23
+ if (meta.dig("blog", "status") == "archived")
24
+ url = normalize_url(meta.fetch("archive_url", nil))
25
+ else
26
+ url = normalize_url(meta.fetch("url", nil))
27
+ end
24
28
  id = options[:doi] ? normalize_doi(options[:doi]) : normalize_id(meta.fetch("doi", nil))
25
- id = url if id.blank? && url.present?
29
+ id = normalize_url(meta.fetch("url", nil)) if id.blank?
26
30
 
27
31
  type = "Article"
28
32
  contributors = if meta.fetch("authors", nil).present?
@@ -125,8 +129,19 @@ module Commonmeta
125
129
  end
126
130
 
127
131
  def get_funding_references(meta)
128
- # check that relationships resolve and have type "HasAward"
129
- Array.wrap(meta["relationships"]).reduce([]) do |sum, relationship|
132
+ # check that relationships resolve and have type "HasAward" or funding is provided by blog metadata
133
+ if funding = meta.dig("blog", "funding")
134
+ fundref = {
135
+ "funderIdentifier" => funding["funder_id"],
136
+ "funderIdentifierType" => "Crossref Funder ID",
137
+ "funderName" => funding["funder_name"],
138
+ "awardNumber" => funding["award_number"]
139
+ }
140
+
141
+ else
142
+ fundref = nil
143
+ end
144
+ Array.wrap(fundref) + Array.wrap(meta["relationships"]).reduce([]) do |sum, relationship|
130
145
  begin
131
146
  # funder is European Commission
132
147
  if validate_prefix(relationship["url"]) == "10.3030" || URI.parse(relationship["url"]).host == "cordis.europa.eu"
@@ -207,6 +207,8 @@ module Commonmeta
207
207
  }.compact
208
208
  end
209
209
 
210
+ files = Array.wrap(meta.fetch("contentUrl", nil)).map { |file| { "url" => file } }
211
+
210
212
  # handle keywords as array and as comma-separated string
211
213
  subjects = meta.fetch('keywords', nil)
212
214
  subjects = subjects.to_s.split(', ') if subjects.is_a?(String)
@@ -222,7 +224,7 @@ module Commonmeta
222
224
  'additional_type' => additional_type,
223
225
  'alternate_identifiers' => alternate_identifiers.presence,
224
226
  'url' => normalize_id(meta.fetch('url', nil)),
225
- 'content_url' => Array.wrap(meta.fetch('contentUrl', nil)),
227
+ 'files' => files.presence,
226
228
  'sizes' => Array.wrap(meta.fetch('contenSize', nil)),
227
229
  'formats' => Array.wrap(meta.fetch('encodingFormat',
228
230
  nil) || meta.fetch('fileFormat', nil)),
@@ -5,7 +5,7 @@ require "pathname"
5
5
 
6
6
  module Commonmeta
7
7
  module SchemaUtils
8
- COMMONMETA = File.read(File.expand_path("../../resources/commonmeta_v0.10.json",
8
+ COMMONMETA = File.read(File.expand_path("../../resources/commonmeta_v0.10.3.json",
9
9
  __dir__))
10
10
 
11
11
  def json_schema_errors
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Commonmeta
4
- VERSION = '3.5'
4
+ VERSION = '3.5.2'
5
5
  end
@@ -35,7 +35,7 @@ module Commonmeta
35
35
  'spatialCoverage' => to_schema_org_spatial_coverage(geo_locations),
36
36
  'citation' => Array.wrap(references).map { |r| to_schema_org_citation(r) },
37
37
  '@reverse' => reverse.presence,
38
- 'contentUrl' => Array.wrap(content_url).unwrap,
38
+ 'contentUrl' => Array.wrap(files).map { |f| f["url"] }.unwrap,
39
39
  'schemaVersion' => schema_version,
40
40
  'periodical' => if type == 'Dataset'
41
41
  nil
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "$schema": "http://json-schema.org/draft-07/schema#",
3
- "$id": "https://commonmeta.org/commonmeta_v0.10.json",
4
- "title": "Commonmeta v0.10",
3
+ "$id": "https://commonmeta.org/commonmeta_v0.10.3.json",
4
+ "title": "Commonmeta v0.10.3",
5
5
  "description": "JSON representation of the Commonmeta schema.",
6
6
  "additionalProperties": false,
7
7
  "definitions": {
@@ -37,7 +37,21 @@
37
37
  "Reader",
38
38
  "Translator",
39
39
  "ContactPerson",
40
- "Maintainer",
40
+ "DataManager",
41
+ "Distributor",
42
+ "HostingInstitution",
43
+ "Producer",
44
+ "ProjectLeader",
45
+ "ProjectManager",
46
+ "ProjectMember",
47
+ "RegistrationAgency",
48
+ "RegistrationAuthority",
49
+ "RelatedPerson",
50
+ "ResearchGroup",
51
+ "RightsHolder",
52
+ "Researcher",
53
+ "Sponsor",
54
+ "WorkPackageLeader",
41
55
  "Conceptualization",
42
56
  "DataCuration",
43
57
  "FormalAnalysis",
@@ -52,6 +66,7 @@
52
66
  "Visualization",
53
67
  "WritingOriginalDraft",
54
68
  "WritingReviewEditing",
69
+ "Maintainer",
55
70
  "Other"
56
71
  ],
57
72
  "type": "string"
@@ -190,8 +205,38 @@
190
205
  },
191
206
  "date": {
192
207
  "description": "The dates for the resource.",
208
+ "$comment": "The date fields are not required. Dates should be formatted as ISO 8601 dates.",
193
209
  "type": "object",
194
- "format": "date"
210
+ "properties": {
211
+ "created": {
212
+ "description": "The date the resource was created.",
213
+ "type": "string"
214
+ },
215
+ "submitted": {
216
+ "description": "The date the resource was submitted.",
217
+ "type": "string"
218
+ },
219
+ "accepted": {
220
+ "description": "The date the resource was accepted.",
221
+ "type": "string"
222
+ },
223
+ "published": {
224
+ "description": "The date the resource was published.",
225
+ "type": "string"
226
+ },
227
+ "updated": {
228
+ "description": "The date the resource was updated.",
229
+ "type": "string"
230
+ },
231
+ "available": {
232
+ "description": "The date the resource was made available.",
233
+ "type": "string"
234
+ },
235
+ "withdrawn": {
236
+ "description": "The date the resource was withdrawn.",
237
+ "type": "string"
238
+ }
239
+ }
195
240
  },
196
241
  "titles": {
197
242
  "description": "The titles of the resource.",
@@ -226,9 +271,11 @@
226
271
  "enum": [
227
272
  "Book",
228
273
  "BookSeries",
274
+ "DataCatalog",
229
275
  "Journal",
230
276
  "Periodical",
231
277
  "ProceedingsSeries",
278
+ "Repository",
232
279
  "Series"
233
280
  ]
234
281
  },
@@ -316,17 +363,21 @@
316
363
  "type": {
317
364
  "type": "string",
318
365
  "enum": [
319
- "IsIdenticalTo",
320
366
  "IsNewVersionOf",
321
367
  "IsPreviousVersionOf",
368
+ "IsVersionOf",
369
+ "HasVersion",
322
370
  "IsPartOf",
323
371
  "HasPart",
324
372
  "IsVariantFormOf",
373
+ "IsOriginalFormOf",
374
+ "IsIdenticalTo",
325
375
  "IsTranslationOf",
326
- "IsVersionOf",
327
- "IsReviewOf",
376
+ "IsReviewedBy",
377
+ "Reviews",
328
378
  "IsPreprintOf",
329
- "HasPreprint"
379
+ "HasPreprint",
380
+ "isSupplementTo"
330
381
  ]
331
382
  }
332
383
  },
@@ -437,12 +488,20 @@
437
488
  "required": ["alternateIdentifier", "alternateIdentifierType"]
438
489
  }
439
490
  },
440
- "content_url": {
441
- "description": "The URL of the content of the resource.",
491
+ "files": {
492
+ "description": "The downloadable files for the resource.",
442
493
  "type": "array",
443
494
  "items": {
444
- "type": "string",
445
- "format": "uri"
495
+ "type": "object",
496
+ "properties": {
497
+ "bucket": { "type": "string" },
498
+ "key": { "type": "string" },
499
+ "checksum": { "type": "string" },
500
+ "url": { "type": "string", "format": "uri" },
501
+ "size": { "type": "integer" },
502
+ "mimeType": { "type": "string" }
503
+ },
504
+ "required": ["url"]
446
505
  }
447
506
  },
448
507
  "schema_version": {
@@ -459,5 +518,13 @@
459
518
  "enum": ["findable", "not_found"]
460
519
  }
461
520
  },
462
- "required": ["id", "type", "url", "contributors", "titles", "publisher", "date"]
521
+ "required": [
522
+ "id",
523
+ "type",
524
+ "url",
525
+ "contributors",
526
+ "titles",
527
+ "publisher",
528
+ "date"
529
+ ]
463
530
  }