bolognese 0.15.9 → 1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (136) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +7 -6
  3. data/bolognese.gemspec +1 -0
  4. data/lib/bolognese/datacite_utils.rb +19 -51
  5. data/lib/bolognese/doi_utils.rb +1 -1
  6. data/lib/bolognese/metadata_utils.rb +43 -125
  7. data/lib/bolognese/readers/bibtex_reader.rb +21 -10
  8. data/lib/bolognese/readers/citeproc_reader.rb +20 -12
  9. data/lib/bolognese/readers/codemeta_reader.rb +8 -3
  10. data/lib/bolognese/readers/crossref_reader.rb +41 -50
  11. data/lib/bolognese/readers/datacite_json_reader.rb +17 -40
  12. data/lib/bolognese/readers/datacite_reader.rb +78 -93
  13. data/lib/bolognese/readers/ris_reader.rb +20 -11
  14. data/lib/bolognese/readers/schema_org_reader.rb +62 -29
  15. data/lib/bolognese/utils.rb +90 -17
  16. data/lib/bolognese/version.rb +1 -1
  17. data/lib/bolognese/writers/bibtex_writer.rb +2 -2
  18. data/lib/bolognese/writers/codemeta_writer.rb +1 -2
  19. data/lib/bolognese/writers/crosscite_writer.rb +10 -29
  20. data/lib/bolognese/writers/datacite_json_writer.rb +18 -28
  21. data/lib/bolognese/writers/jats_writer.rb +4 -4
  22. data/lib/bolognese/writers/ris_writer.rb +3 -2
  23. data/lib/bolognese/writers/schema_org_writer.rb +16 -15
  24. data/spec/author_utils_spec.rb +3 -3
  25. data/spec/datacite_utils_spec.rb +4 -14
  26. data/spec/fixtures/crosscite.json +3 -3
  27. data/spec/fixtures/crossref.ris +1 -0
  28. data/spec/fixtures/datacite-example-geolocation-2.xml +140 -0
  29. data/spec/fixtures/datacite-example-geolocation.xml +66 -0
  30. data/spec/fixtures/datacite.json +33 -12
  31. data/spec/fixtures/datacite_software_missing_comma.json +1 -1
  32. data/spec/fixtures/schema_org_geolocation.json +82 -0
  33. data/spec/fixtures/schema_org_geoshape.json +550 -0
  34. data/spec/fixtures/schema_org_gtex.json +1 -1
  35. data/spec/fixtures/schema_org_list.json +1 -1
  36. data/spec/fixtures/schema_org_topmed.json +1 -1
  37. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/crossref/default.yml +3 -3
  38. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/crossref/to_bibtex.yml +3 -3
  39. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/crossref/to_citation.yml +4 -4
  40. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/crossref/to_crossref.yml +3 -3
  41. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/crossref/to_datacite.yml +3 -3
  42. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/crossref/to_jats.yml +3 -3
  43. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/crossref/to_schema_org.yml +3 -3
  44. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/default.yml +3 -3
  45. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/to_bibtex.yml +3 -3
  46. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/to_citation.yml +4 -4
  47. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/to_datacite.yml +3 -3
  48. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/to_datacite_json.yml +3 -3
  49. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/to_jats.yml +3 -3
  50. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/convert_from_id/datacite/to_schema_org.yml +3 -3
  51. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/find_from_format_by_id/crossref.yml +2 -2
  52. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/find_from_format_by_id/datacite.yml +2 -2
  53. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_datacite_metadata/change_state.yml +3 -3
  54. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/change_datacite_metadata/change_title.yml +3 -3
  55. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/doi_registration_agency/crossref.yml +3 -3
  56. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/doi_registration_agency/datacite.yml +11 -11
  57. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/doi_registration_agency/medra.yml +3 -3
  58. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/doi_registration_agency/not_found.yml +3 -3
  59. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_by_ID/crossref.yml +5 -5
  60. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_by_ID/crossref_doi_not_url.yml +5 -5
  61. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_by_ID/datacite.yml +5 -5
  62. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_by_ID/datacite_doi_http.yml +5 -5
  63. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_by_ID/github.yml +3 -3
  64. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_by_ID/orcid.yml +3 -3
  65. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_by_ID/schema_org.yml +3 -3
  66. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_by_ID/unknown_DOI_registration_agency.yml +5 -5
  67. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/bibtex.yml +3 -3
  68. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/citeproc.yml +3 -3
  69. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/codemeta.yml +3 -3
  70. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/crosscite.yml +3 -3
  71. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/crossref.yml +3 -3
  72. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/datacite.yml +3 -3
  73. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/datacite_json.yml +3 -3
  74. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/ris.yml +3 -3
  75. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_file/schema_org.yml +3 -3
  76. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_from_format_from_string/crosscite.yml +3 -3
  77. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/DOI_with_ORCID_ID.yml +3 -3
  78. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/DOI_with_SICI_DOI.yml +3 -3
  79. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/DOI_with_data_citation.yml +3 -3
  80. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/book_chapter.yml +3 -3
  81. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/dataset.yml +3 -3
  82. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/date_in_future.yml +3 -3
  83. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/journal_article.yml +3 -3
  84. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/journal_article_with.yml +3 -3
  85. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/not_found_error.yml +3 -3
  86. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_crossref_metadata/posted_content.yml +3 -3
  87. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/BlogPosting.yml +3 -3
  88. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/DOI_in_test_system.yml +3 -3
  89. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/DOI_in_with_related_id_system.yml +3 -3
  90. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/DOI_not_found.yml +3 -3
  91. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/Dataset.yml +3 -3
  92. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/Funding.yml +3 -3
  93. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/Funding_schema_version_4.yml +3 -3
  94. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/Referee_report_in_test_system.yml +3 -3
  95. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/author_only_full_name.yml +3 -3
  96. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/author_with_scheme.yml +3 -3
  97. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/author_with_wrong_orcid_scheme.yml +3 -3
  98. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/content_url.yml +12 -12
  99. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/date.yml +3 -3
  100. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/dissertation.yml +3 -3
  101. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/doi_with_sign.yml +3 -3
  102. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/empty_subject.yml +3 -3
  103. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/funding_schema_version_3.yml +3 -3
  104. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/is_identical_to.yml +3 -3
  105. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/keywords_with_attributes.yml +3 -3
  106. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/leading_and_trailing_whitespace.yml +3 -3
  107. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/multiple_author_names_in_one_creatorName.yml +3 -3
  108. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/multiple_licenses.yml +3 -3
  109. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/series-information.yml +3 -3
  110. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/subject_scheme.yml +3 -3
  111. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/get_datacite_metadata/xs_string_attributes.yml +3 -3
  112. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/handle_input/DOI_RA_not_Crossref_or_DataCite.yml +2 -2
  113. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/handle_input/unknown_DOI_prefix.yml +2 -2
  114. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/jsonlint/missing_comma.yml +3 -3
  115. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/jsonlint/nil.yml +3 -3
  116. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/jsonlint/overlapping_keys.yml +3 -3
  117. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/jsonlint/valid.yml +3 -3
  118. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_ris/BlogPosting_schema_org.yml +16 -20
  119. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/write_metadata_as_schema_org/geo_location_box.yml +42 -0
  120. data/spec/readers/bibtex_reader_spec.rb +5 -5
  121. data/spec/readers/citeproc_reader_spec.rb +1 -2
  122. data/spec/readers/codemeta_reader_spec.rb +9 -9
  123. data/spec/readers/crosscite_reader_spec.rb +2 -2
  124. data/spec/readers/crossref_reader_spec.rb +55 -48
  125. data/spec/readers/datacite_json_reader_spec.rb +5 -4
  126. data/spec/readers/datacite_reader_spec.rb +150 -114
  127. data/spec/readers/ris_reader_spec.rb +5 -5
  128. data/spec/readers/schema_org_reader_spec.rb +62 -48
  129. data/spec/utils_spec.rb +4 -4
  130. data/spec/writers/crosscite_writer_spec.rb +21 -17
  131. data/spec/writers/datacite_json_writer_spec.rb +8 -8
  132. data/spec/writers/datacite_writer_spec.rb +17 -18
  133. data/spec/writers/ris_writer_spec.rb +26 -18
  134. data/spec/writers/schema_org_writer_spec.rb +45 -18
  135. data/spec/writers/turtle_writer_spec.rb +1 -1
  136. metadata +22 -3
@@ -41,16 +41,24 @@ module Bolognese
41
41
 
42
42
  doi = validate_doi(meta.fetch("DO", nil))
43
43
  author = Array.wrap(meta.fetch("AU", nil)).map { |a| { "name" => a } }
44
- container_title = meta.fetch("T2", nil)
45
44
  date_parts = meta.fetch("PY", nil).to_s.split("/")
46
45
  date_published = get_date_from_parts(*date_parts)
47
- is_part_of = if container_title.present?
48
- { "type" => "Periodical",
49
- "title" => container_title,
50
- "issn" => meta.fetch("SN", nil) }.compact
51
- else
52
- nil
53
- end
46
+ related_identifiers = if meta.fetch("T2", nil).present? && meta.fetch("SN", nil).present?
47
+ [{ "type" => "Periodical",
48
+ "id" => meta.fetch("SN", nil),
49
+ "related_identifier_type" => "ISSN",
50
+ "relation_type" => "IsPartOf",
51
+ "title" => meta.fetch("T2", nil), }.compact]
52
+ else
53
+ []
54
+ end
55
+ periodical = if meta.fetch("T2", nil).present?
56
+ { "type" => "Periodical",
57
+ "title" => meta.fetch("T2", nil),
58
+ "id" => meta.fetch("SN", nil) }.compact
59
+ else
60
+ nil
61
+ end
54
62
  state = doi.present? ? "findable" : "not_found"
55
63
 
56
64
  { "id" => normalize_doi(doi),
@@ -61,9 +69,10 @@ module Bolognese
61
69
  "doi" => doi,
62
70
  "b_url" => meta.fetch("UR", nil),
63
71
  "title" => meta.fetch("T1", nil),
64
- "author" => get_authors(author),
65
- "publisher" => meta.fetch("PB", nil),
66
- "is_part_of" => is_part_of,
72
+ "creator" => get_authors(author),
73
+ "publisher" => meta.fetch("PB", "(:unav)"),
74
+ "periodical" => periodical,
75
+ "related_identifiers" => related_identifiers,
67
76
  "date_created" => meta.fetch("Y1", nil),
68
77
  "date_published" => date_published,
69
78
  "date_accessed" => meta.fetch("Y2", nil),
@@ -37,7 +37,7 @@ module Bolognese
37
37
 
38
38
  identifier = Array.wrap(meta.fetch("identifier", nil))
39
39
  if identifier.length > 1
40
- alternate_identifier = identifier[1..-1].map do |r|
40
+ alternate_identifiers = identifier[1..-1].map do |r|
41
41
  if r.is_a?(String)
42
42
  { "type" => "URL", "name" => r }
43
43
  elsif r.is_a?(Hash)
@@ -45,7 +45,7 @@ module Bolognese
45
45
  end
46
46
  end.unwrap
47
47
  else
48
- alternate_identifier = nil
48
+ alternate_identifiers = nil
49
49
  end
50
50
  identifier = identifier.first
51
51
 
@@ -57,24 +57,61 @@ module Bolognese
57
57
  editor = get_authors(from_schema_org(Array.wrap(meta.fetch("editor", nil))))
58
58
  publisher = parse_attributes(meta.fetch("publisher", nil), content: "name", first: true)
59
59
 
60
- included_in_data_catalog = from_schema_org(Array.wrap(meta.fetch("includedInDataCatalog", nil)))
61
- included_in_data_catalog = Array.wrap(included_in_data_catalog).reduce([]) do |sum, dc|
62
- sum << { "title" => dc["name"], "url" => dc["url"] } if dc["url"].present?
63
- sum
64
- end.unwrap
65
- is_part_of = schema_org_is_part_of(meta) || included_in_data_catalog
60
+ ct = (type == "Dataset") ? "includedInDataCatalog" : "Periodical"
61
+ periodical = if meta.fetch(ct, nil).present?
62
+ {
63
+ "type" => (type == "Dataset") ? "DataCatalog" : "Periodical",
64
+ "title" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "name", first: true),
65
+ "url" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "url", first: true)
66
+ }.compact
67
+ else
68
+ nil
69
+ end
66
70
 
67
- license = {
71
+ related_identifiers = Array.wrap(schema_org_is_identical_to(meta)) +
72
+ Array.wrap(schema_org_is_part_of(meta)) +
73
+ Array.wrap(schema_org_has_part(meta)) +
74
+ Array.wrap(schema_org_is_previous_version_of(meta)) +
75
+ Array.wrap(schema_org_is_new_version_of(meta)) +
76
+ Array.wrap(schema_org_references(meta)) +
77
+ Array.wrap(schema_org_is_referenced_by(meta)) +
78
+ Array.wrap(schema_org_is_supplement_to(meta)) +
79
+ Array.wrap(schema_org_is_supplemented_by(meta))
80
+
81
+ rights = {
68
82
  "id" => parse_attributes(meta.fetch("license", nil), content: "id", first: true),
69
83
  "name" => parse_attributes(meta.fetch("license", nil), content: "name", first: true)
70
84
  }
71
85
 
72
- funding = from_schema_org(Array.wrap(meta.fetch("funding", nil)))
86
+ funding_references = from_schema_org(Array.wrap(meta.fetch("funder", nil)))
87
+ funding_references = Array.wrap(meta.fetch("funder", nil)).compact.map do |fr|
88
+ {
89
+ "funder_name" => fr["name"],
90
+ "funder_identifier" => fr["@id"],
91
+ "funder_identifier_type" => fr["@id"].to_s.start_with?("https://doi.org/10.13039") ? "Crossref Funder ID" : nil }.compact
92
+ end
73
93
  date_published = meta.fetch("datePublished", nil)
74
94
  state = meta.present? ? "findable" : "not_found"
75
-
76
- ct = (type == "Dataset") ? "includedInDataCatalog" : "Periodical"
77
- container_title = parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "name", first: true)
95
+ geo_location = Array.wrap(meta.fetch("spatialCoverage", nil)).map do |gl|
96
+ if gl.dig("geo", "box")
97
+ s, w, n, e = gl.dig("geo", "box").split(" ", 4)
98
+ geo_location_box = {
99
+ "west_bound_longitude" => w,
100
+ "east_bound_longitude" => e,
101
+ "south_bound_latitude" => s,
102
+ "north_bound_latitude" => n
103
+ }.compact.presence
104
+ else
105
+ geo_location_box = nil
106
+ end
107
+ geo_location_point = { "point_longitude" => gl.dig("geo", "longitude"), "point_latitude" => gl.dig("geo", "latitude") }.compact.presence
108
+
109
+ {
110
+ "geo_location_place" => gl.dig("geo", "address"),
111
+ "geo_location_point" => geo_location_point,
112
+ "geo_location_box" => geo_location_box
113
+ }.compact
114
+ end
78
115
 
79
116
  { "id" => id,
80
117
  "type" => type,
@@ -85,43 +122,38 @@ module Bolognese
85
122
  "resource_type_general" => resource_type_general,
86
123
  "doi" => validate_doi(id),
87
124
  "identifier" => identifier,
88
- "alternate_identifier" => alternate_identifier,
125
+ "alternate_identifiers" => alternate_identifiers,
89
126
  "b_url" => normalize_id(meta.fetch("url", nil)),
90
127
  "content_url" => Array.wrap(meta.fetch("contentUrl", nil)).unwrap,
91
- "content_size" => meta.fetch("contenSize", nil),
92
- "content_format" => Array.wrap(meta.fetch("encodingFormat", nil) || meta.fetch("fileFormat", nil)).unwrap,
128
+ "size" => meta.fetch("contenSize", nil),
129
+ "format" => Array.wrap(meta.fetch("encodingFormat", nil) || meta.fetch("fileFormat", nil)).unwrap,
93
130
  "title" => meta.fetch("name", nil),
94
- "author" => author,
131
+ "creator" => author,
95
132
  "editor" => editor,
96
133
  "publisher" => publisher,
97
134
  "service_provider" => parse_attributes(meta.fetch("provider", nil), content: "name", first: true),
98
- "container_title" => container_title,
99
- "is_identical_to" => schema_org_is_identical_to(meta),
100
- "is_part_of" => is_part_of,
101
- "has_part" => schema_org_has_part(meta),
102
- "references" => schema_org_references(meta),
103
- "is_referenced_by" => schema_org_is_referenced_by(meta),
104
- "is_supplement_to" => schema_org_is_supplement_to(meta),
105
- "is_supplemented_by" => schema_org_is_supplemented_by(meta),
135
+ "periodical" => periodical,
136
+ "related_identifiers" => related_identifiers,
106
137
  "date_created" => meta.fetch("dateCreated", nil),
107
138
  "date_published" => date_published,
108
139
  "date_modified" => meta.fetch("dateModified", nil),
109
140
  "description" => meta.fetch("description", nil).present? ? { "text" => sanitize(meta.fetch("description")) } : nil,
110
- "license" => license,
141
+ "rights" => rights,
111
142
  "b_version" => meta.fetch("version", nil),
112
143
  "keywords" => meta.fetch("keywords", nil).to_s.split(", "),
113
144
  "state" => state,
114
145
  "schema_version" => meta.fetch("schemaVersion", nil),
115
- "funding" => funding
146
+ "funding_references" => funding_references,
147
+ "geo_location" => geo_location
116
148
  }
117
149
  end
118
150
 
119
151
  def schema_org_related_identifier(meta, relation_type: nil)
120
- normalize_ids(ids: meta.fetch(relation_type, nil))
152
+ normalize_ids(ids: meta.fetch(relation_type, nil), relation_type: SO_TO_DC_RELATION_TYPES[relation_type])
121
153
  end
122
154
 
123
155
  def schema_org_reverse_related_identifier(meta, relation_type: nil)
124
- normalize_ids(ids: meta.dig("@reverse", relation_type))
156
+ normalize_ids(ids: meta.dig("@reverse", relation_type), relation_type: SO_TO_DC_RELATION_TYPES[relation_type])
125
157
  end
126
158
 
127
159
  def schema_org_is_identical_to(meta)
@@ -159,6 +191,7 @@ module Bolognese
159
191
  def schema_org_is_supplemented_by(meta)
160
192
  schema_org_related_identifier(meta, relation_type: "isBasedOn")
161
193
  end
194
+
162
195
  end
163
196
  end
164
197
  end
@@ -25,7 +25,10 @@ module Bolognese
25
25
  "Sound" => "AudioObject",
26
26
  "Text" => "ScholarlyArticle",
27
27
  "Workflow" => nil,
28
- "Other" => "CreativeWork"
28
+ "Other" => "CreativeWork",
29
+ # not part of DataCite schema, but used internally
30
+ "Periodical" => "Periodical",
31
+ "DataCatalog" => "DataCatalog"
29
32
  }
30
33
 
31
34
  DC_TO_CP_TRANSLATIONS = {
@@ -173,7 +176,6 @@ module Bolognese
173
176
  "BlogPosting" => "Text",
174
177
  "Chapter" => "Text",
175
178
  "Collection" => "Collection",
176
- "CreativeWork" => "Other",
177
179
  "DataCatalog" => "Dataset",
178
180
  "Dataset" => "Dataset",
179
181
  "Event" => "Event",
@@ -362,7 +364,7 @@ module Bolognese
362
364
  "codemeta"
363
365
  elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("ris_type")
364
366
  "crosscite"
365
- elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("schemaVersion").to_s.start_with?("http://datacite.org/schema/kernel")
367
+ elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("schema-version").to_s.start_with?("http://datacite.org/schema/kernel")
366
368
  "datacite_json"
367
369
  elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("issued", "date-parts").present?
368
370
  "citeproc"
@@ -471,11 +473,17 @@ module Bolognese
471
473
  "http://orcid.org/" + Addressable::URI.encode(orcid)
472
474
  end
473
475
 
474
- def normalize_ids(ids: nil)
475
- Array.wrap(ids).map do |id|
476
- { "id" => normalize_id(id["@id"]),
477
- "type" => id["@type"] || Metadata::DC_TO_SO_TRANSLATIONS[id["resourceTypeGeneral"]] || "CreativeWork",
478
- "title" => id["title"] || id["name"] }.compact
476
+ def normalize_ids(ids: nil, relation_type: nil)
477
+ Array.wrap(ids).select { |idx| idx["@id"].present? }.map do |idx|
478
+ id = normalize_id(idx["@id"])
479
+ related_identifier_type = doi_from_url(id).present? ? "DOI" : "URL"
480
+ id = doi_from_url(id) || id
481
+
482
+ { "id" => id,
483
+ "relation_type" => relation_type,
484
+ "related_identifier_type" => related_identifier_type,
485
+ "resource_type_general" => Metadata::SO_TO_DC_TRANSLATIONS[idx["@type"]],
486
+ "title" => idx["title"] || idx["name"] }.compact
479
487
  end.unwrap
480
488
  end
481
489
 
@@ -525,13 +533,10 @@ module Bolognese
525
533
  def to_schema_org_container(element, options={})
526
534
  return nil unless (element.is_a?(Hash) || (element.nil? && options[:container_title].present?))
527
535
 
528
- mapping = { "type" => "@type", "id" => "@id", "title" => "name" }
529
-
530
- element ||= {}
531
- element["type"] = (options[:type] == "Dataset") ? "DataCatalog" : "Periodical"
532
- element["title"] ||= options[:container_title]
533
-
534
- map_hash_keys(element: element, mapping: mapping)
536
+ {
537
+ "@id" => element["id"],
538
+ "@type" => (options[:type] == "Dataset") ? "DataCatalog" : "Periodical",
539
+ "name" => element["title"] || options[:container_title] }
535
540
  end
536
541
 
537
542
  def to_schema_org_identifier(element, options={})
@@ -540,8 +545,8 @@ module Bolognese
540
545
  "propertyID" => normalize_doi(element) ? "doi" : "url",
541
546
  "value" => element }
542
547
 
543
- if options[:alternate_identifier].present?
544
- [ident] + Array.wrap(options[:alternate_identifier]).map do |ai|
548
+ if options[:alternate_identifiers].present?
549
+ [ident] + Array.wrap(options[:alternate_identifiers]).map do |ai|
545
550
  if ai["type"].to_s.downcase == "url"
546
551
  ai["name"]
547
552
  else
@@ -556,6 +561,67 @@ module Bolognese
556
561
  end
557
562
  end
558
563
 
564
+ def to_schema_org_relation(related_identifiers: nil, relation_type: nil)
565
+ return nil unless related_identifiers.present? && relation_type.present?
566
+
567
+ relation_type = relation_type == "References" ? ["References", "Cites", "Documents"] : [relation_type]
568
+
569
+ Array.wrap(related_identifiers).select { |ri| relation_type.include?(ri["relation_type"]) }.map do |r|
570
+ if r["related_identifier_type"] == "ISSN" && r["relation_type"] == "IsPartOf"
571
+ {
572
+ "@type" => "Periodical",
573
+ "issn" => r["id"],
574
+ "name" => r["title"] }.compact
575
+ else
576
+ {
577
+ "@id" => normalize_id(r["id"]),
578
+ "@type" => DC_TO_SO_TRANSLATIONS[r["resource_type_general"]] || "CreativeWork",
579
+ "name" => r["title"] }.compact
580
+ end
581
+ end.unwrap
582
+ end
583
+
584
+ def to_schema_org_funder(funding_references)
585
+ return nil unless funding_references.present?
586
+
587
+ Array.wrap(funding_references).map do |fr|
588
+ {
589
+ "@id" => fr["funder_identifier"],
590
+ "@type" => "Organization",
591
+ "name" => fr["funder_name"] }.compact
592
+ end.unwrap
593
+ end
594
+
595
+ def to_schema_org_spatial_coverage(geo_location)
596
+ return nil unless geo_location.present?
597
+
598
+ Array.wrap(geo_location).map do |gl|
599
+ if gl.fetch("geo_location_point", nil)
600
+ {
601
+ "@type" => "Place",
602
+ "geo" => {
603
+ "@type" => "GeoCoordinates",
604
+ "address" => gl["geo_location_place"],
605
+ "latitude" => gl.dig("geo_location_point", "point_latitude"),
606
+ "longitude" => gl.dig("geo_location_point", "point_longitude")
607
+ }.compact
608
+ }
609
+ elsif gl.fetch("geo_location_box", nil)
610
+ {
611
+ "@type" => "Place",
612
+ "geo" => {
613
+ "@type" => "GeoShape",
614
+ "address" => gl["geo_location_place"],
615
+ "box" => [gl.dig("geo_location_box", "south_bound_latitude"),
616
+ gl.dig("geo_location_box", "west_bound_longitude"),
617
+ gl.dig("geo_location_box", "north_bound_latitude"),
618
+ gl.dig("geo_location_box", "east_bound_longitude")].join(" ")
619
+ }.compact
620
+ }
621
+ end
622
+ end.compact.unwrap
623
+ end
624
+
559
625
  def from_schema_org(element)
560
626
  mapping = { "@type" => "type", "@id" => "id" }
561
627
 
@@ -576,6 +642,13 @@ module Bolognese
576
642
  end.unwrap
577
643
  end
578
644
 
645
+ def to_identifier(identifier)
646
+ {
647
+ "@type" => "PropertyValue",
648
+ "propertyID" => identifier["related_identifier_type"],
649
+ "value" => identifier["id"] }
650
+ end
651
+
579
652
  def from_citeproc(element)
580
653
  Array.wrap(element).map do |a|
581
654
  if a["literal"].present?
@@ -1,3 +1,3 @@
1
1
  module Bolognese
2
- VERSION = "0.15.9"
2
+ VERSION = "1.0"
3
3
  end
@@ -11,11 +11,11 @@ module Bolognese
11
11
  bibtex_key: identifier,
12
12
  doi: doi,
13
13
  url: b_url,
14
- author: authors_as_string(author),
14
+ author: authors_as_string(creator),
15
15
  keywords: keywords.present? ? Array.wrap(keywords).map { |k| parse_attributes(k, content: "text", first: true) }.join(", ") : nil,
16
16
  language: language,
17
17
  title: parse_attributes(title, content: "text", first: true),
18
- journal: container_title,
18
+ journal: periodical && periodical["title"],
19
19
  volume: volume,
20
20
  issue: issue,
21
21
  pages: [first_page, last_page].compact.join("-").presence,
@@ -13,11 +13,10 @@ module Bolognese
13
13
  "identifier" => identifier,
14
14
  "codeRepository" => b_url,
15
15
  "title" => title,
16
- "agents" => author,
16
+ "agents" => creator,
17
17
  "description" => parse_attributes(description, content: "text", first: true),
18
18
  "version" => b_version,
19
19
  "tags" => keywords.to_s.split(", ").presence,
20
- "dateCreated" => date_created,
21
20
  "datePublished" => date_published,
22
21
  "dateModified" => date_modified,
23
22
  "publisher" => publisher
@@ -15,48 +15,29 @@ module Bolognese
15
15
  "ris_type" => ris_type,
16
16
  "resource_type_general" => resource_type_general,
17
17
  "resource_type" => additional_type,
18
- "author" => author,
18
+ "creator" => creator,
19
19
  "title" => title,
20
20
  "publisher" => publisher,
21
- "container_title" => container_title,
21
+ "container_title" => periodical && periodical["title"],
22
22
  "keywords" => keywords,
23
23
  "contributor" => contributor,
24
- "date_accepted" => date_accepted,
25
- "date_available" => date_available,
26
- "date_copyrighted" => date_copyrighted,
27
- "date_collected" => date_collected,
28
- "date_created" => date_created,
24
+ "dates" => dates,
29
25
  "date_published" => date_published,
30
26
  "date_modified" => date_modified,
31
- "date_submitted" => date_submitted,
32
- "date_registered" => date_registered,
33
- "date_updated" => date_updated,
34
- "date_valid" => date_valid,
35
27
  "language" => language,
36
- "alternate_identifier" => alternate_identifier,
37
- "content_size" => content_size,
28
+ "alternate_identifiers" => alternate_identifiers,
29
+ "size" => size,
30
+ "format" => b_format,
38
31
  "version" => b_version,
39
- "license" => license,
32
+ "rights" => rights,
40
33
  "description" => description,
41
34
  "volume" => volume,
42
35
  "issue" => issue,
43
36
  "first_page" => first_page,
44
37
  "last_page" => last_page,
45
- "spatial_coverage" => spatial_coverage,
46
- "funding" => funding,
47
- "is_identical_to" => is_identical_to,
48
- "is_part_of" => is_part_of,
49
- "has_part" => has_part,
50
- "is_previous_version_of" => is_previous_version_of,
51
- "is_new_version_of" => is_new_version_of,
52
- "is_variant_form_of" => is_variant_form_of,
53
- "is_original_form_of" => is_original_form_of,
54
- "references" => references,
55
- "is_referenced_by" => is_referenced_by,
56
- "is_supplement_to" => is_supplement_to,
57
- "is_supplemented_by" => is_supplemented_by,
58
- "reviews" => reviews,
59
- "is_reviewed_by" => is_reviewed_by,
38
+ "geo_location" => geo_location,
39
+ "funding_references" => funding_references,
40
+ "related_identifiers" => related_identifiers,
60
41
  "schema_version" => schema_version,
61
42
  "provider_id" => provider_id,
62
43
  "client_id" => client_id,