bolognese 0.4.3 → 2.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (326) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/build.yml +9 -0
  3. data/.github/workflows/changelog.yml +36 -0
  4. data/.github/workflows/ci.yml +22 -0
  5. data/.github/workflows/pull-request.yml +9 -0
  6. data/.github/workflows/release.yml +32 -0
  7. data/.gitignore +4 -0
  8. data/.rubocop.yml +658 -0
  9. data/CHANGELOG.md +1864 -0
  10. data/CITATION +17 -0
  11. data/Gemfile +1 -1
  12. data/Gemfile.lock +250 -98
  13. data/README.md +963 -21
  14. data/Rakefile +1 -0
  15. data/bin/bolognese +5 -1
  16. data/bolognese.gemspec +33 -21
  17. data/lib/bolognese/array.rb +13 -0
  18. data/lib/bolognese/author_utils.rb +147 -33
  19. data/lib/bolognese/citeproc_extensions.rb +48 -0
  20. data/lib/bolognese/cli.rb +28 -21
  21. data/lib/bolognese/datacite_utils.rb +292 -104
  22. data/lib/bolognese/doi_utils.rb +44 -20
  23. data/lib/bolognese/metadata.rb +249 -23
  24. data/lib/bolognese/metadata_utils.rb +228 -0
  25. data/lib/bolognese/pubmed.rb +2 -0
  26. data/lib/bolognese/readers/bibtex_reader.rb +100 -0
  27. data/lib/bolognese/readers/citeproc_reader.rb +125 -0
  28. data/lib/bolognese/readers/codemeta_reader.rb +108 -0
  29. data/lib/bolognese/readers/crosscite_reader.rb +17 -0
  30. data/lib/bolognese/readers/crossref_reader.rb +413 -0
  31. data/lib/bolognese/readers/datacite_json_reader.rb +17 -0
  32. data/lib/bolognese/readers/datacite_reader.rb +341 -0
  33. data/lib/bolognese/readers/npm_reader.rb +115 -0
  34. data/lib/bolognese/readers/ris_reader.rb +114 -0
  35. data/lib/bolognese/readers/schema_org_reader.rb +264 -0
  36. data/lib/bolognese/string.rb +3 -1
  37. data/lib/bolognese/utils.rb +1463 -25
  38. data/lib/bolognese/version.rb +1 -1
  39. data/lib/bolognese/whitelist_scrubber.rb +47 -0
  40. data/lib/bolognese/writers/bibtex_writer.rb +32 -0
  41. data/lib/bolognese/writers/citation_writer.rb +14 -0
  42. data/lib/bolognese/writers/citeproc_writer.rb +11 -0
  43. data/lib/bolognese/writers/codemeta_writer.rb +29 -0
  44. data/lib/bolognese/writers/crosscite_writer.rb +11 -0
  45. data/lib/bolognese/writers/crossref_writer.rb +11 -0
  46. data/lib/bolognese/writers/csv_writer.rb +24 -0
  47. data/lib/bolognese/writers/datacite_json_writer.rb +13 -0
  48. data/lib/bolognese/writers/datacite_writer.rb +12 -0
  49. data/lib/bolognese/writers/jats_writer.rb +137 -0
  50. data/lib/bolognese/writers/rdf_xml_writer.rb +11 -0
  51. data/lib/bolognese/writers/ris_writer.rb +29 -0
  52. data/lib/bolognese/writers/schema_org_writer.rb +55 -0
  53. data/lib/bolognese/writers/turtle_writer.rb +11 -0
  54. data/lib/bolognese.rb +19 -5
  55. data/package.json +12 -0
  56. data/resources/2008/09/xsd.xsl +997 -0
  57. data/resources/datacite-contributorType-v4.xsd +35 -0
  58. data/resources/datacite-dateType-v4.xsd +25 -0
  59. data/resources/datacite-descriptionType-v4.xsd +19 -0
  60. data/resources/datacite-funderIdentifierType-v4.xsd +15 -0
  61. data/resources/datacite-nameType-v4.xsd +10 -0
  62. data/resources/datacite-relatedIdentifierType-v4.xsd +34 -0
  63. data/resources/datacite-relationType-v4.xsd +49 -0
  64. data/resources/datacite-resourceType-v4.xsd +28 -0
  65. data/resources/datacite-titleType-v4.xsd +14 -0
  66. data/resources/kernel-2.1/include/datacite-contributorType-v1.1.xsd +22 -0
  67. data/resources/kernel-2.1/include/datacite-contributorType-v2.xsd +3 -0
  68. data/resources/kernel-2.1/include/datacite-dateType-v1.1.xsd +31 -0
  69. data/resources/kernel-2.1/include/datacite-dateType-v2.xsd +3 -0
  70. data/resources/kernel-2.1/include/datacite-descriptionType-v1.1.xsd +14 -0
  71. data/resources/kernel-2.1/include/datacite-descriptionType-v2.xsd +3 -0
  72. data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v1.1.xsd +24 -0
  73. data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v2.xsd +3 -0
  74. data/resources/kernel-2.1/include/datacite-relationType-v1.1.xsd +29 -0
  75. data/resources/kernel-2.1/include/datacite-relationType-v2.xsd +3 -0
  76. data/resources/kernel-2.1/include/datacite-resourceType-v1.1.xsd +22 -0
  77. data/resources/kernel-2.1/include/datacite-resourceType-v2.xsd +3 -0
  78. data/resources/kernel-2.1/include/datacite-titleType-v1.1.xsd +11 -0
  79. data/resources/kernel-2.1/include/datacite-titleType-v2.xsd +3 -0
  80. data/resources/kernel-2.1/metadata.xsd +315 -0
  81. data/resources/kernel-2.2/include/datacite-contributorType-v2.xsd +29 -0
  82. data/resources/kernel-2.2/include/datacite-dateType-v2.xsd +21 -0
  83. data/resources/kernel-2.2/include/datacite-descriptionType-v2.xsd +15 -0
  84. data/resources/kernel-2.2/include/datacite-relatedIdentifierType-v2.xsd +25 -0
  85. data/resources/kernel-2.2/include/datacite-relationType-v2.xsd +29 -0
  86. data/resources/kernel-2.2/include/datacite-resourceType-v2.xsd +23 -0
  87. data/resources/kernel-2.2/include/datacite-titleType-v2.xsd +10 -0
  88. data/resources/kernel-2.2/metadata.xsd +316 -0
  89. data/resources/kernel-3/include/datacite-contributorType-v3.1.xsd +35 -0
  90. data/resources/kernel-3/include/datacite-dateType-v3.xsd +21 -0
  91. data/resources/kernel-3/include/datacite-descriptionType-v3.xsd +17 -0
  92. data/resources/kernel-3/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
  93. data/resources/kernel-3/include/datacite-relationType-v3.1.xsd +38 -0
  94. data/resources/kernel-3/include/datacite-resourceType-v3.xsd +26 -0
  95. data/resources/kernel-3/include/datacite-titleType-v3.xsd +12 -0
  96. data/resources/kernel-3/include/xml.xsd +286 -0
  97. data/resources/kernel-3/metadata.xsd +380 -0
  98. data/resources/kernel-3.0/include/datacite-contributorType-v3.xsd +33 -0
  99. data/resources/kernel-3.0/include/datacite-dateType-v3.xsd +21 -0
  100. data/resources/kernel-3.0/include/datacite-descriptionType-v3.xsd +17 -0
  101. data/resources/kernel-3.0/include/datacite-relatedIdentifierType-v3.xsd +27 -0
  102. data/resources/kernel-3.0/include/datacite-relationType-v3.xsd +33 -0
  103. data/resources/kernel-3.0/include/datacite-resourceType-v3.xsd +26 -0
  104. data/resources/kernel-3.0/include/datacite-titleType-v3.xsd +12 -0
  105. data/resources/kernel-3.0/include/xml.xsd +286 -0
  106. data/resources/kernel-3.0/metadata.xsd +377 -0
  107. data/resources/kernel-3.1/include/datacite-contributorType-v3.1.xsd +35 -0
  108. data/resources/kernel-3.1/include/datacite-dateType-v3.xsd +21 -0
  109. data/resources/kernel-3.1/include/datacite-descriptionType-v3.xsd +17 -0
  110. data/resources/kernel-3.1/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
  111. data/resources/kernel-3.1/include/datacite-relationType-v3.1.xsd +38 -0
  112. data/resources/kernel-3.1/include/datacite-resourceType-v3.xsd +26 -0
  113. data/resources/kernel-3.1/include/datacite-titleType-v3.xsd +12 -0
  114. data/resources/kernel-3.1/include/xml.xsd +286 -0
  115. data/resources/kernel-3.1/metadata.xsd +380 -0
  116. data/resources/kernel-4/include/datacite-contributorType-v4.xsd +37 -0
  117. data/resources/kernel-4/include/datacite-dateType-v4.xsd +27 -0
  118. data/resources/kernel-4/include/datacite-descriptionType-v4.xsd +19 -0
  119. data/resources/kernel-4/include/datacite-funderIdentifierType-v4.xsd +16 -0
  120. data/resources/kernel-4/include/datacite-nameType-v4.xsd +10 -0
  121. data/resources/kernel-4/include/datacite-numberType-v4.xsd +12 -0
  122. data/resources/kernel-4/include/datacite-relatedIdentifierType-v4.xsd +39 -0
  123. data/resources/kernel-4/include/datacite-relationType-v4.xsd +59 -0
  124. data/resources/kernel-4/include/datacite-resourceType-v4.xsd +52 -0
  125. data/resources/kernel-4/include/datacite-titleType-v4.xsd +14 -0
  126. data/resources/kernel-4/include/xml.xsd +286 -0
  127. data/resources/kernel-4/metadata.xsd +715 -0
  128. data/resources/kernel-4.0/include/datacite-contributorType-v4.xsd +1 -1
  129. data/resources/kernel-4.0/include/datacite-dateType-v4.xsd +1 -1
  130. data/resources/kernel-4.0/include/datacite-descriptionType-v4.xsd +1 -1
  131. data/resources/kernel-4.0/include/datacite-funderIdentifierType-v4.xsd +1 -1
  132. data/resources/kernel-4.0/include/datacite-relatedIdentifierType-v4.xsd +1 -1
  133. data/resources/kernel-4.0/include/datacite-relationType-v4.xsd +1 -1
  134. data/resources/kernel-4.0/include/datacite-resourceType-v4.xsd +1 -1
  135. data/resources/kernel-4.0/include/datacite-titleType-v4.xsd +1 -1
  136. data/resources/kernel-4.0/include/xml.xsd +286 -0
  137. data/resources/kernel-4.0/metadata.xsd +2 -2
  138. data/resources/kernel-4.1/include/datacite-contributorType-v4.xsd +35 -0
  139. data/resources/kernel-4.1/include/datacite-dateType-v4.1.xsd +23 -0
  140. data/resources/kernel-4.1/include/datacite-descriptionType-v4.xsd +19 -0
  141. data/resources/kernel-4.1/include/datacite-funderIdentifierType-v4.xsd +15 -0
  142. data/resources/kernel-4.1/include/datacite-nameType-v4.1.xsd +10 -0
  143. data/resources/kernel-4.1/include/datacite-relatedIdentifierType-v4.xsd +32 -0
  144. data/resources/kernel-4.1/include/datacite-relationType-v4.1.xsd +46 -0
  145. data/resources/kernel-4.1/include/datacite-resourceType-v4.1.xsd +28 -0
  146. data/resources/kernel-4.1/include/datacite-titleType-v4.xsd +14 -0
  147. data/resources/kernel-4.1/include/xml.xsd +286 -0
  148. data/resources/kernel-4.1/metadata.xsd +483 -0
  149. data/resources/kernel-4.2/include/datacite-contributorType-v4.xsd +35 -0
  150. data/resources/kernel-4.2/include/datacite-dateType-v4.xsd +25 -0
  151. data/resources/kernel-4.2/include/datacite-descriptionType-v4.xsd +19 -0
  152. data/resources/kernel-4.2/include/datacite-funderIdentifierType-v4.xsd +15 -0
  153. data/resources/kernel-4.2/include/datacite-nameType-v4.xsd +10 -0
  154. data/resources/kernel-4.2/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  155. data/resources/kernel-4.2/include/datacite-relationType-v4.xsd +49 -0
  156. data/resources/kernel-4.2/include/datacite-resourceType-v4.xsd +28 -0
  157. data/resources/kernel-4.2/include/datacite-titleType-v4.xsd +14 -0
  158. data/resources/kernel-4.2/include/xml.xsd +286 -0
  159. data/resources/kernel-4.2/metadata.xsd +479 -0
  160. data/resources/kernel-4.3/include/datacite-contributorType-v4.xsd +35 -0
  161. data/resources/kernel-4.3/include/datacite-dateType-v4.xsd +25 -0
  162. data/resources/kernel-4.3/include/datacite-descriptionType-v4.xsd +19 -0
  163. data/resources/kernel-4.3/include/datacite-funderIdentifierType-v4.xsd +16 -0
  164. data/resources/kernel-4.3/include/datacite-nameType-v4.xsd +10 -0
  165. data/resources/kernel-4.3/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  166. data/resources/kernel-4.3/include/datacite-relationType-v4.xsd +49 -0
  167. data/resources/kernel-4.3/include/datacite-resourceType-v4.xsd +28 -0
  168. data/resources/kernel-4.3/include/datacite-titleType-v4.xsd +14 -0
  169. data/resources/kernel-4.3/include/xml.xsd +286 -0
  170. data/resources/kernel-4.3/metadata.xsd +515 -0
  171. data/resources/kernel-4.4/include/datacite-contributorType-v4.xsd +35 -0
  172. data/resources/kernel-4.4/include/datacite-dateType-v4.xsd +25 -0
  173. data/resources/kernel-4.4/include/datacite-descriptionType-v4.xsd +19 -0
  174. data/resources/kernel-4.4/include/datacite-funderIdentifierType-v4.xsd +16 -0
  175. data/resources/kernel-4.4/include/datacite-nameType-v4.xsd +10 -0
  176. data/resources/kernel-4.4/include/datacite-numberType-v4.xsd +12 -0
  177. data/resources/kernel-4.4/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  178. data/resources/kernel-4.4/include/datacite-relationType-v4.xsd +51 -0
  179. data/resources/kernel-4.4/include/datacite-resourceType-v4.xsd +43 -0
  180. data/resources/kernel-4.4/include/datacite-titleType-v4.xsd +14 -0
  181. data/resources/kernel-4.4/include/xml.xsd +286 -0
  182. data/resources/kernel-4.4/metadata.xsd +707 -0
  183. data/resources/kernel-4.5/include/datacite-contributorType-v4.xsd +35 -0
  184. data/resources/kernel-4.5/include/datacite-dateType-v4.xsd +25 -0
  185. data/resources/kernel-4.5/include/datacite-descriptionType-v4.xsd +19 -0
  186. data/resources/kernel-4.5/include/datacite-funderIdentifierType-v4.xsd +16 -0
  187. data/resources/kernel-4.5/include/datacite-nameType-v4.xsd +10 -0
  188. data/resources/kernel-4.5/include/datacite-numberType-v4.xsd +12 -0
  189. data/resources/kernel-4.5/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  190. data/resources/kernel-4.5/include/datacite-relationType-v4.xsd +53 -0
  191. data/resources/kernel-4.5/include/datacite-resourceType-v4.xsd +45 -0
  192. data/resources/kernel-4.5/include/datacite-titleType-v4.xsd +14 -0
  193. data/resources/kernel-4.5/include/xml.xsd +286 -0
  194. data/resources/kernel-4.5/metadata.xsd +711 -0
  195. data/resources/kernel-4.6/include/datacite-contributorType-v4.xsd +37 -0
  196. data/resources/kernel-4.6/include/datacite-dateType-v4.xsd +27 -0
  197. data/resources/kernel-4.6/include/datacite-descriptionType-v4.xsd +19 -0
  198. data/resources/kernel-4.6/include/datacite-funderIdentifierType-v4.xsd +16 -0
  199. data/resources/kernel-4.6/include/datacite-nameType-v4.xsd +10 -0
  200. data/resources/kernel-4.6/include/datacite-numberType-v4.xsd +12 -0
  201. data/resources/kernel-4.6/include/datacite-relatedIdentifierType-v4.xsd +37 -0
  202. data/resources/kernel-4.6/include/datacite-relationType-v4.xsd +57 -0
  203. data/resources/kernel-4.6/include/datacite-resourceType-v4.xsd +49 -0
  204. data/resources/kernel-4.6/include/datacite-titleType-v4.xsd +14 -0
  205. data/resources/kernel-4.6/include/xml.xsd +286 -0
  206. data/resources/kernel-4.6/metadata.xsd +712 -0
  207. data/resources/kernel-4.7/include/datacite-contributorType-v4.xsd +37 -0
  208. data/resources/kernel-4.7/include/datacite-dateType-v4.xsd +27 -0
  209. data/resources/kernel-4.7/include/datacite-descriptionType-v4.xsd +19 -0
  210. data/resources/kernel-4.7/include/datacite-funderIdentifierType-v4.xsd +16 -0
  211. data/resources/kernel-4.7/include/datacite-nameType-v4.xsd +10 -0
  212. data/resources/kernel-4.7/include/datacite-numberType-v4.xsd +12 -0
  213. data/resources/kernel-4.7/include/datacite-relatedIdentifierType-v4.xsd +39 -0
  214. data/resources/kernel-4.7/include/datacite-relationType-v4.xsd +59 -0
  215. data/resources/kernel-4.7/include/datacite-resourceType-v4.xsd +52 -0
  216. data/resources/kernel-4.7/include/datacite-titleType-v4.xsd +14 -0
  217. data/resources/kernel-4.7/include/xml.xsd +286 -0
  218. data/resources/kernel-4.7/metadata.xsd +715 -0
  219. data/resources/oecd/dfg-mappings.json +1866 -0
  220. data/resources/oecd/for-mappings.json +1101 -0
  221. data/resources/oecd/fos-mappings.json +198 -0
  222. data/resources/schema_org/jsonldcontext.json +7477 -0
  223. data/resources/spdx/licenses.json +5297 -0
  224. data/resources/xml.xsd +286 -0
  225. metadata +466 -179
  226. data/.travis.yml +0 -23
  227. data/lib/bolognese/crossref.rb +0 -233
  228. data/lib/bolognese/datacite.rb +0 -176
  229. data/lib/bolognese/date_utils.rb +0 -48
  230. data/lib/bolognese/github.rb +0 -106
  231. data/lib/bolognese/orcid.rb +0 -24
  232. data/lib/bolognese/schema_org.rb +0 -170
  233. data/spec/cli_spec.rb +0 -56
  234. data/spec/crossref_spec.rb +0 -190
  235. data/spec/datacite_spec.rb +0 -93
  236. data/spec/datacite_utils_spec.rb +0 -159
  237. data/spec/doi_utils_spec.rb +0 -89
  238. data/spec/fixtures/crossref.xml +0 -742
  239. data/spec/fixtures/datacite.xml +0 -40
  240. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_crossref.yml +0 -760
  241. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_datacite.yml +0 -1476
  242. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_schema_org.yml +0 -1476
  243. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_datacite.yml +0 -214
  244. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_schema_org.yml +0 -384
  245. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/schema_org/as_datacite.yml +0 -653
  246. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/schema_org/as_schema_org.yml +0 -653
  247. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/crossref.yml +0 -971
  248. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/datacite.yml +0 -971
  249. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/medra.yml +0 -971
  250. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/not_found.yml +0 -971
  251. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_ORCID_ID.yml +0 -506
  252. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_SICI_DOI.yml +0 -214
  253. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_data_citation.yml +0 -901
  254. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/date_in_future.yml +0 -305
  255. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/journal_article.yml +0 -1080
  256. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/not_found_error.yml +0 -183
  257. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/posted_content.yml +0 -389
  258. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata_as_datacite_xml/DOI_with_ORCID_ID.yml +0 -506
  259. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata_as_datacite_xml/DOI_with_data_citation.yml +0 -901
  260. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata_as_datacite_xml/with_ORCID_ID.yml +0 -366
  261. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata_as_datacite_xml/with_data_citation.yml +0 -719
  262. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata_as_datacite_xml/with_editor.yml +0 -930
  263. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata_as_string/DOI_with_data_citation.yml +0 -719
  264. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_doi/SICI_doi.yml +0 -930
  265. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_doi/doi.yml +0 -930
  266. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_doi/doi_from_url_without_doi_proxy.yml +0 -930
  267. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_doi/doi_prefix_too_long.yml +0 -930
  268. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_doi/doi_prefix_with_string.yml +0 -930
  269. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_doi/doi_with_protocol.yml +0 -930
  270. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_doi/dx_doi_org_url.yml +0 -930
  271. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_doi/https_url.yml +0 -930
  272. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_doi/not_valid_doi_prefix.yml +0 -930
  273. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_ids/doi.yml +0 -930
  274. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_ids/url.yml +0 -930
  275. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_url/doi.yml +0 -930
  276. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/normalize_url/url.yml +0 -930
  277. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/parse_attribute/array.yml +0 -930
  278. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/parse_attribute/hash.yml +0 -930
  279. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/parse_attribute/nil.yml +0 -930
  280. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/parse_attribute/string.yml +0 -930
  281. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/parse_attributes/array.yml +0 -930
  282. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/parse_attributes/hash.yml +0 -930
  283. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/parse_attributes/nil.yml +0 -930
  284. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/parse_attributes/string.yml +0 -930
  285. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/validate_url/DOI.yml +0 -930
  286. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/validate_url/URL.yml +0 -930
  287. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/validate_url/string.yml +0 -930
  288. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/BlogPosting.yml +0 -477
  289. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/Dataset.yml +0 -1925
  290. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/Date.yml +0 -458
  291. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata_as_string/Dataset.yml +0 -173
  292. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_alternate_identifiers/insert.yml +0 -173
  293. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_contributors/insert.yml +0 -173
  294. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_contributors/none.yml +0 -173
  295. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_creators/insert.yml +0 -173
  296. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_dates/insert.yml +0 -173
  297. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_descriptions/insert.yml +0 -155
  298. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_identifier/doi.yml +0 -173
  299. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_person/creator.yml +0 -173
  300. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_person/creator_given_and_family_name.yml +0 -173
  301. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_person/creator_only_name.yml +0 -173
  302. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_publication_year/insert.yml +0 -173
  303. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_publisher/insert.yml +0 -173
  304. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_related_identifiers/get_related_identifier_type.yml +0 -173
  305. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_related_identifiers/get_related_identifier_type_DOI.yml +0 -173
  306. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_related_identifiers/get_related_identifier_type_URL.yml +0 -173
  307. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_related_identifiers/insert.yml +0 -173
  308. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_related_identifiers/rel_identifiers.yml +0 -173
  309. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_resource_type/insert.yml +0 -173
  310. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_rights_list/insert.yml +0 -173
  311. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_subjects/insert.yml +0 -173
  312. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_titles/doi.yml +0 -173
  313. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_titles/insert.yml +0 -173
  314. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/insert_version/insert.yml +0 -173
  315. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref.yml +0 -44
  316. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref_doi_not_url.yml +0 -44
  317. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite.yml +0 -44
  318. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite_doi_http.yml +0 -44
  319. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/orcid.yml +0 -44
  320. data/spec/fixtures/vcr_cassettes/Bolognese_SchemaOrg/get_metadata/BlogPosting.yml +0 -632
  321. data/spec/fixtures/vcr_cassettes/Bolognese_SchemaOrg/get_metadata/not_found_error.yml +0 -93
  322. data/spec/metadata_spec.rb +0 -41
  323. data/spec/orcid_spec.rb +0 -23
  324. data/spec/schema_org_spec.rb +0 -33
  325. data/spec/spec_helper.rb +0 -88
  326. data/spec/utils_spec.rb +0 -107
@@ -1,23 +1,634 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Bolognese
2
4
  module Utils
3
- def normalize_orcid(orcid)
4
- orcid = validate_orcid(orcid)
5
- return nil unless orcid.present?
5
+ class << self
6
+ include Utils
7
+ end
6
8
 
7
- # turn ORCID ID into URL
8
- "http://orcid.org/" + Addressable::URI.encode(orcid)
9
+ NORMALIZED_LICENSES = {
10
+ "https://creativecommons.org/licenses/by/1.0" => "https://creativecommons.org/licenses/by/1.0/legalcode",
11
+ "https://creativecommons.org/licenses/by/2.0" => "https://creativecommons.org/licenses/by/2.0/legalcode",
12
+ "https://creativecommons.org/licenses/by/2.5" => "https://creativecommons.org/licenses/by/2.5/legalcode",
13
+ "https://creativecommons.org/licenses/by/3.0" => "https://creativecommons.org/licenses/by/3.0/legalcode",
14
+ "https://creativecommons.org/licenses/by/3.0/us" => "https://creativecommons.org/licenses/by/3.0/legalcode",
15
+ "https://creativecommons.org/licenses/by/4.0" => "https://creativecommons.org/licenses/by/4.0/legalcode",
16
+ "https://creativecommons.org/licenses/by-nc/1.0" => "https://creativecommons.org/licenses/by-nc/1.0/legalcode",
17
+ "https://creativecommons.org/licenses/by-nc/2.0" => "https://creativecommons.org/licenses/by-nc/2.0/legalcode",
18
+ "https://creativecommons.org/licenses/by-nc/2.5" => "https://creativecommons.org/licenses/by-nc/2.5/legalcode",
19
+ "https://creativecommons.org/licenses/by-nc/3.0" => "https://creativecommons.org/licenses/by-nc/3.0/legalcode",
20
+ "https://creativecommons.org/licenses/by-nc/4.0" => "https://creativecommons.org/licenses/by-nc/4.0/legalcode",
21
+ "https://creativecommons.org/licenses/by-nd-nc/1.0" => "https://creativecommons.org/licenses/by-nd-nc/1.0/legalcode",
22
+ "https://creativecommons.org/licenses/by-nd-nc/2.0" => "https://creativecommons.org/licenses/by-nd-nc/2.0/legalcode",
23
+ "https://creativecommons.org/licenses/by-nd-nc/2.5" => "https://creativecommons.org/licenses/by-nd-nc/2.5/legalcode",
24
+ "https://creativecommons.org/licenses/by-nd-nc/3.0" => "https://creativecommons.org/licenses/by-nd-nc/3.0/legalcode",
25
+ "https://creativecommons.org/licenses/by-nd-nc/4.0" => "https://creativecommons.org/licenses/by-nd-nc/4.0/legalcode",
26
+ "https://creativecommons.org/licenses/by-nc-sa/1.0" => "https://creativecommons.org/licenses/by-nc-sa/1.0/legalcode",
27
+ "https://creativecommons.org/licenses/by-nc-sa/2.0" => "https://creativecommons.org/licenses/by-nc-sa/2.0/legalcode",
28
+ "https://creativecommons.org/licenses/by-nc-sa/2.5" => "https://creativecommons.org/licenses/by-nc-sa/2.5/legalcode",
29
+ "https://creativecommons.org/licenses/by-nc-sa/3.0" => "https://creativecommons.org/licenses/by-nc-sa/3.0/legalcode",
30
+ "https://creativecommons.org/licenses/by-nc-sa/4.0" => "https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode",
31
+ "https://creativecommons.org/licenses/by-nd/1.0" => "https://creativecommons.org/licenses/by-nd/1.0/legalcode",
32
+ "https://creativecommons.org/licenses/by-nd/2.0" => "https://creativecommons.org/licenses/by-nd/2.0/legalcode",
33
+ "https://creativecommons.org/licenses/by-nd/2.5" => "https://creativecommons.org/licenses/by-nd/2.5/legalcode",
34
+ "https://creativecommons.org/licenses/by-nd/3.0" => "https://creativecommons.org/licenses/by-nd/3.0/legalcode",
35
+ "https://creativecommons.org/licenses/by-nd/4.0" => "https://creativecommons.org/licenses/by-nd/4.0/legalcode",
36
+ "https://creativecommons.org/licenses/by-sa/1.0" => "https://creativecommons.org/licenses/by-sa/1.0/legalcode",
37
+ "https://creativecommons.org/licenses/by-sa/2.0" => "https://creativecommons.org/licenses/by-sa/2.0/legalcode",
38
+ "https://creativecommons.org/licenses/by-sa/2.5" => "https://creativecommons.org/licenses/by-sa/2.5/legalcode",
39
+ "https://creativecommons.org/licenses/by-sa/3.0" => "https://creativecommons.org/licenses/by-sa/3.0/legalcode",
40
+ "https://creativecommons.org/licenses/by-sa/4.0" => "https://creativecommons.org/licenses/by-sa/4.0/legalcode",
41
+ "https://creativecommons.org/licenses/by-nc-nd/1.0" => "https://creativecommons.org/licenses/by-nc-nd/1.0/legalcode",
42
+ "https://creativecommons.org/licenses/by-nc-nd/2.0" => "https://creativecommons.org/licenses/by-nc-nd/2.0/legalcode",
43
+ "https://creativecommons.org/licenses/by-nc-nd/2.5" => "https://creativecommons.org/licenses/by-nc-nd/2.5/legalcode",
44
+ "https://creativecommons.org/licenses/by-nc-nd/3.0" => "https://creativecommons.org/licenses/by-nc-nd/3.0/legalcode",
45
+ "https://creativecommons.org/licenses/by-nc-nd/4.0" => "https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode",
46
+ "https://creativecommons.org/licenses/publicdomain" => "https://creativecommons.org/licenses/publicdomain/",
47
+ "https://creativecommons.org/publicdomain/zero/1.0" => "https://creativecommons.org/publicdomain/zero/1.0/legalcode",
48
+ }
49
+
50
+ DC_TO_SO_TRANSLATIONS = {
51
+ "Audiovisual" => "MediaObject",
52
+ "Book" => "Book",
53
+ "BookChapter" => "Chapter",
54
+ "Collection" => "Collection",
55
+ "ComputationalNotebook" => "SoftwareSourceCode",
56
+ "ConferencePaper" => "Article",
57
+ "ConferenceProceeding" => "Periodical",
58
+ "DataPaper" => "Article",
59
+ "Dataset" => "Dataset",
60
+ "Dissertation" => "Thesis",
61
+ "Event" => "Event",
62
+ "Image" => "ImageObject",
63
+ "InteractiveResource" => nil,
64
+ "Journal" => "Periodical",
65
+ "JournalArticle" => "ScholarlyArticle",
66
+ "Model" => nil,
67
+ "OutputManagementPlan" => nil,
68
+ "PeerReview" => "Review",
69
+ "PhysicalObject" => nil,
70
+ "Poster" => "Poster",
71
+ "Preprint" => nil,
72
+ "Presentation" => "PresentationDigitalDocument",
73
+ "Report" => "Report",
74
+ "Service" => "Service",
75
+ "Software" => "SoftwareSourceCode",
76
+ "Sound" => "AudioObject",
77
+ "Standard" => nil,
78
+ "Text" => "ScholarlyArticle",
79
+ "Workflow" => nil,
80
+ "Other" => "CreativeWork",
81
+ # not part of DataCite schema, but used internally
82
+ "Periodical" => "Periodical",
83
+ "DataCatalog" => "DataCatalog",
84
+ "Award" => "Grant",
85
+ "Project" => "Project"
86
+ }
87
+
88
+ DC_TO_CP_TRANSLATIONS = {
89
+ "Audiovisual" => "motion_picture",
90
+ "Book" => "book",
91
+ "BookChapter" => "chapter",
92
+ "Collection" => nil,
93
+ "ComputationalNotebook" => nil,
94
+ "ConferencePaper" => "paper_conference",
95
+ "ConferenceProceeding" => nil,
96
+ "DataPaper" => "report",
97
+ "Dataset" => "dataset",
98
+ "Dissertation" => nil,
99
+ "Event" => nil,
100
+ "Image" => "graphic",
101
+ "InteractiveResource" => nil,
102
+ "Journal" => nil,
103
+ "JournalArticle" => "article-journal",
104
+ "Model" => nil,
105
+ "OutputManagementPlan" => nil,
106
+ "PeerReview" => "review",
107
+ "PhysicalObject" => nil,
108
+ "Poster" => "document",
109
+ "Preprint" => nil,
110
+ "Presentation" => "presentation",
111
+ "Report" => "report",
112
+ "Service" => nil,
113
+ "Sound" => "song",
114
+ "Standard" => nil,
115
+ "Text" => "report",
116
+ "Workflow" => nil,
117
+ "Other" => nil
118
+ }
119
+
120
+ CR_TO_CP_TRANSLATIONS = {
121
+ "Proceedings" => nil,
122
+ "ReferenceBook" => nil,
123
+ "JournalIssue" => nil,
124
+ "ProceedingsArticle" => "paper-conference",
125
+ "Other" => nil,
126
+ "Dissertation" => "thesis",
127
+ "Dataset" => "dataset",
128
+ "EditedBook" => "book",
129
+ "JournalArticle" => "article-journal",
130
+ "Journal" => nil,
131
+ "Report" => "report",
132
+ "BookSeries" => nil,
133
+ "ReportSeries" => nil,
134
+ "BookTrack" => nil,
135
+ "Standard" => nil,
136
+ "BookSection" => "chapter",
137
+ "BookPart" => nil,
138
+ "Book" => "book",
139
+ "BookChapter" => "chapter",
140
+ "StandardSeries" => nil,
141
+ "Monograph" => "book",
142
+ "Component" => nil,
143
+ "ReferenceEntry" => "entry-dictionary",
144
+ "JournalVolume" => nil,
145
+ "BookSet" => nil
146
+ }
147
+
148
+ CR_TO_SO_TRANSLATIONS = {
149
+ "Proceedings" => nil,
150
+ "ReferenceBook" => "Book",
151
+ "JournalIssue" => "PublicationIssue",
152
+ "ProceedingsArticle" => nil,
153
+ "Other" => "CreativeWork",
154
+ "Dissertation" => "Thesis",
155
+ "Dataset" => "Dataset",
156
+ "EditedBook" => "Book",
157
+ "JournalArticle" => "ScholarlyArticle",
158
+ "Journal" => nil,
159
+ "Report" => "Report",
160
+ "BookSeries" => nil,
161
+ "ReportSeries" => nil,
162
+ "BookTrack" => nil,
163
+ "Standard" => nil,
164
+ "BookSection" => nil,
165
+ "BookPart" => nil,
166
+ "Book" => "Book",
167
+ "BookChapter" => "Chapter",
168
+ "StandardSeries" => nil,
169
+ "Monograph" => "Book",
170
+ "Component" => "CreativeWork",
171
+ "ReferenceEntry" => nil,
172
+ "JournalVolume" => "PublicationVolume",
173
+ "BookSet" => nil,
174
+ "PostedContent" => "ScholarlyArticle",
175
+ "PeerReview" => "Review"
176
+ }
177
+
178
+ CR_TO_BIB_TRANSLATIONS = {
179
+ "Proceedings" => "proceedings",
180
+ "ReferenceBook" => "book",
181
+ "JournalIssue" => nil,
182
+ "ProceedingsArticle" => nil,
183
+ "Other" => nil,
184
+ "Dissertation" => "phdthesis",
185
+ "Dataset" => nil,
186
+ "EditedBook" => "book",
187
+ "JournalArticle" => "article",
188
+ "Journal" => nil,
189
+ "Report" => "techreport",
190
+ "BookSeries" => nil,
191
+ "ReportSeries" => nil,
192
+ "BookTrack" => nil,
193
+ "Standard" => nil,
194
+ "BookSection" => "inbook",
195
+ "BookPart" => nil,
196
+ "Book" => "book",
197
+ "BookChapter" => "inbook",
198
+ "StandardSeries" => nil,
199
+ "Monograph" => "book",
200
+ "Component" => nil,
201
+ "ReferenceEntry" => nil,
202
+ "JournalVolume" => nil,
203
+ "BookSet" => nil,
204
+ "PostedContent" => "article"
205
+ }
206
+
207
+ BIB_TO_CR_TRANSLATIONS = {
208
+ "proceedings" => "Proceedings",
209
+ "phdthesis" => "Dissertation",
210
+ "article" => "JournalArticle",
211
+ "book" => "Book",
212
+ "inbook" => "BookChapter"
213
+ }
214
+
215
+ CR_TO_JATS_TRANSLATIONS = {
216
+ "Proceedings" => "working-paper",
217
+ "ReferenceBook" => "book",
218
+ "JournalIssue" => "journal",
219
+ "ProceedingsArticle" => "working-paper",
220
+ "Other" => nil,
221
+ "Dissertation" => nil,
222
+ "Dataset" => "data",
223
+ "EditedBook" => "book",
224
+ "JournalArticle" => "journal",
225
+ "Journal" => "journal",
226
+ "Report" => "report",
227
+ "BookSeries" => "book",
228
+ "ReportSeries" => "report",
229
+ "BookTrack" => "book",
230
+ "Standard" => "standard",
231
+ "BookSection" => "chapter",
232
+ "BookPart" => "chapter",
233
+ "Book" => "book",
234
+ "BookChapter" => "chapter",
235
+ "StandardSeries" => "standard",
236
+ "Monograph" => "book",
237
+ "Component" => nil,
238
+ "ReferenceEntry" => nil,
239
+ "JournalVolume" => "journal",
240
+ "BookSet" => "book"
241
+ }
242
+
243
+ CR_TO_DC_TRANSLATIONS = {
244
+ "Proceedings" => nil,
245
+ "ReferenceBook" => nil,
246
+ "JournalIssue" => "Text",
247
+ "ProceedingsArticle" => "ConferencePaper",
248
+ "Other" => "Other",
249
+ "Dissertation" => "Dissertation",
250
+ "Dataset" => "Dataset",
251
+ "EditedBook" => "Book",
252
+ "JournalArticle" => "JournalArticle",
253
+ "Journal" => "Journal",
254
+ "Report" => "Report",
255
+ "BookSeries" => nil,
256
+ "ReportSeries" => nil,
257
+ "BookTrack" => nil,
258
+ "Standard" => "Standard",
259
+ "BookSection" => "BookChapter",
260
+ "BookPart" => nil,
261
+ "Book" => "Book",
262
+ "BookChapter" => "BookChapter",
263
+ "SaComponent" => "Text",
264
+ "StandardSeries" => "Standard",
265
+ "Monograph" => "book",
266
+ "Component" => nil,
267
+ "ReferenceEntry" => nil,
268
+ "JournalVolume" => nil,
269
+ "BookSet" => nil,
270
+ "PostedContent" => "JournalArticle",
271
+ "PeerReview" => "PeerReview"
272
+ }
273
+
274
+ SO_TO_DC_TRANSLATIONS = {
275
+ "Article" => "Text",
276
+ "AudioObject" => "Sound",
277
+ "Blog" => "Text",
278
+ "BlogPosting" => "Text",
279
+ "Book" => "Book",
280
+ "Chapter" => "BookChapter",
281
+ "Collection" => "Collection",
282
+ "DataCatalog" => "Dataset",
283
+ "Dataset" => "Dataset",
284
+ "Event" => "Event",
285
+ "ImageObject" => "Image",
286
+ "Movie" => "Audiovisual",
287
+ "Poster" => "Poster",
288
+ "PresentationDigitalDocument" => "Presentation",
289
+ "PublicationIssue" => "Text",
290
+ "Report" => "Report",
291
+ "ScholarlyArticle" => "Text",
292
+ "Thesis" => "Text",
293
+ "Service" => "Service",
294
+ "Review" => "PeerReview",
295
+ "SoftwareSourceCode" => "Software",
296
+ "VideoObject" => "Audiovisual",
297
+ "WebPage" => "Text",
298
+ "WebSite" => "Text"
299
+ }
300
+
301
+ SO_TO_JATS_TRANSLATIONS = {
302
+ "Article" => "journal",
303
+ "AudioObject" => nil,
304
+ "Blog" => nil,
305
+ "BlogPosting" => nil,
306
+ "Book" => "book",
307
+ "Collection" => nil,
308
+ "CreativeWork" => nil,
309
+ "DataCatalog" => "data",
310
+ "Dataset" => "data",
311
+ "Event" => nil,
312
+ "ImageObject" => nil,
313
+ "Movie" => nil,
314
+ "PublicationIssue" => "journal",
315
+ "ScholarlyArticle" => "journal",
316
+ "Service" => nil,
317
+ "SoftwareSourceCode" => "software",
318
+ "VideoObject" => nil,
319
+ "WebPage" => nil,
320
+ "WebSite" => "website"
321
+ }
322
+
323
+ SO_TO_CP_TRANSLATIONS = {
324
+ "Article" => "",
325
+ "AudioObject" => "song",
326
+ "Blog" => "report",
327
+ "BlogPosting" => "post-weblog",
328
+ "Collection" => nil,
329
+ "CreativeWork" => nil,
330
+ "DataCatalog" => "dataset",
331
+ "Dataset" => "dataset",
332
+ "Event" => nil,
333
+ "ImageObject" => "graphic",
334
+ "Movie" => "motion_picture",
335
+ "Poster" => "document",
336
+ "PresentationDigitalDocument" => "presentation",
337
+ "PublicationIssue" => nil,
338
+ "Report" => "report",
339
+ "ScholarlyArticle" => "article-journal",
340
+ "Service" => nil,
341
+ "Thesis" => "thesis",
342
+ "VideoObject" => "broadcast",
343
+ "WebPage" => "webpage",
344
+ "WebSite" => "webpage"
345
+ }
346
+
347
+ SO_TO_RIS_TRANSLATIONS = {
348
+ "Article" => nil,
349
+ "AudioObject" => nil,
350
+ "Blog" => nil,
351
+ "BlogPosting" => "BLOG",
352
+ "Collection" => nil,
353
+ "CreativeWork" => "GEN",
354
+ "DataCatalog" => "CTLG",
355
+ "Dataset" => "DATA",
356
+ "Event" => nil,
357
+ "ImageObject" => "FIGURE",
358
+ "Movie" => "MPCT",
359
+ "Poster" => "GEN",
360
+ "PresentationDigitalDocument" => "SLIDE",
361
+ "PublicationIssue" => nil,
362
+ "Report" => "RPRT",
363
+ "ScholarlyArticle" => "JOUR",
364
+ "Service" => nil,
365
+ "SoftwareSourceCode" => "COMP",
366
+ "VideoObject" => "VIDEO",
367
+ "WebPage" => "ELEC",
368
+ "WebSite" => nil
369
+ }
370
+
371
+ CR_TO_RIS_TRANSLATIONS = {
372
+ "Proceedings" => "CONF",
373
+ "ReferenceBook" => "BOOK",
374
+ "JournalIssue" => nil,
375
+ "ProceedingsArticle" => "CPAPER",
376
+ "Other" => "GEN",
377
+ "Dissertation" => "THES",
378
+ "Dataset" => "DATA",
379
+ "EditedBook" => "BOOK",
380
+ "JournalArticle" => "JOUR",
381
+ "Journal" => nil,
382
+ "Report" => "RPRT",
383
+ "BookSeries" => nil,
384
+ "ReportSeries" => nil,
385
+ "BookTrack" => nil,
386
+ "Standard" => "STAND",
387
+ "BookSection" => "CHAP",
388
+ "BookPart" => "CHAP",
389
+ "Book" => "BOOK",
390
+ "BookChapter" => "CHAP",
391
+ "StandardSeries" => nil,
392
+ "Monograph" => "BOOK",
393
+ "Component" => nil,
394
+ "ReferenceEntry" => "DICT",
395
+ "JournalVolume" => nil,
396
+ "BookSet" => nil
397
+ }
398
+
399
+ DC_TO_RIS_TRANSLATIONS = {
400
+ "Audiovisual" => "MPCT",
401
+ "Book" => "BOOK",
402
+ "BookChapter" => "CHAP",
403
+ "Collection" => nil,
404
+ "ComputationalNotebook" => "COMP",
405
+ "ConferencePaper" => "CPAPER",
406
+ "ConferenceProceeding" => "CONF",
407
+ "DataPaper" => nil,
408
+ "Dataset" => "DATA",
409
+ "Dissertation" => "THES",
410
+ "Event" => nil,
411
+ "Image" => "FIGURE",
412
+ "InteractiveResource" => nil,
413
+ "Journal" => nil,
414
+ "JournalArticle" => "JOUR",
415
+ "Model" => nil,
416
+ "OutputManagementPlan" => nil,
417
+ "PeerReview" => nil,
418
+ "PhysicalObject" => nil,
419
+ "Poster" => "GEN",
420
+ "Preprint" => nil,
421
+ "Presentation" => "SLIDE",
422
+ "Report" => "RRPT",
423
+ "Service" => nil,
424
+ "Software" => "COMP",
425
+ "Sound" => "SOUND",
426
+ "Standard" => nil,
427
+ "Text" => "RPRT",
428
+ "Workflow" => nil,
429
+ "Other" => nil
430
+ }
431
+
432
+ RIS_TO_DC_TRANSLATIONS = {
433
+ "BLOG" => "Text",
434
+ "GEN" => "Poster",
435
+ "CTLG" => "Collection",
436
+ "DATA" => "Dataset",
437
+ "FIGURE" => "Image",
438
+ "THES" => "Dissertation",
439
+ "MPCT" => "Audiovisual",
440
+ "JOUR" => "JournalArticle",
441
+ "COMP" => "Software",
442
+ "VIDEO" => "Audiovisual",
443
+ "ELEC" => "Text",
444
+ "SLIDE" => "Presentation"
445
+ }
446
+
447
+ BIB_TO_DC_TRANSLATIONS = {
448
+ "article" => "JournalArticle",
449
+ "book" => "Book",
450
+ "inbook" => "BookChapter",
451
+ "inproceedings" => nil,
452
+ "manual" => nil,
453
+ "misc" => "Other",
454
+ "phdthesis" => "Dissertation",
455
+ "proceedings" => "ConferenceProceeding",
456
+ "techreport" => "Report",
457
+ "unpublished" => nil
458
+ }
459
+
460
+ CP_TO_DC_TRANSLATIONS = {
461
+ "song" => "Audiovisual",
462
+ "post-weblog" => "Text",
463
+ "dataset" => "Dataset",
464
+ "graphic" => "Image",
465
+ "motion_picture" => "Audiovisual",
466
+ "article-journal" => "JournalArticle",
467
+ "broadcast" => "Audiovisual",
468
+ "webpage" => "Text",
469
+ "document" => "Poster",
470
+ "presentation" => "Presentation"
471
+ }
472
+
473
+ SO_TO_BIB_TRANSLATIONS = {
474
+ "Article" => "article",
475
+ "AudioObject" => "misc",
476
+ "Thesis" => "phdthesis",
477
+ "Blog" => "misc",
478
+ "BlogPosting" => "article",
479
+ "Collection" => "misc",
480
+ "CreativeWork" => "misc",
481
+ "DataCatalog" => "misc",
482
+ "Dataset" => "misc",
483
+ "Event" => "misc",
484
+ "ImageObject" => "misc",
485
+ "Movie" => "misc",
486
+ "Poster" => "misc",
487
+ "PresentationDigitalDocument" => "misc",
488
+ "PublicationIssue" => "misc",
489
+ "ScholarlyArticle" => "article",
490
+ "Service" => "misc",
491
+ "SoftwareSourceCode" => "misc",
492
+ "VideoObject" => "misc",
493
+ "WebPage" => "misc",
494
+ "WebSite" => "misc"
495
+ }
496
+
497
+ UNKNOWN_INFORMATION = {
498
+ ":unac" => "temporarily inaccessible",
499
+ ":unal" => "unallowed, suppressed intentionally",
500
+ ":unap" => "not applicable, makes no sense",
501
+ ":unas" => "value unassigned (e.g., Untitled)",
502
+ ":unav" => "value unavailable, possibly unknown",
503
+ ":unkn" => "known to be unknown (e.g., Anonymous, Inconnue)",
504
+ ":none" => "never had a value, never will",
505
+ ":null" => "explicitly and meaningfully empty",
506
+ ":tba" => "to be assigned or announced later",
507
+ ":etal" => "too numerous to list (et alia)"
508
+ }
509
+
510
+ RESOURCE_PATHS = {
511
+ spdx: 'spdx/licenses.json',
512
+ fos: 'oecd/fos-mappings.json',
513
+ for: 'oecd/for-mappings.json',
514
+ dfg: 'oecd/dfg-mappings.json'
515
+ }
516
+
517
+ def resources_dir_path
518
+ File.expand_path('../../../resources', __FILE__) + '/'
519
+ end
520
+
521
+ def resource_file( extra_path )
522
+ File.read(resources_dir_path + extra_path)
523
+ end
524
+
525
+ def resource_json( resource_symbol )
526
+ if RESOURCE_PATHS.keys().include?(resource_symbol)
527
+ JSON.load(resource_file(RESOURCE_PATHS[resource_symbol]))
528
+ end
529
+ end
530
+
531
+
532
+ def find_from_format(id: nil, string: nil, ext: nil, filename: nil)
533
+ if id.present?
534
+ find_from_format_by_id(id)
535
+ elsif ext.present?
536
+ find_from_format_by_filename(filename) || find_from_format_by_ext(string, ext: ext)
537
+ elsif string.present?
538
+ find_from_format_by_string(string)
539
+ else
540
+ "datacite"
541
+ end
542
+ end
543
+
544
+ def find_from_format_by_id(id)
545
+ id = normalize_id(id)
546
+
547
+ if /\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.stage.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(id)
548
+ ra = get_doi_ra(id)
549
+ %w(DataCite Crossref mEDRA KISTI JaLC OP).include?(ra) ? ra.downcase : nil
550
+ elsif /\A(?:(http|https):\/(\/)?orcid\.org\/)?(\d{4}-\d{4}-\d{4}-\d{3}[0-9X]+)\z/.match(id)
551
+ "orcid"
552
+ elsif /\A(http|https):\/(\/)?github\.com\/(.+)\/package.json\z/.match(id)
553
+ "npm"
554
+ elsif /\A(http|https):\/(\/)?github\.com\/(.+)\z/.match(id)
555
+ "codemeta"
556
+ else
557
+ "schema_org"
558
+ end
559
+ end
560
+
561
+ def find_from_format_by_filename(filename)
562
+ if filename == "package.json"
563
+ "npm"
564
+ end
565
+ end
566
+
567
+ def find_from_format_by_ext(string, options={})
568
+ if options[:ext] == ".bib"
569
+ "bibtex"
570
+ elsif options[:ext] == ".ris"
571
+ "ris"
572
+ elsif options[:ext] == ".xml" && Maremma.from_xml(string).to_h.dig("crossref_result", "query_result", "body", "query", "doi_record", "crossref")
573
+ "crossref"
574
+ elsif options[:ext] == ".xml" && Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
575
+ "datacite"
576
+ elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("@context").to_s.start_with?("http://schema.org", "https://schema.org")
577
+ "schema_org"
578
+ elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("@context") == ("https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld")
579
+ "codemeta"
580
+ elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("schemaVersion").to_s.start_with?("http://datacite.org/schema/kernel")
581
+ "datacite_json"
582
+ elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("types") && Maremma.from_json(string).to_h.dig("publication_year").present?
583
+ "crosscite"
584
+ elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("issued", "date-parts").present?
585
+ "citeproc"
586
+ end
587
+ end
588
+
589
+ def find_from_format_by_string(string)
590
+ if Maremma.from_xml(string).to_h.dig("crossref_result", "query_result", "body", "query", "doi_record", "crossref").present?
591
+ "crossref"
592
+ elsif Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
593
+ "datacite"
594
+ elsif Maremma.from_json(string).to_h.dig("@context").to_s.start_with?("http://schema.org", "https://schema.org")
595
+ "schema_org"
596
+ elsif Maremma.from_json(string).to_h.dig("@context") == ("https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld")
597
+ "codemeta"
598
+ elsif Maremma.from_json(string).to_h.dig("schema-version").to_s.start_with?("http://datacite.org/schema/kernel")
599
+ "datacite_json"
600
+ elsif Maremma.from_json(string).to_h.dig("types").present? && Maremma.from_json(string).to_h.dig("publication_year").present?
601
+ "crosscite"
602
+ elsif Maremma.from_json(string).to_h.dig("issued", "date-parts").present?
603
+ "citeproc"
604
+ elsif string.start_with?("TY - ")
605
+ "ris"
606
+ elsif BibTeX.parse(string).first
607
+ "bibtex"
608
+ end
609
+ rescue BibTeX::ParseError => error
610
+ nil
9
611
  end
10
612
 
11
613
  def orcid_from_url(url)
12
- Array(/\Ahttp:\/\/orcid\.org\/(.+)/.match(url)).last
614
+ Array(/\A:(http|https):\/\/orcid\.org\/(.+)/.match(url)).last
13
615
  end
14
616
 
15
617
  def orcid_as_url(orcid)
16
- "http://orcid.org/#{orcid}" if orcid.present?
618
+ "https://orcid.org/#{orcid}" if orcid.present?
17
619
  end
18
620
 
19
621
  def validate_orcid(orcid)
20
- Array(/\A(?:http:\/\/orcid\.org\/)?(\d{4}-\d{4}-\d{4}-\d{3}[0-9X]+)\z/.match(orcid)).last
622
+ orcid = Array(/\A(?:(?:http|https):\/\/(?:(?:www|sandbox)?\.)?orcid\.org\/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\/{0,1}\z/.match(orcid)).last
623
+ orcid.gsub(/[[:space:]]/, "-") if orcid.present?
624
+ end
625
+
626
+ def validate_ror(ror)
627
+ Array(/^(?:(?:(?:http|https):\/\/)?ror\.org\/)?(0\w{6}\d{2})\/{0,1}$/.match(ror)).last
628
+ end
629
+
630
+ def validate_orcid_scheme(orcid_scheme)
631
+ Array(/\A(http|https):\/\/(www\.)?(orcid\.org)/.match(orcid_scheme)).last
21
632
  end
22
633
 
23
634
  def validate_url(str)
@@ -25,41 +636,868 @@ module Bolognese
25
636
  "DOI"
26
637
  elsif /\A(http|https):\/\//.match(str)
27
638
  "URL"
639
+ elsif /\A(ISSN|eISSN) (\d{4}-\d{3}[0-9X]+)\z/.match(str)
640
+ "ISSN"
28
641
  end
29
642
  end
30
643
 
31
- def parse_attributes(element)
32
- if element.is_a?(String)
33
- element
644
+ def parse_attributes(element, options={})
645
+ content = options[:content] || "__content__"
646
+
647
+ if element.is_a?(String) && options[:content].nil?
648
+ CGI.unescapeHTML(element)
34
649
  elsif element.is_a?(Hash)
35
- element.fetch("text", nil)
650
+ element.fetch( CGI.unescapeHTML(content), nil)
36
651
  elsif element.is_a?(Array)
37
- element.map { |e| e.fetch("text", nil) }
652
+ a = element.map { |e| e.is_a?(Hash) ? e.fetch(CGI.unescapeHTML(content), nil) : e }.uniq
653
+ a = options[:first] ? a.first : a.unwrap
38
654
  else
39
655
  nil
40
656
  end
41
657
  end
42
658
 
43
- def parse_attribute(element)
44
- if element.is_a?(String)
45
- element
46
- elsif element.is_a?(Hash)
47
- element.fetch("text", nil)
48
- elsif element.is_a?(Array)
49
- element.first.fetch("text", nil)
659
+ def normalize_id(id, options={})
660
+ return nil unless id.present?
661
+
662
+ # check for valid DOI
663
+ doi = DoiUtils::normalize_doi(id, options)
664
+ return doi if doi.present?
665
+
666
+ # check for valid HTTP uri
667
+ uri = Addressable::URI.parse(id)
668
+ return nil unless uri && uri.host && %w(http https).include?(uri.scheme)
669
+
670
+ # clean up URL
671
+ normalize_uri_with_path_cleanup(uri)
672
+ rescue Addressable::URI::InvalidURIError
673
+ nil
674
+ end
675
+
676
+ def normalize_url(id, options={})
677
+ return nil unless id.present?
678
+
679
+ # handle info URIs
680
+ return id if id.to_s.start_with?("info")
681
+
682
+ # check for valid HTTP uri
683
+ uri = Addressable::URI.parse(id)
684
+
685
+ return nil unless uri && uri.host && %w(http https ftp).include?(uri.scheme)
686
+
687
+ # optionally turn into https URL
688
+ uri.scheme = "https" if options[:https]
689
+
690
+ # clean up URL
691
+ normalize_uri_with_path_cleanup(uri)
692
+ rescue Addressable::URI::InvalidURIError
693
+ nil
694
+ end
695
+
696
+ def normalize_cc_url(id)
697
+ id = normalize_url(id, https: true)
698
+ NORMALIZED_LICENSES.fetch(id, id)
699
+ end
700
+
701
+ def normalize_orcid(orcid)
702
+ orcid = validate_orcid(orcid)
703
+ return nil unless orcid.present?
704
+
705
+ # turn ORCID ID into URL
706
+ "https://orcid.org/" + Addressable::URI.encode(orcid)
707
+ end
708
+
709
+ def normalize_ror(ror)
710
+ ror = validate_ror(ror)
711
+ return nil unless ror.present?
712
+
713
+ # turn ROR into URL
714
+ "https://ror.org/" + Addressable::URI.encode(ror)
715
+ end
716
+
717
+ def normalize_ids(ids: nil, relation_type: nil)
718
+ Array.wrap(ids).select { |idx| idx["@id"].present? }.map do |idx|
719
+ id = normalize_id(idx["@id"])
720
+ related_identifier_type = DoiUtils::doi_from_url(id).present? ? "DOI" : "URL"
721
+ id = DoiUtils::doi_from_url(id) || id
722
+
723
+ { "relatedIdentifier" => id,
724
+ "relationType" => relation_type,
725
+ "relatedIdentifierType" => related_identifier_type,
726
+ "resourceTypeGeneral" => Metadata::SO_TO_DC_TRANSLATIONS[idx["@type"]] }.compact
727
+ end.unwrap
728
+ end
729
+
730
+ # pick electronic issn if there are multiple
731
+ # format issn as xxxx-xxxx
732
+ def normalize_issn(input, options={})
733
+ content = options[:content] || "__content__"
734
+
735
+ issn = if input.blank?
736
+ nil
737
+ elsif input.is_a?(String) && options[:content].nil?
738
+ input
739
+ elsif input.is_a?(Hash)
740
+ input.fetch(content, nil)
741
+ elsif input.is_a?(Array)
742
+ a = input.find { |a| a["media_type"] == "electronic" } || input.first
743
+ a.fetch(content, nil)
744
+ end
745
+
746
+ case issn.to_s.length
747
+ when 9
748
+ issn
749
+ when 8
750
+ issn[0..3] + "-" + issn[4..7]
751
+ else
752
+ nil
753
+ end
754
+ end
755
+
756
+ # find Creative Commons or OSI license in licenses array, normalize url and name
757
+ def normalize_licenses(licenses)
758
+ standard_licenses = Array.wrap(licenses).map { |l| URI.parse(l["url"]) }.select { |li| li.host && li.host[/(creativecommons.org|opensource.org)$/] }
759
+ return licenses unless standard_licenses.present?
760
+
761
+ # use HTTPS
762
+ uri.scheme = "https"
763
+
764
+ # use host name without subdomain
765
+ uri.host = Array(/(creativecommons.org|opensource.org)/.match uri.host).last
766
+
767
+ # normalize URLs
768
+ if uri.host == "creativecommons.org"
769
+ uri.path = uri.path.split('/')[0..-2].join("/") if uri.path.split('/').last == "legalcode"
770
+ uri.path << '/' unless uri.path.end_with?('/')
771
+ else
772
+ uri.path = uri.path.gsub(/(-license|\.php|\.html)/, '')
773
+ uri.path = uri.path.sub(/(mit|afl|apl|osl|gpl|ecl)/) { |match| match.upcase }
774
+ uri.path = uri.path.sub(/(artistic|apache)/) { |match| match.titleize }
775
+ uri.path = uri.path.sub(/([^0-9\-]+)(-)?([1-9])?(\.)?([0-9])?$/) do
776
+ m = Regexp.last_match
777
+ text = m[1]
778
+
779
+ if m[3].present?
780
+ version = [m[3], m[5].presence || "0"].join(".")
781
+ [text, version].join("-")
782
+ else
783
+ text
784
+ end
785
+ end
786
+ end
787
+
788
+ uri.to_s
789
+ rescue URI::InvalidURIError
790
+ nil
791
+ end
792
+
793
+ def normalize_publisher(publisher)
794
+ if publisher.respond_to?(:to_hash)
795
+ publisher
796
+ elsif publisher.respond_to?(:to_str)
797
+ { "name" => publisher }
798
+ end
799
+ end
800
+
801
+ def to_datacite_json(element, options={})
802
+ a = Array.wrap(element).map do |e|
803
+ e.inject({}) {|h, (k,v)| h[k.dasherize] = v; h }
804
+ end
805
+ options[:first] ? a.unwrap : a.presence
806
+ end
807
+
808
+ def from_datacite_json(element)
809
+ Array.wrap(element).map do |e|
810
+ e.inject({}) {|h, (k,v)| h[k.underscore] = v; h }
811
+ end
812
+ end
813
+
814
+ def to_schema_org(element)
815
+ mapping = { "type" => "@type", "id" => "@id", "title" => "name" }
816
+
817
+ map_hash_keys(element: element, mapping: mapping)
818
+ end
819
+
820
+ def to_schema_org_creators(element)
821
+ element = Array.wrap(element).map do |c|
822
+ c["affiliation"] = Array.wrap(c["affiliation"]).map do |a|
823
+ if a.is_a?(String)
824
+ name = a
825
+ affiliation_identifier = nil
826
+ else
827
+ name = a["name"]
828
+ affiliation_identifier = a["affiliationIdentifier"]
829
+ end
830
+
831
+ {
832
+ "@type" => "Organization",
833
+ "@id" => affiliation_identifier,
834
+ "name" => name }.compact
835
+ end.unwrap
836
+ c["@type"] = c["nameType"].present? ? c["nameType"][0..-3] : nil
837
+ c["@id"] = Array.wrap(c["nameIdentifiers"]).first.to_h.fetch("nameIdentifier", nil)
838
+ c["name"] = c["familyName"].present? ? [c["givenName"], c["familyName"]].join(" ") : c["name"]
839
+ c.except("nameIdentifiers", "nameType").compact
840
+ end.unwrap
841
+ end
842
+
843
+ def to_schema_org_contributors(element)
844
+ element = Array.wrap(element).map do |c|
845
+ transformed_c = c.dup
846
+ transformed_c["affiliation"] = Array.wrap(c["affiliation"]).map do |a|
847
+ if a.is_a?(String)
848
+ name = a
849
+ affiliation_identifier = nil
850
+ else
851
+ name = a["name"]
852
+ affiliation_identifier = a["affiliationIdentifier"]
853
+ end
854
+
855
+ {
856
+ "@type" => "Organization",
857
+ "@id" => affiliation_identifier,
858
+ "name" => name }.compact
859
+ end.unwrap
860
+ transformed_c["@type"] = c["nameType"].present? ? c["nameType"][0..-3] : nil
861
+ transformed_c["@id"] = Array.wrap(c["nameIdentifiers"]).first.to_h.fetch("nameIdentifier", nil)
862
+ transformed_c["name"] = c["familyName"].present? ? [c["givenName"], c["familyName"]].join(" ") : c["name"]
863
+ transformed_c.except("nameIdentifiers", "nameType").compact
864
+ end.unwrap
865
+ end
866
+
867
+ def to_schema_org_container(element, options={})
868
+ return nil unless (element.is_a?(Hash) || (element.nil? && options[:container_title].present?))
869
+
870
+ {
871
+ "@id" => element["identifier"],
872
+ "@type" => (options[:type] == "Dataset") ? "DataCatalog" : "Periodical",
873
+ "name" => element["title"] || options[:container_title] }.compact
874
+ end
875
+
876
+ def to_schema_org_identifiers(element, options={})
877
+ Array.wrap(element).map do |ai|
878
+ {
879
+ "@type" => "PropertyValue",
880
+ "propertyID" => ai["identifierType"],
881
+ "value" => ai["identifier"] }
882
+ end.unwrap
883
+ end
884
+
885
+ def to_schema_org_relation(related_identifiers: nil, relation_type: nil)
886
+ return nil unless related_identifiers.present? && relation_type.present?
887
+
888
+ relation_type = relation_type == "References" ? ["References", "Cites", "Documents"] : [relation_type]
889
+
890
+ Array.wrap(related_identifiers).select { |ri| relation_type.include?(ri["relationType"]) }.map do |r|
891
+ if r["relatedIdentifierType"] == "ISSN" && r["relationType"] == "IsPartOf"
892
+ {
893
+ "@type" => "Periodical",
894
+ "issn" => r["relatedIdentifier"] }.compact
895
+ else
896
+ {
897
+ "@id" => normalize_id(r["relatedIdentifier"]),
898
+ "@type" => DC_TO_SO_TRANSLATIONS[r["resourceTypeGeneral"]] || "CreativeWork" }.compact
899
+ end
900
+ end.unwrap
901
+ end
902
+
903
+ def to_schema_org_funder(funding_references)
904
+ return nil unless funding_references.present?
905
+
906
+ Array.wrap(funding_references).map do |fr|
907
+ {
908
+ "@id" => fr["funderIdentifier"],
909
+ "@type" => "Organization",
910
+ "name" => fr["funderName"] }.compact
911
+ end.unwrap
912
+ end
913
+
914
+ def to_schema_org_spatial_coverage(geo_location)
915
+ return nil unless geo_location.present?
916
+
917
+ Array.wrap(geo_location).reduce([]) do |sum, gl|
918
+ if gl.fetch("geoLocationPoint", nil)
919
+ sum << {
920
+ "@type" => "Place",
921
+ "geo" => {
922
+ "@type" => "GeoCoordinates",
923
+ "address" => gl["geoLocationPlace"],
924
+ "latitude" => gl.dig("geoLocationPoint", "pointLatitude"),
925
+ "longitude" => gl.dig("geoLocationPoint", "pointLongitude") }
926
+ }.compact
927
+ end
928
+
929
+ if gl.fetch("geoLocationBox", nil)
930
+ sum << {
931
+ "@type" => "Place",
932
+ "geo" => {
933
+ "@type" => "GeoShape",
934
+ "address" => gl["geoLocationPlace"],
935
+ "box" => [gl.dig("geoLocationBox", "southBoundLatitude"),
936
+ gl.dig("geoLocationBox", "westBoundLongitude"),
937
+ gl.dig("geoLocationBox", "northBoundLatitude"),
938
+ gl.dig("geoLocationBox", "eastBoundLongitude")].compact.join(" ").presence }.compact
939
+ }.compact
940
+ end
941
+
942
+ if gl.fetch("geoLocationPolygon", nil)
943
+ sum << {
944
+ "@type" => "Place",
945
+ "geo" => {
946
+ "@type" => "GeoShape",
947
+ "address" => gl["geoLocationPlace"],
948
+ "polygon" => Array.wrap(gl.dig("geoLocationPolygon")).map do |glp|
949
+ Array.wrap(glp).map do |glpp|
950
+ if glpp.dig("polygonPoint")
951
+ [glpp.dig("polygonPoint", "pointLongitude"), glpp.dig("polygonPoint", "pointLatitude")].compact
952
+ end
953
+ end.compact.presence
954
+ end.compact.presence,
955
+ }
956
+ }
957
+ end
958
+
959
+ if gl.fetch("geoLocationPlace", nil) && !gl.fetch("geoLocationPoint", nil) && !gl.fetch("geoLocationBox", nil) && !gl.fetch("geoLocationPolygon", nil)
960
+ sum << {
961
+ "@type" => "Place",
962
+ "geo" => {
963
+ "@type" => "GeoCoordinates",
964
+ "address" => gl["geoLocationPlace"] }
965
+ }.compact
966
+ end
967
+
968
+ sum
969
+ end.unwrap
970
+ end
971
+
972
+ def from_schema_org(element)
973
+ mapping = { "@type" => "type", "@id" => "id" }
974
+
975
+ map_hash_keys(element: element, mapping: mapping)
976
+ end
977
+
978
+ def from_schema_org_creators(element)
979
+ element = Array.wrap(element).map do |c|
980
+ if c["affiliation"].is_a?(String)
981
+ c["affiliation"] = { "name" => c["affiliation"] }
982
+ affiliation_identifier_scheme = nil
983
+ scheme_uri = nil
984
+ elsif c.dig("affiliation", "@id").to_s.starts_with?("https://ror.org")
985
+ affiliation_identifier_scheme = "ROR"
986
+ scheme_uri = "https://ror.org/"
987
+ elsif c.dig("affiliation", "@id").to_s.starts_with?("https://isni.org")
988
+ affiliation_identifier_scheme = "ISNI"
989
+ scheme_uri = "https://isni.org/isni/"
990
+ else
991
+ affiliation_identifier_scheme = nil
992
+ scheme_uri = nil
993
+ end
994
+
995
+ c["nameIdentifier"] = [{ "__content__" => c["@id"], "nameIdentifierScheme" => "ORCID", "schemeUri" => "https://orcid.org" }] if normalize_orcid(c["@id"])
996
+ c["@type"] = c["@type"].find { |t| %w(Person Organization).include?(t) } if c["@type"].is_a?(Array)
997
+ c["creatorName"] = { "nameType" => c["@type"].present? ? c["@type"].titleize + "al" : nil, "__content__" => c["name"] }.compact
998
+ c["affiliation"] = { "__content__" => c.dig("affiliation", "name"), "affiliationIdentifier" => c.dig("affiliation", "@id"), "affiliationIdentifierScheme" => affiliation_identifier_scheme, "schemeUri" => scheme_uri }.compact.presence
999
+ c.except("@id", "@type", "name").compact
1000
+ end
1001
+ end
1002
+
1003
+ def from_schema_org_contributors(element)
1004
+ element = Array.wrap(element).map do |c|
1005
+ if c["affiliation"].is_a?(String)
1006
+ c["affiliation"] = { "name" => c["affiliation"] }
1007
+ affiliation_identifier_scheme = nil
1008
+ scheme_uri = nil
1009
+ elsif c.dig("affiliation", "@id").to_s.starts_with?("https://ror.org")
1010
+ affiliation_identifier_scheme = "ROR"
1011
+ scheme_uri = "https://ror.org/"
1012
+ elsif c.dig("affiliation", "@id").to_s.starts_with?("https://isni.org")
1013
+ affiliation_identifier_scheme = "ISNI"
1014
+ scheme_uri = "https://isni.org/isni/"
1015
+ else
1016
+ affiliation_identifier_scheme = nil
1017
+ scheme_uri = nil
1018
+ end
1019
+
1020
+ c["nameIdentifier"] = [{ "__content__" => c["@id"], "nameIdentifierScheme" => "ORCID", "schemeUri" => "https://orcid.org" }] if normalize_orcid(c["@id"])
1021
+ c["contributorName"] = { "nameType" => c["@type"].present? ? c["@type"].titleize + "al" : nil, "__content__" => c["name"] }.compact
1022
+ c["affiliation"] = { "__content__" => c.dig("affiliation", "name"), "affiliationIdentifier" => c.dig("affiliation", "@id"), "affiliationIdentifierScheme" => affiliation_identifier_scheme, "schemeUri" => scheme_uri }.compact.presence
1023
+ c.except("@id", "@type", "name").compact
1024
+ end
1025
+ end
1026
+
1027
+ def map_hash_keys(element: nil, mapping: nil)
1028
+ Array.wrap(element).map do |a|
1029
+ a.map {|k, v| [mapping.fetch(k, k), v] }.reduce({}) do |hsh, (k, v)|
1030
+ if v.is_a?(Hash)
1031
+ hsh[k] = to_schema_org(v)
1032
+ hsh
1033
+ else
1034
+ hsh[k] = v
1035
+ hsh
1036
+ end
1037
+ end
1038
+ end.unwrap
1039
+ end
1040
+
1041
+ def to_identifier(identifier)
1042
+ {
1043
+ "@type" => "PropertyValue",
1044
+ "propertyID" => identifier["relatedIdentifierType"],
1045
+ "value" => identifier["relatedIdentifier"] }
1046
+ end
1047
+
1048
+ def from_citeproc(element)
1049
+ Array.wrap(element).map do |a|
1050
+ if a["literal"].present?
1051
+ a["@type"] = "Organization"
1052
+ a["creatorName"] = a["literal"]
1053
+ else
1054
+ a["@type"] = "Person"
1055
+ a["name"] = [a["given"], a["family"]].compact.join(" ")
1056
+ end
1057
+ a["givenName"] = a["given"]
1058
+ a["familyName"] = a["family"]
1059
+ a.except("given", "family", "literal").compact
1060
+ end.unwrap
1061
+ end
1062
+
1063
+ def to_citeproc(element)
1064
+ Array.wrap(element).map do |a|
1065
+ a["family"] = a["familyName"]
1066
+ a["given"] = a["givenName"]
1067
+ a["literal"] = a["name"] unless a["familyName"].present?
1068
+ a.except("nameType", "type", "@type", "id", "@id", "name", "familyName", "givenName", "affiliation", "nameIdentifiers", "contributorType").compact
1069
+ end.presence
1070
+ end
1071
+
1072
+ def to_ris(element)
1073
+ Array.wrap(element).map do |a|
1074
+ if a["familyName"].present?
1075
+ [a["familyName"], a["givenName"]].join(", ")
1076
+ else
1077
+ a["name"]
1078
+ end
1079
+ end.unwrap
1080
+ end
1081
+
1082
+ def sanitize(text, options={})
1083
+ options[:tags] ||= Set.new(%w(strong em b i code pre sub sup br))
1084
+ content = options[:content] || "__content__"
1085
+ custom_scrubber = Bolognese::WhitelistScrubber.new(options)
1086
+
1087
+ if text.is_a?(String)
1088
+ if options[:new_line]
1089
+ # Remove multiple spaces, tabs, and other whitespace characters while preserving single spaces and new lines
1090
+ Loofah.scrub_fragment(text, custom_scrubber).to_s.gsub(/[ \t]+/, ' ').strip
1091
+ else
1092
+ Loofah.scrub_fragment(text, custom_scrubber).to_s.squish
1093
+ end
1094
+ elsif text.is_a?(Hash)
1095
+ sanitize(text.fetch(content, nil), new_line: options[:new_line])
1096
+ elsif text.is_a?(Array)
1097
+ a = text.map { |e| e.is_a?(Hash) ? sanitize(e.fetch(content, nil), new_line: options[:new_line]) : sanitize(e, new_line: options[:new_line]) }.uniq
1098
+ a = options[:first] ? a.first : a.unwrap
50
1099
  else
51
1100
  nil
52
1101
  end
53
1102
  end
54
1103
 
55
- def normalize_url(url)
56
- return nil unless url.present?
1104
+ def github_from_url(url)
1105
+ return {} unless /\Ahttps:\/\/github\.com\/(.+)(?:\/)?(.+)?(?:\/tree\/)?(.*)\z/.match(url)
1106
+ words = URI.parse(url).path[1..-1].split('/')
1107
+ path = words.length > 3 ? words[4...words.length].join("/") : nil
1108
+
1109
+ { owner: words[0],
1110
+ repo: words[1],
1111
+ release: words[3],
1112
+ path: path }.compact
1113
+ end
1114
+
1115
+ def github_repo_from_url(url)
1116
+ github_from_url(url).fetch(:repo, nil)
1117
+ end
1118
+
1119
+ def github_release_from_url(url)
1120
+ github_from_url(url).fetch(:release, nil)
1121
+ end
1122
+
1123
+ def github_owner_from_url(url)
1124
+ github_from_url(url).fetch(:owner, nil)
1125
+ end
1126
+
1127
+ def github_as_owner_url(url)
1128
+ github_hash = github_from_url(url)
1129
+ "https://github.com/#{github_hash[:owner]}" if github_hash[:owner].present?
1130
+ end
1131
+
1132
+ def github_as_repo_url(url)
1133
+ github_hash = github_from_url(url)
1134
+ "https://github.com/#{github_hash[:owner]}/#{github_hash[:repo]}" if github_hash[:repo].present?
1135
+ end
1136
+
1137
+ def github_as_release_url(url)
1138
+ github_hash = github_from_url(url)
1139
+ "https://github.com/#{github_hash[:owner]}/#{github_hash[:repo]}/tree/#{github_hash[:release]}" if github_hash[:release].present?
1140
+ end
1141
+
1142
+ def github_as_codemeta_url(url)
1143
+ github_hash = github_from_url(url)
1144
+
1145
+ if github_hash[:path].to_s.end_with?("codemeta.json")
1146
+ "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/#{github_hash[:release]}/#{github_hash[:path]}"
1147
+ elsif github_hash[:owner].present?
1148
+ "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/master/codemeta.json"
1149
+ end
1150
+ end
1151
+
1152
+ def get_date_parts(iso8601_time)
1153
+ return { 'date-parts' => [[]] } if iso8601_time.nil?
1154
+
1155
+ year = iso8601_time[0..3].to_i
1156
+ month = iso8601_time[5..6].to_i
1157
+ day = iso8601_time[8..9].to_i
1158
+ { 'date-parts' => [[year, month, day].reject { |part| part == 0 }] }
1159
+ rescue TypeError
1160
+ nil
1161
+ end
1162
+
1163
+ def get_date_from_date_parts(date_as_parts)
1164
+ date_parts = date_as_parts.fetch("date-parts", []).first
1165
+ year, month, day = date_parts[0], date_parts[1], date_parts[2]
1166
+ get_date_from_parts(year, month, day)
1167
+ end
1168
+
1169
+ def get_date_from_parts(year, month = nil, day = nil)
1170
+ [year.to_s.rjust(4, '0'), month.to_s.rjust(2, '0'), day.to_s.rjust(2, '0')].reject { |part| part == "00" }.join("-")
1171
+ end
1172
+
1173
+ def get_date_parts_from_parts(year, month = nil, day = nil)
1174
+ { 'date-parts' => [[year.to_i, month.to_i, day.to_i].reject { |part| part == 0 }] }
1175
+ end
1176
+
1177
+ def get_iso8601_date(iso8601_time)
1178
+ return nil if iso8601_time.nil?
1179
+
1180
+ iso8601_time[0..9]
1181
+ end
1182
+
1183
+ def get_year_month(iso8601_time)
1184
+ return [] if iso8601_time.nil?
1185
+
1186
+ year = iso8601_time[0..3]
1187
+ month = iso8601_time[5..6]
1188
+
1189
+ [year.to_i, month.to_i].reject { |part| part == 0 }
1190
+ end
1191
+
1192
+ def get_year_month_day(iso8601_time)
1193
+ return [] if iso8601_time.nil?
1194
+
1195
+ year = iso8601_time[0..3]
1196
+ month = iso8601_time[5..6]
1197
+ day = iso8601_time[8..9]
1198
+
1199
+ [year.to_i, month.to_i, day.to_i].reject { |part| part == 0 }
1200
+ end
1201
+
1202
+ # parsing of incomplete iso8601 timestamps such as 2015-04 is broken
1203
+ # in standard library
1204
+ # return nil if invalid iso8601 timestamp
1205
+ def get_datetime_from_iso8601(iso8601_time)
1206
+ ISO8601::DateTime.new(iso8601_time).to_time.utc
1207
+ rescue
1208
+ nil
1209
+ end
1210
+
1211
+ # iso8601 datetime without hyphens and colons, used by Crossref
1212
+ # return nil if invalid
1213
+ def get_datetime_from_time(time)
1214
+ DateTime.strptime(time.to_s, "%Y%m%d%H%M%S").strftime('%Y-%m-%dT%H:%M:%SZ')
1215
+ rescue ArgumentError
1216
+ nil
1217
+ end
1218
+
1219
+ def get_date(dates, date_type)
1220
+ dd = Array.wrap(dates).find { |d| d["dateType"] == date_type } || {}
1221
+ dd.fetch("date", nil)
1222
+ end
1223
+
1224
+ def get_contributor(contributor, contributor_type)
1225
+ contributor.select { |c| c["contributorType"] == contributor_type }
1226
+ end
1227
+
1228
+ def get_identifier(identifiers, identifier_type)
1229
+ id = Array.wrap(identifiers).find { |i| i["identifierType"] == identifier_type } || {}
1230
+ id.fetch("identifier", nil)
1231
+ end
1232
+
1233
+ def get_identifier_type(identifier_type)
1234
+ return nil unless identifier_type.present?
1235
+
1236
+ identifierTypes = {
1237
+ "ark" => "ARK",
1238
+ "arxiv" => "arXiv",
1239
+ "bibcode" => "bibcode",
1240
+ "doi" => "DOI",
1241
+ "ean13" => "EAN13",
1242
+ "eissn" => "EISSN",
1243
+ "handle" => "Handle",
1244
+ "igsn" => "IGSN",
1245
+ "isbn" => "ISBN",
1246
+ "issn" => "ISSN",
1247
+ "istc" => "ISTC",
1248
+ "lissn" => "LISSN",
1249
+ "lsid" => "LSID",
1250
+ "pmid" => "PMID",
1251
+ "purl" => "PURL",
1252
+ "upc" => "UPC",
1253
+ "url" => "URL",
1254
+ "urn" => "URN",
1255
+ "md5" => "md5",
1256
+ "minid" => "minid",
1257
+ "dataguid" => "dataguid",
1258
+ "cstr" => "CSTR",
1259
+ "rrid" => "RRID"
1260
+ }
1261
+
1262
+ identifierTypes[identifier_type.downcase] || identifier_type
1263
+ end
1264
+
1265
+ def get_series_information(str)
1266
+ return {} unless str.present?
1267
+
1268
+ str = str.split(",").map(&:strip)
1269
+
1270
+ title = str.first
1271
+ volume_issue = str.length > 2 ? str[1].rpartition(/\(([^)]+)\)/) : nil
1272
+ volume = volume_issue.present? ? volume_issue[0].presence || volume_issue[2].presence : nil
1273
+ issue = volume_issue.present? ? volume_issue[1][1...-1].presence : nil
1274
+ pages = str.length > 1 ? str.last : nil
1275
+ first_page = pages.present? ? pages.split("-").map(&:strip)[0] : nil
1276
+ last_page = pages.present? ? pages.split("-").map(&:strip)[1] : nil
1277
+
1278
+ {
1279
+ "title" => title,
1280
+ "volume" => volume,
1281
+ "issue" => issue,
1282
+ "firstPage" => first_page,
1283
+ "lastPage" => last_page }.compact
1284
+ end
1285
+
1286
+ def jsonlint(json)
1287
+ return ["No JSON provided"] unless json.present?
1288
+
1289
+ error_array = []
1290
+ linter = JsonLint::Linter.new
1291
+ linter.send(:check_data, json, error_array)
1292
+ error_array
1293
+ end
1294
+
1295
+ def name_to_spdx(name)
1296
+ spdx = resource_json(:spdx).fetch("licenses")
1297
+ license = spdx.find { |l| l["name"] == name || l["licenseId"] == name || l["seeAlso"].first == normalize_cc_url(name) }
1298
+
1299
+ if license
1300
+ {
1301
+ "rights" => license["name"],
1302
+ "rightsUri" => license["seeAlso"].first,
1303
+ "rightsIdentifier" => license["licenseId"].downcase,
1304
+ "rightsIdentifierScheme" => "SPDX",
1305
+ "schemeUri" => "https://spdx.org/licenses/" }.compact
1306
+ else
1307
+ { "rights" => name }
1308
+ end
1309
+ end
1310
+
1311
+ def hsh_to_spdx(hsh)
1312
+ spdx = resource_json(:spdx).fetch("licenses")
1313
+ license = spdx.find { |l| l["licenseId"].casecmp?(hsh["rightsIdentifier"]) || l["seeAlso"].first == normalize_cc_url(hsh["rightsURI"]) || l["name"] == hsh["rights"] || l["seeAlso"].first == normalize_cc_url(hsh["rights"]) }
57
1314
 
58
- normalize_doi(url) || PostRank::URI.clean(url)
1315
+ if license
1316
+ {
1317
+ "rights" => license["name"],
1318
+ "rightsUri" => license["seeAlso"].first,
1319
+ "rightsIdentifier" => license["licenseId"].downcase,
1320
+ "rightsIdentifierScheme" => "SPDX",
1321
+ "schemeUri" => "https://spdx.org/licenses/",
1322
+ "lang" => hsh["lang"] }.compact
1323
+ else
1324
+ {
1325
+ "rights" => hsh["__content__"] || hsh["rights"],
1326
+ "rightsUri" => hsh["rightsURI"] || hsh["rightsUri"],
1327
+ "rightsIdentifier" => hsh["rightsIdentifier"].present? ? hsh["rightsIdentifier"].downcase : nil,
1328
+ "rightsIdentifierScheme" => hsh["rightsIdentifierScheme"],
1329
+ "schemeUri" => hsh["schemeUri"],
1330
+ "lang" => hsh["lang"] }.compact
1331
+ end
59
1332
  end
60
1333
 
61
- def normalize_ids(list)
62
- Array.wrap(list).map { |url| url.merge("@id" => normalize_url(url["@id"])) }
1334
+ def name_to_fos(name)
1335
+ # first find subject in Fields of Science (OECD)
1336
+ fos = resource_json(:fos).fetch("fosFields")
1337
+
1338
+ subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name }
1339
+
1340
+ if subject
1341
+ return [{
1342
+ "subject" => sanitize(name) },
1343
+ {
1344
+ "subject" => "FOS: " + subject["fosLabel"],
1345
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1346
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1347
+ }]
1348
+ end
1349
+
1350
+ # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
1351
+ # and map to Fields of Science. Add an extra entry for the latter
1352
+ fores = resource_json(:for)
1353
+ for_fields = fores.fetch("forFields")
1354
+ for_disciplines = fores.fetch("forDisciplines")
1355
+
1356
+ subject = for_fields.find { |l| l["forLabel"] == name } ||
1357
+ for_disciplines.find { |l| l["forLabel"] == name }
1358
+
1359
+ if subject
1360
+ [{
1361
+ "subject" => sanitize(name) },
1362
+ {
1363
+ "subject" => "FOS: " + subject["fosLabel"],
1364
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1365
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1366
+ }]
1367
+ else
1368
+ [{ "subject" => sanitize(name) }]
1369
+ end
1370
+ end
1371
+
1372
+ def hsh_to_fos(hsh)
1373
+ # first find subject in Fields of Science (OECD)
1374
+ fos = resource_json(:fos).fetch("fosFields")
1375
+ subject = fos.find { |l| l["fosLabel"] == hsh["__content__"] || "FOS: " + l["fosLabel"] == hsh["__content__"] || l["fosLabel"] == hsh["subject"]}
1376
+
1377
+ if subject
1378
+ return [{
1379
+ "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1380
+ "subjectScheme" => hsh["subjectScheme"],
1381
+ "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
1382
+ "valueUri" => hsh["valueURI"] || hsh["valueUri"],
1383
+ "classificationCode" => hsh["classificationCode"],
1384
+ "lang" => hsh["lang"] }.compact,
1385
+ {
1386
+ "subject" => "FOS: " + subject["fosLabel"],
1387
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1388
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf" }.compact]
1389
+ end
1390
+
1391
+ # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
1392
+ # and map to Fields of Science. Add an extra entry for the latter
1393
+ fores = resource_json(:for)
1394
+ for_fields = fores.fetch("forFields")
1395
+ for_disciplines = fores.fetch("forDisciplines")
1396
+
1397
+ # try to extract forId
1398
+ if hsh["subjectScheme"] == "FOR"
1399
+ for_id = hsh["__content__"].to_s.split(" ").first || hsh["subject"].to_s.split(" ").first
1400
+ for_id = for_id.rjust(6, "0")
1401
+
1402
+ subject = for_fields.find { |l| l["forId"] == for_id } ||
1403
+ for_disciplines.find { |l| l["forId"] == for_id[0..3] }
1404
+ else
1405
+ subject = for_fields.find { |l| l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"] } ||
1406
+ for_disciplines.find { |l| l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"] }
1407
+ end
1408
+
1409
+ if subject
1410
+ [{
1411
+ "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1412
+ "subjectScheme" => hsh["subjectScheme"],
1413
+ "classificationCode" => hsh["classificationCode"],
1414
+ "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
1415
+ "valueUri" => hsh["valueURI"] || hsh["valueUri"],
1416
+ "lang" => hsh["lang"] }.compact,
1417
+ {
1418
+ "subject" => "FOS: " + subject["fosLabel"],
1419
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1420
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1421
+ }]
1422
+ else
1423
+ [{
1424
+ "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1425
+ "subjectScheme" => hsh["subjectScheme"],
1426
+ "classificationCode" => hsh["classificationCode"],
1427
+ "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
1428
+ "valueUri" => hsh["valueURI"] || hsh["valueUri"],
1429
+ "lang" => hsh["lang"] }.compact]
1430
+ end
1431
+ end
1432
+
1433
+ def dfg_ids_to_fos(dfg_ids)
1434
+ dfgs = resource_json(:dfg).fetch("dfgFields")
1435
+ ids = Array.wrap(dfg_ids)
1436
+
1437
+ subjects = dfgs.select { |l| ids.include?(l["dfgId"])}
1438
+ subjects.map do |subject|
1439
+ {
1440
+ "classificationCode" => subject["fosId"],
1441
+ "subject" => subject["fosLabel"],
1442
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1443
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1444
+ }
1445
+ end
1446
+ end
1447
+
1448
+ def abstract_description
1449
+ # Fetch the first description with descriptionType "Abstract"
1450
+ Array.wrap(descriptions)&.find { |d| d["descriptionType"] == "Abstract" }
1451
+ end
1452
+
1453
+ def generate_container(types, related_items, related_identifiers, descriptions)
1454
+ container_type = (types.respond_to?(:dig) && types&.dig("resourceTypeGeneral")) == "Dataset" ? "DataRepository" : "Series"
1455
+
1456
+ # relatedItem container
1457
+ related_item = Array.wrap(related_items).find { |ri| ri["relationType"] == "IsPublishedIn" }.to_h
1458
+
1459
+ if related_item.present?
1460
+ return {
1461
+ "type" => container_type,
1462
+ "identifier" => related_item.dig("relatedItemIdentifier", "relatedItemIdentifier"),
1463
+ "identifierType" => related_item.dig("relatedItemIdentifier", "relatedItemIdentifierType"),
1464
+ "title" => related_item.dig("titles", 0).then { |t| t ? parse_attributes(t, content: "title", first: true) : nil },
1465
+ "volume" => related_item["volume"],
1466
+ "issue" => related_item["issue"],
1467
+ "edition" => related_item["edition"],
1468
+ "number" => related_item["number"],
1469
+ "chapterNumber" => related_item["numberType"] == "Chapter" ? related_item["number"] : nil,
1470
+ "firstPage" => related_item["firstPage"],
1471
+ "lastPage" => related_item["lastPage"]
1472
+ }.compact
1473
+ end
1474
+
1475
+ # Legacy SeriesInformation/relatedIdentifier container fallback
1476
+ series_information = Array.wrap(descriptions).find { |r| r["descriptionType"] == "SeriesInformation" }.to_h.fetch("description", nil)
1477
+ si = get_series_information(series_information)
1478
+
1479
+ is_part_of = Array.wrap(related_identifiers).find { |ri| ri["relationType"] == "IsPartOf" }.to_h
1480
+
1481
+ if si["title"].present?
1482
+ return {
1483
+ "type" => container_type,
1484
+ "identifier" => is_part_of["relatedIdentifier"],
1485
+ "identifierType" => is_part_of["relatedIdentifierType"],
1486
+ "title" => si["title"],
1487
+ "volume" => si["volume"],
1488
+ "issue" => si["issue"],
1489
+ "firstPage" => si["firstPage"],
1490
+ "lastPage" => si["lastPage"]
1491
+ }.compact
1492
+ end
1493
+ end
1494
+
1495
+ private
1496
+
1497
+ def normalize_uri_with_path_cleanup(uri)
1498
+ normalized_uri = uri.normalize
1499
+ normalized_uri.path = normalized_uri.path.sub(%r{/\z}, "") if normalized_uri.path.present?
1500
+ normalized_uri.to_s
63
1501
  end
64
1502
  end
65
1503
  end