bolognese 0.2.2 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/build.yml +9 -0
  3. data/.github/workflows/changelog.yml +36 -0
  4. data/.github/workflows/ci.yml +22 -0
  5. data/.github/workflows/pull-request.yml +9 -0
  6. data/.github/workflows/release.yml +32 -0
  7. data/.gitignore +4 -0
  8. data/.rubocop.yml +658 -0
  9. data/CHANGELOG.md +1864 -0
  10. data/CITATION +17 -0
  11. data/Gemfile +1 -1
  12. data/Gemfile.lock +251 -99
  13. data/README.md +1026 -2
  14. data/Rakefile +1 -0
  15. data/bin/bolognese +5 -1
  16. data/bolognese.gemspec +33 -21
  17. data/lib/bolognese/array.rb +13 -0
  18. data/lib/bolognese/author_utils.rb +115 -39
  19. data/lib/bolognese/citeproc_extensions.rb +48 -0
  20. data/lib/bolognese/cli.rb +28 -15
  21. data/lib/bolognese/datacite_utils.rb +418 -0
  22. data/lib/bolognese/doi_utils.rb +45 -23
  23. data/lib/bolognese/metadata.rb +250 -18
  24. data/lib/bolognese/metadata_utils.rb +228 -0
  25. data/lib/bolognese/pubmed.rb +2 -0
  26. data/lib/bolognese/readers/bibtex_reader.rb +100 -0
  27. data/lib/bolognese/readers/citeproc_reader.rb +125 -0
  28. data/lib/bolognese/readers/codemeta_reader.rb +108 -0
  29. data/lib/bolognese/readers/crosscite_reader.rb +17 -0
  30. data/lib/bolognese/readers/crossref_reader.rb +413 -0
  31. data/lib/bolognese/readers/datacite_json_reader.rb +17 -0
  32. data/lib/bolognese/readers/datacite_reader.rb +338 -0
  33. data/lib/bolognese/readers/npm_reader.rb +115 -0
  34. data/lib/bolognese/readers/ris_reader.rb +114 -0
  35. data/lib/bolognese/readers/schema_org_reader.rb +264 -0
  36. data/lib/bolognese/string.rb +3 -1
  37. data/lib/bolognese/utils.rb +1403 -12
  38. data/lib/bolognese/version.rb +1 -1
  39. data/lib/bolognese/whitelist_scrubber.rb +47 -0
  40. data/lib/bolognese/writers/bibtex_writer.rb +32 -0
  41. data/lib/bolognese/writers/citation_writer.rb +14 -0
  42. data/lib/bolognese/writers/citeproc_writer.rb +11 -0
  43. data/lib/bolognese/writers/codemeta_writer.rb +29 -0
  44. data/lib/bolognese/writers/crosscite_writer.rb +11 -0
  45. data/lib/bolognese/writers/crossref_writer.rb +11 -0
  46. data/lib/bolognese/writers/csv_writer.rb +24 -0
  47. data/lib/bolognese/writers/datacite_json_writer.rb +13 -0
  48. data/lib/bolognese/writers/datacite_writer.rb +12 -0
  49. data/lib/bolognese/writers/jats_writer.rb +138 -0
  50. data/lib/bolognese/writers/rdf_xml_writer.rb +11 -0
  51. data/lib/bolognese/writers/ris_writer.rb +29 -0
  52. data/lib/bolognese/writers/schema_org_writer.rb +55 -0
  53. data/lib/bolognese/writers/turtle_writer.rb +11 -0
  54. data/lib/bolognese.rb +19 -4
  55. data/package.json +12 -0
  56. data/resources/2008/09/xsd.xsl +997 -0
  57. data/resources/datacite-contributorType-v4.xsd +35 -0
  58. data/resources/datacite-dateType-v4.xsd +25 -0
  59. data/resources/datacite-descriptionType-v4.xsd +19 -0
  60. data/resources/datacite-funderIdentifierType-v4.xsd +15 -0
  61. data/resources/datacite-nameType-v4.xsd +10 -0
  62. data/resources/datacite-relatedIdentifierType-v4.xsd +34 -0
  63. data/resources/datacite-relationType-v4.xsd +49 -0
  64. data/resources/datacite-resourceType-v4.xsd +28 -0
  65. data/resources/datacite-titleType-v4.xsd +14 -0
  66. data/resources/kernel-2.1/include/datacite-contributorType-v1.1.xsd +22 -0
  67. data/resources/kernel-2.1/include/datacite-contributorType-v2.xsd +3 -0
  68. data/resources/kernel-2.1/include/datacite-dateType-v1.1.xsd +31 -0
  69. data/resources/kernel-2.1/include/datacite-dateType-v2.xsd +3 -0
  70. data/resources/kernel-2.1/include/datacite-descriptionType-v1.1.xsd +14 -0
  71. data/resources/kernel-2.1/include/datacite-descriptionType-v2.xsd +3 -0
  72. data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v1.1.xsd +24 -0
  73. data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v2.xsd +3 -0
  74. data/resources/kernel-2.1/include/datacite-relationType-v1.1.xsd +29 -0
  75. data/resources/kernel-2.1/include/datacite-relationType-v2.xsd +3 -0
  76. data/resources/kernel-2.1/include/datacite-resourceType-v1.1.xsd +22 -0
  77. data/resources/kernel-2.1/include/datacite-resourceType-v2.xsd +3 -0
  78. data/resources/kernel-2.1/include/datacite-titleType-v1.1.xsd +11 -0
  79. data/resources/kernel-2.1/include/datacite-titleType-v2.xsd +3 -0
  80. data/resources/kernel-2.1/metadata.xsd +315 -0
  81. data/resources/kernel-2.2/include/datacite-contributorType-v2.xsd +29 -0
  82. data/resources/kernel-2.2/include/datacite-dateType-v2.xsd +21 -0
  83. data/resources/kernel-2.2/include/datacite-descriptionType-v2.xsd +15 -0
  84. data/resources/kernel-2.2/include/datacite-relatedIdentifierType-v2.xsd +25 -0
  85. data/resources/kernel-2.2/include/datacite-relationType-v2.xsd +29 -0
  86. data/resources/kernel-2.2/include/datacite-resourceType-v2.xsd +23 -0
  87. data/resources/kernel-2.2/include/datacite-titleType-v2.xsd +10 -0
  88. data/resources/kernel-2.2/metadata.xsd +316 -0
  89. data/resources/kernel-3/include/datacite-contributorType-v3.1.xsd +35 -0
  90. data/resources/kernel-3/include/datacite-dateType-v3.xsd +21 -0
  91. data/resources/kernel-3/include/datacite-descriptionType-v3.xsd +17 -0
  92. data/resources/kernel-3/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
  93. data/resources/kernel-3/include/datacite-relationType-v3.1.xsd +38 -0
  94. data/resources/kernel-3/include/datacite-resourceType-v3.xsd +26 -0
  95. data/resources/kernel-3/include/datacite-titleType-v3.xsd +12 -0
  96. data/resources/kernel-3/include/xml.xsd +286 -0
  97. data/resources/kernel-3/metadata.xsd +380 -0
  98. data/resources/kernel-3.0/include/datacite-contributorType-v3.xsd +33 -0
  99. data/resources/kernel-3.0/include/datacite-dateType-v3.xsd +21 -0
  100. data/resources/kernel-3.0/include/datacite-descriptionType-v3.xsd +17 -0
  101. data/resources/kernel-3.0/include/datacite-relatedIdentifierType-v3.xsd +27 -0
  102. data/resources/kernel-3.0/include/datacite-relationType-v3.xsd +33 -0
  103. data/resources/kernel-3.0/include/datacite-resourceType-v3.xsd +26 -0
  104. data/resources/kernel-3.0/include/datacite-titleType-v3.xsd +12 -0
  105. data/resources/kernel-3.0/include/xml.xsd +286 -0
  106. data/resources/kernel-3.0/metadata.xsd +377 -0
  107. data/resources/kernel-3.1/include/datacite-contributorType-v3.1.xsd +35 -0
  108. data/resources/kernel-3.1/include/datacite-dateType-v3.xsd +21 -0
  109. data/resources/kernel-3.1/include/datacite-descriptionType-v3.xsd +17 -0
  110. data/resources/kernel-3.1/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
  111. data/resources/kernel-3.1/include/datacite-relationType-v3.1.xsd +38 -0
  112. data/resources/kernel-3.1/include/datacite-resourceType-v3.xsd +26 -0
  113. data/resources/kernel-3.1/include/datacite-titleType-v3.xsd +12 -0
  114. data/resources/kernel-3.1/include/xml.xsd +286 -0
  115. data/resources/kernel-3.1/metadata.xsd +380 -0
  116. data/resources/kernel-4/include/datacite-contributorType-v4.xsd +37 -0
  117. data/resources/kernel-4/include/datacite-dateType-v4.xsd +27 -0
  118. data/resources/kernel-4/include/datacite-descriptionType-v4.xsd +19 -0
  119. data/resources/kernel-4/include/datacite-funderIdentifierType-v4.xsd +16 -0
  120. data/resources/kernel-4/include/datacite-nameType-v4.xsd +10 -0
  121. data/resources/kernel-4/include/datacite-numberType-v4.xsd +12 -0
  122. data/resources/kernel-4/include/datacite-relatedIdentifierType-v4.xsd +39 -0
  123. data/resources/kernel-4/include/datacite-relationType-v4.xsd +59 -0
  124. data/resources/kernel-4/include/datacite-resourceType-v4.xsd +52 -0
  125. data/resources/kernel-4/include/datacite-titleType-v4.xsd +14 -0
  126. data/resources/kernel-4/include/xml.xsd +286 -0
  127. data/resources/kernel-4/metadata.xsd +715 -0
  128. data/resources/kernel-4.0/include/datacite-contributorType-v4.xsd +35 -0
  129. data/resources/kernel-4.0/include/datacite-dateType-v4.xsd +21 -0
  130. data/resources/kernel-4.0/include/datacite-descriptionType-v4.xsd +19 -0
  131. data/resources/kernel-4.0/include/datacite-funderIdentifierType-v4.xsd +15 -0
  132. data/resources/kernel-4.0/include/datacite-relatedIdentifierType-v4.xsd +32 -0
  133. data/resources/kernel-4.0/include/datacite-relationType-v4.xsd +39 -0
  134. data/resources/kernel-4.0/include/datacite-resourceType-v4.xsd +26 -0
  135. data/resources/kernel-4.0/include/datacite-titleType-v4.xsd +14 -0
  136. data/resources/kernel-4.0/include/xml.xsd +286 -0
  137. data/resources/kernel-4.0/metadata.xsd +470 -0
  138. data/resources/kernel-4.1/include/datacite-contributorType-v4.xsd +35 -0
  139. data/resources/kernel-4.1/include/datacite-dateType-v4.1.xsd +23 -0
  140. data/resources/kernel-4.1/include/datacite-descriptionType-v4.xsd +19 -0
  141. data/resources/kernel-4.1/include/datacite-funderIdentifierType-v4.xsd +15 -0
  142. data/resources/kernel-4.1/include/datacite-nameType-v4.1.xsd +10 -0
  143. data/resources/kernel-4.1/include/datacite-relatedIdentifierType-v4.xsd +32 -0
  144. data/resources/kernel-4.1/include/datacite-relationType-v4.1.xsd +46 -0
  145. data/resources/kernel-4.1/include/datacite-resourceType-v4.1.xsd +28 -0
  146. data/resources/kernel-4.1/include/datacite-titleType-v4.xsd +14 -0
  147. data/resources/kernel-4.1/include/xml.xsd +286 -0
  148. data/resources/kernel-4.1/metadata.xsd +483 -0
  149. data/resources/kernel-4.2/include/datacite-contributorType-v4.xsd +35 -0
  150. data/resources/kernel-4.2/include/datacite-dateType-v4.xsd +25 -0
  151. data/resources/kernel-4.2/include/datacite-descriptionType-v4.xsd +19 -0
  152. data/resources/kernel-4.2/include/datacite-funderIdentifierType-v4.xsd +15 -0
  153. data/resources/kernel-4.2/include/datacite-nameType-v4.xsd +10 -0
  154. data/resources/kernel-4.2/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  155. data/resources/kernel-4.2/include/datacite-relationType-v4.xsd +49 -0
  156. data/resources/kernel-4.2/include/datacite-resourceType-v4.xsd +28 -0
  157. data/resources/kernel-4.2/include/datacite-titleType-v4.xsd +14 -0
  158. data/resources/kernel-4.2/include/xml.xsd +286 -0
  159. data/resources/kernel-4.2/metadata.xsd +479 -0
  160. data/resources/kernel-4.3/include/datacite-contributorType-v4.xsd +35 -0
  161. data/resources/kernel-4.3/include/datacite-dateType-v4.xsd +25 -0
  162. data/resources/kernel-4.3/include/datacite-descriptionType-v4.xsd +19 -0
  163. data/resources/kernel-4.3/include/datacite-funderIdentifierType-v4.xsd +16 -0
  164. data/resources/kernel-4.3/include/datacite-nameType-v4.xsd +10 -0
  165. data/resources/kernel-4.3/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  166. data/resources/kernel-4.3/include/datacite-relationType-v4.xsd +49 -0
  167. data/resources/kernel-4.3/include/datacite-resourceType-v4.xsd +28 -0
  168. data/resources/kernel-4.3/include/datacite-titleType-v4.xsd +14 -0
  169. data/resources/kernel-4.3/include/xml.xsd +286 -0
  170. data/resources/kernel-4.3/metadata.xsd +515 -0
  171. data/resources/kernel-4.4/include/datacite-contributorType-v4.xsd +35 -0
  172. data/resources/kernel-4.4/include/datacite-dateType-v4.xsd +25 -0
  173. data/resources/kernel-4.4/include/datacite-descriptionType-v4.xsd +19 -0
  174. data/resources/kernel-4.4/include/datacite-funderIdentifierType-v4.xsd +16 -0
  175. data/resources/kernel-4.4/include/datacite-nameType-v4.xsd +10 -0
  176. data/resources/kernel-4.4/include/datacite-numberType-v4.xsd +12 -0
  177. data/resources/kernel-4.4/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  178. data/resources/kernel-4.4/include/datacite-relationType-v4.xsd +51 -0
  179. data/resources/kernel-4.4/include/datacite-resourceType-v4.xsd +43 -0
  180. data/resources/kernel-4.4/include/datacite-titleType-v4.xsd +14 -0
  181. data/resources/kernel-4.4/include/xml.xsd +286 -0
  182. data/resources/kernel-4.4/metadata.xsd +707 -0
  183. data/resources/kernel-4.5/include/datacite-contributorType-v4.xsd +35 -0
  184. data/resources/kernel-4.5/include/datacite-dateType-v4.xsd +25 -0
  185. data/resources/kernel-4.5/include/datacite-descriptionType-v4.xsd +19 -0
  186. data/resources/kernel-4.5/include/datacite-funderIdentifierType-v4.xsd +16 -0
  187. data/resources/kernel-4.5/include/datacite-nameType-v4.xsd +10 -0
  188. data/resources/kernel-4.5/include/datacite-numberType-v4.xsd +12 -0
  189. data/resources/kernel-4.5/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  190. data/resources/kernel-4.5/include/datacite-relationType-v4.xsd +53 -0
  191. data/resources/kernel-4.5/include/datacite-resourceType-v4.xsd +45 -0
  192. data/resources/kernel-4.5/include/datacite-titleType-v4.xsd +14 -0
  193. data/resources/kernel-4.5/include/xml.xsd +286 -0
  194. data/resources/kernel-4.5/metadata.xsd +711 -0
  195. data/resources/kernel-4.6/include/datacite-contributorType-v4.xsd +37 -0
  196. data/resources/kernel-4.6/include/datacite-dateType-v4.xsd +27 -0
  197. data/resources/kernel-4.6/include/datacite-descriptionType-v4.xsd +19 -0
  198. data/resources/kernel-4.6/include/datacite-funderIdentifierType-v4.xsd +16 -0
  199. data/resources/kernel-4.6/include/datacite-nameType-v4.xsd +10 -0
  200. data/resources/kernel-4.6/include/datacite-numberType-v4.xsd +12 -0
  201. data/resources/kernel-4.6/include/datacite-relatedIdentifierType-v4.xsd +37 -0
  202. data/resources/kernel-4.6/include/datacite-relationType-v4.xsd +57 -0
  203. data/resources/kernel-4.6/include/datacite-resourceType-v4.xsd +49 -0
  204. data/resources/kernel-4.6/include/datacite-titleType-v4.xsd +14 -0
  205. data/resources/kernel-4.6/include/xml.xsd +286 -0
  206. data/resources/kernel-4.6/metadata.xsd +712 -0
  207. data/resources/kernel-4.7/include/datacite-contributorType-v4.xsd +37 -0
  208. data/resources/kernel-4.7/include/datacite-dateType-v4.xsd +27 -0
  209. data/resources/kernel-4.7/include/datacite-descriptionType-v4.xsd +19 -0
  210. data/resources/kernel-4.7/include/datacite-funderIdentifierType-v4.xsd +16 -0
  211. data/resources/kernel-4.7/include/datacite-nameType-v4.xsd +10 -0
  212. data/resources/kernel-4.7/include/datacite-numberType-v4.xsd +12 -0
  213. data/resources/kernel-4.7/include/datacite-relatedIdentifierType-v4.xsd +39 -0
  214. data/resources/kernel-4.7/include/datacite-relationType-v4.xsd +59 -0
  215. data/resources/kernel-4.7/include/datacite-resourceType-v4.xsd +52 -0
  216. data/resources/kernel-4.7/include/datacite-titleType-v4.xsd +14 -0
  217. data/resources/kernel-4.7/include/xml.xsd +286 -0
  218. data/resources/kernel-4.7/metadata.xsd +715 -0
  219. data/resources/oecd/dfg-mappings.json +1866 -0
  220. data/resources/oecd/for-mappings.json +1101 -0
  221. data/resources/oecd/fos-mappings.json +198 -0
  222. data/resources/schema_org/jsonldcontext.json +7477 -0
  223. data/resources/spdx/licenses.json +5297 -0
  224. data/resources/xml.xsd +286 -0
  225. metadata +478 -150
  226. data/.travis.yml +0 -23
  227. data/lib/bolognese/crossref.rb +0 -202
  228. data/lib/bolognese/datacite.rb +0 -157
  229. data/lib/bolognese/date_utils.rb +0 -48
  230. data/lib/bolognese/github.rb +0 -106
  231. data/lib/bolognese/orcid.rb +0 -24
  232. data/lib/bolognese/pid_utils.rb +0 -23
  233. data/spec/cli_spec.rb +0 -37
  234. data/spec/crossref_spec.rb +0 -113
  235. data/spec/datacite_spec.rb +0 -49
  236. data/spec/doi_spec.rb +0 -89
  237. data/spec/fixtures/crossref.xml +0 -742
  238. data/spec/fixtures/datacite.xml +0 -40
  239. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_crossref.yml +0 -760
  240. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_schema_org.yml +0 -1476
  241. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_datacite.yml +0 -214
  242. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_schema_org.yml +0 -384
  243. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/crossref.yml +0 -44
  244. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/datacite.yml +0 -44
  245. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/medra.yml +0 -44
  246. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/not_found.yml +0 -44
  247. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_test.yml +0 -843
  248. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_SICI_DOI.yml +0 -277
  249. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_data_citation.yml +0 -15755
  250. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/date_in_future.yml +0 -2691
  251. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/journal_article.yml +0 -1857
  252. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/not_found_error.yml +0 -93
  253. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/posted_content.yml +0 -5715
  254. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/BlogPosting.yml +0 -307
  255. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/Dataset.yml +0 -343
  256. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref.yml +0 -44
  257. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref_doi_not_url.yml +0 -44
  258. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite.yml +0 -44
  259. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite_doi_http.yml +0 -44
  260. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/orcid.yml +0 -44
  261. data/spec/metadata_spec.rb +0 -35
  262. data/spec/orcid_spec.rb +0 -23
  263. data/spec/spec_helper.rb +0 -88
  264. /data/{LICENSE → LICENSE.md} +0 -0
@@ -1,27 +1,1418 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Bolognese
2
4
  module Utils
3
- def parse_attributes(element)
4
- if element.is_a?(String)
5
- element
5
+ class << self
6
+ include Utils
7
+ end
8
+
9
+ NORMALIZED_LICENSES = {
10
+ "https://creativecommons.org/licenses/by/1.0" => "https://creativecommons.org/licenses/by/1.0/legalcode",
11
+ "https://creativecommons.org/licenses/by/2.0" => "https://creativecommons.org/licenses/by/2.0/legalcode",
12
+ "https://creativecommons.org/licenses/by/2.5" => "https://creativecommons.org/licenses/by/2.5/legalcode",
13
+ "https://creativecommons.org/licenses/by/3.0" => "https://creativecommons.org/licenses/by/3.0/legalcode",
14
+ "https://creativecommons.org/licenses/by/3.0/us" => "https://creativecommons.org/licenses/by/3.0/legalcode",
15
+ "https://creativecommons.org/licenses/by/4.0" => "https://creativecommons.org/licenses/by/4.0/legalcode",
16
+ "https://creativecommons.org/licenses/by-nc/1.0" => "https://creativecommons.org/licenses/by-nc/1.0/legalcode",
17
+ "https://creativecommons.org/licenses/by-nc/2.0" => "https://creativecommons.org/licenses/by-nc/2.0/legalcode",
18
+ "https://creativecommons.org/licenses/by-nc/2.5" => "https://creativecommons.org/licenses/by-nc/2.5/legalcode",
19
+ "https://creativecommons.org/licenses/by-nc/3.0" => "https://creativecommons.org/licenses/by-nc/3.0/legalcode",
20
+ "https://creativecommons.org/licenses/by-nc/4.0" => "https://creativecommons.org/licenses/by-nc/4.0/legalcode",
21
+ "https://creativecommons.org/licenses/by-nd-nc/1.0" => "https://creativecommons.org/licenses/by-nd-nc/1.0/legalcode",
22
+ "https://creativecommons.org/licenses/by-nd-nc/2.0" => "https://creativecommons.org/licenses/by-nd-nc/2.0/legalcode",
23
+ "https://creativecommons.org/licenses/by-nd-nc/2.5" => "https://creativecommons.org/licenses/by-nd-nc/2.5/legalcode",
24
+ "https://creativecommons.org/licenses/by-nd-nc/3.0" => "https://creativecommons.org/licenses/by-nd-nc/3.0/legalcode",
25
+ "https://creativecommons.org/licenses/by-nd-nc/4.0" => "https://creativecommons.org/licenses/by-nd-nc/4.0/legalcode",
26
+ "https://creativecommons.org/licenses/by-nc-sa/1.0" => "https://creativecommons.org/licenses/by-nc-sa/1.0/legalcode",
27
+ "https://creativecommons.org/licenses/by-nc-sa/2.0" => "https://creativecommons.org/licenses/by-nc-sa/2.0/legalcode",
28
+ "https://creativecommons.org/licenses/by-nc-sa/2.5" => "https://creativecommons.org/licenses/by-nc-sa/2.5/legalcode",
29
+ "https://creativecommons.org/licenses/by-nc-sa/3.0" => "https://creativecommons.org/licenses/by-nc-sa/3.0/legalcode",
30
+ "https://creativecommons.org/licenses/by-nc-sa/4.0" => "https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode",
31
+ "https://creativecommons.org/licenses/by-nd/1.0" => "https://creativecommons.org/licenses/by-nd/1.0/legalcode",
32
+ "https://creativecommons.org/licenses/by-nd/2.0" => "https://creativecommons.org/licenses/by-nd/2.0/legalcode",
33
+ "https://creativecommons.org/licenses/by-nd/2.5" => "https://creativecommons.org/licenses/by-nd/2.5/legalcode",
34
+ "https://creativecommons.org/licenses/by-nd/3.0" => "https://creativecommons.org/licenses/by-nd/3.0/legalcode",
35
+ "https://creativecommons.org/licenses/by-nd/4.0" => "https://creativecommons.org/licenses/by-nd/4.0/legalcode",
36
+ "https://creativecommons.org/licenses/by-sa/1.0" => "https://creativecommons.org/licenses/by-sa/1.0/legalcode",
37
+ "https://creativecommons.org/licenses/by-sa/2.0" => "https://creativecommons.org/licenses/by-sa/2.0/legalcode",
38
+ "https://creativecommons.org/licenses/by-sa/2.5" => "https://creativecommons.org/licenses/by-sa/2.5/legalcode",
39
+ "https://creativecommons.org/licenses/by-sa/3.0" => "https://creativecommons.org/licenses/by-sa/3.0/legalcode",
40
+ "https://creativecommons.org/licenses/by-sa/4.0" => "https://creativecommons.org/licenses/by-sa/4.0/legalcode",
41
+ "https://creativecommons.org/licenses/by-nc-nd/1.0" => "https://creativecommons.org/licenses/by-nc-nd/1.0/legalcode",
42
+ "https://creativecommons.org/licenses/by-nc-nd/2.0" => "https://creativecommons.org/licenses/by-nc-nd/2.0/legalcode",
43
+ "https://creativecommons.org/licenses/by-nc-nd/2.5" => "https://creativecommons.org/licenses/by-nc-nd/2.5/legalcode",
44
+ "https://creativecommons.org/licenses/by-nc-nd/3.0" => "https://creativecommons.org/licenses/by-nc-nd/3.0/legalcode",
45
+ "https://creativecommons.org/licenses/by-nc-nd/4.0" => "https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode",
46
+ "https://creativecommons.org/licenses/publicdomain" => "https://creativecommons.org/licenses/publicdomain/",
47
+ "https://creativecommons.org/publicdomain/zero/1.0" => "https://creativecommons.org/publicdomain/zero/1.0/legalcode",
48
+ }
49
+
50
+ DC_TO_SO_TRANSLATIONS = {
51
+ "Audiovisual" => "MediaObject",
52
+ "Book" => "Book",
53
+ "BookChapter" => "Chapter",
54
+ "Collection" => "Collection",
55
+ "ComputationalNotebook" => "SoftwareSourceCode",
56
+ "ConferencePaper" => "Article",
57
+ "ConferenceProceeding" => "Periodical",
58
+ "DataPaper" => "Article",
59
+ "Dataset" => "Dataset",
60
+ "Dissertation" => "Thesis",
61
+ "Event" => "Event",
62
+ "Image" => "ImageObject",
63
+ "InteractiveResource" => nil,
64
+ "Journal" => "Periodical",
65
+ "JournalArticle" => "ScholarlyArticle",
66
+ "Model" => nil,
67
+ "OutputManagementPlan" => nil,
68
+ "PeerReview" => "Review",
69
+ "PhysicalObject" => nil,
70
+ "Poster" => "Poster",
71
+ "Preprint" => nil,
72
+ "Presentation" => "PresentationDigitalDocument",
73
+ "Report" => "Report",
74
+ "Service" => "Service",
75
+ "Software" => "SoftwareSourceCode",
76
+ "Sound" => "AudioObject",
77
+ "Standard" => nil,
78
+ "Text" => "ScholarlyArticle",
79
+ "Workflow" => nil,
80
+ "Other" => "CreativeWork",
81
+ # not part of DataCite schema, but used internally
82
+ "Periodical" => "Periodical",
83
+ "DataCatalog" => "DataCatalog",
84
+ "Award" => "Grant",
85
+ "Project" => "Project"
86
+ }
87
+
88
+ DC_TO_CP_TRANSLATIONS = {
89
+ "Audiovisual" => "motion_picture",
90
+ "Book" => "book",
91
+ "BookChapter" => "chapter",
92
+ "Collection" => nil,
93
+ "ComputationalNotebook" => nil,
94
+ "ConferencePaper" => "paper_conference",
95
+ "ConferenceProceeding" => nil,
96
+ "DataPaper" => "report",
97
+ "Dataset" => "dataset",
98
+ "Dissertation" => nil,
99
+ "Event" => nil,
100
+ "Image" => "graphic",
101
+ "InteractiveResource" => nil,
102
+ "Journal" => nil,
103
+ "JournalArticle" => "article-journal",
104
+ "Model" => nil,
105
+ "OutputManagementPlan" => nil,
106
+ "PeerReview" => "review",
107
+ "PhysicalObject" => nil,
108
+ "Poster" => "document",
109
+ "Preprint" => nil,
110
+ "Presentation" => "presentation",
111
+ "Report" => "report",
112
+ "Service" => nil,
113
+ "Sound" => "song",
114
+ "Standard" => nil,
115
+ "Text" => "report",
116
+ "Workflow" => nil,
117
+ "Other" => nil
118
+ }
119
+
120
+ CR_TO_CP_TRANSLATIONS = {
121
+ "Proceedings" => nil,
122
+ "ReferenceBook" => nil,
123
+ "JournalIssue" => nil,
124
+ "ProceedingsArticle" => "paper-conference",
125
+ "Other" => nil,
126
+ "Dissertation" => "thesis",
127
+ "Dataset" => "dataset",
128
+ "EditedBook" => "book",
129
+ "JournalArticle" => "article-journal",
130
+ "Journal" => nil,
131
+ "Report" => "report",
132
+ "BookSeries" => nil,
133
+ "ReportSeries" => nil,
134
+ "BookTrack" => nil,
135
+ "Standard" => nil,
136
+ "BookSection" => "chapter",
137
+ "BookPart" => nil,
138
+ "Book" => "book",
139
+ "BookChapter" => "chapter",
140
+ "StandardSeries" => nil,
141
+ "Monograph" => "book",
142
+ "Component" => nil,
143
+ "ReferenceEntry" => "entry-dictionary",
144
+ "JournalVolume" => nil,
145
+ "BookSet" => nil
146
+ }
147
+
148
+ CR_TO_SO_TRANSLATIONS = {
149
+ "Proceedings" => nil,
150
+ "ReferenceBook" => "Book",
151
+ "JournalIssue" => "PublicationIssue",
152
+ "ProceedingsArticle" => nil,
153
+ "Other" => "CreativeWork",
154
+ "Dissertation" => "Thesis",
155
+ "Dataset" => "Dataset",
156
+ "EditedBook" => "Book",
157
+ "JournalArticle" => "ScholarlyArticle",
158
+ "Journal" => nil,
159
+ "Report" => "Report",
160
+ "BookSeries" => nil,
161
+ "ReportSeries" => nil,
162
+ "BookTrack" => nil,
163
+ "Standard" => nil,
164
+ "BookSection" => nil,
165
+ "BookPart" => nil,
166
+ "Book" => "Book",
167
+ "BookChapter" => "Chapter",
168
+ "StandardSeries" => nil,
169
+ "Monograph" => "Book",
170
+ "Component" => "CreativeWork",
171
+ "ReferenceEntry" => nil,
172
+ "JournalVolume" => "PublicationVolume",
173
+ "BookSet" => nil,
174
+ "PostedContent" => "ScholarlyArticle",
175
+ "PeerReview" => "Review"
176
+ }
177
+
178
+ CR_TO_BIB_TRANSLATIONS = {
179
+ "Proceedings" => "proceedings",
180
+ "ReferenceBook" => "book",
181
+ "JournalIssue" => nil,
182
+ "ProceedingsArticle" => nil,
183
+ "Other" => nil,
184
+ "Dissertation" => "phdthesis",
185
+ "Dataset" => nil,
186
+ "EditedBook" => "book",
187
+ "JournalArticle" => "article",
188
+ "Journal" => nil,
189
+ "Report" => "techreport",
190
+ "BookSeries" => nil,
191
+ "ReportSeries" => nil,
192
+ "BookTrack" => nil,
193
+ "Standard" => nil,
194
+ "BookSection" => "inbook",
195
+ "BookPart" => nil,
196
+ "Book" => "book",
197
+ "BookChapter" => "inbook",
198
+ "StandardSeries" => nil,
199
+ "Monograph" => "book",
200
+ "Component" => nil,
201
+ "ReferenceEntry" => nil,
202
+ "JournalVolume" => nil,
203
+ "BookSet" => nil,
204
+ "PostedContent" => "article"
205
+ }
206
+
207
+ BIB_TO_CR_TRANSLATIONS = {
208
+ "proceedings" => "Proceedings",
209
+ "phdthesis" => "Dissertation",
210
+ "article" => "JournalArticle",
211
+ "book" => "Book",
212
+ "inbook" => "BookChapter"
213
+ }
214
+
215
+ CR_TO_JATS_TRANSLATIONS = {
216
+ "Proceedings" => "working-paper",
217
+ "ReferenceBook" => "book",
218
+ "JournalIssue" => "journal",
219
+ "ProceedingsArticle" => "working-paper",
220
+ "Other" => nil,
221
+ "Dissertation" => nil,
222
+ "Dataset" => "data",
223
+ "EditedBook" => "book",
224
+ "JournalArticle" => "journal",
225
+ "Journal" => "journal",
226
+ "Report" => "report",
227
+ "BookSeries" => "book",
228
+ "ReportSeries" => "report",
229
+ "BookTrack" => "book",
230
+ "Standard" => "standard",
231
+ "BookSection" => "chapter",
232
+ "BookPart" => "chapter",
233
+ "Book" => "book",
234
+ "BookChapter" => "chapter",
235
+ "StandardSeries" => "standard",
236
+ "Monograph" => "book",
237
+ "Component" => nil,
238
+ "ReferenceEntry" => nil,
239
+ "JournalVolume" => "journal",
240
+ "BookSet" => "book"
241
+ }
242
+
243
+ CR_TO_DC_TRANSLATIONS = {
244
+ "Proceedings" => nil,
245
+ "ReferenceBook" => nil,
246
+ "JournalIssue" => "Text",
247
+ "ProceedingsArticle" => "ConferencePaper",
248
+ "Other" => "Other",
249
+ "Dissertation" => "Dissertation",
250
+ "Dataset" => "Dataset",
251
+ "EditedBook" => "Book",
252
+ "JournalArticle" => "JournalArticle",
253
+ "Journal" => "Journal",
254
+ "Report" => "Report",
255
+ "BookSeries" => nil,
256
+ "ReportSeries" => nil,
257
+ "BookTrack" => nil,
258
+ "Standard" => "Standard",
259
+ "BookSection" => "BookChapter",
260
+ "BookPart" => nil,
261
+ "Book" => "Book",
262
+ "BookChapter" => "BookChapter",
263
+ "SaComponent" => "Text",
264
+ "StandardSeries" => "Standard",
265
+ "Monograph" => "book",
266
+ "Component" => nil,
267
+ "ReferenceEntry" => nil,
268
+ "JournalVolume" => nil,
269
+ "BookSet" => nil,
270
+ "PostedContent" => "JournalArticle",
271
+ "PeerReview" => "PeerReview"
272
+ }
273
+
274
+ SO_TO_DC_TRANSLATIONS = {
275
+ "Article" => "Text",
276
+ "AudioObject" => "Sound",
277
+ "Blog" => "Text",
278
+ "BlogPosting" => "Text",
279
+ "Book" => "Book",
280
+ "Chapter" => "BookChapter",
281
+ "Collection" => "Collection",
282
+ "DataCatalog" => "Dataset",
283
+ "Dataset" => "Dataset",
284
+ "Event" => "Event",
285
+ "ImageObject" => "Image",
286
+ "Movie" => "Audiovisual",
287
+ "Poster" => "Poster",
288
+ "PresentationDigitalDocument" => "Presentation",
289
+ "PublicationIssue" => "Text",
290
+ "Report" => "Report",
291
+ "ScholarlyArticle" => "Text",
292
+ "Thesis" => "Text",
293
+ "Service" => "Service",
294
+ "Review" => "PeerReview",
295
+ "SoftwareSourceCode" => "Software",
296
+ "VideoObject" => "Audiovisual",
297
+ "WebPage" => "Text",
298
+ "WebSite" => "Text"
299
+ }
300
+
301
+ SO_TO_JATS_TRANSLATIONS = {
302
+ "Article" => "journal",
303
+ "AudioObject" => nil,
304
+ "Blog" => nil,
305
+ "BlogPosting" => nil,
306
+ "Book" => "book",
307
+ "Collection" => nil,
308
+ "CreativeWork" => nil,
309
+ "DataCatalog" => "data",
310
+ "Dataset" => "data",
311
+ "Event" => nil,
312
+ "ImageObject" => nil,
313
+ "Movie" => nil,
314
+ "PublicationIssue" => "journal",
315
+ "ScholarlyArticle" => "journal",
316
+ "Service" => nil,
317
+ "SoftwareSourceCode" => "software",
318
+ "VideoObject" => nil,
319
+ "WebPage" => nil,
320
+ "WebSite" => "website"
321
+ }
322
+
323
+ SO_TO_CP_TRANSLATIONS = {
324
+ "Article" => "",
325
+ "AudioObject" => "song",
326
+ "Blog" => "report",
327
+ "BlogPosting" => "post-weblog",
328
+ "Collection" => nil,
329
+ "CreativeWork" => nil,
330
+ "DataCatalog" => "dataset",
331
+ "Dataset" => "dataset",
332
+ "Event" => nil,
333
+ "ImageObject" => "graphic",
334
+ "Movie" => "motion_picture",
335
+ "Poster" => "document",
336
+ "PresentationDigitalDocument" => "presentation",
337
+ "PublicationIssue" => nil,
338
+ "Report" => "report",
339
+ "ScholarlyArticle" => "article-journal",
340
+ "Service" => nil,
341
+ "Thesis" => "thesis",
342
+ "VideoObject" => "broadcast",
343
+ "WebPage" => "webpage",
344
+ "WebSite" => "webpage"
345
+ }
346
+
347
+ SO_TO_RIS_TRANSLATIONS = {
348
+ "Article" => nil,
349
+ "AudioObject" => nil,
350
+ "Blog" => nil,
351
+ "BlogPosting" => "BLOG",
352
+ "Collection" => nil,
353
+ "CreativeWork" => "GEN",
354
+ "DataCatalog" => "CTLG",
355
+ "Dataset" => "DATA",
356
+ "Event" => nil,
357
+ "ImageObject" => "FIGURE",
358
+ "Movie" => "MPCT",
359
+ "Poster" => "GEN",
360
+ "PresentationDigitalDocument" => "SLIDE",
361
+ "PublicationIssue" => nil,
362
+ "Report" => "RPRT",
363
+ "ScholarlyArticle" => "JOUR",
364
+ "Service" => nil,
365
+ "SoftwareSourceCode" => "COMP",
366
+ "VideoObject" => "VIDEO",
367
+ "WebPage" => "ELEC",
368
+ "WebSite" => nil
369
+ }
370
+
371
+ CR_TO_RIS_TRANSLATIONS = {
372
+ "Proceedings" => "CONF",
373
+ "ReferenceBook" => "BOOK",
374
+ "JournalIssue" => nil,
375
+ "ProceedingsArticle" => "CPAPER",
376
+ "Other" => "GEN",
377
+ "Dissertation" => "THES",
378
+ "Dataset" => "DATA",
379
+ "EditedBook" => "BOOK",
380
+ "JournalArticle" => "JOUR",
381
+ "Journal" => nil,
382
+ "Report" => "RPRT",
383
+ "BookSeries" => nil,
384
+ "ReportSeries" => nil,
385
+ "BookTrack" => nil,
386
+ "Standard" => "STAND",
387
+ "BookSection" => "CHAP",
388
+ "BookPart" => "CHAP",
389
+ "Book" => "BOOK",
390
+ "BookChapter" => "CHAP",
391
+ "StandardSeries" => nil,
392
+ "Monograph" => "BOOK",
393
+ "Component" => nil,
394
+ "ReferenceEntry" => "DICT",
395
+ "JournalVolume" => nil,
396
+ "BookSet" => nil
397
+ }
398
+
399
+ DC_TO_RIS_TRANSLATIONS = {
400
+ "Audiovisual" => "MPCT",
401
+ "Book" => "BOOK",
402
+ "BookChapter" => "CHAP",
403
+ "Collection" => nil,
404
+ "ComputationalNotebook" => "COMP",
405
+ "ConferencePaper" => "CPAPER",
406
+ "ConferenceProceeding" => "CONF",
407
+ "DataPaper" => nil,
408
+ "Dataset" => "DATA",
409
+ "Dissertation" => "THES",
410
+ "Event" => nil,
411
+ "Image" => "FIGURE",
412
+ "InteractiveResource" => nil,
413
+ "Journal" => nil,
414
+ "JournalArticle" => "JOUR",
415
+ "Model" => nil,
416
+ "OutputManagementPlan" => nil,
417
+ "PeerReview" => nil,
418
+ "PhysicalObject" => nil,
419
+ "Poster" => "GEN",
420
+ "Preprint" => nil,
421
+ "Presentation" => "SLIDE",
422
+ "Report" => "RRPT",
423
+ "Service" => nil,
424
+ "Software" => "COMP",
425
+ "Sound" => "SOUND",
426
+ "Standard" => nil,
427
+ "Text" => "RPRT",
428
+ "Workflow" => nil,
429
+ "Other" => nil
430
+ }
431
+
432
+ RIS_TO_DC_TRANSLATIONS = {
433
+ "BLOG" => "Text",
434
+ "GEN" => "Poster",
435
+ "CTLG" => "Collection",
436
+ "DATA" => "Dataset",
437
+ "FIGURE" => "Image",
438
+ "THES" => "Dissertation",
439
+ "MPCT" => "Audiovisual",
440
+ "JOUR" => "JournalArticle",
441
+ "COMP" => "Software",
442
+ "VIDEO" => "Audiovisual",
443
+ "ELEC" => "Text",
444
+ "SLIDE" => "Presentation"
445
+ }
446
+
447
+ BIB_TO_DC_TRANSLATIONS = {
448
+ "article" => "JournalArticle",
449
+ "book" => "Book",
450
+ "inbook" => "BookChapter",
451
+ "inproceedings" => nil,
452
+ "manual" => nil,
453
+ "misc" => "Other",
454
+ "phdthesis" => "Dissertation",
455
+ "proceedings" => "ConferenceProceeding",
456
+ "techreport" => "Report",
457
+ "unpublished" => nil
458
+ }
459
+
460
+ CP_TO_DC_TRANSLATIONS = {
461
+ "song" => "Audiovisual",
462
+ "post-weblog" => "Text",
463
+ "dataset" => "Dataset",
464
+ "graphic" => "Image",
465
+ "motion_picture" => "Audiovisual",
466
+ "article-journal" => "JournalArticle",
467
+ "broadcast" => "Audiovisual",
468
+ "webpage" => "Text",
469
+ "document" => "Poster",
470
+ "presentation" => "Presentation"
471
+ }
472
+
473
+ SO_TO_BIB_TRANSLATIONS = {
474
+ "Article" => "article",
475
+ "AudioObject" => "misc",
476
+ "Thesis" => "phdthesis",
477
+ "Blog" => "misc",
478
+ "BlogPosting" => "article",
479
+ "Collection" => "misc",
480
+ "CreativeWork" => "misc",
481
+ "DataCatalog" => "misc",
482
+ "Dataset" => "misc",
483
+ "Event" => "misc",
484
+ "ImageObject" => "misc",
485
+ "Movie" => "misc",
486
+ "Poster" => "misc",
487
+ "PresentationDigitalDocument" => "misc",
488
+ "PublicationIssue" => "misc",
489
+ "ScholarlyArticle" => "article",
490
+ "Service" => "misc",
491
+ "SoftwareSourceCode" => "misc",
492
+ "VideoObject" => "misc",
493
+ "WebPage" => "misc",
494
+ "WebSite" => "misc"
495
+ }
496
+
497
+ UNKNOWN_INFORMATION = {
498
+ ":unac" => "temporarily inaccessible",
499
+ ":unal" => "unallowed, suppressed intentionally",
500
+ ":unap" => "not applicable, makes no sense",
501
+ ":unas" => "value unassigned (e.g., Untitled)",
502
+ ":unav" => "value unavailable, possibly unknown",
503
+ ":unkn" => "known to be unknown (e.g., Anonymous, Inconnue)",
504
+ ":none" => "never had a value, never will",
505
+ ":null" => "explicitly and meaningfully empty",
506
+ ":tba" => "to be assigned or announced later",
507
+ ":etal" => "too numerous to list (et alia)"
508
+ }
509
+
510
+ RESOURCE_PATHS = {
511
+ spdx: 'spdx/licenses.json',
512
+ fos: 'oecd/fos-mappings.json',
513
+ for: 'oecd/for-mappings.json',
514
+ dfg: 'oecd/dfg-mappings.json'
515
+ }
516
+
517
+ def resources_dir_path
518
+ File.expand_path('../../../resources', __FILE__) + '/'
519
+ end
520
+
521
+ def resource_file( extra_path )
522
+ File.read(resources_dir_path + extra_path)
523
+ end
524
+
525
+ def resource_json( resource_symbol )
526
+ if RESOURCE_PATHS.keys().include?(resource_symbol)
527
+ JSON.load(resource_file(RESOURCE_PATHS[resource_symbol]))
528
+ end
529
+ end
530
+
531
+
532
+ def find_from_format(id: nil, string: nil, ext: nil, filename: nil)
533
+ if id.present?
534
+ find_from_format_by_id(id)
535
+ elsif ext.present?
536
+ find_from_format_by_filename(filename) || find_from_format_by_ext(string, ext: ext)
537
+ elsif string.present?
538
+ find_from_format_by_string(string)
539
+ else
540
+ "datacite"
541
+ end
542
+ end
543
+
544
+ def find_from_format_by_id(id)
545
+ id = normalize_id(id)
546
+
547
+ if /\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.stage.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(id)
548
+ ra = get_doi_ra(id)
549
+ %w(DataCite Crossref mEDRA KISTI JaLC OP).include?(ra) ? ra.downcase : nil
550
+ elsif /\A(?:(http|https):\/(\/)?orcid\.org\/)?(\d{4}-\d{4}-\d{4}-\d{3}[0-9X]+)\z/.match(id)
551
+ "orcid"
552
+ elsif /\A(http|https):\/(\/)?github\.com\/(.+)\/package.json\z/.match(id)
553
+ "npm"
554
+ elsif /\A(http|https):\/(\/)?github\.com\/(.+)\z/.match(id)
555
+ "codemeta"
556
+ else
557
+ "schema_org"
558
+ end
559
+ end
560
+
561
+ def find_from_format_by_filename(filename)
562
+ if filename == "package.json"
563
+ "npm"
564
+ end
565
+ end
566
+
567
+ def find_from_format_by_ext(string, options={})
568
+ if options[:ext] == ".bib"
569
+ "bibtex"
570
+ elsif options[:ext] == ".ris"
571
+ "ris"
572
+ elsif options[:ext] == ".xml" && Maremma.from_xml(string).to_h.dig("crossref_result", "query_result", "body", "query", "doi_record", "crossref")
573
+ "crossref"
574
+ elsif options[:ext] == ".xml" && Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
575
+ "datacite"
576
+ elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("@context").to_s.start_with?("http://schema.org", "https://schema.org")
577
+ "schema_org"
578
+ elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("@context") == ("https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld")
579
+ "codemeta"
580
+ elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("schemaVersion").to_s.start_with?("http://datacite.org/schema/kernel")
581
+ "datacite_json"
582
+ elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("types") && Maremma.from_json(string).to_h.dig("publication_year").present?
583
+ "crosscite"
584
+ elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("issued", "date-parts").present?
585
+ "citeproc"
586
+ end
587
+ end
588
+
589
+ def find_from_format_by_string(string)
590
+ if Maremma.from_xml(string).to_h.dig("crossref_result", "query_result", "body", "query", "doi_record", "crossref").present?
591
+ "crossref"
592
+ elsif Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
593
+ "datacite"
594
+ elsif Maremma.from_json(string).to_h.dig("@context").to_s.start_with?("http://schema.org", "https://schema.org")
595
+ "schema_org"
596
+ elsif Maremma.from_json(string).to_h.dig("@context") == ("https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld")
597
+ "codemeta"
598
+ elsif Maremma.from_json(string).to_h.dig("schema-version").to_s.start_with?("http://datacite.org/schema/kernel")
599
+ "datacite_json"
600
+ elsif Maremma.from_json(string).to_h.dig("types").present? && Maremma.from_json(string).to_h.dig("publication_year").present?
601
+ "crosscite"
602
+ elsif Maremma.from_json(string).to_h.dig("issued", "date-parts").present?
603
+ "citeproc"
604
+ elsif string.start_with?("TY - ")
605
+ "ris"
606
+ elsif BibTeX.parse(string).first
607
+ "bibtex"
608
+ end
609
+ rescue BibTeX::ParseError => error
610
+ nil
611
+ end
612
+
613
+ def orcid_from_url(url)
614
+ Array(/\A:(http|https):\/\/orcid\.org\/(.+)/.match(url)).last
615
+ end
616
+
617
+ def orcid_as_url(orcid)
618
+ "https://orcid.org/#{orcid}" if orcid.present?
619
+ end
620
+
621
+ def validate_orcid(orcid)
622
+ orcid = Array(/\A(?:(?:http|https):\/\/(?:(?:www|sandbox)?\.)?orcid\.org\/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\/{0,1}\z/.match(orcid)).last
623
+ orcid.gsub(/[[:space:]]/, "-") if orcid.present?
624
+ end
625
+
626
+ def validate_ror(ror)
627
+ Array(/^(?:(?:(?:http|https):\/\/)?ror\.org\/)?(0\w{6}\d{2})\/{0,1}$/.match(ror)).last
628
+ end
629
+
630
+ def validate_orcid_scheme(orcid_scheme)
631
+ Array(/\A(http|https):\/\/(www\.)?(orcid\.org)/.match(orcid_scheme)).last
632
+ end
633
+
634
+ def validate_url(str)
635
+ if /\A(?:(http|https):\/\/(dx\.)?doi.org\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(str)
636
+ "DOI"
637
+ elsif /\A(http|https):\/\//.match(str)
638
+ "URL"
639
+ elsif /\A(ISSN|eISSN) (\d{4}-\d{3}[0-9X]+)\z/.match(str)
640
+ "ISSN"
641
+ end
642
+ end
643
+
644
+ def parse_attributes(element, options={})
645
+ content = options[:content] || "__content__"
646
+
647
+ if element.is_a?(String) && options[:content].nil?
648
+ CGI.unescapeHTML(element)
6
649
  elsif element.is_a?(Hash)
7
- element.fetch("text", nil)
650
+ element.fetch( CGI.unescapeHTML(content), nil)
8
651
  elsif element.is_a?(Array)
9
- element.map { |e| e.fetch("text", nil) }
652
+ a = element.map { |e| e.is_a?(Hash) ? e.fetch(CGI.unescapeHTML(content), nil) : e }.uniq
653
+ a = options[:first] ? a.first : a.unwrap
10
654
  else
11
655
  nil
12
656
  end
13
657
  end
14
658
 
15
- def parse_attribute(element)
16
- if element.is_a?(String)
17
- element
18
- elsif element.is_a?(Hash)
19
- element.fetch("text", nil)
20
- elsif element.is_a?(Array)
21
- element.first.fetch("text", nil)
659
+ def normalize_id(id, options={})
660
+ return nil unless id.present?
661
+
662
+ # check for valid DOI
663
+ doi = DoiUtils::normalize_doi(id, options)
664
+ return doi if doi.present?
665
+
666
+ # check for valid HTTP uri
667
+ uri = Addressable::URI.parse(id)
668
+ return nil unless uri && uri.host && %w(http https).include?(uri.scheme)
669
+
670
+ # clean up URL
671
+ normalize_uri_with_path_cleanup(uri)
672
+ rescue Addressable::URI::InvalidURIError
673
+ nil
674
+ end
675
+
676
+ def normalize_url(id, options={})
677
+ return nil unless id.present?
678
+
679
+ # handle info URIs
680
+ return id if id.to_s.start_with?("info")
681
+
682
+ # check for valid HTTP uri
683
+ uri = Addressable::URI.parse(id)
684
+
685
+ return nil unless uri && uri.host && %w(http https ftp).include?(uri.scheme)
686
+
687
+ # optionally turn into https URL
688
+ uri.scheme = "https" if options[:https]
689
+
690
+ # clean up URL
691
+ normalize_uri_with_path_cleanup(uri)
692
+ rescue Addressable::URI::InvalidURIError
693
+ nil
694
+ end
695
+
696
+ def normalize_cc_url(id)
697
+ id = normalize_url(id, https: true)
698
+ NORMALIZED_LICENSES.fetch(id, id)
699
+ end
700
+
701
+ def normalize_orcid(orcid)
702
+ validated = validate_orcid(orcid)
703
+ return orcid unless validated.present?
704
+
705
+ # turn ORCID ID into URL
706
+ "https://orcid.org/" + Addressable::URI.encode(validated)
707
+ end
708
+
709
+ def normalize_ror(ror)
710
+ validated = validate_ror(ror)
711
+ return ror unless validated.present?
712
+
713
+ # turn ROR into URL
714
+ "https://ror.org/" + Addressable::URI.encode(validated)
715
+ end
716
+
717
+ def normalize_ids(ids: nil, relation_type: nil)
718
+ Array.wrap(ids).select { |idx| idx["@id"].present? }.map do |idx|
719
+ id = normalize_id(idx["@id"])
720
+ related_identifier_type = DoiUtils::doi_from_url(id).present? ? "DOI" : "URL"
721
+ id = DoiUtils::doi_from_url(id) || id
722
+
723
+ { "relatedIdentifier" => id,
724
+ "relationType" => relation_type,
725
+ "relatedIdentifierType" => related_identifier_type,
726
+ "resourceTypeGeneral" => Metadata::SO_TO_DC_TRANSLATIONS[idx["@type"]] }.compact
727
+ end.unwrap
728
+ end
729
+
730
+ # pick electronic issn if there are multiple
731
+ # format issn as xxxx-xxxx
732
+ def normalize_issn(input, options={})
733
+ content = options[:content] || "__content__"
734
+
735
+ issn = if input.blank?
736
+ nil
737
+ elsif input.is_a?(String) && options[:content].nil?
738
+ input
739
+ elsif input.is_a?(Hash)
740
+ input.fetch(content, nil)
741
+ elsif input.is_a?(Array)
742
+ a = input.find { |a| a["media_type"] == "electronic" } || input.first
743
+ a.fetch(content, nil)
744
+ end
745
+
746
+ case issn.to_s.length
747
+ when 9
748
+ issn
749
+ when 8
750
+ issn[0..3] + "-" + issn[4..7]
22
751
  else
23
752
  nil
24
753
  end
25
754
  end
755
+
756
+ # find Creative Commons or OSI license in licenses array, normalize url and name
757
+ def normalize_licenses(licenses)
758
+ standard_licenses = Array.wrap(licenses).map { |l| URI.parse(l["url"]) }.select { |li| li.host && li.host[/(creativecommons.org|opensource.org)$/] }
759
+ return licenses unless standard_licenses.present?
760
+
761
+ # use HTTPS
762
+ uri.scheme = "https"
763
+
764
+ # use host name without subdomain
765
+ uri.host = Array(/(creativecommons.org|opensource.org)/.match uri.host).last
766
+
767
+ # normalize URLs
768
+ if uri.host == "creativecommons.org"
769
+ uri.path = uri.path.split('/')[0..-2].join("/") if uri.path.split('/').last == "legalcode"
770
+ uri.path << '/' unless uri.path.end_with?('/')
771
+ else
772
+ uri.path = uri.path.gsub(/(-license|\.php|\.html)/, '')
773
+ uri.path = uri.path.sub(/(mit|afl|apl|osl|gpl|ecl)/) { |match| match.upcase }
774
+ uri.path = uri.path.sub(/(artistic|apache)/) { |match| match.titleize }
775
+ uri.path = uri.path.sub(/([^0-9\-]+)(-)?([1-9])?(\.)?([0-9])?$/) do
776
+ m = Regexp.last_match
777
+ text = m[1]
778
+
779
+ if m[3].present?
780
+ version = [m[3], m[5].presence || "0"].join(".")
781
+ [text, version].join("-")
782
+ else
783
+ text
784
+ end
785
+ end
786
+ end
787
+
788
+ uri.to_s
789
+ rescue URI::InvalidURIError
790
+ nil
791
+ end
792
+
793
+ def normalize_publisher(publisher)
794
+ if publisher.respond_to?(:to_hash)
795
+ publisher
796
+ elsif publisher.respond_to?(:to_str)
797
+ { "name" => publisher }
798
+ end
799
+ end
800
+
801
+ def to_datacite_json(element, options={})
802
+ a = Array.wrap(element).map do |e|
803
+ e.inject({}) {|h, (k,v)| h[k.dasherize] = v; h }
804
+ end
805
+ options[:first] ? a.unwrap : a.presence
806
+ end
807
+
808
+ def from_datacite_json(element)
809
+ Array.wrap(element).map do |e|
810
+ e.inject({}) {|h, (k,v)| h[k.underscore] = v; h }
811
+ end
812
+ end
813
+
814
+ def to_schema_org(element)
815
+ mapping = { "type" => "@type", "id" => "@id", "title" => "name" }
816
+
817
+ map_hash_keys(element: element, mapping: mapping)
818
+ end
819
+
820
+ def to_schema_org_creators(element)
821
+ element = Array.wrap(element).map do |c|
822
+ c["affiliation"] = Array.wrap(c["affiliation"]).map do |a|
823
+ if a.is_a?(String)
824
+ name = a
825
+ affiliation_identifier = nil
826
+ else
827
+ name = a["name"]
828
+ affiliation_identifier = a["affiliationIdentifier"]
829
+ end
830
+
831
+ {
832
+ "@type" => "Organization",
833
+ "@id" => affiliation_identifier,
834
+ "name" => name }.compact
835
+ end.unwrap
836
+ c["@type"] = c["nameType"].present? ? c["nameType"][0..-3] : nil
837
+ c["@id"] = Array.wrap(c["nameIdentifiers"]).first.to_h.fetch("nameIdentifier", nil)
838
+ c["name"] = c["familyName"].present? ? [c["givenName"], c["familyName"]].join(" ") : c["name"]
839
+ c.except("nameIdentifiers", "nameType").compact
840
+ end.unwrap
841
+ end
842
+
843
+ def to_schema_org_contributors(element)
844
+ element = Array.wrap(element).map do |c|
845
+ transformed_c = c.dup
846
+ transformed_c["affiliation"] = Array.wrap(c["affiliation"]).map do |a|
847
+ if a.is_a?(String)
848
+ name = a
849
+ affiliation_identifier = nil
850
+ else
851
+ name = a["name"]
852
+ affiliation_identifier = a["affiliationIdentifier"]
853
+ end
854
+
855
+ {
856
+ "@type" => "Organization",
857
+ "@id" => affiliation_identifier,
858
+ "name" => name }.compact
859
+ end.unwrap
860
+ transformed_c["@type"] = c["nameType"].present? ? c["nameType"][0..-3] : nil
861
+ transformed_c["@id"] = Array.wrap(c["nameIdentifiers"]).first.to_h.fetch("nameIdentifier", nil)
862
+ transformed_c["name"] = c["familyName"].present? ? [c["givenName"], c["familyName"]].join(" ") : c["name"]
863
+ transformed_c.except("nameIdentifiers", "nameType").compact
864
+ end.unwrap
865
+ end
866
+
867
+ def to_schema_org_container(element, options={})
868
+ return nil unless (element.is_a?(Hash) || (element.nil? && options[:container_title].present?))
869
+
870
+ {
871
+ "@id" => element["identifier"],
872
+ "@type" => (options[:type] == "Dataset") ? "DataCatalog" : "Periodical",
873
+ "name" => element["title"] || options[:container_title] }.compact
874
+ end
875
+
876
+ def to_schema_org_identifiers(element, options={})
877
+ Array.wrap(element).map do |ai|
878
+ {
879
+ "@type" => "PropertyValue",
880
+ "propertyID" => ai["identifierType"],
881
+ "value" => ai["identifier"] }
882
+ end.unwrap
883
+ end
884
+
885
+ def to_schema_org_relation(related_identifiers: nil, relation_type: nil)
886
+ return nil unless related_identifiers.present? && relation_type.present?
887
+
888
+ relation_type = relation_type == "References" ? ["References", "Cites", "Documents"] : [relation_type]
889
+
890
+ Array.wrap(related_identifiers).select { |ri| relation_type.include?(ri["relationType"]) }.map do |r|
891
+ if r["relatedIdentifierType"] == "ISSN" && r["relationType"] == "IsPartOf"
892
+ {
893
+ "@type" => "Periodical",
894
+ "issn" => r["relatedIdentifier"] }.compact
895
+ else
896
+ {
897
+ "@id" => normalize_id(r["relatedIdentifier"]),
898
+ "@type" => DC_TO_SO_TRANSLATIONS[r["resourceTypeGeneral"]] || "CreativeWork" }.compact
899
+ end
900
+ end.unwrap
901
+ end
902
+
903
+ def to_schema_org_funder(funding_references)
904
+ return nil unless funding_references.present?
905
+
906
+ Array.wrap(funding_references).map do |fr|
907
+ {
908
+ "@id" => fr["funderIdentifier"],
909
+ "@type" => "Organization",
910
+ "name" => fr["funderName"] }.compact
911
+ end.unwrap
912
+ end
913
+
914
+ def to_schema_org_spatial_coverage(geo_location)
915
+ return nil unless geo_location.present?
916
+
917
+ Array.wrap(geo_location).reduce([]) do |sum, gl|
918
+ if gl.fetch("geoLocationPoint", nil)
919
+ sum << {
920
+ "@type" => "Place",
921
+ "geo" => {
922
+ "@type" => "GeoCoordinates",
923
+ "address" => gl["geoLocationPlace"],
924
+ "latitude" => gl.dig("geoLocationPoint", "pointLatitude"),
925
+ "longitude" => gl.dig("geoLocationPoint", "pointLongitude") }
926
+ }.compact
927
+ end
928
+
929
+ if gl.fetch("geoLocationBox", nil)
930
+ sum << {
931
+ "@type" => "Place",
932
+ "geo" => {
933
+ "@type" => "GeoShape",
934
+ "address" => gl["geoLocationPlace"],
935
+ "box" => [gl.dig("geoLocationBox", "southBoundLatitude"),
936
+ gl.dig("geoLocationBox", "westBoundLongitude"),
937
+ gl.dig("geoLocationBox", "northBoundLatitude"),
938
+ gl.dig("geoLocationBox", "eastBoundLongitude")].compact.join(" ").presence }.compact
939
+ }.compact
940
+ end
941
+
942
+ if gl.fetch("geoLocationPolygon", nil)
943
+ sum << {
944
+ "@type" => "Place",
945
+ "geo" => {
946
+ "@type" => "GeoShape",
947
+ "address" => gl["geoLocationPlace"],
948
+ "polygon" => Array.wrap(gl.dig("geoLocationPolygon")).map do |glp|
949
+ Array.wrap(glp).map do |glpp|
950
+ if glpp.dig("polygonPoint")
951
+ [glpp.dig("polygonPoint", "pointLongitude"), glpp.dig("polygonPoint", "pointLatitude")].compact
952
+ end
953
+ end.compact.presence
954
+ end.compact.presence,
955
+ }
956
+ }
957
+ end
958
+
959
+ if gl.fetch("geoLocationPlace", nil) && !gl.fetch("geoLocationPoint", nil) && !gl.fetch("geoLocationBox", nil) && !gl.fetch("geoLocationPolygon", nil)
960
+ sum << {
961
+ "@type" => "Place",
962
+ "geo" => {
963
+ "@type" => "GeoCoordinates",
964
+ "address" => gl["geoLocationPlace"] }
965
+ }.compact
966
+ end
967
+
968
+ sum
969
+ end.unwrap
970
+ end
971
+
972
+ def from_schema_org(element)
973
+ mapping = { "@type" => "type", "@id" => "id" }
974
+
975
+ map_hash_keys(element: element, mapping: mapping)
976
+ end
977
+
978
+ def from_schema_org_creators(element)
979
+ element = Array.wrap(element).map do |c|
980
+ if c["affiliation"].is_a?(String)
981
+ c["affiliation"] = { "name" => c["affiliation"] }
982
+ affiliation_identifier_scheme = nil
983
+ scheme_uri = nil
984
+ elsif c.dig("affiliation", "@id").to_s.starts_with?("https://ror.org")
985
+ affiliation_identifier_scheme = "ROR"
986
+ scheme_uri = "https://ror.org/"
987
+ elsif c.dig("affiliation", "@id").to_s.starts_with?("https://isni.org")
988
+ affiliation_identifier_scheme = "ISNI"
989
+ scheme_uri = "https://isni.org/isni/"
990
+ else
991
+ affiliation_identifier_scheme = nil
992
+ scheme_uri = nil
993
+ end
994
+
995
+ c["nameIdentifier"] = [{ "__content__" => c["@id"], "nameIdentifierScheme" => "ORCID", "schemeUri" => "https://orcid.org" }] if normalize_orcid(c["@id"])
996
+ c["@type"] = c["@type"].find { |t| %w(Person Organization).include?(t) } if c["@type"].is_a?(Array)
997
+ c["creatorName"] = { "nameType" => c["@type"].present? ? c["@type"].titleize + "al" : nil, "__content__" => c["name"] }.compact
998
+ c["affiliation"] = { "__content__" => c.dig("affiliation", "name"), "affiliationIdentifier" => c.dig("affiliation", "@id"), "affiliationIdentifierScheme" => affiliation_identifier_scheme, "schemeUri" => scheme_uri }.compact.presence
999
+ c.except("@id", "@type", "name").compact
1000
+ end
1001
+ end
1002
+
1003
+ def from_schema_org_contributors(element)
1004
+ element = Array.wrap(element).map do |c|
1005
+ if c["affiliation"].is_a?(String)
1006
+ c["affiliation"] = { "name" => c["affiliation"] }
1007
+ affiliation_identifier_scheme = nil
1008
+ scheme_uri = nil
1009
+ elsif c.dig("affiliation", "@id").to_s.starts_with?("https://ror.org")
1010
+ affiliation_identifier_scheme = "ROR"
1011
+ scheme_uri = "https://ror.org/"
1012
+ elsif c.dig("affiliation", "@id").to_s.starts_with?("https://isni.org")
1013
+ affiliation_identifier_scheme = "ISNI"
1014
+ scheme_uri = "https://isni.org/isni/"
1015
+ else
1016
+ affiliation_identifier_scheme = nil
1017
+ scheme_uri = nil
1018
+ end
1019
+
1020
+ c["nameIdentifier"] = [{ "__content__" => c["@id"], "nameIdentifierScheme" => "ORCID", "schemeUri" => "https://orcid.org" }] if normalize_orcid(c["@id"])
1021
+ c["contributorName"] = { "nameType" => c["@type"].present? ? c["@type"].titleize + "al" : nil, "__content__" => c["name"] }.compact
1022
+ c["affiliation"] = { "__content__" => c.dig("affiliation", "name"), "affiliationIdentifier" => c.dig("affiliation", "@id"), "affiliationIdentifierScheme" => affiliation_identifier_scheme, "schemeUri" => scheme_uri }.compact.presence
1023
+ c.except("@id", "@type", "name").compact
1024
+ end
1025
+ end
1026
+
1027
+ def map_hash_keys(element: nil, mapping: nil)
1028
+ Array.wrap(element).map do |a|
1029
+ a.map {|k, v| [mapping.fetch(k, k), v] }.reduce({}) do |hsh, (k, v)|
1030
+ if v.is_a?(Hash)
1031
+ hsh[k] = to_schema_org(v)
1032
+ hsh
1033
+ else
1034
+ hsh[k] = v
1035
+ hsh
1036
+ end
1037
+ end
1038
+ end.unwrap
1039
+ end
1040
+
1041
+ def to_identifier(identifier)
1042
+ {
1043
+ "@type" => "PropertyValue",
1044
+ "propertyID" => identifier["relatedIdentifierType"],
1045
+ "value" => identifier["relatedIdentifier"] }
1046
+ end
1047
+
1048
+ def from_citeproc(element)
1049
+ Array.wrap(element).map do |a|
1050
+ if a["literal"].present?
1051
+ a["@type"] = "Organization"
1052
+ a["creatorName"] = a["literal"]
1053
+ else
1054
+ a["@type"] = "Person"
1055
+ a["name"] = [a["given"], a["family"]].compact.join(" ")
1056
+ end
1057
+ a["givenName"] = a["given"]
1058
+ a["familyName"] = a["family"]
1059
+ a.except("given", "family", "literal").compact
1060
+ end.unwrap
1061
+ end
1062
+
1063
+ def to_citeproc(element)
1064
+ Array.wrap(element).map do |a|
1065
+ a["family"] = a["familyName"]
1066
+ a["given"] = a["givenName"]
1067
+ a["literal"] = a["name"] unless a["familyName"].present?
1068
+ a.except("nameType", "type", "@type", "id", "@id", "name", "familyName", "givenName", "affiliation", "nameIdentifiers", "contributorType").compact
1069
+ end.presence
1070
+ end
1071
+
1072
+ def to_ris(element)
1073
+ Array.wrap(element).map do |a|
1074
+ if a["familyName"].present?
1075
+ [a["familyName"], a["givenName"]].join(", ")
1076
+ else
1077
+ a["name"]
1078
+ end
1079
+ end.unwrap
1080
+ end
1081
+
1082
+ def sanitize(text, options={})
1083
+ options[:tags] ||= Set.new(%w(strong em b i code pre sub sup br))
1084
+ content = options[:content] || "__content__"
1085
+ custom_scrubber = Bolognese::WhitelistScrubber.new(options)
1086
+
1087
+ if text.is_a?(String)
1088
+ if options[:new_line]
1089
+ # Remove multiple spaces, tabs, and other whitespace characters while preserving single spaces and new lines
1090
+ Loofah.scrub_fragment(text, custom_scrubber).to_s.gsub(/[ \t]+/, ' ').strip
1091
+ else
1092
+ Loofah.scrub_fragment(text, custom_scrubber).to_s.squish
1093
+ end
1094
+ elsif text.is_a?(Hash)
1095
+ sanitize(text.fetch(content, nil), new_line: options[:new_line])
1096
+ elsif text.is_a?(Array)
1097
+ a = text.map { |e| e.is_a?(Hash) ? sanitize(e.fetch(content, nil), new_line: options[:new_line]) : sanitize(e, new_line: options[:new_line]) }.uniq
1098
+ a = options[:first] ? a.first : a.unwrap
1099
+ else
1100
+ nil
1101
+ end
1102
+ end
1103
+
1104
+ def github_from_url(url)
1105
+ return {} unless /\Ahttps:\/\/github\.com\/(.+)(?:\/)?(.+)?(?:\/tree\/)?(.*)\z/.match(url)
1106
+ words = URI.parse(url).path[1..-1].split('/')
1107
+ path = words.length > 3 ? words[4...words.length].join("/") : nil
1108
+
1109
+ { owner: words[0],
1110
+ repo: words[1],
1111
+ release: words[3],
1112
+ path: path }.compact
1113
+ end
1114
+
1115
+ def github_repo_from_url(url)
1116
+ github_from_url(url).fetch(:repo, nil)
1117
+ end
1118
+
1119
+ def github_release_from_url(url)
1120
+ github_from_url(url).fetch(:release, nil)
1121
+ end
1122
+
1123
+ def github_owner_from_url(url)
1124
+ github_from_url(url).fetch(:owner, nil)
1125
+ end
1126
+
1127
+ def github_as_owner_url(url)
1128
+ github_hash = github_from_url(url)
1129
+ "https://github.com/#{github_hash[:owner]}" if github_hash[:owner].present?
1130
+ end
1131
+
1132
+ def github_as_repo_url(url)
1133
+ github_hash = github_from_url(url)
1134
+ "https://github.com/#{github_hash[:owner]}/#{github_hash[:repo]}" if github_hash[:repo].present?
1135
+ end
1136
+
1137
+ def github_as_release_url(url)
1138
+ github_hash = github_from_url(url)
1139
+ "https://github.com/#{github_hash[:owner]}/#{github_hash[:repo]}/tree/#{github_hash[:release]}" if github_hash[:release].present?
1140
+ end
1141
+
1142
+ def github_as_codemeta_url(url)
1143
+ github_hash = github_from_url(url)
1144
+
1145
+ if github_hash[:path].to_s.end_with?("codemeta.json")
1146
+ "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/#{github_hash[:release]}/#{github_hash[:path]}"
1147
+ elsif github_hash[:owner].present?
1148
+ "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/master/codemeta.json"
1149
+ end
1150
+ end
1151
+
1152
+ def get_date_parts(iso8601_time)
1153
+ return { 'date-parts' => [[]] } if iso8601_time.nil?
1154
+
1155
+ year = iso8601_time[0..3].to_i
1156
+ month = iso8601_time[5..6].to_i
1157
+ day = iso8601_time[8..9].to_i
1158
+ { 'date-parts' => [[year, month, day].reject { |part| part == 0 }] }
1159
+ rescue TypeError
1160
+ nil
1161
+ end
1162
+
1163
+ def get_date_from_date_parts(date_as_parts)
1164
+ date_parts = date_as_parts.fetch("date-parts", []).first
1165
+ year, month, day = date_parts[0], date_parts[1], date_parts[2]
1166
+ get_date_from_parts(year, month, day)
1167
+ end
1168
+
1169
+ def get_date_from_parts(year, month = nil, day = nil)
1170
+ [year.to_s.rjust(4, '0'), month.to_s.rjust(2, '0'), day.to_s.rjust(2, '0')].reject { |part| part == "00" }.join("-")
1171
+ end
1172
+
1173
+ def get_date_parts_from_parts(year, month = nil, day = nil)
1174
+ { 'date-parts' => [[year.to_i, month.to_i, day.to_i].reject { |part| part == 0 }] }
1175
+ end
1176
+
1177
+ def get_iso8601_date(iso8601_time)
1178
+ return nil if iso8601_time.nil?
1179
+
1180
+ iso8601_time[0..9]
1181
+ end
1182
+
1183
+ def get_year_month(iso8601_time)
1184
+ return [] if iso8601_time.nil?
1185
+
1186
+ year = iso8601_time[0..3]
1187
+ month = iso8601_time[5..6]
1188
+
1189
+ [year.to_i, month.to_i].reject { |part| part == 0 }
1190
+ end
1191
+
1192
+ def get_year_month_day(iso8601_time)
1193
+ return [] if iso8601_time.nil?
1194
+
1195
+ year = iso8601_time[0..3]
1196
+ month = iso8601_time[5..6]
1197
+ day = iso8601_time[8..9]
1198
+
1199
+ [year.to_i, month.to_i, day.to_i].reject { |part| part == 0 }
1200
+ end
1201
+
1202
+ # parsing of incomplete iso8601 timestamps such as 2015-04 is broken
1203
+ # in standard library
1204
+ # return nil if invalid iso8601 timestamp
1205
+ def get_datetime_from_iso8601(iso8601_time)
1206
+ ISO8601::DateTime.new(iso8601_time).to_time.utc
1207
+ rescue
1208
+ nil
1209
+ end
1210
+
1211
+ # iso8601 datetime without hyphens and colons, used by Crossref
1212
+ # return nil if invalid
1213
+ def get_datetime_from_time(time)
1214
+ DateTime.strptime(time.to_s, "%Y%m%d%H%M%S").strftime('%Y-%m-%dT%H:%M:%SZ')
1215
+ rescue ArgumentError
1216
+ nil
1217
+ end
1218
+
1219
+ def get_date(dates, date_type)
1220
+ dd = Array.wrap(dates).find { |d| d["dateType"] == date_type } || {}
1221
+ dd.fetch("date", nil)
1222
+ end
1223
+
1224
+ def get_contributor(contributor, contributor_type)
1225
+ contributor.select { |c| c["contributorType"] == contributor_type }
1226
+ end
1227
+
1228
+ def get_identifier(identifiers, identifier_type)
1229
+ id = Array.wrap(identifiers).find { |i| i["identifierType"] == identifier_type } || {}
1230
+ id.fetch("identifier", nil)
1231
+ end
1232
+
1233
+ def get_identifier_type(identifier_type)
1234
+ return nil unless identifier_type.present?
1235
+
1236
+ identifierTypes = {
1237
+ "ark" => "ARK",
1238
+ "arxiv" => "arXiv",
1239
+ "bibcode" => "bibcode",
1240
+ "doi" => "DOI",
1241
+ "ean13" => "EAN13",
1242
+ "eissn" => "EISSN",
1243
+ "handle" => "Handle",
1244
+ "igsn" => "IGSN",
1245
+ "isbn" => "ISBN",
1246
+ "issn" => "ISSN",
1247
+ "istc" => "ISTC",
1248
+ "lissn" => "LISSN",
1249
+ "lsid" => "LSID",
1250
+ "pmid" => "PMID",
1251
+ "purl" => "PURL",
1252
+ "upc" => "UPC",
1253
+ "url" => "URL",
1254
+ "urn" => "URN",
1255
+ "md5" => "md5",
1256
+ "minid" => "minid",
1257
+ "dataguid" => "dataguid",
1258
+ "cstr" => "CSTR",
1259
+ "rrid" => "RRID"
1260
+ }
1261
+
1262
+ identifierTypes[identifier_type.downcase] || identifier_type
1263
+ end
1264
+
1265
+ def get_series_information(str)
1266
+ return {} unless str.present?
1267
+
1268
+ str = str.split(",").map(&:strip)
1269
+
1270
+ title = str.first
1271
+ volume_issue = str.length > 2 ? str[1].rpartition(/\(([^)]+)\)/) : nil
1272
+ volume = volume_issue.present? ? volume_issue[0].presence || volume_issue[2].presence : nil
1273
+ issue = volume_issue.present? ? volume_issue[1][1...-1].presence : nil
1274
+ pages = str.length > 1 ? str.last : nil
1275
+ first_page = pages.present? ? pages.split("-").map(&:strip)[0] : nil
1276
+ last_page = pages.present? ? pages.split("-").map(&:strip)[1] : nil
1277
+
1278
+ {
1279
+ "title" => title,
1280
+ "volume" => volume,
1281
+ "issue" => issue,
1282
+ "firstPage" => first_page,
1283
+ "lastPage" => last_page }.compact
1284
+ end
1285
+
1286
+ def jsonlint(json)
1287
+ return ["No JSON provided"] unless json.present?
1288
+
1289
+ error_array = []
1290
+ linter = JsonLint::Linter.new
1291
+ linter.send(:check_data, json, error_array)
1292
+ error_array
1293
+ end
1294
+
1295
+ def name_to_spdx(name)
1296
+ spdx = resource_json(:spdx).fetch("licenses")
1297
+ license = spdx.find { |l| l["name"] == name || l["licenseId"] == name || l["seeAlso"].first == normalize_cc_url(name) }
1298
+
1299
+ if license
1300
+ {
1301
+ "rights" => license["name"],
1302
+ "rightsUri" => license["seeAlso"].first,
1303
+ "rightsIdentifier" => license["licenseId"].downcase,
1304
+ "rightsIdentifierScheme" => "SPDX",
1305
+ "schemeUri" => "https://spdx.org/licenses/" }.compact
1306
+ else
1307
+ { "rights" => name }
1308
+ end
1309
+ end
1310
+
1311
+ def hsh_to_spdx(hsh)
1312
+ spdx = resource_json(:spdx).fetch("licenses")
1313
+ license = spdx.find { |l| l["licenseId"].casecmp?(hsh["rightsIdentifier"]) || l["seeAlso"].first == normalize_cc_url(hsh["rightsURI"]) || l["name"] == hsh["rights"] || l["seeAlso"].first == normalize_cc_url(hsh["rights"]) }
1314
+
1315
+ if license
1316
+ {
1317
+ "rights" => license["name"],
1318
+ "rightsUri" => license["seeAlso"].first,
1319
+ "rightsIdentifier" => license["licenseId"].downcase,
1320
+ "rightsIdentifierScheme" => "SPDX",
1321
+ "schemeUri" => "https://spdx.org/licenses/",
1322
+ "lang" => hsh["lang"] }.compact
1323
+ else
1324
+ {
1325
+ "rights" => hsh["__content__"] || hsh["rights"],
1326
+ "rightsUri" => hsh["rightsURI"] || hsh["rightsUri"],
1327
+ "rightsIdentifier" => hsh["rightsIdentifier"].present? ? hsh["rightsIdentifier"].downcase : nil,
1328
+ "rightsIdentifierScheme" => hsh["rightsIdentifierScheme"],
1329
+ "schemeUri" => hsh["schemeUri"],
1330
+ "lang" => hsh["lang"] }.compact
1331
+ end
1332
+ end
1333
+
1334
+ def name_to_subject(name)
1335
+ [{ "subject" => sanitize(name) }]
1336
+ end
1337
+
1338
+ def hsh_to_subject(hsh)
1339
+ [{
1340
+ "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
1341
+ "subjectScheme" => hsh["subjectScheme"],
1342
+ "classificationCode" => hsh["classificationCode"],
1343
+ "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
1344
+ "valueUri" => hsh["valueURI"] || hsh["valueUri"],
1345
+ "lang" => hsh["lang"] }.compact]
1346
+ end
1347
+
1348
+ def dfg_ids_to_fos(dfg_ids)
1349
+ dfgs = resource_json(:dfg).fetch("dfgFields")
1350
+ ids = Array.wrap(dfg_ids)
1351
+
1352
+ subjects = dfgs.select { |l| ids.include?(l["dfgId"])}
1353
+ subjects.map do |subject|
1354
+ {
1355
+ "classificationCode" => subject["fosId"],
1356
+ "subject" => subject["fosLabel"],
1357
+ "subjectScheme" => "Fields of Science and Technology (FOS)",
1358
+ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
1359
+ }
1360
+ end
1361
+ end
1362
+
1363
+ def abstract_description
1364
+ # Fetch the first description with descriptionType "Abstract"
1365
+ Array.wrap(descriptions)&.find { |d| d["descriptionType"] == "Abstract" }
1366
+ end
1367
+
1368
+ def generate_container(types, related_items, related_identifiers, descriptions)
1369
+ container_type = (types.respond_to?(:dig) && types&.dig("resourceTypeGeneral")) == "Dataset" ? "DataRepository" : "Series"
1370
+
1371
+ # relatedItem container
1372
+ related_item = Array.wrap(related_items).find { |ri| ri["relationType"] == "IsPublishedIn" }.to_h
1373
+
1374
+ if related_item.present?
1375
+ return {
1376
+ "type" => container_type,
1377
+ "identifier" => related_item.dig("relatedItemIdentifier", "relatedItemIdentifier"),
1378
+ "identifierType" => related_item.dig("relatedItemIdentifier", "relatedItemIdentifierType"),
1379
+ "title" => related_item.dig("titles", 0).then { |t| t ? parse_attributes(t, content: "title", first: true) : nil },
1380
+ "volume" => related_item["volume"],
1381
+ "issue" => related_item["issue"],
1382
+ "edition" => related_item["edition"],
1383
+ "number" => related_item["number"],
1384
+ "chapterNumber" => related_item["numberType"] == "Chapter" ? related_item["number"] : nil,
1385
+ "firstPage" => related_item["firstPage"],
1386
+ "lastPage" => related_item["lastPage"]
1387
+ }.compact
1388
+ end
1389
+
1390
+ # Legacy SeriesInformation/relatedIdentifier container fallback
1391
+ series_information = Array.wrap(descriptions).find { |r| r["descriptionType"] == "SeriesInformation" }.to_h.fetch("description", nil)
1392
+ si = get_series_information(series_information)
1393
+
1394
+ is_part_of = Array.wrap(related_identifiers).find { |ri| ri["relationType"] == "IsPartOf" }.to_h
1395
+
1396
+ if si["title"].present?
1397
+ return {
1398
+ "type" => container_type,
1399
+ "identifier" => is_part_of["relatedIdentifier"],
1400
+ "identifierType" => is_part_of["relatedIdentifierType"],
1401
+ "title" => si["title"],
1402
+ "volume" => si["volume"],
1403
+ "issue" => si["issue"],
1404
+ "firstPage" => si["firstPage"],
1405
+ "lastPage" => si["lastPage"]
1406
+ }.compact
1407
+ end
1408
+ end
1409
+
1410
+ private
1411
+
1412
+ def normalize_uri_with_path_cleanup(uri)
1413
+ normalized_uri = uri.normalize
1414
+ normalized_uri.path = normalized_uri.path.sub(%r{/\z}, "") if normalized_uri.path.present?
1415
+ normalized_uri.to_s
1416
+ end
26
1417
  end
27
1418
  end