bolognese 0.2.2 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/build.yml +9 -0
  3. data/.github/workflows/changelog.yml +36 -0
  4. data/.github/workflows/ci.yml +22 -0
  5. data/.github/workflows/pull-request.yml +9 -0
  6. data/.github/workflows/release.yml +32 -0
  7. data/.gitignore +4 -0
  8. data/.rubocop.yml +658 -0
  9. data/CHANGELOG.md +1864 -0
  10. data/CITATION +17 -0
  11. data/Gemfile +1 -1
  12. data/Gemfile.lock +251 -99
  13. data/README.md +1026 -2
  14. data/Rakefile +1 -0
  15. data/bin/bolognese +5 -1
  16. data/bolognese.gemspec +33 -21
  17. data/lib/bolognese/array.rb +13 -0
  18. data/lib/bolognese/author_utils.rb +115 -39
  19. data/lib/bolognese/citeproc_extensions.rb +48 -0
  20. data/lib/bolognese/cli.rb +28 -15
  21. data/lib/bolognese/datacite_utils.rb +418 -0
  22. data/lib/bolognese/doi_utils.rb +45 -23
  23. data/lib/bolognese/metadata.rb +250 -18
  24. data/lib/bolognese/metadata_utils.rb +228 -0
  25. data/lib/bolognese/pubmed.rb +2 -0
  26. data/lib/bolognese/readers/bibtex_reader.rb +100 -0
  27. data/lib/bolognese/readers/citeproc_reader.rb +125 -0
  28. data/lib/bolognese/readers/codemeta_reader.rb +108 -0
  29. data/lib/bolognese/readers/crosscite_reader.rb +17 -0
  30. data/lib/bolognese/readers/crossref_reader.rb +413 -0
  31. data/lib/bolognese/readers/datacite_json_reader.rb +17 -0
  32. data/lib/bolognese/readers/datacite_reader.rb +338 -0
  33. data/lib/bolognese/readers/npm_reader.rb +115 -0
  34. data/lib/bolognese/readers/ris_reader.rb +114 -0
  35. data/lib/bolognese/readers/schema_org_reader.rb +264 -0
  36. data/lib/bolognese/string.rb +3 -1
  37. data/lib/bolognese/utils.rb +1403 -12
  38. data/lib/bolognese/version.rb +1 -1
  39. data/lib/bolognese/whitelist_scrubber.rb +47 -0
  40. data/lib/bolognese/writers/bibtex_writer.rb +32 -0
  41. data/lib/bolognese/writers/citation_writer.rb +14 -0
  42. data/lib/bolognese/writers/citeproc_writer.rb +11 -0
  43. data/lib/bolognese/writers/codemeta_writer.rb +29 -0
  44. data/lib/bolognese/writers/crosscite_writer.rb +11 -0
  45. data/lib/bolognese/writers/crossref_writer.rb +11 -0
  46. data/lib/bolognese/writers/csv_writer.rb +24 -0
  47. data/lib/bolognese/writers/datacite_json_writer.rb +13 -0
  48. data/lib/bolognese/writers/datacite_writer.rb +12 -0
  49. data/lib/bolognese/writers/jats_writer.rb +138 -0
  50. data/lib/bolognese/writers/rdf_xml_writer.rb +11 -0
  51. data/lib/bolognese/writers/ris_writer.rb +29 -0
  52. data/lib/bolognese/writers/schema_org_writer.rb +55 -0
  53. data/lib/bolognese/writers/turtle_writer.rb +11 -0
  54. data/lib/bolognese.rb +19 -4
  55. data/package.json +12 -0
  56. data/resources/2008/09/xsd.xsl +997 -0
  57. data/resources/datacite-contributorType-v4.xsd +35 -0
  58. data/resources/datacite-dateType-v4.xsd +25 -0
  59. data/resources/datacite-descriptionType-v4.xsd +19 -0
  60. data/resources/datacite-funderIdentifierType-v4.xsd +15 -0
  61. data/resources/datacite-nameType-v4.xsd +10 -0
  62. data/resources/datacite-relatedIdentifierType-v4.xsd +34 -0
  63. data/resources/datacite-relationType-v4.xsd +49 -0
  64. data/resources/datacite-resourceType-v4.xsd +28 -0
  65. data/resources/datacite-titleType-v4.xsd +14 -0
  66. data/resources/kernel-2.1/include/datacite-contributorType-v1.1.xsd +22 -0
  67. data/resources/kernel-2.1/include/datacite-contributorType-v2.xsd +3 -0
  68. data/resources/kernel-2.1/include/datacite-dateType-v1.1.xsd +31 -0
  69. data/resources/kernel-2.1/include/datacite-dateType-v2.xsd +3 -0
  70. data/resources/kernel-2.1/include/datacite-descriptionType-v1.1.xsd +14 -0
  71. data/resources/kernel-2.1/include/datacite-descriptionType-v2.xsd +3 -0
  72. data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v1.1.xsd +24 -0
  73. data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v2.xsd +3 -0
  74. data/resources/kernel-2.1/include/datacite-relationType-v1.1.xsd +29 -0
  75. data/resources/kernel-2.1/include/datacite-relationType-v2.xsd +3 -0
  76. data/resources/kernel-2.1/include/datacite-resourceType-v1.1.xsd +22 -0
  77. data/resources/kernel-2.1/include/datacite-resourceType-v2.xsd +3 -0
  78. data/resources/kernel-2.1/include/datacite-titleType-v1.1.xsd +11 -0
  79. data/resources/kernel-2.1/include/datacite-titleType-v2.xsd +3 -0
  80. data/resources/kernel-2.1/metadata.xsd +315 -0
  81. data/resources/kernel-2.2/include/datacite-contributorType-v2.xsd +29 -0
  82. data/resources/kernel-2.2/include/datacite-dateType-v2.xsd +21 -0
  83. data/resources/kernel-2.2/include/datacite-descriptionType-v2.xsd +15 -0
  84. data/resources/kernel-2.2/include/datacite-relatedIdentifierType-v2.xsd +25 -0
  85. data/resources/kernel-2.2/include/datacite-relationType-v2.xsd +29 -0
  86. data/resources/kernel-2.2/include/datacite-resourceType-v2.xsd +23 -0
  87. data/resources/kernel-2.2/include/datacite-titleType-v2.xsd +10 -0
  88. data/resources/kernel-2.2/metadata.xsd +316 -0
  89. data/resources/kernel-3/include/datacite-contributorType-v3.1.xsd +35 -0
  90. data/resources/kernel-3/include/datacite-dateType-v3.xsd +21 -0
  91. data/resources/kernel-3/include/datacite-descriptionType-v3.xsd +17 -0
  92. data/resources/kernel-3/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
  93. data/resources/kernel-3/include/datacite-relationType-v3.1.xsd +38 -0
  94. data/resources/kernel-3/include/datacite-resourceType-v3.xsd +26 -0
  95. data/resources/kernel-3/include/datacite-titleType-v3.xsd +12 -0
  96. data/resources/kernel-3/include/xml.xsd +286 -0
  97. data/resources/kernel-3/metadata.xsd +380 -0
  98. data/resources/kernel-3.0/include/datacite-contributorType-v3.xsd +33 -0
  99. data/resources/kernel-3.0/include/datacite-dateType-v3.xsd +21 -0
  100. data/resources/kernel-3.0/include/datacite-descriptionType-v3.xsd +17 -0
  101. data/resources/kernel-3.0/include/datacite-relatedIdentifierType-v3.xsd +27 -0
  102. data/resources/kernel-3.0/include/datacite-relationType-v3.xsd +33 -0
  103. data/resources/kernel-3.0/include/datacite-resourceType-v3.xsd +26 -0
  104. data/resources/kernel-3.0/include/datacite-titleType-v3.xsd +12 -0
  105. data/resources/kernel-3.0/include/xml.xsd +286 -0
  106. data/resources/kernel-3.0/metadata.xsd +377 -0
  107. data/resources/kernel-3.1/include/datacite-contributorType-v3.1.xsd +35 -0
  108. data/resources/kernel-3.1/include/datacite-dateType-v3.xsd +21 -0
  109. data/resources/kernel-3.1/include/datacite-descriptionType-v3.xsd +17 -0
  110. data/resources/kernel-3.1/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
  111. data/resources/kernel-3.1/include/datacite-relationType-v3.1.xsd +38 -0
  112. data/resources/kernel-3.1/include/datacite-resourceType-v3.xsd +26 -0
  113. data/resources/kernel-3.1/include/datacite-titleType-v3.xsd +12 -0
  114. data/resources/kernel-3.1/include/xml.xsd +286 -0
  115. data/resources/kernel-3.1/metadata.xsd +380 -0
  116. data/resources/kernel-4/include/datacite-contributorType-v4.xsd +37 -0
  117. data/resources/kernel-4/include/datacite-dateType-v4.xsd +27 -0
  118. data/resources/kernel-4/include/datacite-descriptionType-v4.xsd +19 -0
  119. data/resources/kernel-4/include/datacite-funderIdentifierType-v4.xsd +16 -0
  120. data/resources/kernel-4/include/datacite-nameType-v4.xsd +10 -0
  121. data/resources/kernel-4/include/datacite-numberType-v4.xsd +12 -0
  122. data/resources/kernel-4/include/datacite-relatedIdentifierType-v4.xsd +39 -0
  123. data/resources/kernel-4/include/datacite-relationType-v4.xsd +59 -0
  124. data/resources/kernel-4/include/datacite-resourceType-v4.xsd +52 -0
  125. data/resources/kernel-4/include/datacite-titleType-v4.xsd +14 -0
  126. data/resources/kernel-4/include/xml.xsd +286 -0
  127. data/resources/kernel-4/metadata.xsd +715 -0
  128. data/resources/kernel-4.0/include/datacite-contributorType-v4.xsd +35 -0
  129. data/resources/kernel-4.0/include/datacite-dateType-v4.xsd +21 -0
  130. data/resources/kernel-4.0/include/datacite-descriptionType-v4.xsd +19 -0
  131. data/resources/kernel-4.0/include/datacite-funderIdentifierType-v4.xsd +15 -0
  132. data/resources/kernel-4.0/include/datacite-relatedIdentifierType-v4.xsd +32 -0
  133. data/resources/kernel-4.0/include/datacite-relationType-v4.xsd +39 -0
  134. data/resources/kernel-4.0/include/datacite-resourceType-v4.xsd +26 -0
  135. data/resources/kernel-4.0/include/datacite-titleType-v4.xsd +14 -0
  136. data/resources/kernel-4.0/include/xml.xsd +286 -0
  137. data/resources/kernel-4.0/metadata.xsd +470 -0
  138. data/resources/kernel-4.1/include/datacite-contributorType-v4.xsd +35 -0
  139. data/resources/kernel-4.1/include/datacite-dateType-v4.1.xsd +23 -0
  140. data/resources/kernel-4.1/include/datacite-descriptionType-v4.xsd +19 -0
  141. data/resources/kernel-4.1/include/datacite-funderIdentifierType-v4.xsd +15 -0
  142. data/resources/kernel-4.1/include/datacite-nameType-v4.1.xsd +10 -0
  143. data/resources/kernel-4.1/include/datacite-relatedIdentifierType-v4.xsd +32 -0
  144. data/resources/kernel-4.1/include/datacite-relationType-v4.1.xsd +46 -0
  145. data/resources/kernel-4.1/include/datacite-resourceType-v4.1.xsd +28 -0
  146. data/resources/kernel-4.1/include/datacite-titleType-v4.xsd +14 -0
  147. data/resources/kernel-4.1/include/xml.xsd +286 -0
  148. data/resources/kernel-4.1/metadata.xsd +483 -0
  149. data/resources/kernel-4.2/include/datacite-contributorType-v4.xsd +35 -0
  150. data/resources/kernel-4.2/include/datacite-dateType-v4.xsd +25 -0
  151. data/resources/kernel-4.2/include/datacite-descriptionType-v4.xsd +19 -0
  152. data/resources/kernel-4.2/include/datacite-funderIdentifierType-v4.xsd +15 -0
  153. data/resources/kernel-4.2/include/datacite-nameType-v4.xsd +10 -0
  154. data/resources/kernel-4.2/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  155. data/resources/kernel-4.2/include/datacite-relationType-v4.xsd +49 -0
  156. data/resources/kernel-4.2/include/datacite-resourceType-v4.xsd +28 -0
  157. data/resources/kernel-4.2/include/datacite-titleType-v4.xsd +14 -0
  158. data/resources/kernel-4.2/include/xml.xsd +286 -0
  159. data/resources/kernel-4.2/metadata.xsd +479 -0
  160. data/resources/kernel-4.3/include/datacite-contributorType-v4.xsd +35 -0
  161. data/resources/kernel-4.3/include/datacite-dateType-v4.xsd +25 -0
  162. data/resources/kernel-4.3/include/datacite-descriptionType-v4.xsd +19 -0
  163. data/resources/kernel-4.3/include/datacite-funderIdentifierType-v4.xsd +16 -0
  164. data/resources/kernel-4.3/include/datacite-nameType-v4.xsd +10 -0
  165. data/resources/kernel-4.3/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  166. data/resources/kernel-4.3/include/datacite-relationType-v4.xsd +49 -0
  167. data/resources/kernel-4.3/include/datacite-resourceType-v4.xsd +28 -0
  168. data/resources/kernel-4.3/include/datacite-titleType-v4.xsd +14 -0
  169. data/resources/kernel-4.3/include/xml.xsd +286 -0
  170. data/resources/kernel-4.3/metadata.xsd +515 -0
  171. data/resources/kernel-4.4/include/datacite-contributorType-v4.xsd +35 -0
  172. data/resources/kernel-4.4/include/datacite-dateType-v4.xsd +25 -0
  173. data/resources/kernel-4.4/include/datacite-descriptionType-v4.xsd +19 -0
  174. data/resources/kernel-4.4/include/datacite-funderIdentifierType-v4.xsd +16 -0
  175. data/resources/kernel-4.4/include/datacite-nameType-v4.xsd +10 -0
  176. data/resources/kernel-4.4/include/datacite-numberType-v4.xsd +12 -0
  177. data/resources/kernel-4.4/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  178. data/resources/kernel-4.4/include/datacite-relationType-v4.xsd +51 -0
  179. data/resources/kernel-4.4/include/datacite-resourceType-v4.xsd +43 -0
  180. data/resources/kernel-4.4/include/datacite-titleType-v4.xsd +14 -0
  181. data/resources/kernel-4.4/include/xml.xsd +286 -0
  182. data/resources/kernel-4.4/metadata.xsd +707 -0
  183. data/resources/kernel-4.5/include/datacite-contributorType-v4.xsd +35 -0
  184. data/resources/kernel-4.5/include/datacite-dateType-v4.xsd +25 -0
  185. data/resources/kernel-4.5/include/datacite-descriptionType-v4.xsd +19 -0
  186. data/resources/kernel-4.5/include/datacite-funderIdentifierType-v4.xsd +16 -0
  187. data/resources/kernel-4.5/include/datacite-nameType-v4.xsd +10 -0
  188. data/resources/kernel-4.5/include/datacite-numberType-v4.xsd +12 -0
  189. data/resources/kernel-4.5/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  190. data/resources/kernel-4.5/include/datacite-relationType-v4.xsd +53 -0
  191. data/resources/kernel-4.5/include/datacite-resourceType-v4.xsd +45 -0
  192. data/resources/kernel-4.5/include/datacite-titleType-v4.xsd +14 -0
  193. data/resources/kernel-4.5/include/xml.xsd +286 -0
  194. data/resources/kernel-4.5/metadata.xsd +711 -0
  195. data/resources/kernel-4.6/include/datacite-contributorType-v4.xsd +37 -0
  196. data/resources/kernel-4.6/include/datacite-dateType-v4.xsd +27 -0
  197. data/resources/kernel-4.6/include/datacite-descriptionType-v4.xsd +19 -0
  198. data/resources/kernel-4.6/include/datacite-funderIdentifierType-v4.xsd +16 -0
  199. data/resources/kernel-4.6/include/datacite-nameType-v4.xsd +10 -0
  200. data/resources/kernel-4.6/include/datacite-numberType-v4.xsd +12 -0
  201. data/resources/kernel-4.6/include/datacite-relatedIdentifierType-v4.xsd +37 -0
  202. data/resources/kernel-4.6/include/datacite-relationType-v4.xsd +57 -0
  203. data/resources/kernel-4.6/include/datacite-resourceType-v4.xsd +49 -0
  204. data/resources/kernel-4.6/include/datacite-titleType-v4.xsd +14 -0
  205. data/resources/kernel-4.6/include/xml.xsd +286 -0
  206. data/resources/kernel-4.6/metadata.xsd +712 -0
  207. data/resources/kernel-4.7/include/datacite-contributorType-v4.xsd +37 -0
  208. data/resources/kernel-4.7/include/datacite-dateType-v4.xsd +27 -0
  209. data/resources/kernel-4.7/include/datacite-descriptionType-v4.xsd +19 -0
  210. data/resources/kernel-4.7/include/datacite-funderIdentifierType-v4.xsd +16 -0
  211. data/resources/kernel-4.7/include/datacite-nameType-v4.xsd +10 -0
  212. data/resources/kernel-4.7/include/datacite-numberType-v4.xsd +12 -0
  213. data/resources/kernel-4.7/include/datacite-relatedIdentifierType-v4.xsd +39 -0
  214. data/resources/kernel-4.7/include/datacite-relationType-v4.xsd +59 -0
  215. data/resources/kernel-4.7/include/datacite-resourceType-v4.xsd +52 -0
  216. data/resources/kernel-4.7/include/datacite-titleType-v4.xsd +14 -0
  217. data/resources/kernel-4.7/include/xml.xsd +286 -0
  218. data/resources/kernel-4.7/metadata.xsd +715 -0
  219. data/resources/oecd/dfg-mappings.json +1866 -0
  220. data/resources/oecd/for-mappings.json +1101 -0
  221. data/resources/oecd/fos-mappings.json +198 -0
  222. data/resources/schema_org/jsonldcontext.json +7477 -0
  223. data/resources/spdx/licenses.json +5297 -0
  224. data/resources/xml.xsd +286 -0
  225. metadata +478 -150
  226. data/.travis.yml +0 -23
  227. data/lib/bolognese/crossref.rb +0 -202
  228. data/lib/bolognese/datacite.rb +0 -157
  229. data/lib/bolognese/date_utils.rb +0 -48
  230. data/lib/bolognese/github.rb +0 -106
  231. data/lib/bolognese/orcid.rb +0 -24
  232. data/lib/bolognese/pid_utils.rb +0 -23
  233. data/spec/cli_spec.rb +0 -37
  234. data/spec/crossref_spec.rb +0 -113
  235. data/spec/datacite_spec.rb +0 -49
  236. data/spec/doi_spec.rb +0 -89
  237. data/spec/fixtures/crossref.xml +0 -742
  238. data/spec/fixtures/datacite.xml +0 -40
  239. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_crossref.yml +0 -760
  240. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_schema_org.yml +0 -1476
  241. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_datacite.yml +0 -214
  242. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_schema_org.yml +0 -384
  243. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/crossref.yml +0 -44
  244. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/datacite.yml +0 -44
  245. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/medra.yml +0 -44
  246. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/not_found.yml +0 -44
  247. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_test.yml +0 -843
  248. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_SICI_DOI.yml +0 -277
  249. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_data_citation.yml +0 -15755
  250. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/date_in_future.yml +0 -2691
  251. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/journal_article.yml +0 -1857
  252. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/not_found_error.yml +0 -93
  253. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/posted_content.yml +0 -5715
  254. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/BlogPosting.yml +0 -307
  255. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/Dataset.yml +0 -343
  256. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref.yml +0 -44
  257. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref_doi_not_url.yml +0 -44
  258. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite.yml +0 -44
  259. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite_doi_http.yml +0 -44
  260. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/orcid.yml +0 -44
  261. data/spec/metadata_spec.rb +0 -35
  262. data/spec/orcid_spec.rb +0 -23
  263. data/spec/spec_helper.rb +0 -88
  264. /data/{LICENSE → LICENSE.md} +0 -0
@@ -0,0 +1,264 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bolognese
4
+ module Readers
5
+ module SchemaOrgReader
6
+ SO_TO_DC_RELATION_TYPES = {
7
+ "citation" => "References",
8
+ "isBasedOn" => "IsSupplementedBy",
9
+ "sameAs" => "IsIdenticalTo",
10
+ "isPartOf" => "IsPartOf",
11
+ "hasPart" => "HasPart",
12
+ "isPredecessor" => "IsPreviousVersionOf",
13
+ "isSuccessor" => "IsNewVersionOf",
14
+ "workTranslation" => "HasTranslation",
15
+ "translationOfWork" => "IsTranslationOf"
16
+ }
17
+
18
+ SO_TO_DC_REVERSE_RELATION_TYPES = {
19
+ "citation" => "IsReferencedBy",
20
+ "isBasedOn" => "IsSupplementTo",
21
+ "sameAs" => "IsIdenticalTo",
22
+ "isPartOf" => "HasPart",
23
+ "hasPart" => "IsPartOf",
24
+ "isPredecessor" => "IsNewVersionOf",
25
+ "isSuccessor" => "IsPreviousVersionOf"
26
+ }
27
+
28
+ def get_schema_org(id: nil, **options)
29
+ return { "string" => nil, "state" => "not_found" } unless id.present?
30
+
31
+ url = normalize_id(id)
32
+ response = Maremma.get(url)
33
+
34
+ # Find the schema.org json from the html body
35
+ doc = Nokogiri::HTML(response.body.fetch("data", nil))
36
+ string = doc.at('script[type="application/ld+json"]')
37
+ string = string.text if string.present?
38
+
39
+ { "string" => string }
40
+ end
41
+
42
+ def read_schema_org(string: nil, **options)
43
+ if string.present?
44
+ errors = jsonlint(string)
45
+ return { "errors" => errors } if errors.present?
46
+ end
47
+
48
+ read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
49
+
50
+ meta = string.present? ? Maremma.from_json(string) : {}
51
+
52
+ identifiers = Array.wrap(meta.fetch("identifier", nil)).map do |r|
53
+ r = normalize_id(r) if r.is_a?(String)
54
+ if r.is_a?(String) && !r.start_with?("https://doi.org")
55
+ { "identifierType" => "URL", "identifier" => r }
56
+ elsif r.is_a?(Hash)
57
+ { "identifierType" => get_identifier_type(r["propertyID"]), "identifier" => r["value"] }
58
+ end
59
+ end.compact.uniq
60
+
61
+ id = normalize_id(options[:doi] || meta.fetch("@id", nil) || meta.fetch("identifier", nil))
62
+
63
+ schema_org = meta.fetch("@type", nil) && meta.fetch("@type").camelcase
64
+ resource_type_general = Bolognese::Utils::SO_TO_DC_TRANSLATIONS[schema_org]
65
+ types = {
66
+ "resourceTypeGeneral" => resource_type_general,
67
+ "resourceType" => meta.fetch("additionalType", nil),
68
+ "schemaOrg" => schema_org,
69
+ "citeproc" => Bolognese::Utils::SO_TO_CP_TRANSLATIONS[schema_org] || "article-journal",
70
+ "bibtex" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
71
+ "ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN"
72
+ }.compact
73
+ authors = meta.fetch("author", nil) || meta.fetch("creator", nil)
74
+ # Authors should be an object, if it's just a plain string don't try and parse it.
75
+ if not authors.is_a?(String)
76
+ creators = get_authors(from_schema_org_creators(Array.wrap(authors)))
77
+ end
78
+ contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
79
+ translators = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("translator", nil))))
80
+ translators.map! do |translator|
81
+ translator["contributorType"] = "Translator"
82
+ translator
83
+ end
84
+ contributors += translators
85
+
86
+ publisher = {
87
+ "name" => parse_attributes(meta.fetch("publisher", nil), content: "name", first: true),
88
+ "publisherIdentifier" => parse_attributes(meta.fetch("publisher", nil), content: "@id", first: true),
89
+ }.compact if meta.fetch("publisher", nil).present?
90
+
91
+ ct = (schema_org == "Dataset") ? "includedInDataCatalog" : "Periodical"
92
+ container = if meta.fetch(ct, nil).present?
93
+ url = parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "url", first: true)
94
+
95
+ {
96
+ "type" => (schema_org == "Dataset") ? "DataRepository" : "Periodical",
97
+ "title" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "name", first: true),
98
+ "identifier" => url,
99
+ "identifierType" => url.present? ? "URL" : nil,
100
+ "volume" => meta.fetch("volumeNumber", nil),
101
+ "issue" => meta.fetch("issueNumber", nil),
102
+ "firstPage" => meta.fetch("pageStart", nil),
103
+ "lastPage" => meta.fetch("pageEnd", nil)
104
+ }.compact
105
+ else
106
+ {}
107
+ end
108
+
109
+ related_identifiers = Array.wrap(schema_org_is_identical_to(meta)) +
110
+ Array.wrap(schema_org_is_part_of(meta)) +
111
+ Array.wrap(schema_org_has_part(meta)) +
112
+ Array.wrap(schema_org_is_previous_version_of(meta)) +
113
+ Array.wrap(schema_org_is_new_version_of(meta)) +
114
+ Array.wrap(schema_org_references(meta)) +
115
+ Array.wrap(schema_org_is_referenced_by(meta)) +
116
+ Array.wrap(schema_org_is_supplement_to(meta)) +
117
+ Array.wrap(schema_org_is_supplemented_by(meta)) +
118
+ Array.wrap(schema_org_has_translation(meta)) +
119
+ Array.wrap(schema_org_is_translation_of(meta))
120
+
121
+ rights_list = Array.wrap(meta.fetch("license", nil)).compact.map do |rl|
122
+ hsh_to_spdx("__content__" => rl["name"], "rightsURI" => rl["id"])
123
+ end
124
+
125
+ funding_references = Array.wrap(meta.fetch("funder", nil)).compact.map do |fr|
126
+ if fr["@id"].present?
127
+ {
128
+ "funderName" => fr["name"],
129
+ "funderIdentifier" => fr["@id"],
130
+ "funderIdentifierType" => fr["@id"].to_s.start_with?("https://doi.org/10.13039") ? "Crossref Funder ID" : "Other" }.compact
131
+ else
132
+ {
133
+ "funderName" => fr["name"] }.compact
134
+ end
135
+ end
136
+ dates = []
137
+ dates << { "date" => meta.fetch("datePublished"), "dateType" => "Issued" } if Date.edtf(meta.fetch("datePublished", nil)).present?
138
+ dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if Date.edtf(meta.fetch("dateCreated", nil)).present?
139
+ dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if Date.edtf(meta.fetch("dateModified", nil)).present?
140
+ dates << { "date" => meta.fetch("temporalCoverage"), "dateType" => "Coverage" } if Date.edtf(meta.fetch("temporalCoverage", nil)).present?
141
+ publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
142
+
143
+ if meta.fetch("inLanguage", nil).is_a?(String)
144
+ language = meta.fetch("inLanguage")
145
+ elsif meta.fetch("inLanguage", nil).is_a?(Object)
146
+ language = meta.dig("inLanguage", 'alternateName') || meta.dig("inLanguage", 'name')
147
+ else
148
+ language = nil
149
+ end
150
+
151
+ state = meta.present? || read_options.present? ? "findable" : "not_found"
152
+ geo_locations = Array.wrap(meta.fetch("spatialCoverage", nil)).map do |gl|
153
+ if gl.dig("geo", "box")
154
+ s, w, n, e = gl.dig("geo", "box").split(" ", 4)
155
+ geo_location_box = {
156
+ "westBoundLongitude" => w,
157
+ "eastBoundLongitude" => e,
158
+ "southBoundLatitude" => s,
159
+ "northBoundLatitude" => n,
160
+ }.compact.presence
161
+ else
162
+ geo_location_box = nil
163
+ end
164
+ geo_location_point = { "pointLongitude" => gl.dig("geo", "longitude"), "pointLatitude" => gl.dig("geo", "latitude") }.compact.presence
165
+
166
+ {
167
+ "geoLocationPlace" => gl.dig("geo", "address"),
168
+ "geoLocationPoint" => geo_location_point,
169
+ "geoLocationBox" => geo_location_box
170
+ }.compact
171
+ end
172
+
173
+ # handle keywords as array and as comma-separated string
174
+ subjects = meta.fetch("keywords", nil)
175
+ subjects = subjects.to_s.split(", ") if subjects.is_a?(String)
176
+ subjects = Array.wrap(subjects).reduce([]) do |sum, subject|
177
+ sum += name_to_subject(subject)
178
+ sum
179
+ end
180
+
181
+ { "id" => id,
182
+ "types" => types,
183
+ "doi" => validate_doi(id),
184
+ "identifiers" => identifiers,
185
+ "url" => normalize_id(meta.fetch("url", nil)),
186
+ "content_url" => Array.wrap(meta.fetch("contentUrl", nil)),
187
+ "sizes" => Array.wrap(meta.fetch("contenSize", nil)).presence,
188
+ "formats" => Array.wrap(meta.fetch("encodingFormat", nil) || meta.fetch("fileFormat", nil)),
189
+ "titles" => meta.fetch("name", nil).present? ? [{ "title" => meta.fetch("name", nil) }] : nil,
190
+ "creators" => creators,
191
+ "contributors" => contributors,
192
+ "publisher" => publisher,
193
+ "agency" => parse_attributes(meta.fetch("provider", nil), content: "name", first: true),
194
+ "container" => container,
195
+ "related_identifiers" => related_identifiers,
196
+ "publication_year" => publication_year,
197
+ "dates" => dates,
198
+ "descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : nil,
199
+ "rights_list" => rights_list,
200
+ "version_info" => meta.fetch("version", nil).to_s.presence,
201
+ "subjects" => subjects,
202
+ "language" => language,
203
+ "state" => state,
204
+ "schema_version" => meta.fetch("schemaVersion", nil).to_s.presence,
205
+ "funding_references" => funding_references,
206
+ "geo_locations" => geo_locations
207
+ }.merge(read_options)
208
+ end
209
+
210
+ def schema_org_related_identifier(meta, relation_type: nil)
211
+ normalize_ids(ids: meta.fetch(relation_type, nil), relation_type: SO_TO_DC_RELATION_TYPES[relation_type])
212
+ end
213
+
214
+ def schema_org_reverse_related_identifier(meta, relation_type: nil)
215
+ normalize_ids(ids: meta.dig("@reverse", relation_type), relation_type: SO_TO_DC_REVERSE_RELATION_TYPES[relation_type])
216
+ end
217
+
218
+ def schema_org_is_identical_to(meta)
219
+ schema_org_related_identifier(meta, relation_type: "sameAs")
220
+ end
221
+
222
+ def schema_org_is_part_of(meta)
223
+ schema_org_related_identifier(meta, relation_type: "isPartOf")
224
+ end
225
+
226
+ def schema_org_has_part(meta)
227
+ schema_org_related_identifier(meta, relation_type: "hasPart")
228
+ end
229
+
230
+ def schema_org_is_previous_version_of(meta)
231
+ schema_org_related_identifier(meta, relation_type: "PredecessorOf")
232
+ end
233
+
234
+ def schema_org_is_new_version_of(meta)
235
+ schema_org_related_identifier(meta, relation_type: "SuccessorOf")
236
+ end
237
+
238
+ def schema_org_references(meta)
239
+ schema_org_related_identifier(meta, relation_type: "citation")
240
+ end
241
+
242
+ def schema_org_is_referenced_by(meta)
243
+ schema_org_reverse_related_identifier(meta, relation_type: "citation")
244
+ end
245
+
246
+ def schema_org_is_supplement_to(meta)
247
+ schema_org_reverse_related_identifier(meta, relation_type: "isBasedOn")
248
+ end
249
+
250
+ def schema_org_is_supplemented_by(meta)
251
+ schema_org_related_identifier(meta, relation_type: "isBasedOn")
252
+ end
253
+
254
+ def schema_org_has_translation(meta)
255
+ schema_org_related_identifier(meta, relation_type: "workTranslation", )
256
+ end
257
+
258
+ def schema_org_is_translation_of(meta)
259
+ schema_org_related_identifier(meta, relation_type: "translationOfWork")
260
+ end
261
+
262
+ end
263
+ end
264
+ end
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class String
2
4
  def my_titleize
3
- self.gsub(/(\b|_)(.)/) { "#{$1}#{$2.upcase}" }
5
+ self.gsub(/\b(['’]?[a-z])/) { "#{$1.capitalize}" }
4
6
  end
5
7
  end