bolognese 0.2.2 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/build.yml +9 -0
  3. data/.github/workflows/changelog.yml +36 -0
  4. data/.github/workflows/ci.yml +22 -0
  5. data/.github/workflows/pull-request.yml +9 -0
  6. data/.github/workflows/release.yml +32 -0
  7. data/.gitignore +4 -0
  8. data/.rubocop.yml +658 -0
  9. data/CHANGELOG.md +1864 -0
  10. data/CITATION +17 -0
  11. data/Gemfile +1 -1
  12. data/Gemfile.lock +251 -99
  13. data/README.md +1026 -2
  14. data/Rakefile +1 -0
  15. data/bin/bolognese +5 -1
  16. data/bolognese.gemspec +33 -21
  17. data/lib/bolognese/array.rb +13 -0
  18. data/lib/bolognese/author_utils.rb +115 -39
  19. data/lib/bolognese/citeproc_extensions.rb +48 -0
  20. data/lib/bolognese/cli.rb +28 -15
  21. data/lib/bolognese/datacite_utils.rb +418 -0
  22. data/lib/bolognese/doi_utils.rb +45 -23
  23. data/lib/bolognese/metadata.rb +250 -18
  24. data/lib/bolognese/metadata_utils.rb +228 -0
  25. data/lib/bolognese/pubmed.rb +2 -0
  26. data/lib/bolognese/readers/bibtex_reader.rb +100 -0
  27. data/lib/bolognese/readers/citeproc_reader.rb +125 -0
  28. data/lib/bolognese/readers/codemeta_reader.rb +108 -0
  29. data/lib/bolognese/readers/crosscite_reader.rb +17 -0
  30. data/lib/bolognese/readers/crossref_reader.rb +413 -0
  31. data/lib/bolognese/readers/datacite_json_reader.rb +17 -0
  32. data/lib/bolognese/readers/datacite_reader.rb +338 -0
  33. data/lib/bolognese/readers/npm_reader.rb +115 -0
  34. data/lib/bolognese/readers/ris_reader.rb +114 -0
  35. data/lib/bolognese/readers/schema_org_reader.rb +264 -0
  36. data/lib/bolognese/string.rb +3 -1
  37. data/lib/bolognese/utils.rb +1403 -12
  38. data/lib/bolognese/version.rb +1 -1
  39. data/lib/bolognese/whitelist_scrubber.rb +47 -0
  40. data/lib/bolognese/writers/bibtex_writer.rb +32 -0
  41. data/lib/bolognese/writers/citation_writer.rb +14 -0
  42. data/lib/bolognese/writers/citeproc_writer.rb +11 -0
  43. data/lib/bolognese/writers/codemeta_writer.rb +29 -0
  44. data/lib/bolognese/writers/crosscite_writer.rb +11 -0
  45. data/lib/bolognese/writers/crossref_writer.rb +11 -0
  46. data/lib/bolognese/writers/csv_writer.rb +24 -0
  47. data/lib/bolognese/writers/datacite_json_writer.rb +13 -0
  48. data/lib/bolognese/writers/datacite_writer.rb +12 -0
  49. data/lib/bolognese/writers/jats_writer.rb +138 -0
  50. data/lib/bolognese/writers/rdf_xml_writer.rb +11 -0
  51. data/lib/bolognese/writers/ris_writer.rb +29 -0
  52. data/lib/bolognese/writers/schema_org_writer.rb +55 -0
  53. data/lib/bolognese/writers/turtle_writer.rb +11 -0
  54. data/lib/bolognese.rb +19 -4
  55. data/package.json +12 -0
  56. data/resources/2008/09/xsd.xsl +997 -0
  57. data/resources/datacite-contributorType-v4.xsd +35 -0
  58. data/resources/datacite-dateType-v4.xsd +25 -0
  59. data/resources/datacite-descriptionType-v4.xsd +19 -0
  60. data/resources/datacite-funderIdentifierType-v4.xsd +15 -0
  61. data/resources/datacite-nameType-v4.xsd +10 -0
  62. data/resources/datacite-relatedIdentifierType-v4.xsd +34 -0
  63. data/resources/datacite-relationType-v4.xsd +49 -0
  64. data/resources/datacite-resourceType-v4.xsd +28 -0
  65. data/resources/datacite-titleType-v4.xsd +14 -0
  66. data/resources/kernel-2.1/include/datacite-contributorType-v1.1.xsd +22 -0
  67. data/resources/kernel-2.1/include/datacite-contributorType-v2.xsd +3 -0
  68. data/resources/kernel-2.1/include/datacite-dateType-v1.1.xsd +31 -0
  69. data/resources/kernel-2.1/include/datacite-dateType-v2.xsd +3 -0
  70. data/resources/kernel-2.1/include/datacite-descriptionType-v1.1.xsd +14 -0
  71. data/resources/kernel-2.1/include/datacite-descriptionType-v2.xsd +3 -0
  72. data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v1.1.xsd +24 -0
  73. data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v2.xsd +3 -0
  74. data/resources/kernel-2.1/include/datacite-relationType-v1.1.xsd +29 -0
  75. data/resources/kernel-2.1/include/datacite-relationType-v2.xsd +3 -0
  76. data/resources/kernel-2.1/include/datacite-resourceType-v1.1.xsd +22 -0
  77. data/resources/kernel-2.1/include/datacite-resourceType-v2.xsd +3 -0
  78. data/resources/kernel-2.1/include/datacite-titleType-v1.1.xsd +11 -0
  79. data/resources/kernel-2.1/include/datacite-titleType-v2.xsd +3 -0
  80. data/resources/kernel-2.1/metadata.xsd +315 -0
  81. data/resources/kernel-2.2/include/datacite-contributorType-v2.xsd +29 -0
  82. data/resources/kernel-2.2/include/datacite-dateType-v2.xsd +21 -0
  83. data/resources/kernel-2.2/include/datacite-descriptionType-v2.xsd +15 -0
  84. data/resources/kernel-2.2/include/datacite-relatedIdentifierType-v2.xsd +25 -0
  85. data/resources/kernel-2.2/include/datacite-relationType-v2.xsd +29 -0
  86. data/resources/kernel-2.2/include/datacite-resourceType-v2.xsd +23 -0
  87. data/resources/kernel-2.2/include/datacite-titleType-v2.xsd +10 -0
  88. data/resources/kernel-2.2/metadata.xsd +316 -0
  89. data/resources/kernel-3/include/datacite-contributorType-v3.1.xsd +35 -0
  90. data/resources/kernel-3/include/datacite-dateType-v3.xsd +21 -0
  91. data/resources/kernel-3/include/datacite-descriptionType-v3.xsd +17 -0
  92. data/resources/kernel-3/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
  93. data/resources/kernel-3/include/datacite-relationType-v3.1.xsd +38 -0
  94. data/resources/kernel-3/include/datacite-resourceType-v3.xsd +26 -0
  95. data/resources/kernel-3/include/datacite-titleType-v3.xsd +12 -0
  96. data/resources/kernel-3/include/xml.xsd +286 -0
  97. data/resources/kernel-3/metadata.xsd +380 -0
  98. data/resources/kernel-3.0/include/datacite-contributorType-v3.xsd +33 -0
  99. data/resources/kernel-3.0/include/datacite-dateType-v3.xsd +21 -0
  100. data/resources/kernel-3.0/include/datacite-descriptionType-v3.xsd +17 -0
  101. data/resources/kernel-3.0/include/datacite-relatedIdentifierType-v3.xsd +27 -0
  102. data/resources/kernel-3.0/include/datacite-relationType-v3.xsd +33 -0
  103. data/resources/kernel-3.0/include/datacite-resourceType-v3.xsd +26 -0
  104. data/resources/kernel-3.0/include/datacite-titleType-v3.xsd +12 -0
  105. data/resources/kernel-3.0/include/xml.xsd +286 -0
  106. data/resources/kernel-3.0/metadata.xsd +377 -0
  107. data/resources/kernel-3.1/include/datacite-contributorType-v3.1.xsd +35 -0
  108. data/resources/kernel-3.1/include/datacite-dateType-v3.xsd +21 -0
  109. data/resources/kernel-3.1/include/datacite-descriptionType-v3.xsd +17 -0
  110. data/resources/kernel-3.1/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
  111. data/resources/kernel-3.1/include/datacite-relationType-v3.1.xsd +38 -0
  112. data/resources/kernel-3.1/include/datacite-resourceType-v3.xsd +26 -0
  113. data/resources/kernel-3.1/include/datacite-titleType-v3.xsd +12 -0
  114. data/resources/kernel-3.1/include/xml.xsd +286 -0
  115. data/resources/kernel-3.1/metadata.xsd +380 -0
  116. data/resources/kernel-4/include/datacite-contributorType-v4.xsd +37 -0
  117. data/resources/kernel-4/include/datacite-dateType-v4.xsd +27 -0
  118. data/resources/kernel-4/include/datacite-descriptionType-v4.xsd +19 -0
  119. data/resources/kernel-4/include/datacite-funderIdentifierType-v4.xsd +16 -0
  120. data/resources/kernel-4/include/datacite-nameType-v4.xsd +10 -0
  121. data/resources/kernel-4/include/datacite-numberType-v4.xsd +12 -0
  122. data/resources/kernel-4/include/datacite-relatedIdentifierType-v4.xsd +39 -0
  123. data/resources/kernel-4/include/datacite-relationType-v4.xsd +59 -0
  124. data/resources/kernel-4/include/datacite-resourceType-v4.xsd +52 -0
  125. data/resources/kernel-4/include/datacite-titleType-v4.xsd +14 -0
  126. data/resources/kernel-4/include/xml.xsd +286 -0
  127. data/resources/kernel-4/metadata.xsd +715 -0
  128. data/resources/kernel-4.0/include/datacite-contributorType-v4.xsd +35 -0
  129. data/resources/kernel-4.0/include/datacite-dateType-v4.xsd +21 -0
  130. data/resources/kernel-4.0/include/datacite-descriptionType-v4.xsd +19 -0
  131. data/resources/kernel-4.0/include/datacite-funderIdentifierType-v4.xsd +15 -0
  132. data/resources/kernel-4.0/include/datacite-relatedIdentifierType-v4.xsd +32 -0
  133. data/resources/kernel-4.0/include/datacite-relationType-v4.xsd +39 -0
  134. data/resources/kernel-4.0/include/datacite-resourceType-v4.xsd +26 -0
  135. data/resources/kernel-4.0/include/datacite-titleType-v4.xsd +14 -0
  136. data/resources/kernel-4.0/include/xml.xsd +286 -0
  137. data/resources/kernel-4.0/metadata.xsd +470 -0
  138. data/resources/kernel-4.1/include/datacite-contributorType-v4.xsd +35 -0
  139. data/resources/kernel-4.1/include/datacite-dateType-v4.1.xsd +23 -0
  140. data/resources/kernel-4.1/include/datacite-descriptionType-v4.xsd +19 -0
  141. data/resources/kernel-4.1/include/datacite-funderIdentifierType-v4.xsd +15 -0
  142. data/resources/kernel-4.1/include/datacite-nameType-v4.1.xsd +10 -0
  143. data/resources/kernel-4.1/include/datacite-relatedIdentifierType-v4.xsd +32 -0
  144. data/resources/kernel-4.1/include/datacite-relationType-v4.1.xsd +46 -0
  145. data/resources/kernel-4.1/include/datacite-resourceType-v4.1.xsd +28 -0
  146. data/resources/kernel-4.1/include/datacite-titleType-v4.xsd +14 -0
  147. data/resources/kernel-4.1/include/xml.xsd +286 -0
  148. data/resources/kernel-4.1/metadata.xsd +483 -0
  149. data/resources/kernel-4.2/include/datacite-contributorType-v4.xsd +35 -0
  150. data/resources/kernel-4.2/include/datacite-dateType-v4.xsd +25 -0
  151. data/resources/kernel-4.2/include/datacite-descriptionType-v4.xsd +19 -0
  152. data/resources/kernel-4.2/include/datacite-funderIdentifierType-v4.xsd +15 -0
  153. data/resources/kernel-4.2/include/datacite-nameType-v4.xsd +10 -0
  154. data/resources/kernel-4.2/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  155. data/resources/kernel-4.2/include/datacite-relationType-v4.xsd +49 -0
  156. data/resources/kernel-4.2/include/datacite-resourceType-v4.xsd +28 -0
  157. data/resources/kernel-4.2/include/datacite-titleType-v4.xsd +14 -0
  158. data/resources/kernel-4.2/include/xml.xsd +286 -0
  159. data/resources/kernel-4.2/metadata.xsd +479 -0
  160. data/resources/kernel-4.3/include/datacite-contributorType-v4.xsd +35 -0
  161. data/resources/kernel-4.3/include/datacite-dateType-v4.xsd +25 -0
  162. data/resources/kernel-4.3/include/datacite-descriptionType-v4.xsd +19 -0
  163. data/resources/kernel-4.3/include/datacite-funderIdentifierType-v4.xsd +16 -0
  164. data/resources/kernel-4.3/include/datacite-nameType-v4.xsd +10 -0
  165. data/resources/kernel-4.3/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  166. data/resources/kernel-4.3/include/datacite-relationType-v4.xsd +49 -0
  167. data/resources/kernel-4.3/include/datacite-resourceType-v4.xsd +28 -0
  168. data/resources/kernel-4.3/include/datacite-titleType-v4.xsd +14 -0
  169. data/resources/kernel-4.3/include/xml.xsd +286 -0
  170. data/resources/kernel-4.3/metadata.xsd +515 -0
  171. data/resources/kernel-4.4/include/datacite-contributorType-v4.xsd +35 -0
  172. data/resources/kernel-4.4/include/datacite-dateType-v4.xsd +25 -0
  173. data/resources/kernel-4.4/include/datacite-descriptionType-v4.xsd +19 -0
  174. data/resources/kernel-4.4/include/datacite-funderIdentifierType-v4.xsd +16 -0
  175. data/resources/kernel-4.4/include/datacite-nameType-v4.xsd +10 -0
  176. data/resources/kernel-4.4/include/datacite-numberType-v4.xsd +12 -0
  177. data/resources/kernel-4.4/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  178. data/resources/kernel-4.4/include/datacite-relationType-v4.xsd +51 -0
  179. data/resources/kernel-4.4/include/datacite-resourceType-v4.xsd +43 -0
  180. data/resources/kernel-4.4/include/datacite-titleType-v4.xsd +14 -0
  181. data/resources/kernel-4.4/include/xml.xsd +286 -0
  182. data/resources/kernel-4.4/metadata.xsd +707 -0
  183. data/resources/kernel-4.5/include/datacite-contributorType-v4.xsd +35 -0
  184. data/resources/kernel-4.5/include/datacite-dateType-v4.xsd +25 -0
  185. data/resources/kernel-4.5/include/datacite-descriptionType-v4.xsd +19 -0
  186. data/resources/kernel-4.5/include/datacite-funderIdentifierType-v4.xsd +16 -0
  187. data/resources/kernel-4.5/include/datacite-nameType-v4.xsd +10 -0
  188. data/resources/kernel-4.5/include/datacite-numberType-v4.xsd +12 -0
  189. data/resources/kernel-4.5/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  190. data/resources/kernel-4.5/include/datacite-relationType-v4.xsd +53 -0
  191. data/resources/kernel-4.5/include/datacite-resourceType-v4.xsd +45 -0
  192. data/resources/kernel-4.5/include/datacite-titleType-v4.xsd +14 -0
  193. data/resources/kernel-4.5/include/xml.xsd +286 -0
  194. data/resources/kernel-4.5/metadata.xsd +711 -0
  195. data/resources/kernel-4.6/include/datacite-contributorType-v4.xsd +37 -0
  196. data/resources/kernel-4.6/include/datacite-dateType-v4.xsd +27 -0
  197. data/resources/kernel-4.6/include/datacite-descriptionType-v4.xsd +19 -0
  198. data/resources/kernel-4.6/include/datacite-funderIdentifierType-v4.xsd +16 -0
  199. data/resources/kernel-4.6/include/datacite-nameType-v4.xsd +10 -0
  200. data/resources/kernel-4.6/include/datacite-numberType-v4.xsd +12 -0
  201. data/resources/kernel-4.6/include/datacite-relatedIdentifierType-v4.xsd +37 -0
  202. data/resources/kernel-4.6/include/datacite-relationType-v4.xsd +57 -0
  203. data/resources/kernel-4.6/include/datacite-resourceType-v4.xsd +49 -0
  204. data/resources/kernel-4.6/include/datacite-titleType-v4.xsd +14 -0
  205. data/resources/kernel-4.6/include/xml.xsd +286 -0
  206. data/resources/kernel-4.6/metadata.xsd +712 -0
  207. data/resources/kernel-4.7/include/datacite-contributorType-v4.xsd +37 -0
  208. data/resources/kernel-4.7/include/datacite-dateType-v4.xsd +27 -0
  209. data/resources/kernel-4.7/include/datacite-descriptionType-v4.xsd +19 -0
  210. data/resources/kernel-4.7/include/datacite-funderIdentifierType-v4.xsd +16 -0
  211. data/resources/kernel-4.7/include/datacite-nameType-v4.xsd +10 -0
  212. data/resources/kernel-4.7/include/datacite-numberType-v4.xsd +12 -0
  213. data/resources/kernel-4.7/include/datacite-relatedIdentifierType-v4.xsd +39 -0
  214. data/resources/kernel-4.7/include/datacite-relationType-v4.xsd +59 -0
  215. data/resources/kernel-4.7/include/datacite-resourceType-v4.xsd +52 -0
  216. data/resources/kernel-4.7/include/datacite-titleType-v4.xsd +14 -0
  217. data/resources/kernel-4.7/include/xml.xsd +286 -0
  218. data/resources/kernel-4.7/metadata.xsd +715 -0
  219. data/resources/oecd/dfg-mappings.json +1866 -0
  220. data/resources/oecd/for-mappings.json +1101 -0
  221. data/resources/oecd/fos-mappings.json +198 -0
  222. data/resources/schema_org/jsonldcontext.json +7477 -0
  223. data/resources/spdx/licenses.json +5297 -0
  224. data/resources/xml.xsd +286 -0
  225. metadata +478 -150
  226. data/.travis.yml +0 -23
  227. data/lib/bolognese/crossref.rb +0 -202
  228. data/lib/bolognese/datacite.rb +0 -157
  229. data/lib/bolognese/date_utils.rb +0 -48
  230. data/lib/bolognese/github.rb +0 -106
  231. data/lib/bolognese/orcid.rb +0 -24
  232. data/lib/bolognese/pid_utils.rb +0 -23
  233. data/spec/cli_spec.rb +0 -37
  234. data/spec/crossref_spec.rb +0 -113
  235. data/spec/datacite_spec.rb +0 -49
  236. data/spec/doi_spec.rb +0 -89
  237. data/spec/fixtures/crossref.xml +0 -742
  238. data/spec/fixtures/datacite.xml +0 -40
  239. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_crossref.yml +0 -760
  240. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_schema_org.yml +0 -1476
  241. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_datacite.yml +0 -214
  242. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_schema_org.yml +0 -384
  243. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/crossref.yml +0 -44
  244. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/datacite.yml +0 -44
  245. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/medra.yml +0 -44
  246. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/not_found.yml +0 -44
  247. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_test.yml +0 -843
  248. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_SICI_DOI.yml +0 -277
  249. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_data_citation.yml +0 -15755
  250. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/date_in_future.yml +0 -2691
  251. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/journal_article.yml +0 -1857
  252. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/not_found_error.yml +0 -93
  253. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/posted_content.yml +0 -5715
  254. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/BlogPosting.yml +0 -307
  255. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/Dataset.yml +0 -343
  256. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref.yml +0 -44
  257. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref_doi_not_url.yml +0 -44
  258. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite.yml +0 -44
  259. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite_doi_http.yml +0 -44
  260. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/orcid.yml +0 -44
  261. data/spec/metadata_spec.rb +0 -35
  262. data/spec/orcid_spec.rb +0 -23
  263. data/spec/spec_helper.rb +0 -88
  264. /data/{LICENSE → LICENSE.md} +0 -0
@@ -0,0 +1,418 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bolognese
4
+ module DataciteUtils
5
+ def datacite_xml
6
+ @datacite_xml ||= Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
7
+ xml.resource(root_attributes) do
8
+ insert_work(xml)
9
+ end
10
+ end.to_xml
11
+ end
12
+
13
+ def datacite_errors(xml: nil, schema_version: nil)
14
+ if xml.present?
15
+ namespaces = Nokogiri::XML(xml, nil, 'UTF-8').root.namespaces
16
+ schema_version = namespaces.fetch('xmlns',nil).presence || namespaces.fetch('xmlns:ns0',nil).presence
17
+ else
18
+ schema_version = schema_version.to_s.start_with?("http://datacite.org/schema/kernel") ? schema_version : "http://datacite.org/schema/kernel-4"
19
+ end
20
+
21
+ kernel = schema_version.to_s.split("/").last
22
+ filepath = File.expand_path("../../../resources/#{kernel}/metadata.xsd", __FILE__)
23
+ schema = Nokogiri::XML::Schema(open(filepath))
24
+
25
+ schema.validate(Nokogiri::XML(xml, nil, 'UTF-8')).map { |error| error.to_s }.unwrap
26
+ rescue Nokogiri::XML::SyntaxError => e
27
+ e.message
28
+ end
29
+
30
+ def insert_work(xml)
31
+ insert_identifier(xml)
32
+ insert_creators(xml)
33
+ insert_titles(xml)
34
+ insert_publisher(xml)
35
+ insert_publication_year(xml)
36
+ insert_resource_type(xml)
37
+ insert_subjects(xml)
38
+ insert_contributors(xml)
39
+ insert_dates(xml)
40
+ insert_language(xml)
41
+ insert_alternate_identifiers(xml)
42
+ insert_related_identifiers(xml)
43
+ insert_related_items(xml)
44
+ insert_sizes(xml)
45
+ insert_formats(xml)
46
+ insert_version(xml)
47
+ insert_rights_list(xml)
48
+ insert_descriptions(xml)
49
+ insert_geo_locations(xml)
50
+ insert_funding_references(xml)
51
+ end
52
+
53
+ def insert_identifier(xml)
54
+ xml.identifier(doi, 'identifierType' => "DOI")
55
+ end
56
+
57
+ def insert_creators(xml)
58
+ xml.creators do
59
+ Array.wrap(creators).each do |au|
60
+ xml.creator do
61
+ insert_person(xml, au, "creator")
62
+ end
63
+ end
64
+ end
65
+ end
66
+
67
+ def insert_contributors(xml)
68
+ return xml unless contributors.present?
69
+
70
+ xml.contributors do
71
+ Array.wrap(contributors).each do |con|
72
+ xml.contributor("contributorType" => con["contributorType"]) do
73
+ insert_person(xml, con, "contributor")
74
+ end
75
+ end
76
+ end
77
+ end
78
+
79
+ def insert_person(xml, person, type)
80
+ person_name = person["familyName"].present? ? [person["familyName"], person["givenName"]].compact.join(", ") : person["name"]
81
+ attributes = { "nameType" => person["nameType"] }.compact
82
+ xml.send(type + "Name", person_name, attributes)
83
+ xml.givenName(person["givenName"]) if person["givenName"].present?
84
+ xml.familyName(person["familyName"]) if person["familyName"].present?
85
+ Array.wrap(person["nameIdentifiers"]).each do |ni|
86
+ xml.nameIdentifier(ni["nameIdentifier"], 'nameIdentifierScheme' => ni["nameIdentifierScheme"], 'schemeURI' => ni["schemeUri"])
87
+ end
88
+ Array.wrap(person["affiliation"]).each do |affiliation|
89
+ attributes = { "affiliationIdentifier" => affiliation["affiliationIdentifier"], "affiliationIdentifierScheme" => affiliation["affiliationIdentifierScheme"], "schemeURI" => affiliation["schemeUri"] }.compact
90
+ xml.affiliation(affiliation["name"], attributes)
91
+ end
92
+ end
93
+
94
+ def insert_titles(xml)
95
+ xml.titles do
96
+ Array.wrap(titles).each do |title|
97
+ if title.is_a?(Hash)
98
+ t = title
99
+ else
100
+ t = {}
101
+ t["title"] = title
102
+ end
103
+
104
+ attributes = { 'titleType' => t["titleType"], 'xml:lang' => t["lang"] }.compact
105
+ xml.title(t["title"], attributes)
106
+ end
107
+ end
108
+ end
109
+
110
+ def insert_publisher(xml)
111
+ if publisher.is_a?(Hash)
112
+ attributes = {
113
+ 'publisherIdentifier' => publisher["publisherIdentifier"],
114
+ 'publisherIdentifierScheme' => publisher["publisherIdentifierScheme"],
115
+ 'schemeURI' => publisher["schemeUri"],
116
+ "xml:lang" => publisher["lang"]
117
+ }.compact
118
+ xml.publisher(publisher["name"] || container && container["title"], attributes)
119
+ else
120
+ xml.publisher(publisher || container && container["title"])
121
+ end
122
+ end
123
+
124
+ def insert_publication_year(xml)
125
+ xml.publicationYear(publication_year)
126
+ end
127
+
128
+ def insert_resource_type(xml)
129
+ return xml unless types.is_a?(Hash) && (types["schemaOrg"].present? || types["resourceTypeGeneral"])
130
+
131
+ xml.resourceType(types["resourceType"],
132
+ 'resourceTypeGeneral' => types["resourceTypeGeneral"] || Metadata::SO_TO_DC_TRANSLATIONS[types["schemaOrg"]] || "Other")
133
+ end
134
+
135
+ def insert_alternate_identifiers(xml)
136
+ alternate_identifiers = Array.wrap(identifiers).select { |r| r["identifierType"] != "DOI" }
137
+ return xml unless alternate_identifiers.present?
138
+
139
+ xml.alternateIdentifiers do
140
+ Array.wrap(alternate_identifiers).each do |alternate_identifier|
141
+ xml.alternateIdentifier(alternate_identifier["identifier"], 'alternateIdentifierType' => alternate_identifier["identifierType"])
142
+ end
143
+ end
144
+ end
145
+
146
+ def insert_dates(xml)
147
+ return xml unless Array.wrap(dates).present?
148
+
149
+ xml.dates do
150
+ Array.wrap(dates).each do |date|
151
+ attributes = { 'dateType' => date["dateType"] || "Issued", 'dateInformation' => date["dateInformation"] }.compact
152
+ xml.date(date["date"], attributes)
153
+ end
154
+ end
155
+ end
156
+
157
+ def insert_funding_references(xml)
158
+ return xml unless Array.wrap(funding_references).present?
159
+
160
+ xml.fundingReferences do
161
+ Array.wrap(funding_references).each do |funding_reference|
162
+ xml.fundingReference do
163
+ xml.funderName(funding_reference["funderName"])
164
+ xml.funderIdentifier(funding_reference["funderIdentifier"], { "funderIdentifierType" => funding_reference["funderIdentifierType"] }.compact) if funding_reference["funderIdentifier"].present?
165
+ xml.awardNumber(funding_reference["awardNumber"], { "awardURI" => funding_reference["awardUri"] }.compact) if funding_reference["awardNumber"].present? || funding_reference["awardUri"].present?
166
+ xml.awardTitle(funding_reference["awardTitle"]) if funding_reference["awardTitle"].present?
167
+ end
168
+ end
169
+ end
170
+ end
171
+
172
+ def insert_subjects(xml)
173
+ return xml unless subjects.present?
174
+
175
+ xml.subjects do
176
+ subjects.each do |subject|
177
+ if subject.is_a?(Hash)
178
+ s = subject
179
+ else
180
+ s = {}
181
+ s["subject"] = subject
182
+ end
183
+
184
+ attributes = { "subjectScheme" => s["subjectScheme"], "schemeURI" => s["schemeUri"], "valueURI" => s["valueUri"], "classificationCode" => s["classificationCode"], "xml:lang" => s["lang"] }.compact
185
+
186
+ xml.subject(s["subject"], attributes)
187
+ end
188
+ end
189
+ end
190
+
191
+ def insert_version(xml)
192
+ return xml unless version_info.present?
193
+
194
+ xml.version(version_info)
195
+ end
196
+
197
+
198
+ def insert_language(xml)
199
+ return xml unless language.present?
200
+
201
+ xml.language(language)
202
+ end
203
+
204
+ def insert_related_identifiers(xml)
205
+ return xml unless related_identifiers.present?
206
+
207
+ xml.relatedIdentifiers do
208
+ related_identifiers.each do |related_identifier|
209
+ attributes = {
210
+ 'relatedIdentifierType' => related_identifier["relatedIdentifierType"],
211
+ 'relationType' => related_identifier["relationType"],
212
+ 'relationTypeInformation' => related_identifier["relationTypeInformation"],
213
+ 'resourceTypeGeneral' => related_identifier["resourceTypeGeneral",
214
+ ] }.compact
215
+
216
+ attributes.merge({ 'relatedMetadataScheme' => related_identifier["relatedMetadataSchema"],
217
+ 'schemeURI' => related_identifier["schemeUri"],
218
+ 'schemeType' => related_identifier["schemeType"]}.compact) if %w(HasMetadata IsMetadataFor).include?(related_identifier["relationType"])
219
+
220
+ xml.relatedIdentifier(related_identifier["relatedIdentifier"], attributes)
221
+ end
222
+ end
223
+ end
224
+
225
+ def insert_related_items(xml)
226
+ return xml unless related_items.present?
227
+
228
+ xml.relatedItems do
229
+ related_items.each do |related_item|
230
+ attributes = {
231
+ 'relatedItemType' => related_item["relatedItemType"],
232
+ 'relationType' => related_item["relationType"],
233
+ 'relationTypeInformation' => related_item["relationTypeInformation"],
234
+ }.compact
235
+
236
+ xml.relatedItem(related_item["relatedItem"], attributes) do
237
+
238
+ if related_item["relatedItemIdentifier"].present?
239
+ xml.relatedItemIdentifier(related_item["relatedItemIdentifier"]['relatedItemIdentifier'],
240
+ {
241
+ 'relatedItemIdentifierType' => related_item["relatedItemIdentifier"]["relatedItemIdentifierType"],
242
+ 'relatedMetadataScheme' => related_item["relatedItemIdentifier"]["relatedMetadataScheme"],
243
+ 'schemeURI' => related_item["relatedItemIdentifier"]["schemeURI"],
244
+ 'schemeType' => related_item["relatedItemIdentifier"]["schemeType"],
245
+ }.compact
246
+ )
247
+ end
248
+
249
+ if related_item["creators"].present?
250
+ xml.creators do
251
+ Array.wrap(related_item['creators']).each do |au|
252
+ xml.creator do
253
+ insert_person(xml, au, "creator")
254
+ end
255
+ end
256
+ end
257
+ end
258
+
259
+ xml.titles do
260
+ Array.wrap(related_item['titles']).each do |title|
261
+ if title.is_a?(Hash)
262
+ t = title
263
+ else
264
+ t = {}
265
+ t["title"] = title
266
+ end
267
+
268
+ attributes = { 'titleType' => t["titleType"], 'xml:lang' => t["lang"] }.compact
269
+ xml.title(t["title"], attributes)
270
+ end
271
+ end
272
+
273
+ xml.publicationYear(related_item['publicationYear']) if related_item["publicationYear"].present?
274
+ xml.volume(related_item['volume']) if related_item["volume"].present?
275
+ xml.issue(related_item['issue']) if related_item["issue"].present?
276
+ xml.number(related_item['number'], {'numberType' => related_item['numberType']}.compact) if related_item["number"].present?
277
+ xml.firstPage(related_item['firstPage']) if related_item["firstPage"].present?
278
+ xml.lastPage(related_item['lastPage']) if related_item["lastPage"].present?
279
+ xml.publisher(related_item['publisher']) if related_item["publisher"].present?
280
+ xml.edition(related_item['edition']) if related_item["edition"].present?
281
+
282
+ if related_item["contributors"].present?
283
+ xml.contributors do
284
+ Array.wrap(related_item["contributors"]).each do |con|
285
+ xml.contributor("contributorType" => con["contributorType"] || "Other") do
286
+ insert_person(xml, con, "contributor")
287
+ end
288
+ end
289
+ end
290
+ end
291
+
292
+ end
293
+ end
294
+ end
295
+ end
296
+
297
+ def insert_sizes(xml)
298
+ xml.sizes do
299
+ Array.wrap(sizes).each do |s|
300
+ xml.size(s)
301
+ end
302
+ end
303
+ end
304
+
305
+ def insert_formats(xml)
306
+ xml.formats do
307
+ Array.wrap(formats).each do |f|
308
+ xml.format(f)
309
+ end
310
+ end
311
+ end
312
+
313
+ def insert_rights_list(xml)
314
+ return xml unless rights_list.present?
315
+
316
+ xml.rightsList do
317
+ Array.wrap(rights_list).each do |rights|
318
+ if rights.is_a?(Hash)
319
+ r = rights
320
+ else
321
+ r = {}
322
+ r["rights"] = rights
323
+ r["rightsUri"] = normalize_id(rights)
324
+ end
325
+
326
+ attributes = {
327
+ "rightsURI" => r["rightsUri"],
328
+ "rightsIdentifier" => r["rightsIdentifier"],
329
+ "rightsIdentifierScheme" => r["rightsIdentifierScheme"],
330
+ "schemeURI" => r["schemeUri"],
331
+ "xml:lang" => r["lang"]
332
+ }.compact
333
+
334
+ xml.rights(r["rights"], attributes)
335
+ end
336
+ end
337
+ end
338
+
339
+ def insert_descriptions(xml)
340
+ return xml unless descriptions.present? || container && container["title"].present?
341
+
342
+ xml.descriptions do
343
+ if (respond_to?(:from) && !from.to_s.include?("datacite")) && container && container["title"].present?
344
+ issue = container["issue"].present? ? "(#{container["issue"]})" : nil
345
+ volume_issue = container["volume"].present? ? [container["volume"], issue].join("") : nil
346
+ pages = [container["firstPage"], container["lastPage"]].compact.join("-") if container["firstPage"].present?
347
+ series_information = [container["title"], volume_issue, pages].compact.join(", ")
348
+ xml.description(series_information, 'descriptionType' => "SeriesInformation")
349
+ end
350
+
351
+ Array.wrap(descriptions).each do |description|
352
+ if description.is_a?(Hash)
353
+ d = description
354
+ else
355
+ d = {}
356
+ d["description"] = description
357
+ d["descriptionType"] = "Abstract"
358
+ end
359
+
360
+ attributes = { 'xml:lang' => d["lang"], 'descriptionType' => d["descriptionType"] || "Abstract" }.compact
361
+
362
+ xml.description(d["description"], attributes)
363
+ end
364
+ end
365
+ end
366
+
367
+ def insert_geo_locations(xml)
368
+ return xml unless geo_locations.present?
369
+
370
+ xml.geoLocations do
371
+ geo_locations.each do |geo_location|
372
+ xml.geoLocation do
373
+ xml.geoLocationPlace(geo_location["geoLocationPlace"]) if geo_location["geoLocationPlace"]
374
+
375
+ if geo_location["geoLocationPoint"]
376
+ xml.geoLocationPoint do
377
+ xml.pointLatitude(geo_location.dig("geoLocationPoint", "pointLatitude"))
378
+ xml.pointLongitude(geo_location.dig("geoLocationPoint", "pointLongitude"))
379
+ end
380
+ end
381
+
382
+ if geo_location["geoLocationBox"]
383
+ xml.geoLocationBox do
384
+ xml.westBoundLongitude(geo_location.dig("geoLocationBox", "westBoundLongitude"))
385
+ xml.eastBoundLongitude(geo_location.dig("geoLocationBox", "eastBoundLongitude"))
386
+ xml.southBoundLatitude(geo_location.dig("geoLocationBox", "southBoundLatitude"))
387
+ xml.northBoundLatitude(geo_location.dig("geoLocationBox", "northBoundLatitude"))
388
+ end
389
+ end
390
+ if geo_location["geoLocationPolygon"]
391
+ xml.geoLocationPolygon do
392
+ Array.wrap(geo_location["geoLocationPolygon"]).each do |polygon_point|
393
+ if polygon_point.has_key?("polygonPoint")
394
+ xml.polygonPoint do
395
+ xml.pointLatitude(polygon_point.dig("polygonPoint", "pointLatitude"))
396
+ xml.pointLongitude(polygon_point.dig("polygonPoint", "pointLongitude"))
397
+ end
398
+ else
399
+ xml.inPolygonPoint do
400
+ xml.pointLatitude(polygon_point.dig("inPolygonPoint", "pointLatitude"))
401
+ xml.pointLongitude(polygon_point.dig("inPolygonPoint", "pointLongitude"))
402
+ end
403
+ end
404
+ end
405
+ end
406
+ end
407
+ end
408
+ end
409
+ end
410
+ end
411
+
412
+ def root_attributes
413
+ { :'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
414
+ :'xsi:schemaLocation' => 'http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd',
415
+ :'xmlns' => 'http://datacite.org/schema/kernel-4' }
416
+ end
417
+ end
418
+ end
@@ -1,48 +1,70 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Bolognese
2
4
  module DoiUtils
5
+ class << self
6
+ include DoiUtils
7
+ end
8
+
3
9
  def validate_doi(doi)
4
- Array(/\A(?:(http|https):\/\/(dx\.)?doi.org\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(doi)).last
10
+ doi = Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.stage.datacite.org|handle.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(doi)).last
11
+ # remove non-printing whitespace and downcase
12
+ doi.delete("\u200B").downcase if doi.present?
5
13
  end
6
14
 
7
- def normalize_doi(doi)
8
- doi = validate_doi(doi)
9
- return nil unless doi.present?
15
+ def validate_funder_doi(doi)
16
+ match = Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.stage.datacite.org|handle.test.datacite.org)\/)?(doi:)?(10\.13039\/)?([1-9]\d+)\z/.match(doi)).last
17
+ return doi unless match
10
18
 
11
19
  # remove non-printing whitespace and downcase
12
- doi = doi.gsub(/\u200B/, '').downcase
20
+ if match.present?
21
+ doi = match.delete("\u200B").downcase
22
+ "https://doi.org/10.13039/#{doi}"
23
+ end
24
+ end
25
+
26
+ def validate_prefix(doi)
27
+ Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.stage.datacite.org|handle.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}).*\z/.match(doi)).last
28
+ end
29
+
30
+ def doi_resolver(doi, options = {})
31
+ sandbox = Array(/handle.stage.datacite.org/.match(doi)).last
32
+ sandbox.present? || options[:sandbox] ? "https://handle.stage.datacite.org/" : "https://doi.org/"
33
+ end
34
+
35
+ def doi_api_url(doi, options = {})
36
+ sandbox = Array(/handle.stage.datacite.org/.match(doi)).last
37
+ sandbox.present? || options[:sandbox] ? "https://api.stage.datacite.org/dois/#{doi_from_url(doi)}?include=media,client" : "https://api.datacite.org/dois/#{doi_from_url(doi)}?include=media,client"
38
+ end
39
+
40
+ def normalize_doi(doi, options = {})
41
+ doi_str = validate_doi(doi)
42
+ return nil unless doi_str.present?
13
43
 
14
44
  # turn DOI into URL, escape unsafe characters
15
- "https://doi.org/" + Addressable::URI.encode(doi)
45
+ doi_resolver(doi, options) + Addressable::URI.encode(doi_str)
16
46
  end
17
47
 
18
48
  def doi_from_url(url)
19
- if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(url)
49
+ if /\A(?:(http|https):\/\/(dx\.)?(doi.org|handle.stage.datacite.org|handle.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(url)
20
50
  uri = Addressable::URI.parse(url)
21
- uri.path[1..-1].upcase
22
- elsif url.is_a?(String) && url.starts_with?("doi:")
23
- url[4..-1].upcase
51
+ uri.path.gsub(/^\//, '').downcase
24
52
  end
25
53
  end
26
54
 
27
55
  def doi_as_url(doi)
28
- "https://doi.org/#{clean_doi(doi)}" if doi.present?
56
+ "https://doi.org/#{doi}" if doi.present?
29
57
  end
30
58
 
31
- # get DOI registration agency, assume a normalized DOI
59
+ # get DOI registration agency
32
60
  def get_doi_ra(doi)
33
- return {} if doi.blank?
61
+ prefix = validate_prefix(doi)
62
+ return nil if prefix.blank?
34
63
 
35
- url = "https://doi.crossref.org/doiRA/#{doi_from_url(doi)}"
36
- response = Maremma.get(url, host: true, timeout: 120)
64
+ url = "https://doi.org/ra/#{prefix}"
65
+ result = Maremma.get(url)
37
66
 
38
- ra = response.body.fetch("data", {}).first.fetch("RA", nil)
39
- if ra.present?
40
- { "id" => ra.downcase,
41
- "name" => ra }
42
- else
43
- { "errors" => response.body.fetch("errors", nil) || response.body.fetch("data", nil) }
44
- end
67
+ result.body.dig("data", 0, "RA")
45
68
  end
46
-
47
69
  end
48
70
  end