bolognese 0.2.2 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/build.yml +9 -0
  3. data/.github/workflows/changelog.yml +36 -0
  4. data/.github/workflows/ci.yml +22 -0
  5. data/.github/workflows/pull-request.yml +9 -0
  6. data/.github/workflows/release.yml +32 -0
  7. data/.gitignore +4 -0
  8. data/.rubocop.yml +658 -0
  9. data/CHANGELOG.md +1864 -0
  10. data/CITATION +17 -0
  11. data/Gemfile +1 -1
  12. data/Gemfile.lock +251 -99
  13. data/README.md +1026 -2
  14. data/Rakefile +1 -0
  15. data/bin/bolognese +5 -1
  16. data/bolognese.gemspec +33 -21
  17. data/lib/bolognese/array.rb +13 -0
  18. data/lib/bolognese/author_utils.rb +115 -39
  19. data/lib/bolognese/citeproc_extensions.rb +48 -0
  20. data/lib/bolognese/cli.rb +28 -15
  21. data/lib/bolognese/datacite_utils.rb +418 -0
  22. data/lib/bolognese/doi_utils.rb +45 -23
  23. data/lib/bolognese/metadata.rb +250 -18
  24. data/lib/bolognese/metadata_utils.rb +228 -0
  25. data/lib/bolognese/pubmed.rb +2 -0
  26. data/lib/bolognese/readers/bibtex_reader.rb +100 -0
  27. data/lib/bolognese/readers/citeproc_reader.rb +125 -0
  28. data/lib/bolognese/readers/codemeta_reader.rb +108 -0
  29. data/lib/bolognese/readers/crosscite_reader.rb +17 -0
  30. data/lib/bolognese/readers/crossref_reader.rb +413 -0
  31. data/lib/bolognese/readers/datacite_json_reader.rb +17 -0
  32. data/lib/bolognese/readers/datacite_reader.rb +338 -0
  33. data/lib/bolognese/readers/npm_reader.rb +115 -0
  34. data/lib/bolognese/readers/ris_reader.rb +114 -0
  35. data/lib/bolognese/readers/schema_org_reader.rb +264 -0
  36. data/lib/bolognese/string.rb +3 -1
  37. data/lib/bolognese/utils.rb +1403 -12
  38. data/lib/bolognese/version.rb +1 -1
  39. data/lib/bolognese/whitelist_scrubber.rb +47 -0
  40. data/lib/bolognese/writers/bibtex_writer.rb +32 -0
  41. data/lib/bolognese/writers/citation_writer.rb +14 -0
  42. data/lib/bolognese/writers/citeproc_writer.rb +11 -0
  43. data/lib/bolognese/writers/codemeta_writer.rb +29 -0
  44. data/lib/bolognese/writers/crosscite_writer.rb +11 -0
  45. data/lib/bolognese/writers/crossref_writer.rb +11 -0
  46. data/lib/bolognese/writers/csv_writer.rb +24 -0
  47. data/lib/bolognese/writers/datacite_json_writer.rb +13 -0
  48. data/lib/bolognese/writers/datacite_writer.rb +12 -0
  49. data/lib/bolognese/writers/jats_writer.rb +138 -0
  50. data/lib/bolognese/writers/rdf_xml_writer.rb +11 -0
  51. data/lib/bolognese/writers/ris_writer.rb +29 -0
  52. data/lib/bolognese/writers/schema_org_writer.rb +55 -0
  53. data/lib/bolognese/writers/turtle_writer.rb +11 -0
  54. data/lib/bolognese.rb +19 -4
  55. data/package.json +12 -0
  56. data/resources/2008/09/xsd.xsl +997 -0
  57. data/resources/datacite-contributorType-v4.xsd +35 -0
  58. data/resources/datacite-dateType-v4.xsd +25 -0
  59. data/resources/datacite-descriptionType-v4.xsd +19 -0
  60. data/resources/datacite-funderIdentifierType-v4.xsd +15 -0
  61. data/resources/datacite-nameType-v4.xsd +10 -0
  62. data/resources/datacite-relatedIdentifierType-v4.xsd +34 -0
  63. data/resources/datacite-relationType-v4.xsd +49 -0
  64. data/resources/datacite-resourceType-v4.xsd +28 -0
  65. data/resources/datacite-titleType-v4.xsd +14 -0
  66. data/resources/kernel-2.1/include/datacite-contributorType-v1.1.xsd +22 -0
  67. data/resources/kernel-2.1/include/datacite-contributorType-v2.xsd +3 -0
  68. data/resources/kernel-2.1/include/datacite-dateType-v1.1.xsd +31 -0
  69. data/resources/kernel-2.1/include/datacite-dateType-v2.xsd +3 -0
  70. data/resources/kernel-2.1/include/datacite-descriptionType-v1.1.xsd +14 -0
  71. data/resources/kernel-2.1/include/datacite-descriptionType-v2.xsd +3 -0
  72. data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v1.1.xsd +24 -0
  73. data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v2.xsd +3 -0
  74. data/resources/kernel-2.1/include/datacite-relationType-v1.1.xsd +29 -0
  75. data/resources/kernel-2.1/include/datacite-relationType-v2.xsd +3 -0
  76. data/resources/kernel-2.1/include/datacite-resourceType-v1.1.xsd +22 -0
  77. data/resources/kernel-2.1/include/datacite-resourceType-v2.xsd +3 -0
  78. data/resources/kernel-2.1/include/datacite-titleType-v1.1.xsd +11 -0
  79. data/resources/kernel-2.1/include/datacite-titleType-v2.xsd +3 -0
  80. data/resources/kernel-2.1/metadata.xsd +315 -0
  81. data/resources/kernel-2.2/include/datacite-contributorType-v2.xsd +29 -0
  82. data/resources/kernel-2.2/include/datacite-dateType-v2.xsd +21 -0
  83. data/resources/kernel-2.2/include/datacite-descriptionType-v2.xsd +15 -0
  84. data/resources/kernel-2.2/include/datacite-relatedIdentifierType-v2.xsd +25 -0
  85. data/resources/kernel-2.2/include/datacite-relationType-v2.xsd +29 -0
  86. data/resources/kernel-2.2/include/datacite-resourceType-v2.xsd +23 -0
  87. data/resources/kernel-2.2/include/datacite-titleType-v2.xsd +10 -0
  88. data/resources/kernel-2.2/metadata.xsd +316 -0
  89. data/resources/kernel-3/include/datacite-contributorType-v3.1.xsd +35 -0
  90. data/resources/kernel-3/include/datacite-dateType-v3.xsd +21 -0
  91. data/resources/kernel-3/include/datacite-descriptionType-v3.xsd +17 -0
  92. data/resources/kernel-3/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
  93. data/resources/kernel-3/include/datacite-relationType-v3.1.xsd +38 -0
  94. data/resources/kernel-3/include/datacite-resourceType-v3.xsd +26 -0
  95. data/resources/kernel-3/include/datacite-titleType-v3.xsd +12 -0
  96. data/resources/kernel-3/include/xml.xsd +286 -0
  97. data/resources/kernel-3/metadata.xsd +380 -0
  98. data/resources/kernel-3.0/include/datacite-contributorType-v3.xsd +33 -0
  99. data/resources/kernel-3.0/include/datacite-dateType-v3.xsd +21 -0
  100. data/resources/kernel-3.0/include/datacite-descriptionType-v3.xsd +17 -0
  101. data/resources/kernel-3.0/include/datacite-relatedIdentifierType-v3.xsd +27 -0
  102. data/resources/kernel-3.0/include/datacite-relationType-v3.xsd +33 -0
  103. data/resources/kernel-3.0/include/datacite-resourceType-v3.xsd +26 -0
  104. data/resources/kernel-3.0/include/datacite-titleType-v3.xsd +12 -0
  105. data/resources/kernel-3.0/include/xml.xsd +286 -0
  106. data/resources/kernel-3.0/metadata.xsd +377 -0
  107. data/resources/kernel-3.1/include/datacite-contributorType-v3.1.xsd +35 -0
  108. data/resources/kernel-3.1/include/datacite-dateType-v3.xsd +21 -0
  109. data/resources/kernel-3.1/include/datacite-descriptionType-v3.xsd +17 -0
  110. data/resources/kernel-3.1/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
  111. data/resources/kernel-3.1/include/datacite-relationType-v3.1.xsd +38 -0
  112. data/resources/kernel-3.1/include/datacite-resourceType-v3.xsd +26 -0
  113. data/resources/kernel-3.1/include/datacite-titleType-v3.xsd +12 -0
  114. data/resources/kernel-3.1/include/xml.xsd +286 -0
  115. data/resources/kernel-3.1/metadata.xsd +380 -0
  116. data/resources/kernel-4/include/datacite-contributorType-v4.xsd +37 -0
  117. data/resources/kernel-4/include/datacite-dateType-v4.xsd +27 -0
  118. data/resources/kernel-4/include/datacite-descriptionType-v4.xsd +19 -0
  119. data/resources/kernel-4/include/datacite-funderIdentifierType-v4.xsd +16 -0
  120. data/resources/kernel-4/include/datacite-nameType-v4.xsd +10 -0
  121. data/resources/kernel-4/include/datacite-numberType-v4.xsd +12 -0
  122. data/resources/kernel-4/include/datacite-relatedIdentifierType-v4.xsd +39 -0
  123. data/resources/kernel-4/include/datacite-relationType-v4.xsd +59 -0
  124. data/resources/kernel-4/include/datacite-resourceType-v4.xsd +52 -0
  125. data/resources/kernel-4/include/datacite-titleType-v4.xsd +14 -0
  126. data/resources/kernel-4/include/xml.xsd +286 -0
  127. data/resources/kernel-4/metadata.xsd +715 -0
  128. data/resources/kernel-4.0/include/datacite-contributorType-v4.xsd +35 -0
  129. data/resources/kernel-4.0/include/datacite-dateType-v4.xsd +21 -0
  130. data/resources/kernel-4.0/include/datacite-descriptionType-v4.xsd +19 -0
  131. data/resources/kernel-4.0/include/datacite-funderIdentifierType-v4.xsd +15 -0
  132. data/resources/kernel-4.0/include/datacite-relatedIdentifierType-v4.xsd +32 -0
  133. data/resources/kernel-4.0/include/datacite-relationType-v4.xsd +39 -0
  134. data/resources/kernel-4.0/include/datacite-resourceType-v4.xsd +26 -0
  135. data/resources/kernel-4.0/include/datacite-titleType-v4.xsd +14 -0
  136. data/resources/kernel-4.0/include/xml.xsd +286 -0
  137. data/resources/kernel-4.0/metadata.xsd +470 -0
  138. data/resources/kernel-4.1/include/datacite-contributorType-v4.xsd +35 -0
  139. data/resources/kernel-4.1/include/datacite-dateType-v4.1.xsd +23 -0
  140. data/resources/kernel-4.1/include/datacite-descriptionType-v4.xsd +19 -0
  141. data/resources/kernel-4.1/include/datacite-funderIdentifierType-v4.xsd +15 -0
  142. data/resources/kernel-4.1/include/datacite-nameType-v4.1.xsd +10 -0
  143. data/resources/kernel-4.1/include/datacite-relatedIdentifierType-v4.xsd +32 -0
  144. data/resources/kernel-4.1/include/datacite-relationType-v4.1.xsd +46 -0
  145. data/resources/kernel-4.1/include/datacite-resourceType-v4.1.xsd +28 -0
  146. data/resources/kernel-4.1/include/datacite-titleType-v4.xsd +14 -0
  147. data/resources/kernel-4.1/include/xml.xsd +286 -0
  148. data/resources/kernel-4.1/metadata.xsd +483 -0
  149. data/resources/kernel-4.2/include/datacite-contributorType-v4.xsd +35 -0
  150. data/resources/kernel-4.2/include/datacite-dateType-v4.xsd +25 -0
  151. data/resources/kernel-4.2/include/datacite-descriptionType-v4.xsd +19 -0
  152. data/resources/kernel-4.2/include/datacite-funderIdentifierType-v4.xsd +15 -0
  153. data/resources/kernel-4.2/include/datacite-nameType-v4.xsd +10 -0
  154. data/resources/kernel-4.2/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  155. data/resources/kernel-4.2/include/datacite-relationType-v4.xsd +49 -0
  156. data/resources/kernel-4.2/include/datacite-resourceType-v4.xsd +28 -0
  157. data/resources/kernel-4.2/include/datacite-titleType-v4.xsd +14 -0
  158. data/resources/kernel-4.2/include/xml.xsd +286 -0
  159. data/resources/kernel-4.2/metadata.xsd +479 -0
  160. data/resources/kernel-4.3/include/datacite-contributorType-v4.xsd +35 -0
  161. data/resources/kernel-4.3/include/datacite-dateType-v4.xsd +25 -0
  162. data/resources/kernel-4.3/include/datacite-descriptionType-v4.xsd +19 -0
  163. data/resources/kernel-4.3/include/datacite-funderIdentifierType-v4.xsd +16 -0
  164. data/resources/kernel-4.3/include/datacite-nameType-v4.xsd +10 -0
  165. data/resources/kernel-4.3/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  166. data/resources/kernel-4.3/include/datacite-relationType-v4.xsd +49 -0
  167. data/resources/kernel-4.3/include/datacite-resourceType-v4.xsd +28 -0
  168. data/resources/kernel-4.3/include/datacite-titleType-v4.xsd +14 -0
  169. data/resources/kernel-4.3/include/xml.xsd +286 -0
  170. data/resources/kernel-4.3/metadata.xsd +515 -0
  171. data/resources/kernel-4.4/include/datacite-contributorType-v4.xsd +35 -0
  172. data/resources/kernel-4.4/include/datacite-dateType-v4.xsd +25 -0
  173. data/resources/kernel-4.4/include/datacite-descriptionType-v4.xsd +19 -0
  174. data/resources/kernel-4.4/include/datacite-funderIdentifierType-v4.xsd +16 -0
  175. data/resources/kernel-4.4/include/datacite-nameType-v4.xsd +10 -0
  176. data/resources/kernel-4.4/include/datacite-numberType-v4.xsd +12 -0
  177. data/resources/kernel-4.4/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  178. data/resources/kernel-4.4/include/datacite-relationType-v4.xsd +51 -0
  179. data/resources/kernel-4.4/include/datacite-resourceType-v4.xsd +43 -0
  180. data/resources/kernel-4.4/include/datacite-titleType-v4.xsd +14 -0
  181. data/resources/kernel-4.4/include/xml.xsd +286 -0
  182. data/resources/kernel-4.4/metadata.xsd +707 -0
  183. data/resources/kernel-4.5/include/datacite-contributorType-v4.xsd +35 -0
  184. data/resources/kernel-4.5/include/datacite-dateType-v4.xsd +25 -0
  185. data/resources/kernel-4.5/include/datacite-descriptionType-v4.xsd +19 -0
  186. data/resources/kernel-4.5/include/datacite-funderIdentifierType-v4.xsd +16 -0
  187. data/resources/kernel-4.5/include/datacite-nameType-v4.xsd +10 -0
  188. data/resources/kernel-4.5/include/datacite-numberType-v4.xsd +12 -0
  189. data/resources/kernel-4.5/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  190. data/resources/kernel-4.5/include/datacite-relationType-v4.xsd +53 -0
  191. data/resources/kernel-4.5/include/datacite-resourceType-v4.xsd +45 -0
  192. data/resources/kernel-4.5/include/datacite-titleType-v4.xsd +14 -0
  193. data/resources/kernel-4.5/include/xml.xsd +286 -0
  194. data/resources/kernel-4.5/metadata.xsd +711 -0
  195. data/resources/kernel-4.6/include/datacite-contributorType-v4.xsd +37 -0
  196. data/resources/kernel-4.6/include/datacite-dateType-v4.xsd +27 -0
  197. data/resources/kernel-4.6/include/datacite-descriptionType-v4.xsd +19 -0
  198. data/resources/kernel-4.6/include/datacite-funderIdentifierType-v4.xsd +16 -0
  199. data/resources/kernel-4.6/include/datacite-nameType-v4.xsd +10 -0
  200. data/resources/kernel-4.6/include/datacite-numberType-v4.xsd +12 -0
  201. data/resources/kernel-4.6/include/datacite-relatedIdentifierType-v4.xsd +37 -0
  202. data/resources/kernel-4.6/include/datacite-relationType-v4.xsd +57 -0
  203. data/resources/kernel-4.6/include/datacite-resourceType-v4.xsd +49 -0
  204. data/resources/kernel-4.6/include/datacite-titleType-v4.xsd +14 -0
  205. data/resources/kernel-4.6/include/xml.xsd +286 -0
  206. data/resources/kernel-4.6/metadata.xsd +712 -0
  207. data/resources/kernel-4.7/include/datacite-contributorType-v4.xsd +37 -0
  208. data/resources/kernel-4.7/include/datacite-dateType-v4.xsd +27 -0
  209. data/resources/kernel-4.7/include/datacite-descriptionType-v4.xsd +19 -0
  210. data/resources/kernel-4.7/include/datacite-funderIdentifierType-v4.xsd +16 -0
  211. data/resources/kernel-4.7/include/datacite-nameType-v4.xsd +10 -0
  212. data/resources/kernel-4.7/include/datacite-numberType-v4.xsd +12 -0
  213. data/resources/kernel-4.7/include/datacite-relatedIdentifierType-v4.xsd +39 -0
  214. data/resources/kernel-4.7/include/datacite-relationType-v4.xsd +59 -0
  215. data/resources/kernel-4.7/include/datacite-resourceType-v4.xsd +52 -0
  216. data/resources/kernel-4.7/include/datacite-titleType-v4.xsd +14 -0
  217. data/resources/kernel-4.7/include/xml.xsd +286 -0
  218. data/resources/kernel-4.7/metadata.xsd +715 -0
  219. data/resources/oecd/dfg-mappings.json +1866 -0
  220. data/resources/oecd/for-mappings.json +1101 -0
  221. data/resources/oecd/fos-mappings.json +198 -0
  222. data/resources/schema_org/jsonldcontext.json +7477 -0
  223. data/resources/spdx/licenses.json +5297 -0
  224. data/resources/xml.xsd +286 -0
  225. metadata +478 -150
  226. data/.travis.yml +0 -23
  227. data/lib/bolognese/crossref.rb +0 -202
  228. data/lib/bolognese/datacite.rb +0 -157
  229. data/lib/bolognese/date_utils.rb +0 -48
  230. data/lib/bolognese/github.rb +0 -106
  231. data/lib/bolognese/orcid.rb +0 -24
  232. data/lib/bolognese/pid_utils.rb +0 -23
  233. data/spec/cli_spec.rb +0 -37
  234. data/spec/crossref_spec.rb +0 -113
  235. data/spec/datacite_spec.rb +0 -49
  236. data/spec/doi_spec.rb +0 -89
  237. data/spec/fixtures/crossref.xml +0 -742
  238. data/spec/fixtures/datacite.xml +0 -40
  239. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_crossref.yml +0 -760
  240. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_schema_org.yml +0 -1476
  241. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_datacite.yml +0 -214
  242. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_schema_org.yml +0 -384
  243. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/crossref.yml +0 -44
  244. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/datacite.yml +0 -44
  245. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/medra.yml +0 -44
  246. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/not_found.yml +0 -44
  247. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_test.yml +0 -843
  248. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_SICI_DOI.yml +0 -277
  249. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_data_citation.yml +0 -15755
  250. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/date_in_future.yml +0 -2691
  251. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/journal_article.yml +0 -1857
  252. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/not_found_error.yml +0 -93
  253. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/posted_content.yml +0 -5715
  254. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/BlogPosting.yml +0 -307
  255. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/Dataset.yml +0 -343
  256. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref.yml +0 -44
  257. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref_doi_not_url.yml +0 -44
  258. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite.yml +0 -44
  259. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite_doi_http.yml +0 -44
  260. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/orcid.yml +0 -44
  261. data/spec/metadata_spec.rb +0 -35
  262. data/spec/orcid_spec.rb +0 -23
  263. data/spec/spec_helper.rb +0 -88
  264. /data/{LICENSE → LICENSE.md} +0 -0
@@ -1,3 +1,3 @@
1
1
  module Bolognese
2
- VERSION = "0.2.2"
2
+ VERSION = "2.7.0"
3
3
  end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ # modified from https://gist.github.com/ivan-kolmychek/ee2fdc53f3e2c637271d
4
+
5
+ module Bolognese
6
+ class WhitelistScrubber < Loofah::Scrubber
7
+ def initialize(options={})
8
+ @direction = :bottom_up
9
+ @tags = options[:tags]
10
+ @attributes = options[:attributes]
11
+ end
12
+
13
+ def scrub(node)
14
+ scrub_node_attributes(node) and return CONTINUE if node_allowed?(node)
15
+ node.before node.children
16
+ node.remove
17
+ end
18
+
19
+ private
20
+
21
+ def scrub_node_attributes(node)
22
+ fallback_scrub_node_attributes(node) and return true unless @attributes.present? && @attributes.respond_to?(:include?)
23
+ node.attribute_nodes.each do |attr_node|
24
+ attr_node.remove unless @attributes.include?(attr_node.name)
25
+ end
26
+ end
27
+
28
+ def allowed_not_element_node_types
29
+ [ Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE ]
30
+ end
31
+
32
+ def fallback_scrub_node_attributes(node)
33
+ Loofah::HTML5::Scrub.scrub_attributes(node)
34
+ end
35
+
36
+ def fallback_allowed_element_detection(node)
37
+ Loofah::HTML5::Scrub.allowed_element?(node.name)
38
+ end
39
+
40
+ def node_allowed?(node)
41
+ return fallback_allowed_element_detection(node) unless @tags.present? && @tags.respond_to?(:include?)
42
+ return true if allowed_not_element_node_types.include?(node.type)
43
+ return false unless node.type == Nokogiri::XML::Node::ELEMENT_NODE
44
+ @tags.include? node.name
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bolognese
4
+ module Writers
5
+ module BibtexWriter
6
+ def bibtex
7
+ return nil unless valid?
8
+
9
+ pages = container.to_h["firstPage"].present? ? [container["firstPage"], container["lastPage"]].compact.join("-") : nil
10
+
11
+ bib = {
12
+ bibtex_type: types["bibtex"].presence || "misc",
13
+ bibtex_key: normalize_doi(doi),
14
+ doi: doi,
15
+ url: url,
16
+ author: authors_as_string(creators),
17
+ keywords: subjects.present? ? Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) }.join(", ") : nil,
18
+ language: language,
19
+ title: parse_attributes(titles, content: "title", first: true),
20
+ journal: container && container["title"],
21
+ volume: container.to_h["volume"],
22
+ issue: container.to_h["issue"],
23
+ pages: pages,
24
+ publisher: publisher["name"],
25
+ year: publication_year,
26
+ copyright: Array.wrap(rights_list).map { |l| l["rights"] }.first,
27
+ }.compact
28
+ BibTeX::Entry.new(bib).to_s
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bolognese
4
+ module Writers
5
+ module CitationWriter
6
+ def citation
7
+ cp = CiteProc::Processor.new(style: style, locale: locale, format: 'html')
8
+ cp.import Array.wrap(citeproc_hsh)
9
+ bibliography = cp.render :bibliography, id: normalize_doi(doi)
10
+ bibliography.first
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bolognese
4
+ module Writers
5
+ module CiteprocWriter
6
+ def citeproc
7
+ JSON.pretty_generate citeproc_hsh.presence
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bolognese
4
+ module Writers
5
+ module CodemetaWriter
6
+ def codemeta
7
+ return nil unless valid? || show_errors
8
+
9
+ hsh = {
10
+ "@context" => id.present? ? "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld" : nil,
11
+ "@type" => types.present? ? types["schemaOrg"] : nil,
12
+ "@id" => normalize_doi(doi),
13
+ "identifier" => to_schema_org_identifiers(identifiers),
14
+ "codeRepository" => url,
15
+ "name" => parse_attributes(titles, content: "title", first: true),
16
+ "authors" => creators,
17
+ "description" => parse_attributes(descriptions, content: "description", first: true),
18
+ "version" => version_info,
19
+ "tags" => subjects.present? ? Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) } : nil,
20
+ "datePublished" => get_date(dates, "Issued") || publication_year,
21
+ "dateModified" => get_date(dates, "Updated"),
22
+ "publisher" => publisher["name"],
23
+ "license" => Array.wrap(rights_list).map { |l| l["rightsUri"] }.compact.unwrap,
24
+ }.compact
25
+ JSON.pretty_generate hsh.presence
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bolognese
4
+ module Writers
5
+ module CrossciteWriter
6
+ def crosscite
7
+ JSON.pretty_generate crosscite_hsh.presence
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bolognese
4
+ module Writers
5
+ module CrossrefWriter
6
+ def crossref
7
+ from == "crossref" ? raw : nil
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,24 @@
1
+ module Bolognese
2
+ module Writers
3
+ module CsvWriter
4
+ require "csv"
5
+
6
+ def csv
7
+ bib = {
8
+ doi: doi,
9
+ url: url,
10
+ registered: get_iso8601_date(date_registered),
11
+ state: state,
12
+ resource_type_general: types.respond_to?(:to_h) ? types.to_h["resourceTypeGeneral"] : nil,
13
+ resource_type: types.respond_to?(:to_h) ? types.to_h["resourceType"] : nil,
14
+ title: parse_attributes(titles, content: "title", first: true),
15
+ author: authors_as_string(creators),
16
+ publisher: publisher.respond_to?(:to_h) ? publisher.to_h["name"] : nil,
17
+ publication_year: publication_year
18
+ }.values
19
+
20
+ CSV.generate { |csv| csv << bib }
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,13 @@
1
+ module Bolognese
2
+ # frozen_string_literal: true
3
+
4
+ module Writers
5
+ module DataciteJsonWriter
6
+ def datacite_json
7
+ if crosscite_hsh.present?
8
+ JSON.pretty_generate crosscite_hsh.transform_keys! { |key| key.camelcase(uppercase_first_letter = :lower) }
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bolognese
4
+ module Writers
5
+ module DataciteWriter
6
+ # generate new DataCite XML version 4.0 if regenerate (!should_passthru) option is provided
7
+ def datacite
8
+ should_passthru ? raw : datacite_xml
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bolognese
4
+ module Writers
5
+ module JatsWriter
6
+ def jats
7
+ @jats ||= Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
8
+ xml.send("element-citation", publication_type) do
9
+ insert_citation(xml)
10
+ end
11
+ end.to_xml
12
+ end
13
+
14
+ def insert_citation(xml)
15
+ insert_authors(xml)
16
+ insert_editors(xml)
17
+ insert_citation_title(xml) if is_article? || is_data? || is_chapter?
18
+ insert_source(xml)
19
+ insert_publisher_name(xml) if publisher.present? && !is_data?
20
+ insert_publication_date(xml)
21
+ insert_volume(xml) if container.to_h["volume"].present?
22
+ insert_issue(xml) if container.to_h["issue"].present?
23
+ insert_fpage(xml) if container.to_h["firstPage"].present?
24
+ insert_lpage(xml) if container.to_h["lastPage"].present?
25
+ insert_version(xml) if version_info.present?
26
+ insert_pub_id(xml)
27
+ end
28
+
29
+ def is_article?
30
+ publication_type.fetch('publication-type', nil) == "journal"
31
+ end
32
+
33
+ def is_data?
34
+ publication_type.fetch('publication-type', nil) == "data"
35
+ end
36
+
37
+ def is_chapter?
38
+ publication_type.fetch('publication-type', nil) == "chapter"
39
+ end
40
+
41
+ def insert_authors(xml)
42
+ if creators.present?
43
+ xml.send("person-group", "person-group-type" => "author") do
44
+ Array.wrap(creators).each do |au|
45
+ xml.name do
46
+ insert_contributor(xml, au)
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+
53
+ def insert_editors(xml)
54
+ if contributors.present?
55
+ xml.send("person-group", "person-group-type" => "editor") do
56
+ Array.wrap(contributors).each do |con|
57
+ xml.name do
58
+ insert_contributor(xml, con)
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ def insert_contributor(xml, person)
66
+ xml.surname(person["familyName"]) if person["familyName"].present?
67
+ xml.send("given-names", person["givenName"]) if person["givenName"].present?
68
+ end
69
+
70
+ def insert_citation_title(xml)
71
+ case publication_type.fetch('publication-type', nil)
72
+ when "data" then xml.send("data-title", parse_attributes(titles, content: "title", first: true))
73
+ when "journal" then xml.send("article-title", parse_attributes(titles, content: "title", first: true))
74
+ when "chapter" then xml.send("chapter-title", parse_attributes(titles, content: "title", first: true))
75
+ end
76
+ end
77
+
78
+ def insert_source(xml)
79
+ if is_chapter?
80
+ xml.source(publisher["name"])
81
+ elsif is_article? || is_data?
82
+ xml.source(container && container["title"] || publisher["name"])
83
+ else
84
+ xml.source(parse_attributes(titles, content: "title", first: true))
85
+ end
86
+ end
87
+
88
+ def insert_publisher_name(xml)
89
+ attributes = {
90
+ "xml:lang" => publisher["lang"]
91
+ }.compact
92
+ xml.send("publisher-name", attributes, publisher["name"])
93
+ end
94
+
95
+ def insert_publication_date(xml)
96
+ date = get_date(dates, "Issued") || publication_year
97
+ year, month, day = get_date_parts(date).to_h.fetch("date-parts", []).first
98
+
99
+ xml.year(year, "iso-8601-date" => date)
100
+ xml.month(month.to_s.rjust(2, '0')) if month.present?
101
+ xml.day(day.to_s.rjust(2, '0')) if day.present?
102
+ end
103
+
104
+ def insert_volume(xml)
105
+ xml.volume(container["volume"])
106
+ end
107
+
108
+ def insert_issue(xml)
109
+ xml.issue(container["issue"])
110
+ end
111
+
112
+ def insert_fpage(xml)
113
+ xml.fpage(container["firstPage"])
114
+ end
115
+
116
+ def insert_lpage(xml)
117
+ xml.lpage(container["lastPage"])
118
+ end
119
+
120
+ def insert_version(xml)
121
+ xml.version(version_info)
122
+ end
123
+
124
+ def insert_pub_id(xml)
125
+ return nil unless doi.present?
126
+ xml.send("pub-id", doi, "pub-id-type" => "doi")
127
+ end
128
+
129
+ def date
130
+ get_date(dates, "Issued") ? get_date_parts(get_date(dates, "Issued")) : get_date_parts(publication_year)
131
+ end
132
+
133
+ def publication_type
134
+ { 'publication-type' => Bolognese::Utils::CR_TO_JATS_TRANSLATIONS[types["resourceType"]] || Bolognese::Utils::SO_TO_JATS_TRANSLATIONS[types["schemaOrg"]] }.compact
135
+ end
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bolognese
4
+ module Writers
5
+ module RdfXmlWriter
6
+ def rdf_xml
7
+ graph.dump(:rdfxml, prefixes: { schema: "http://schema.org/" })
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bolognese
4
+ module Writers
5
+ module RisWriter
6
+ def ris
7
+ {
8
+ "TY" => types["ris"],
9
+ "T1" => parse_attributes(titles, content: "title", first: true),
10
+ "T2" => container && container["title"],
11
+ "AU" => to_ris(creators),
12
+ "DO" => doi,
13
+ "UR" => url,
14
+ "AB" => parse_attributes(abstract_description, content: "description", first: true),
15
+ "KW" => Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) }.presence,
16
+ "PY" => publication_year,
17
+ "PB" => publisher["name"],
18
+ "LA" => language,
19
+ "VL" => container.to_h["volume"],
20
+ "IS" => container.to_h["issue"],
21
+ "SP" => container.to_h["firstPage"],
22
+ "EP" => container.to_h["lastPage"],
23
+ "SN" => Array.wrap(related_identifiers).find { |ri| ri["relationType"] == "IsPartOf" }.to_h.fetch("relatedIdentifier", nil),
24
+ "ER" => ""
25
+ }.compact.map { |k, v| v.is_a?(Array) ? v.map { |vi| "#{k} - #{vi}" }.join("\r\n") : "#{k} - #{v}" }.join("\r\n")
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bolognese
4
+ module Writers
5
+ module SchemaOrgWriter
6
+ def schema_hsh
7
+ { "@context" => "http://schema.org",
8
+ "@type" => types.present? ? types["schemaOrg"] : nil,
9
+ "@id" => normalize_doi(doi),
10
+ "identifier" => to_schema_org_identifiers(identifiers),
11
+ "url" => url,
12
+ "additionalType" => types.present? ? types["resourceType"] : nil,
13
+ "name" => parse_attributes(titles, content: "title", first: true),
14
+ "author" => to_schema_org_creators(creators),
15
+ "editor" => to_schema_org_contributors(contributors),
16
+ "translator" => contributors ? to_schema_org_contributors(contributors.select { |c| c["contributorType"] == "Translator" }) : nil,
17
+ "description" => parse_attributes(abstract_description, content: "description", first: true),
18
+ "license" => Array.wrap(rights_list).map { |l| l["rightsUri"] }.compact.unwrap,
19
+ "version" => version_info,
20
+ "keywords" => subjects.present? ? Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) }.join(", ") : nil,
21
+ "inLanguage" => language,
22
+ "contentSize" => Array.wrap(sizes).unwrap,
23
+ "encodingFormat" => Array.wrap(formats).unwrap,
24
+ "dateCreated" => get_date(dates, "Created"),
25
+ "datePublished" => get_date(dates, "Issued") || publication_year,
26
+ "dateModified" => get_date(dates, "Updated"),
27
+ "temporalCoverage" => get_date(dates, "Coverage"),
28
+ "pageStart" => container.to_h["firstPage"],
29
+ "pageEnd" => container.to_h["lastPage"],
30
+ "spatialCoverage" => to_schema_org_spatial_coverage(geo_locations),
31
+ "sameAs" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "IsIdenticalTo"),
32
+ "isPartOf" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "IsPartOf"),
33
+ "hasPart" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "HasPart"),
34
+ "predecessor_of" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "IsPreviousVersionOf"),
35
+ "successor_of" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "IsNewVersionOf"),
36
+ "citation" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "References"),
37
+ "workTranslation" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "HasTranslation"),
38
+ "translationOfWork" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "IsTranslationOf"),
39
+ "@reverse" => reverse.presence,
40
+ "contentUrl" => Array.wrap(content_url).unwrap,
41
+ "schemaVersion" => schema_version,
42
+ "periodical" => types.present? ? ((types["schemaOrg"] != "Dataset") && container.present? ? to_schema_org(container) : nil) : nil,
43
+ "includedInDataCatalog" => types.present? ? ((types["schemaOrg"] == "Dataset") && container.present? ? to_schema_org_container(container, type: "Dataset") : nil) : nil,
44
+ "publisher" => publisher.present? ? { "@type" => "Organization", "@id" => publisher["publisherIdentifier"], "name" => publisher["name"] }.compact : nil,
45
+ "funder" => to_schema_org_funder(funding_references),
46
+ "provider" => agency.present? ? { "@type" => "Organization", "name" => agency } : nil
47
+ }.compact.presence
48
+ end
49
+
50
+ def schema_org
51
+ JSON.pretty_generate schema_hsh
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bolognese
4
+ module Writers
5
+ module TurtleWriter
6
+ def turtle
7
+ graph.dump(:ttl, prefixes: { schema: "http://schema.org/" })
8
+ end
9
+ end
10
+ end
11
+ end
data/lib/bolognese.rb CHANGED
@@ -1,12 +1,27 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'active_support/all'
2
4
  require 'nokogiri'
3
5
  require 'maremma'
4
- require 'postrank-uri'
6
+ require 'bibtex'
7
+ require 'loofah'
8
+ require 'json/ld'
9
+ require 'rdf/turtle'
10
+ require 'rdf/rdfxml'
11
+ require 'logger'
12
+ require 'iso8601'
13
+ require 'jsonlint'
14
+ require 'gender_detector'
15
+ require 'citeproc'
16
+ require 'csl/styles'
17
+ require 'edtf'
5
18
 
19
+ require "bolognese/citeproc_extensions"
6
20
  require "bolognese/version"
7
21
  require "bolognese/metadata"
8
- require "bolognese/crossref"
9
- require "bolognese/datacite"
10
- require "bolognese/orcid"
11
22
  require "bolognese/cli"
12
23
  require "bolognese/string"
24
+ require "bolognese/array"
25
+ require "bolognese/whitelist_scrubber"
26
+
27
+ ENV['USER_AGENT'] ||= "Mozilla/5.0 (compatible; Maremma/#{Maremma::VERSION}; mailto:info@datacite.org)"
data/package.json ADDED
@@ -0,0 +1,12 @@
1
+ {
2
+ "name": "edam-browser",
3
+ "description": "The EDAM Browser is a client-side web-based visualization javascript widget. Its goals are to help describing bio-related resources and service with EDAM, and to facilitate and foster community contributions to EDAM.",
4
+ "author": "Bryan Brancotte",
5
+ "version": "1.0.0",
6
+ "scripts": {
7
+ "test": "jshint js/*js"
8
+ },
9
+ "devDependencies": {
10
+ "jshint": "^2.9.5"
11
+ }
12
+ }