bolognese 0.2.2 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/build.yml +9 -0
  3. data/.github/workflows/changelog.yml +36 -0
  4. data/.github/workflows/ci.yml +22 -0
  5. data/.github/workflows/pull-request.yml +9 -0
  6. data/.github/workflows/release.yml +32 -0
  7. data/.gitignore +4 -0
  8. data/.rubocop.yml +658 -0
  9. data/CHANGELOG.md +1864 -0
  10. data/CITATION +17 -0
  11. data/Gemfile +1 -1
  12. data/Gemfile.lock +251 -99
  13. data/README.md +1026 -2
  14. data/Rakefile +1 -0
  15. data/bin/bolognese +5 -1
  16. data/bolognese.gemspec +33 -21
  17. data/lib/bolognese/array.rb +13 -0
  18. data/lib/bolognese/author_utils.rb +115 -39
  19. data/lib/bolognese/citeproc_extensions.rb +48 -0
  20. data/lib/bolognese/cli.rb +28 -15
  21. data/lib/bolognese/datacite_utils.rb +418 -0
  22. data/lib/bolognese/doi_utils.rb +45 -23
  23. data/lib/bolognese/metadata.rb +250 -18
  24. data/lib/bolognese/metadata_utils.rb +228 -0
  25. data/lib/bolognese/pubmed.rb +2 -0
  26. data/lib/bolognese/readers/bibtex_reader.rb +100 -0
  27. data/lib/bolognese/readers/citeproc_reader.rb +125 -0
  28. data/lib/bolognese/readers/codemeta_reader.rb +108 -0
  29. data/lib/bolognese/readers/crosscite_reader.rb +17 -0
  30. data/lib/bolognese/readers/crossref_reader.rb +413 -0
  31. data/lib/bolognese/readers/datacite_json_reader.rb +17 -0
  32. data/lib/bolognese/readers/datacite_reader.rb +338 -0
  33. data/lib/bolognese/readers/npm_reader.rb +115 -0
  34. data/lib/bolognese/readers/ris_reader.rb +114 -0
  35. data/lib/bolognese/readers/schema_org_reader.rb +264 -0
  36. data/lib/bolognese/string.rb +3 -1
  37. data/lib/bolognese/utils.rb +1403 -12
  38. data/lib/bolognese/version.rb +1 -1
  39. data/lib/bolognese/whitelist_scrubber.rb +47 -0
  40. data/lib/bolognese/writers/bibtex_writer.rb +32 -0
  41. data/lib/bolognese/writers/citation_writer.rb +14 -0
  42. data/lib/bolognese/writers/citeproc_writer.rb +11 -0
  43. data/lib/bolognese/writers/codemeta_writer.rb +29 -0
  44. data/lib/bolognese/writers/crosscite_writer.rb +11 -0
  45. data/lib/bolognese/writers/crossref_writer.rb +11 -0
  46. data/lib/bolognese/writers/csv_writer.rb +24 -0
  47. data/lib/bolognese/writers/datacite_json_writer.rb +13 -0
  48. data/lib/bolognese/writers/datacite_writer.rb +12 -0
  49. data/lib/bolognese/writers/jats_writer.rb +138 -0
  50. data/lib/bolognese/writers/rdf_xml_writer.rb +11 -0
  51. data/lib/bolognese/writers/ris_writer.rb +29 -0
  52. data/lib/bolognese/writers/schema_org_writer.rb +55 -0
  53. data/lib/bolognese/writers/turtle_writer.rb +11 -0
  54. data/lib/bolognese.rb +19 -4
  55. data/package.json +12 -0
  56. data/resources/2008/09/xsd.xsl +997 -0
  57. data/resources/datacite-contributorType-v4.xsd +35 -0
  58. data/resources/datacite-dateType-v4.xsd +25 -0
  59. data/resources/datacite-descriptionType-v4.xsd +19 -0
  60. data/resources/datacite-funderIdentifierType-v4.xsd +15 -0
  61. data/resources/datacite-nameType-v4.xsd +10 -0
  62. data/resources/datacite-relatedIdentifierType-v4.xsd +34 -0
  63. data/resources/datacite-relationType-v4.xsd +49 -0
  64. data/resources/datacite-resourceType-v4.xsd +28 -0
  65. data/resources/datacite-titleType-v4.xsd +14 -0
  66. data/resources/kernel-2.1/include/datacite-contributorType-v1.1.xsd +22 -0
  67. data/resources/kernel-2.1/include/datacite-contributorType-v2.xsd +3 -0
  68. data/resources/kernel-2.1/include/datacite-dateType-v1.1.xsd +31 -0
  69. data/resources/kernel-2.1/include/datacite-dateType-v2.xsd +3 -0
  70. data/resources/kernel-2.1/include/datacite-descriptionType-v1.1.xsd +14 -0
  71. data/resources/kernel-2.1/include/datacite-descriptionType-v2.xsd +3 -0
  72. data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v1.1.xsd +24 -0
  73. data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v2.xsd +3 -0
  74. data/resources/kernel-2.1/include/datacite-relationType-v1.1.xsd +29 -0
  75. data/resources/kernel-2.1/include/datacite-relationType-v2.xsd +3 -0
  76. data/resources/kernel-2.1/include/datacite-resourceType-v1.1.xsd +22 -0
  77. data/resources/kernel-2.1/include/datacite-resourceType-v2.xsd +3 -0
  78. data/resources/kernel-2.1/include/datacite-titleType-v1.1.xsd +11 -0
  79. data/resources/kernel-2.1/include/datacite-titleType-v2.xsd +3 -0
  80. data/resources/kernel-2.1/metadata.xsd +315 -0
  81. data/resources/kernel-2.2/include/datacite-contributorType-v2.xsd +29 -0
  82. data/resources/kernel-2.2/include/datacite-dateType-v2.xsd +21 -0
  83. data/resources/kernel-2.2/include/datacite-descriptionType-v2.xsd +15 -0
  84. data/resources/kernel-2.2/include/datacite-relatedIdentifierType-v2.xsd +25 -0
  85. data/resources/kernel-2.2/include/datacite-relationType-v2.xsd +29 -0
  86. data/resources/kernel-2.2/include/datacite-resourceType-v2.xsd +23 -0
  87. data/resources/kernel-2.2/include/datacite-titleType-v2.xsd +10 -0
  88. data/resources/kernel-2.2/metadata.xsd +316 -0
  89. data/resources/kernel-3/include/datacite-contributorType-v3.1.xsd +35 -0
  90. data/resources/kernel-3/include/datacite-dateType-v3.xsd +21 -0
  91. data/resources/kernel-3/include/datacite-descriptionType-v3.xsd +17 -0
  92. data/resources/kernel-3/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
  93. data/resources/kernel-3/include/datacite-relationType-v3.1.xsd +38 -0
  94. data/resources/kernel-3/include/datacite-resourceType-v3.xsd +26 -0
  95. data/resources/kernel-3/include/datacite-titleType-v3.xsd +12 -0
  96. data/resources/kernel-3/include/xml.xsd +286 -0
  97. data/resources/kernel-3/metadata.xsd +380 -0
  98. data/resources/kernel-3.0/include/datacite-contributorType-v3.xsd +33 -0
  99. data/resources/kernel-3.0/include/datacite-dateType-v3.xsd +21 -0
  100. data/resources/kernel-3.0/include/datacite-descriptionType-v3.xsd +17 -0
  101. data/resources/kernel-3.0/include/datacite-relatedIdentifierType-v3.xsd +27 -0
  102. data/resources/kernel-3.0/include/datacite-relationType-v3.xsd +33 -0
  103. data/resources/kernel-3.0/include/datacite-resourceType-v3.xsd +26 -0
  104. data/resources/kernel-3.0/include/datacite-titleType-v3.xsd +12 -0
  105. data/resources/kernel-3.0/include/xml.xsd +286 -0
  106. data/resources/kernel-3.0/metadata.xsd +377 -0
  107. data/resources/kernel-3.1/include/datacite-contributorType-v3.1.xsd +35 -0
  108. data/resources/kernel-3.1/include/datacite-dateType-v3.xsd +21 -0
  109. data/resources/kernel-3.1/include/datacite-descriptionType-v3.xsd +17 -0
  110. data/resources/kernel-3.1/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
  111. data/resources/kernel-3.1/include/datacite-relationType-v3.1.xsd +38 -0
  112. data/resources/kernel-3.1/include/datacite-resourceType-v3.xsd +26 -0
  113. data/resources/kernel-3.1/include/datacite-titleType-v3.xsd +12 -0
  114. data/resources/kernel-3.1/include/xml.xsd +286 -0
  115. data/resources/kernel-3.1/metadata.xsd +380 -0
  116. data/resources/kernel-4/include/datacite-contributorType-v4.xsd +37 -0
  117. data/resources/kernel-4/include/datacite-dateType-v4.xsd +27 -0
  118. data/resources/kernel-4/include/datacite-descriptionType-v4.xsd +19 -0
  119. data/resources/kernel-4/include/datacite-funderIdentifierType-v4.xsd +16 -0
  120. data/resources/kernel-4/include/datacite-nameType-v4.xsd +10 -0
  121. data/resources/kernel-4/include/datacite-numberType-v4.xsd +12 -0
  122. data/resources/kernel-4/include/datacite-relatedIdentifierType-v4.xsd +39 -0
  123. data/resources/kernel-4/include/datacite-relationType-v4.xsd +59 -0
  124. data/resources/kernel-4/include/datacite-resourceType-v4.xsd +52 -0
  125. data/resources/kernel-4/include/datacite-titleType-v4.xsd +14 -0
  126. data/resources/kernel-4/include/xml.xsd +286 -0
  127. data/resources/kernel-4/metadata.xsd +715 -0
  128. data/resources/kernel-4.0/include/datacite-contributorType-v4.xsd +35 -0
  129. data/resources/kernel-4.0/include/datacite-dateType-v4.xsd +21 -0
  130. data/resources/kernel-4.0/include/datacite-descriptionType-v4.xsd +19 -0
  131. data/resources/kernel-4.0/include/datacite-funderIdentifierType-v4.xsd +15 -0
  132. data/resources/kernel-4.0/include/datacite-relatedIdentifierType-v4.xsd +32 -0
  133. data/resources/kernel-4.0/include/datacite-relationType-v4.xsd +39 -0
  134. data/resources/kernel-4.0/include/datacite-resourceType-v4.xsd +26 -0
  135. data/resources/kernel-4.0/include/datacite-titleType-v4.xsd +14 -0
  136. data/resources/kernel-4.0/include/xml.xsd +286 -0
  137. data/resources/kernel-4.0/metadata.xsd +470 -0
  138. data/resources/kernel-4.1/include/datacite-contributorType-v4.xsd +35 -0
  139. data/resources/kernel-4.1/include/datacite-dateType-v4.1.xsd +23 -0
  140. data/resources/kernel-4.1/include/datacite-descriptionType-v4.xsd +19 -0
  141. data/resources/kernel-4.1/include/datacite-funderIdentifierType-v4.xsd +15 -0
  142. data/resources/kernel-4.1/include/datacite-nameType-v4.1.xsd +10 -0
  143. data/resources/kernel-4.1/include/datacite-relatedIdentifierType-v4.xsd +32 -0
  144. data/resources/kernel-4.1/include/datacite-relationType-v4.1.xsd +46 -0
  145. data/resources/kernel-4.1/include/datacite-resourceType-v4.1.xsd +28 -0
  146. data/resources/kernel-4.1/include/datacite-titleType-v4.xsd +14 -0
  147. data/resources/kernel-4.1/include/xml.xsd +286 -0
  148. data/resources/kernel-4.1/metadata.xsd +483 -0
  149. data/resources/kernel-4.2/include/datacite-contributorType-v4.xsd +35 -0
  150. data/resources/kernel-4.2/include/datacite-dateType-v4.xsd +25 -0
  151. data/resources/kernel-4.2/include/datacite-descriptionType-v4.xsd +19 -0
  152. data/resources/kernel-4.2/include/datacite-funderIdentifierType-v4.xsd +15 -0
  153. data/resources/kernel-4.2/include/datacite-nameType-v4.xsd +10 -0
  154. data/resources/kernel-4.2/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  155. data/resources/kernel-4.2/include/datacite-relationType-v4.xsd +49 -0
  156. data/resources/kernel-4.2/include/datacite-resourceType-v4.xsd +28 -0
  157. data/resources/kernel-4.2/include/datacite-titleType-v4.xsd +14 -0
  158. data/resources/kernel-4.2/include/xml.xsd +286 -0
  159. data/resources/kernel-4.2/metadata.xsd +479 -0
  160. data/resources/kernel-4.3/include/datacite-contributorType-v4.xsd +35 -0
  161. data/resources/kernel-4.3/include/datacite-dateType-v4.xsd +25 -0
  162. data/resources/kernel-4.3/include/datacite-descriptionType-v4.xsd +19 -0
  163. data/resources/kernel-4.3/include/datacite-funderIdentifierType-v4.xsd +16 -0
  164. data/resources/kernel-4.3/include/datacite-nameType-v4.xsd +10 -0
  165. data/resources/kernel-4.3/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  166. data/resources/kernel-4.3/include/datacite-relationType-v4.xsd +49 -0
  167. data/resources/kernel-4.3/include/datacite-resourceType-v4.xsd +28 -0
  168. data/resources/kernel-4.3/include/datacite-titleType-v4.xsd +14 -0
  169. data/resources/kernel-4.3/include/xml.xsd +286 -0
  170. data/resources/kernel-4.3/metadata.xsd +515 -0
  171. data/resources/kernel-4.4/include/datacite-contributorType-v4.xsd +35 -0
  172. data/resources/kernel-4.4/include/datacite-dateType-v4.xsd +25 -0
  173. data/resources/kernel-4.4/include/datacite-descriptionType-v4.xsd +19 -0
  174. data/resources/kernel-4.4/include/datacite-funderIdentifierType-v4.xsd +16 -0
  175. data/resources/kernel-4.4/include/datacite-nameType-v4.xsd +10 -0
  176. data/resources/kernel-4.4/include/datacite-numberType-v4.xsd +12 -0
  177. data/resources/kernel-4.4/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  178. data/resources/kernel-4.4/include/datacite-relationType-v4.xsd +51 -0
  179. data/resources/kernel-4.4/include/datacite-resourceType-v4.xsd +43 -0
  180. data/resources/kernel-4.4/include/datacite-titleType-v4.xsd +14 -0
  181. data/resources/kernel-4.4/include/xml.xsd +286 -0
  182. data/resources/kernel-4.4/metadata.xsd +707 -0
  183. data/resources/kernel-4.5/include/datacite-contributorType-v4.xsd +35 -0
  184. data/resources/kernel-4.5/include/datacite-dateType-v4.xsd +25 -0
  185. data/resources/kernel-4.5/include/datacite-descriptionType-v4.xsd +19 -0
  186. data/resources/kernel-4.5/include/datacite-funderIdentifierType-v4.xsd +16 -0
  187. data/resources/kernel-4.5/include/datacite-nameType-v4.xsd +10 -0
  188. data/resources/kernel-4.5/include/datacite-numberType-v4.xsd +12 -0
  189. data/resources/kernel-4.5/include/datacite-relatedIdentifierType-v4.xsd +34 -0
  190. data/resources/kernel-4.5/include/datacite-relationType-v4.xsd +53 -0
  191. data/resources/kernel-4.5/include/datacite-resourceType-v4.xsd +45 -0
  192. data/resources/kernel-4.5/include/datacite-titleType-v4.xsd +14 -0
  193. data/resources/kernel-4.5/include/xml.xsd +286 -0
  194. data/resources/kernel-4.5/metadata.xsd +711 -0
  195. data/resources/kernel-4.6/include/datacite-contributorType-v4.xsd +37 -0
  196. data/resources/kernel-4.6/include/datacite-dateType-v4.xsd +27 -0
  197. data/resources/kernel-4.6/include/datacite-descriptionType-v4.xsd +19 -0
  198. data/resources/kernel-4.6/include/datacite-funderIdentifierType-v4.xsd +16 -0
  199. data/resources/kernel-4.6/include/datacite-nameType-v4.xsd +10 -0
  200. data/resources/kernel-4.6/include/datacite-numberType-v4.xsd +12 -0
  201. data/resources/kernel-4.6/include/datacite-relatedIdentifierType-v4.xsd +37 -0
  202. data/resources/kernel-4.6/include/datacite-relationType-v4.xsd +57 -0
  203. data/resources/kernel-4.6/include/datacite-resourceType-v4.xsd +49 -0
  204. data/resources/kernel-4.6/include/datacite-titleType-v4.xsd +14 -0
  205. data/resources/kernel-4.6/include/xml.xsd +286 -0
  206. data/resources/kernel-4.6/metadata.xsd +712 -0
  207. data/resources/kernel-4.7/include/datacite-contributorType-v4.xsd +37 -0
  208. data/resources/kernel-4.7/include/datacite-dateType-v4.xsd +27 -0
  209. data/resources/kernel-4.7/include/datacite-descriptionType-v4.xsd +19 -0
  210. data/resources/kernel-4.7/include/datacite-funderIdentifierType-v4.xsd +16 -0
  211. data/resources/kernel-4.7/include/datacite-nameType-v4.xsd +10 -0
  212. data/resources/kernel-4.7/include/datacite-numberType-v4.xsd +12 -0
  213. data/resources/kernel-4.7/include/datacite-relatedIdentifierType-v4.xsd +39 -0
  214. data/resources/kernel-4.7/include/datacite-relationType-v4.xsd +59 -0
  215. data/resources/kernel-4.7/include/datacite-resourceType-v4.xsd +52 -0
  216. data/resources/kernel-4.7/include/datacite-titleType-v4.xsd +14 -0
  217. data/resources/kernel-4.7/include/xml.xsd +286 -0
  218. data/resources/kernel-4.7/metadata.xsd +715 -0
  219. data/resources/oecd/dfg-mappings.json +1866 -0
  220. data/resources/oecd/for-mappings.json +1101 -0
  221. data/resources/oecd/fos-mappings.json +198 -0
  222. data/resources/schema_org/jsonldcontext.json +7477 -0
  223. data/resources/spdx/licenses.json +5297 -0
  224. data/resources/xml.xsd +286 -0
  225. metadata +478 -150
  226. data/.travis.yml +0 -23
  227. data/lib/bolognese/crossref.rb +0 -202
  228. data/lib/bolognese/datacite.rb +0 -157
  229. data/lib/bolognese/date_utils.rb +0 -48
  230. data/lib/bolognese/github.rb +0 -106
  231. data/lib/bolognese/orcid.rb +0 -24
  232. data/lib/bolognese/pid_utils.rb +0 -23
  233. data/spec/cli_spec.rb +0 -37
  234. data/spec/crossref_spec.rb +0 -113
  235. data/spec/datacite_spec.rb +0 -49
  236. data/spec/doi_spec.rb +0 -89
  237. data/spec/fixtures/crossref.xml +0 -742
  238. data/spec/fixtures/datacite.xml +0 -40
  239. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_crossref.yml +0 -760
  240. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_schema_org.yml +0 -1476
  241. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_datacite.yml +0 -214
  242. data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_schema_org.yml +0 -384
  243. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/crossref.yml +0 -44
  244. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/datacite.yml +0 -44
  245. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/medra.yml +0 -44
  246. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/not_found.yml +0 -44
  247. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_test.yml +0 -843
  248. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_SICI_DOI.yml +0 -277
  249. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_data_citation.yml +0 -15755
  250. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/date_in_future.yml +0 -2691
  251. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/journal_article.yml +0 -1857
  252. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/not_found_error.yml +0 -93
  253. data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/posted_content.yml +0 -5715
  254. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/BlogPosting.yml +0 -307
  255. data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/Dataset.yml +0 -343
  256. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref.yml +0 -44
  257. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref_doi_not_url.yml +0 -44
  258. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite.yml +0 -44
  259. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite_doi_http.yml +0 -44
  260. data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/orcid.yml +0 -44
  261. data/spec/metadata_spec.rb +0 -35
  262. data/spec/orcid_spec.rb +0 -23
  263. data/spec/spec_helper.rb +0 -88
  264. /data/{LICENSE → LICENSE.md} +0 -0
@@ -1,30 +1,262 @@
1
- require_relative 'doi_utils'
2
- require_relative 'author_utils'
3
- require_relative 'date_utils'
4
- require_relative 'pid_utils'
5
- require_relative 'utils'
1
+ # frozen_string_literal: false
2
+
3
+ require_relative 'metadata_utils'
6
4
 
7
5
  module Bolognese
8
6
  class Metadata
9
- include Bolognese::DoiUtils
10
- include Bolognese::AuthorUtils
11
- include Bolognese::DateUtils
12
- include Bolognese::PidUtils
7
+ include Bolognese::MetadataUtils
13
8
  include Bolognese::Utils
14
9
 
15
- attr_reader :id, :provider
10
+ attr_accessor :string, :from, :sandbox, :meta, :regenerate, :issue, :show_errors
11
+ attr_reader :doc, :page_start, :page_end
12
+ attr_writer :id, :provider_id, :client_id, :doi, :identifiers, :creators, :contributors, :titles, :publisher,
13
+ :rights_list, :dates, :publication_year, :volume, :url, :version_info,
14
+ :subjects, :contributor, :descriptions, :language, :sizes,
15
+ :formats, :schema_version, :meta, :container, :agency,
16
+ :format, :funding_references, :state, :geo_locations,
17
+ :types, :content_url, :related_identifiers, :related_items, :style, :locale, :date_registered
18
+
19
+ def initialize(options={})
20
+ options.symbolize_keys!
21
+ id = normalize_id(options[:input], options)
22
+ ra = nil
23
+
24
+ if id.present?
25
+ @from = options[:from] || find_from_format(id: id)
26
+
27
+ # mEDRA, KISTI, JaLC and OP DOIs are found in the Crossref index
28
+ if @from == "medra"
29
+ ra = "mEDRA"
30
+ elsif @from == "kisti"
31
+ ra = "KISTI"
32
+ elsif @from == "jalc"
33
+ ra = "JaLC"
34
+ elsif @from == "op"
35
+ ra = "OP"
36
+ end
37
+
38
+ # generate name for method to call dynamically
39
+ hsh = @from.present? ? send("get_" + @from, id: id, sandbox: options[:sandbox]) : {}
40
+ string = hsh.fetch("string", nil)
41
+
42
+ elsif options[:input].present? && File.exist?(options[:input])
43
+ filename = File.basename(options[:input])
44
+ ext = File.extname(options[:input])
45
+ if %w(.bib .ris .xml .json).include?(ext)
46
+ hsh = {
47
+ "url" => options[:url],
48
+ "state" => options[:state],
49
+ "date_registered" => options[:date_registered],
50
+ "date_updated" => options[:date_updated],
51
+ "provider_id" => options[:provider_id],
52
+ "client_id" => options[:client_id],
53
+ "content_url" => options[:content_url] }
54
+ string = IO.read(options[:input])
55
+ @from = options[:from] || find_from_format(string: string, filename: filename, ext: ext)
56
+ else
57
+ $stderr.puts "File type #{ext} not supported"
58
+ exit 1
59
+ end
60
+ else
61
+ hsh = {
62
+ "url" => options[:url],
63
+ "state" => options[:state],
64
+ "date_registered" => options[:date_registered],
65
+ "date_updated" => options[:date_updated],
66
+ "provider_id" => options[:provider_id],
67
+ "client_id" => options[:client_id],
68
+ "content_url" => options[:content_url],
69
+ "creators" => options[:creators],
70
+ "contributors" => options[:contributors],
71
+ "titles" => options[:titles],
72
+ "publisher" => options[:publisher],
73
+ "publication_year" => options[:publication_year] }
74
+ string = options[:input]
75
+ @from = options[:from] || find_from_format(string: string)
76
+ end
77
+
78
+ # make sure input is encoded as utf8
79
+ string1 = string.dup.force_encoding("UTF-8") if string.present?
80
+ @string = string1
81
+
82
+ # input options for citation formatting
83
+ @style = options[:style]
84
+ @locale = options[:locale]
85
+
86
+ @sandbox = options[:sandbox]
87
+
88
+ # options that come from the datacite database
89
+ @url = hsh.to_h["url"].presence || options[:url].presence
90
+ @state = hsh.to_h["state"].presence
91
+ @date_registered = hsh.to_h["date_registered"].presence
92
+ @date_updated = hsh.to_h["date_updated"].presence
93
+ @provider_id = hsh.to_h["provider_id"].presence
94
+ @client_id = hsh.to_h["client_id"].presence
95
+ @content_url = hsh.to_h["content_url"].presence
96
+
97
+ # set attributes directly
98
+ read_options = options.slice(
99
+ :creators,
100
+ :contributors,
101
+ :titles,
102
+ :types,
103
+ :identifiers,
104
+ :container,
105
+ :publisher,
106
+ :funding_references,
107
+ :dates,
108
+ :publication_year,
109
+ :descriptions,
110
+ :rights_list,
111
+ :version_info,
112
+ :subjects,
113
+ :language,
114
+ :geo_locations,
115
+ :related_identifiers,
116
+ :related_items,
117
+ :formats,
118
+ :sizes
119
+ ).compact
120
+
121
+ @regenerate = options[:regenerate] || read_options.present?
122
+ # generate name for method to call dynamically
123
+ opts = { string: string1, sandbox: options[:sandbox], doi: options[:doi], id: id, ra: ra }.merge(read_options)
124
+ @meta = @from.present? ? send("read_" + @from, **opts) : {}
125
+ end
126
+
127
+ def id
128
+ @id ||= meta.fetch("id", nil)
129
+ end
130
+
131
+ def doi
132
+ @doi ||= meta.fetch("doi", nil)
133
+ end
134
+
135
+ def provider_id
136
+ @provider_id ||= meta.fetch("provider_id", nil)
137
+ end
138
+
139
+ def client_id
140
+ @client_id ||= meta.fetch("client_id", nil)
141
+ end
142
+
143
+ def exists?
144
+ (@state || meta.fetch("state", nil)) != "not_found"
145
+ end
146
+
147
+ def valid?
148
+ exists? && errors.nil?
149
+ end
150
+
151
+ # validate against DataCite schema, unless there are already errors in the reader
152
+ def errors
153
+ meta.fetch("errors", nil) || datacite_errors(xml: datacite, schema_version: schema_version)
154
+ end
155
+
156
+ def descriptions
157
+ @descriptions ||= meta.fetch("descriptions", nil)
158
+ end
159
+
160
+ def rights_list
161
+ @rights_list ||= meta.fetch("rights_list", nil)
162
+ end
163
+
164
+ def subjects
165
+ @subjects ||= meta.fetch("subjects", nil)
166
+ end
167
+
168
+ def language
169
+ @language ||= meta.fetch("language", nil)
170
+ end
171
+
172
+ def sizes
173
+ @sizes ||= meta.fetch("sizes", nil)
174
+ end
175
+
176
+ def formats
177
+ @formats ||= meta.fetch("formats", nil)
178
+ end
179
+
180
+ def schema_version
181
+ @schema_version ||= meta.fetch("schema_version", nil)
182
+ end
183
+
184
+ def funding_references
185
+ @funding_references ||= meta.fetch("funding_references", nil)
186
+ end
187
+
188
+ def related_identifiers
189
+ @related_identifiers ||= meta.fetch("related_identifiers", nil)
190
+ end
191
+
192
+ def related_items
193
+ @related_items ||= meta.fetch("related_items", nil)
194
+ end
195
+
196
+ def url
197
+ @url ||= meta.fetch("url", nil)
198
+ end
199
+
200
+ def version_info
201
+ @version_info ||= meta.fetch("version_info", nil) || meta.fetch("version", nil)
202
+ end
203
+
204
+ def publication_year
205
+ @publication_year ||= meta.fetch("publication_year", nil)
206
+ end
207
+
208
+ def container
209
+ @container ||= begin
210
+ generate_container(types, related_items, related_identifiers, descriptions) || meta.fetch("container", nil)
211
+ end
212
+ end
213
+
214
+ def geo_locations
215
+ @geo_locations ||= meta.fetch("geo_locations", nil)
216
+ end
217
+
218
+ def dates
219
+ @dates ||= meta.fetch("dates", nil)
220
+ end
221
+
222
+ def publisher
223
+ @publisher ||= normalize_publisher(meta["publisher"]) if meta.fetch("publisher", nil).present?
224
+ end
225
+
226
+ def identifiers
227
+ @identifiers ||= meta.fetch("identifiers", nil)
228
+ end
229
+
230
+ def content_url
231
+ @content_url ||= meta.fetch("content_url", nil)
232
+ end
233
+
234
+ def agency
235
+ @agency ||= meta.fetch("agency", nil)
236
+ end
237
+
238
+ def state
239
+ @state ||= meta.fetch("state", nil)
240
+ end
241
+
242
+ def date_registered
243
+ @date_registered ||= meta.fetch("date_registered", nil)
244
+ end
245
+
246
+ def types
247
+ @types ||= meta.fetch("types", nil)
248
+ end
16
249
 
17
- def initialize(id)
18
- @id = normalize_id(id)
19
- @provider = find_provider(@id)
250
+ def titles
251
+ @titles ||= meta.fetch("titles", nil)
20
252
  end
21
253
 
22
- def normalize_id(id)
23
- normalize_doi(id) || normalize_orcid(id)
254
+ def creators
255
+ @creators ||= meta.fetch("creators", nil)
24
256
  end
25
257
 
26
- def find_provider(id)
27
- get_doi_ra(id).fetch("id", nil) || "orcid"
258
+ def contributors
259
+ @contributors ||= meta.fetch("contributors", nil)
28
260
  end
29
261
  end
30
- end
262
+ end
@@ -0,0 +1,228 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'doi_utils'
4
+ require_relative 'author_utils'
5
+ require_relative 'datacite_utils'
6
+ require_relative 'utils'
7
+
8
+ require_relative 'readers/bibtex_reader'
9
+ require_relative 'readers/citeproc_reader'
10
+ require_relative 'readers/codemeta_reader'
11
+ require_relative 'readers/crosscite_reader'
12
+ require_relative 'readers/crossref_reader'
13
+ require_relative 'readers/datacite_json_reader'
14
+ require_relative 'readers/datacite_reader'
15
+ require_relative 'readers/npm_reader'
16
+ require_relative 'readers/ris_reader'
17
+ require_relative 'readers/schema_org_reader'
18
+
19
+ require_relative 'writers/bibtex_writer'
20
+ require_relative 'writers/citation_writer'
21
+ require_relative 'writers/citeproc_writer'
22
+ require_relative 'writers/codemeta_writer'
23
+ require_relative 'writers/crosscite_writer'
24
+ require_relative 'writers/crossref_writer'
25
+ require_relative 'writers/csv_writer'
26
+ require_relative 'writers/datacite_writer'
27
+ require_relative 'writers/datacite_json_writer'
28
+ require_relative 'writers/jats_writer'
29
+ require_relative 'writers/rdf_xml_writer'
30
+ require_relative 'writers/ris_writer'
31
+ require_relative 'writers/schema_org_writer'
32
+ require_relative 'writers/turtle_writer'
33
+
34
+ module Bolognese
35
+ module MetadataUtils
36
+ include Bolognese::DoiUtils
37
+ include Bolognese::AuthorUtils
38
+ include Bolognese::DataciteUtils
39
+ include Bolognese::Utils
40
+
41
+ include Bolognese::Readers::BibtexReader
42
+ include Bolognese::Readers::CiteprocReader
43
+ include Bolognese::Readers::CodemetaReader
44
+ include Bolognese::Readers::CrossciteReader
45
+ include Bolognese::Readers::CrossrefReader
46
+ include Bolognese::Readers::DataciteReader
47
+ include Bolognese::Readers::DataciteJsonReader
48
+ include Bolognese::Readers::NpmReader
49
+ include Bolognese::Readers::RisReader
50
+ include Bolognese::Readers::SchemaOrgReader
51
+
52
+ include Bolognese::Writers::BibtexWriter
53
+ include Bolognese::Writers::CitationWriter
54
+ include Bolognese::Writers::CiteprocWriter
55
+ include Bolognese::Writers::CodemetaWriter
56
+ include Bolognese::Writers::CrossciteWriter
57
+ include Bolognese::Writers::CrossrefWriter
58
+ include Bolognese::Writers::CsvWriter
59
+ include Bolognese::Writers::DataciteWriter
60
+ include Bolognese::Writers::DataciteJsonWriter
61
+ include Bolognese::Writers::JatsWriter
62
+ include Bolognese::Writers::RdfXmlWriter
63
+ include Bolognese::Writers::RisWriter
64
+ include Bolognese::Writers::SchemaOrgWriter
65
+ include Bolognese::Writers::TurtleWriter
66
+
67
+ attr_reader :name_detector, :reverse
68
+
69
+ # some dois in the Crossref index are from other registration agencies
70
+ alias get_medra get_crossref
71
+ alias read_medra read_crossref
72
+ alias get_kisti get_crossref
73
+ alias read_kisti read_crossref
74
+ alias get_jalc get_crossref
75
+ alias read_jalc read_crossref
76
+ alias get_op get_crossref
77
+ alias read_op read_crossref
78
+
79
+ # replace DOI in XML if provided in options
80
+ def raw
81
+ r = string.present? ? string.strip : nil
82
+ return r unless (from == "datacite" && r.present?)
83
+
84
+ doc = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks)
85
+ node = doc.at_css("identifier")
86
+ node.content = doi.to_s.upcase if node.present? && doi.present?
87
+ doc.to_xml.strip
88
+ end
89
+
90
+ def should_passthru
91
+ (from == "datacite") && regenerate.blank? && raw.present?
92
+ end
93
+
94
+ def container_title
95
+ if container.present?
96
+ container["title"]
97
+ elsif types["citeproc"] == "article-journal"
98
+ publisher["name"] if publisher.present?
99
+ else
100
+ nil
101
+ end
102
+ end
103
+
104
+ # recognize given name. Can be loaded once as ::NameDetector, e.g. in a Rails initializer
105
+ def name_detector
106
+ @name_detector ||= defined?(::NameDetector) ? ::NameDetector : nil
107
+ end
108
+
109
+ def reverse
110
+ { "citation" => Array.wrap(related_identifiers).select { |ri| ri["relationType"] == "IsReferencedBy" }.map do |r|
111
+ { "@id" => normalize_doi(r["relatedIdentifier"]),
112
+ "@type" => r["resourceTypeGeneral"] || "ScholarlyArticle",
113
+ "identifier" => r["relatedIdentifierType"] == "DOI" ? nil : to_identifier(r) }.compact
114
+ end.unwrap,
115
+ "isBasedOn" => Array.wrap(related_identifiers).select { |ri| ri["relationType"] == "IsSupplementTo" }.map do |r|
116
+ { "@id" => normalize_doi(r["relatedIdentifier"]),
117
+ "@type" => r["resourceTypeGeneral"] || "ScholarlyArticle",
118
+ "identifier" => r["relatedIdentifierType"] == "DOI" ? nil : to_identifier(r) }.compact
119
+ end.unwrap }.compact
120
+ end
121
+
122
+ def graph
123
+ # preload schema_org context
124
+ JSON::LD::Context.add_preloaded(
125
+ 'http://schema.org/',
126
+ JSON::LD::Context.new.parse('resources/schema_org/jsonldcontext.json')
127
+ )
128
+
129
+ RDF::Graph.new << JSON::LD::API.toRdf(schema_hsh)
130
+ rescue NameError
131
+ nil
132
+ end
133
+
134
+ def citeproc_hsh
135
+ page = container.to_h["firstPage"].present? ? [container["firstPage"], container["lastPage"]].compact.join("-") : nil
136
+ if Array.wrap(creators).size == 1 && Array.wrap(creators).first.fetch("name", nil) == ":(unav)"
137
+ author = nil
138
+ else
139
+ author = to_citeproc(creators)
140
+ end
141
+
142
+ if types["resourceTypeGeneral"] == "Software"
143
+ type = "software"
144
+ else
145
+ type = types["citeproc"]
146
+ end
147
+
148
+ # Filter out contributors who are already creators, editors, or translators to avoid duplication
149
+ creator_names = Array.wrap(creators).map { |c| c["name"] || [c["givenName"], c["familyName"]].compact.join(" ") }.compact
150
+ unique_contributors = Array.wrap(contributors).reject do |c|
151
+ contributor_name = c["name"] || [c["givenName"], c["familyName"]].compact.join(" ")
152
+ creator_names.include?(contributor_name) ||
153
+ c["contributorType"] == "Editor" ||
154
+ c["contributorType"] == "Translator"
155
+ end
156
+
157
+ {
158
+ "type" => type,
159
+ "id" => normalize_doi(doi),
160
+ "categories" => Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) }.presence,
161
+ "language" => language,
162
+ "author" => author,
163
+ "contributor" => unique_contributors.presence ? to_citeproc(unique_contributors) : nil,
164
+ "editor" => contributors ? to_citeproc(contributors.select { |c| c["contributorType"] == "Editor" }) : nil,
165
+ "translator" => contributors ? to_citeproc(contributors.select { |c| c["contributorType"] == "Translator" }) : nil,
166
+ "issued" => get_date(dates, "Issued") ? get_date_parts(get_date(dates, "Issued")) : get_date_parts(publication_year.to_s),
167
+ "submitted" => Array.wrap(dates).find { |d| d["dateType"] == "Submitted" }.to_h.fetch("__content__", nil),
168
+ "available-date" => Array.wrap(dates).find { |d| d["dateType"] == "Available" }.to_h.fetch("__content__", nil),
169
+ "abstract" => parse_attributes(descriptions, content: "description", first: true),
170
+ "container-title" => container_title,
171
+ "DOI" => doi,
172
+ "volume" => container.to_h["volume"],
173
+ "issue" => container.to_h["issue"],
174
+ "number" => container.to_h["number"],
175
+ "chapter-number" => container.to_h["chapterNumber"],
176
+ "edition" => container.to_h["edition"],
177
+ "page" => page,
178
+ "page-first" => container.to_h["firstPage"],
179
+ "publisher" => publisher["name"],
180
+ "title" => parse_attributes(titles, content: "title", first: true),
181
+ "URL" => url,
182
+ "copyright" => Array.wrap(rights_list).map { |l| l["rights"] }.first,
183
+ "version" => version_info
184
+ }.compact.symbolize_keys
185
+ end
186
+
187
+ def crosscite_hsh
188
+ {
189
+ "id" => normalize_doi(doi),
190
+ "doi" => doi,
191
+ "url" => url,
192
+ "types" => types,
193
+ "creators" => creators,
194
+ "titles" => titles,
195
+ "publisher" => publisher,
196
+ "container" => container,
197
+ "subjects" => subjects,
198
+ "contributors" => contributors,
199
+ "dates" => dates,
200
+ "publication_year" => publication_year,
201
+ "language" => language,
202
+ "identifiers" => identifiers,
203
+ "sizes" => sizes,
204
+ "formats" => formats,
205
+ "version" => version_info,
206
+ "rights_list" => rights_list,
207
+ "descriptions" => descriptions,
208
+ "geo_locations" => geo_locations,
209
+ "funding_references" => funding_references,
210
+ "related_identifiers" => related_identifiers,
211
+ "related_items" => related_items,
212
+ "schema_version" => schema_version,
213
+ "provider_id" => provider_id,
214
+ "client_id" => client_id,
215
+ "agency" => agency,
216
+ "state" => state
217
+ }.compact
218
+ end
219
+
220
+ def style
221
+ @style ||= "apa"
222
+ end
223
+
224
+ def locale
225
+ @locale ||= "en-US"
226
+ end
227
+ end
228
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Bolognese
2
4
  module Pubmed
3
5
  # def get_pubmed_metadata(pmid, options = {})
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bolognese
4
+ module Readers
5
+ module BibtexReader
6
+ BIB_TO_CP_TRANSLATIONS = {
7
+ "article" => "article-journal",
8
+ "phdthesis" => "thesis"
9
+ }
10
+
11
+ BIB_TO_RIS_TRANSLATIONS = {
12
+ "article" => "JOUR",
13
+ "book" => "BOOK",
14
+ "inbook" => "CHAP",
15
+ "inproceedings" => "CPAPER",
16
+ "manual" => nil,
17
+ "misc" => "GEN",
18
+ "phdthesis" => "THES",
19
+ "proceedings" => "CONF",
20
+ "techreport" => "RPRT",
21
+ "unpublished" => "UNPD"
22
+ }
23
+
24
+ BIB_TO_SO_TRANSLATIONS = {
25
+ "article" => "ScholarlyArticle",
26
+ "phdthesis" => "Thesis"
27
+ }
28
+
29
+ def read_bibtex(string: nil, **options)
30
+ read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
31
+
32
+ meta = string.present? ? BibTeX.parse(string).first : OpenStruct.new
33
+
34
+ bibtex_type = meta.try(:type).to_s
35
+ schema_org = BIB_TO_SO_TRANSLATIONS[bibtex_type] || "ScholarlyArticle"
36
+ types = {
37
+ "resourceTypeGeneral" => Metadata::BIB_TO_DC_TRANSLATIONS[bibtex_type],
38
+ "resourceType" => Bolognese::Utils::BIB_TO_CR_TRANSLATIONS[meta.try(:type).to_s] || meta.try(:type).to_s,
39
+ "schemaOrg" => schema_org,
40
+ "bibtex" => bibtex_type,
41
+ "citeproc" => BIB_TO_CP_TRANSLATIONS[meta.try(:type).to_s] || "misc",
42
+ "ris" => BIB_TO_RIS_TRANSLATIONS[meta.try(:type).to_s] || "GEN"
43
+ }.compact
44
+ doi = meta.try(:doi).to_s.presence || options[:doi]
45
+
46
+ creators = Array(meta.try(:author)).map do |a|
47
+ { "nameType" => "Personal",
48
+ "name" => [a.last, a.first].join(", "),
49
+ "givenName" => a.first,
50
+ "familyName" => a.last }.compact
51
+ end
52
+
53
+ related_identifiers = if meta.try(:journal).present? && meta.try(:issn).to_s.presence
54
+ [{ "type" => "Periodical",
55
+ "relationType" => "IsPartOf",
56
+ "relatedIdentifierType" => "ISSN",
57
+ "title" => meta.journal.to_s,
58
+ "relatedIdentifier" => meta.try(:issn).to_s.presence }.compact]
59
+ end
60
+
61
+ container = if meta.try(:journal).present?
62
+ first_page = meta.try(:pages).present? ? meta.try(:pages).split("-").map(&:strip)[0] : nil
63
+ last_page = meta.try(:pages).present? ? meta.try(:pages).split("-").map(&:strip)[1] : nil
64
+
65
+ { "type" => "Journal",
66
+ "title" => meta.journal.to_s,
67
+ "identifier" => meta.try(:issn).to_s.presence,
68
+ "identifierType" => meta.try(:issn).present? ? "ISSN" : nil,
69
+ "volume" => meta.try(:volume).to_s.presence,
70
+ "firstPage" => first_page,
71
+ "lastPage" => last_page }.compact
72
+ end
73
+
74
+ state = meta.try(:doi).to_s.present? || read_options.present? ? "findable" : "not_found"
75
+ dates = if meta.try(:date).present? && Date.edtf(meta.date.to_s).present?
76
+ [{ "date" => meta.date.to_s,
77
+ "dateType" => "Issued" }]
78
+ end
79
+ publication_year = meta.try(:date).present? ? meta.date.to_s[0..3] : nil
80
+ rights_list = meta.try(:copyright).present? ? [hsh_to_spdx("rightsURI" => meta[:copyright])] : []
81
+
82
+ { "id" => normalize_doi(doi),
83
+ "types" => types,
84
+ "doi" => doi,
85
+ "url" => meta.try(:url).to_s.presence,
86
+ "titles" => meta.try(:title).present? ? [{ "title" => meta.try(:title).to_s }] : [],
87
+ "creators" => creators,
88
+ "container" => container,
89
+ "publisher" => meta.try(:publisher).present? ? { "name" => meta.publisher.to_s } : nil,
90
+ "related_identifiers" => related_identifiers,
91
+ "dates" => dates,
92
+ "publication_year" => publication_year,
93
+ "descriptions" => meta.try(:abstract).present? ? [{ "description" => meta.try(:abstract) && sanitize(meta.abstract.to_s, new_line: true).presence, "descriptionType" => "Abstract" }] : [],
94
+ "rights_list" => rights_list,
95
+ "state" => state
96
+ }.merge(read_options)
97
+ end
98
+ end
99
+ end
100
+ end