bolognese 0.2.2 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/build.yml +9 -0
- data/.github/workflows/changelog.yml +36 -0
- data/.github/workflows/ci.yml +22 -0
- data/.github/workflows/pull-request.yml +9 -0
- data/.github/workflows/release.yml +32 -0
- data/.gitignore +4 -0
- data/.rubocop.yml +658 -0
- data/CHANGELOG.md +1864 -0
- data/CITATION +17 -0
- data/Gemfile +1 -1
- data/Gemfile.lock +251 -99
- data/README.md +1026 -2
- data/Rakefile +1 -0
- data/bin/bolognese +5 -1
- data/bolognese.gemspec +33 -21
- data/lib/bolognese/array.rb +13 -0
- data/lib/bolognese/author_utils.rb +115 -39
- data/lib/bolognese/citeproc_extensions.rb +48 -0
- data/lib/bolognese/cli.rb +28 -15
- data/lib/bolognese/datacite_utils.rb +418 -0
- data/lib/bolognese/doi_utils.rb +45 -23
- data/lib/bolognese/metadata.rb +250 -18
- data/lib/bolognese/metadata_utils.rb +228 -0
- data/lib/bolognese/pubmed.rb +2 -0
- data/lib/bolognese/readers/bibtex_reader.rb +100 -0
- data/lib/bolognese/readers/citeproc_reader.rb +125 -0
- data/lib/bolognese/readers/codemeta_reader.rb +108 -0
- data/lib/bolognese/readers/crosscite_reader.rb +17 -0
- data/lib/bolognese/readers/crossref_reader.rb +413 -0
- data/lib/bolognese/readers/datacite_json_reader.rb +17 -0
- data/lib/bolognese/readers/datacite_reader.rb +338 -0
- data/lib/bolognese/readers/npm_reader.rb +115 -0
- data/lib/bolognese/readers/ris_reader.rb +114 -0
- data/lib/bolognese/readers/schema_org_reader.rb +264 -0
- data/lib/bolognese/string.rb +3 -1
- data/lib/bolognese/utils.rb +1403 -12
- data/lib/bolognese/version.rb +1 -1
- data/lib/bolognese/whitelist_scrubber.rb +47 -0
- data/lib/bolognese/writers/bibtex_writer.rb +32 -0
- data/lib/bolognese/writers/citation_writer.rb +14 -0
- data/lib/bolognese/writers/citeproc_writer.rb +11 -0
- data/lib/bolognese/writers/codemeta_writer.rb +29 -0
- data/lib/bolognese/writers/crosscite_writer.rb +11 -0
- data/lib/bolognese/writers/crossref_writer.rb +11 -0
- data/lib/bolognese/writers/csv_writer.rb +24 -0
- data/lib/bolognese/writers/datacite_json_writer.rb +13 -0
- data/lib/bolognese/writers/datacite_writer.rb +12 -0
- data/lib/bolognese/writers/jats_writer.rb +138 -0
- data/lib/bolognese/writers/rdf_xml_writer.rb +11 -0
- data/lib/bolognese/writers/ris_writer.rb +29 -0
- data/lib/bolognese/writers/schema_org_writer.rb +55 -0
- data/lib/bolognese/writers/turtle_writer.rb +11 -0
- data/lib/bolognese.rb +19 -4
- data/package.json +12 -0
- data/resources/2008/09/xsd.xsl +997 -0
- data/resources/datacite-contributorType-v4.xsd +35 -0
- data/resources/datacite-dateType-v4.xsd +25 -0
- data/resources/datacite-descriptionType-v4.xsd +19 -0
- data/resources/datacite-funderIdentifierType-v4.xsd +15 -0
- data/resources/datacite-nameType-v4.xsd +10 -0
- data/resources/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/datacite-relationType-v4.xsd +49 -0
- data/resources/datacite-resourceType-v4.xsd +28 -0
- data/resources/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-2.1/include/datacite-contributorType-v1.1.xsd +22 -0
- data/resources/kernel-2.1/include/datacite-contributorType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-dateType-v1.1.xsd +31 -0
- data/resources/kernel-2.1/include/datacite-dateType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-descriptionType-v1.1.xsd +14 -0
- data/resources/kernel-2.1/include/datacite-descriptionType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v1.1.xsd +24 -0
- data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-relationType-v1.1.xsd +29 -0
- data/resources/kernel-2.1/include/datacite-relationType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-resourceType-v1.1.xsd +22 -0
- data/resources/kernel-2.1/include/datacite-resourceType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-titleType-v1.1.xsd +11 -0
- data/resources/kernel-2.1/include/datacite-titleType-v2.xsd +3 -0
- data/resources/kernel-2.1/metadata.xsd +315 -0
- data/resources/kernel-2.2/include/datacite-contributorType-v2.xsd +29 -0
- data/resources/kernel-2.2/include/datacite-dateType-v2.xsd +21 -0
- data/resources/kernel-2.2/include/datacite-descriptionType-v2.xsd +15 -0
- data/resources/kernel-2.2/include/datacite-relatedIdentifierType-v2.xsd +25 -0
- data/resources/kernel-2.2/include/datacite-relationType-v2.xsd +29 -0
- data/resources/kernel-2.2/include/datacite-resourceType-v2.xsd +23 -0
- data/resources/kernel-2.2/include/datacite-titleType-v2.xsd +10 -0
- data/resources/kernel-2.2/metadata.xsd +316 -0
- data/resources/kernel-3/include/datacite-contributorType-v3.1.xsd +35 -0
- data/resources/kernel-3/include/datacite-dateType-v3.xsd +21 -0
- data/resources/kernel-3/include/datacite-descriptionType-v3.xsd +17 -0
- data/resources/kernel-3/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
- data/resources/kernel-3/include/datacite-relationType-v3.1.xsd +38 -0
- data/resources/kernel-3/include/datacite-resourceType-v3.xsd +26 -0
- data/resources/kernel-3/include/datacite-titleType-v3.xsd +12 -0
- data/resources/kernel-3/include/xml.xsd +286 -0
- data/resources/kernel-3/metadata.xsd +380 -0
- data/resources/kernel-3.0/include/datacite-contributorType-v3.xsd +33 -0
- data/resources/kernel-3.0/include/datacite-dateType-v3.xsd +21 -0
- data/resources/kernel-3.0/include/datacite-descriptionType-v3.xsd +17 -0
- data/resources/kernel-3.0/include/datacite-relatedIdentifierType-v3.xsd +27 -0
- data/resources/kernel-3.0/include/datacite-relationType-v3.xsd +33 -0
- data/resources/kernel-3.0/include/datacite-resourceType-v3.xsd +26 -0
- data/resources/kernel-3.0/include/datacite-titleType-v3.xsd +12 -0
- data/resources/kernel-3.0/include/xml.xsd +286 -0
- data/resources/kernel-3.0/metadata.xsd +377 -0
- data/resources/kernel-3.1/include/datacite-contributorType-v3.1.xsd +35 -0
- data/resources/kernel-3.1/include/datacite-dateType-v3.xsd +21 -0
- data/resources/kernel-3.1/include/datacite-descriptionType-v3.xsd +17 -0
- data/resources/kernel-3.1/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
- data/resources/kernel-3.1/include/datacite-relationType-v3.1.xsd +38 -0
- data/resources/kernel-3.1/include/datacite-resourceType-v3.xsd +26 -0
- data/resources/kernel-3.1/include/datacite-titleType-v3.xsd +12 -0
- data/resources/kernel-3.1/include/xml.xsd +286 -0
- data/resources/kernel-3.1/metadata.xsd +380 -0
- data/resources/kernel-4/include/datacite-contributorType-v4.xsd +37 -0
- data/resources/kernel-4/include/datacite-dateType-v4.xsd +27 -0
- data/resources/kernel-4/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4/include/datacite-relatedIdentifierType-v4.xsd +39 -0
- data/resources/kernel-4/include/datacite-relationType-v4.xsd +59 -0
- data/resources/kernel-4/include/datacite-resourceType-v4.xsd +52 -0
- data/resources/kernel-4/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4/include/xml.xsd +286 -0
- data/resources/kernel-4/metadata.xsd +715 -0
- data/resources/kernel-4.0/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.0/include/datacite-dateType-v4.xsd +21 -0
- data/resources/kernel-4.0/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.0/include/datacite-funderIdentifierType-v4.xsd +15 -0
- data/resources/kernel-4.0/include/datacite-relatedIdentifierType-v4.xsd +32 -0
- data/resources/kernel-4.0/include/datacite-relationType-v4.xsd +39 -0
- data/resources/kernel-4.0/include/datacite-resourceType-v4.xsd +26 -0
- data/resources/kernel-4.0/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.0/include/xml.xsd +286 -0
- data/resources/kernel-4.0/metadata.xsd +470 -0
- data/resources/kernel-4.1/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.1/include/datacite-dateType-v4.1.xsd +23 -0
- data/resources/kernel-4.1/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.1/include/datacite-funderIdentifierType-v4.xsd +15 -0
- data/resources/kernel-4.1/include/datacite-nameType-v4.1.xsd +10 -0
- data/resources/kernel-4.1/include/datacite-relatedIdentifierType-v4.xsd +32 -0
- data/resources/kernel-4.1/include/datacite-relationType-v4.1.xsd +46 -0
- data/resources/kernel-4.1/include/datacite-resourceType-v4.1.xsd +28 -0
- data/resources/kernel-4.1/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.1/include/xml.xsd +286 -0
- data/resources/kernel-4.1/metadata.xsd +483 -0
- data/resources/kernel-4.2/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.2/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.2/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.2/include/datacite-funderIdentifierType-v4.xsd +15 -0
- data/resources/kernel-4.2/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.2/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.2/include/datacite-relationType-v4.xsd +49 -0
- data/resources/kernel-4.2/include/datacite-resourceType-v4.xsd +28 -0
- data/resources/kernel-4.2/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.2/include/xml.xsd +286 -0
- data/resources/kernel-4.2/metadata.xsd +479 -0
- data/resources/kernel-4.3/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.3/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.3/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.3/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.3/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.3/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.3/include/datacite-relationType-v4.xsd +49 -0
- data/resources/kernel-4.3/include/datacite-resourceType-v4.xsd +28 -0
- data/resources/kernel-4.3/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.3/include/xml.xsd +286 -0
- data/resources/kernel-4.3/metadata.xsd +515 -0
- data/resources/kernel-4.4/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.4/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.4/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.4/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.4/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.4/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.4/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.4/include/datacite-relationType-v4.xsd +51 -0
- data/resources/kernel-4.4/include/datacite-resourceType-v4.xsd +43 -0
- data/resources/kernel-4.4/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.4/include/xml.xsd +286 -0
- data/resources/kernel-4.4/metadata.xsd +707 -0
- data/resources/kernel-4.5/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.5/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.5/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.5/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.5/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.5/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.5/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.5/include/datacite-relationType-v4.xsd +53 -0
- data/resources/kernel-4.5/include/datacite-resourceType-v4.xsd +45 -0
- data/resources/kernel-4.5/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.5/include/xml.xsd +286 -0
- data/resources/kernel-4.5/metadata.xsd +711 -0
- data/resources/kernel-4.6/include/datacite-contributorType-v4.xsd +37 -0
- data/resources/kernel-4.6/include/datacite-dateType-v4.xsd +27 -0
- data/resources/kernel-4.6/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.6/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.6/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.6/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.6/include/datacite-relatedIdentifierType-v4.xsd +37 -0
- data/resources/kernel-4.6/include/datacite-relationType-v4.xsd +57 -0
- data/resources/kernel-4.6/include/datacite-resourceType-v4.xsd +49 -0
- data/resources/kernel-4.6/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.6/include/xml.xsd +286 -0
- data/resources/kernel-4.6/metadata.xsd +712 -0
- data/resources/kernel-4.7/include/datacite-contributorType-v4.xsd +37 -0
- data/resources/kernel-4.7/include/datacite-dateType-v4.xsd +27 -0
- data/resources/kernel-4.7/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.7/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.7/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.7/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.7/include/datacite-relatedIdentifierType-v4.xsd +39 -0
- data/resources/kernel-4.7/include/datacite-relationType-v4.xsd +59 -0
- data/resources/kernel-4.7/include/datacite-resourceType-v4.xsd +52 -0
- data/resources/kernel-4.7/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.7/include/xml.xsd +286 -0
- data/resources/kernel-4.7/metadata.xsd +715 -0
- data/resources/oecd/dfg-mappings.json +1866 -0
- data/resources/oecd/for-mappings.json +1101 -0
- data/resources/oecd/fos-mappings.json +198 -0
- data/resources/schema_org/jsonldcontext.json +7477 -0
- data/resources/spdx/licenses.json +5297 -0
- data/resources/xml.xsd +286 -0
- metadata +478 -150
- data/.travis.yml +0 -23
- data/lib/bolognese/crossref.rb +0 -202
- data/lib/bolognese/datacite.rb +0 -157
- data/lib/bolognese/date_utils.rb +0 -48
- data/lib/bolognese/github.rb +0 -106
- data/lib/bolognese/orcid.rb +0 -24
- data/lib/bolognese/pid_utils.rb +0 -23
- data/spec/cli_spec.rb +0 -37
- data/spec/crossref_spec.rb +0 -113
- data/spec/datacite_spec.rb +0 -49
- data/spec/doi_spec.rb +0 -89
- data/spec/fixtures/crossref.xml +0 -742
- data/spec/fixtures/datacite.xml +0 -40
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_crossref.yml +0 -760
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_schema_org.yml +0 -1476
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_datacite.yml +0 -214
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_schema_org.yml +0 -384
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/crossref.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/datacite.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/medra.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/not_found.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_test.yml +0 -843
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_SICI_DOI.yml +0 -277
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_data_citation.yml +0 -15755
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/date_in_future.yml +0 -2691
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/journal_article.yml +0 -1857
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/not_found_error.yml +0 -93
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/posted_content.yml +0 -5715
- data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/BlogPosting.yml +0 -307
- data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/Dataset.yml +0 -343
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref_doi_not_url.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite_doi_http.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/orcid.yml +0 -44
- data/spec/metadata_spec.rb +0 -35
- data/spec/orcid_spec.rb +0 -23
- data/spec/spec_helper.rb +0 -88
- /data/{LICENSE → LICENSE.md} +0 -0
data/lib/bolognese/metadata.rb
CHANGED
|
@@ -1,30 +1,262 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
require_relative '
|
|
4
|
-
require_relative 'pid_utils'
|
|
5
|
-
require_relative 'utils'
|
|
1
|
+
# frozen_string_literal: false
|
|
2
|
+
|
|
3
|
+
require_relative 'metadata_utils'
|
|
6
4
|
|
|
7
5
|
module Bolognese
|
|
8
6
|
class Metadata
|
|
9
|
-
include Bolognese::
|
|
10
|
-
include Bolognese::AuthorUtils
|
|
11
|
-
include Bolognese::DateUtils
|
|
12
|
-
include Bolognese::PidUtils
|
|
7
|
+
include Bolognese::MetadataUtils
|
|
13
8
|
include Bolognese::Utils
|
|
14
9
|
|
|
15
|
-
|
|
10
|
+
attr_accessor :string, :from, :sandbox, :meta, :regenerate, :issue, :show_errors
|
|
11
|
+
attr_reader :doc, :page_start, :page_end
|
|
12
|
+
attr_writer :id, :provider_id, :client_id, :doi, :identifiers, :creators, :contributors, :titles, :publisher,
|
|
13
|
+
:rights_list, :dates, :publication_year, :volume, :url, :version_info,
|
|
14
|
+
:subjects, :contributor, :descriptions, :language, :sizes,
|
|
15
|
+
:formats, :schema_version, :meta, :container, :agency,
|
|
16
|
+
:format, :funding_references, :state, :geo_locations,
|
|
17
|
+
:types, :content_url, :related_identifiers, :related_items, :style, :locale, :date_registered
|
|
18
|
+
|
|
19
|
+
def initialize(options={})
|
|
20
|
+
options.symbolize_keys!
|
|
21
|
+
id = normalize_id(options[:input], options)
|
|
22
|
+
ra = nil
|
|
23
|
+
|
|
24
|
+
if id.present?
|
|
25
|
+
@from = options[:from] || find_from_format(id: id)
|
|
26
|
+
|
|
27
|
+
# mEDRA, KISTI, JaLC and OP DOIs are found in the Crossref index
|
|
28
|
+
if @from == "medra"
|
|
29
|
+
ra = "mEDRA"
|
|
30
|
+
elsif @from == "kisti"
|
|
31
|
+
ra = "KISTI"
|
|
32
|
+
elsif @from == "jalc"
|
|
33
|
+
ra = "JaLC"
|
|
34
|
+
elsif @from == "op"
|
|
35
|
+
ra = "OP"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# generate name for method to call dynamically
|
|
39
|
+
hsh = @from.present? ? send("get_" + @from, id: id, sandbox: options[:sandbox]) : {}
|
|
40
|
+
string = hsh.fetch("string", nil)
|
|
41
|
+
|
|
42
|
+
elsif options[:input].present? && File.exist?(options[:input])
|
|
43
|
+
filename = File.basename(options[:input])
|
|
44
|
+
ext = File.extname(options[:input])
|
|
45
|
+
if %w(.bib .ris .xml .json).include?(ext)
|
|
46
|
+
hsh = {
|
|
47
|
+
"url" => options[:url],
|
|
48
|
+
"state" => options[:state],
|
|
49
|
+
"date_registered" => options[:date_registered],
|
|
50
|
+
"date_updated" => options[:date_updated],
|
|
51
|
+
"provider_id" => options[:provider_id],
|
|
52
|
+
"client_id" => options[:client_id],
|
|
53
|
+
"content_url" => options[:content_url] }
|
|
54
|
+
string = IO.read(options[:input])
|
|
55
|
+
@from = options[:from] || find_from_format(string: string, filename: filename, ext: ext)
|
|
56
|
+
else
|
|
57
|
+
$stderr.puts "File type #{ext} not supported"
|
|
58
|
+
exit 1
|
|
59
|
+
end
|
|
60
|
+
else
|
|
61
|
+
hsh = {
|
|
62
|
+
"url" => options[:url],
|
|
63
|
+
"state" => options[:state],
|
|
64
|
+
"date_registered" => options[:date_registered],
|
|
65
|
+
"date_updated" => options[:date_updated],
|
|
66
|
+
"provider_id" => options[:provider_id],
|
|
67
|
+
"client_id" => options[:client_id],
|
|
68
|
+
"content_url" => options[:content_url],
|
|
69
|
+
"creators" => options[:creators],
|
|
70
|
+
"contributors" => options[:contributors],
|
|
71
|
+
"titles" => options[:titles],
|
|
72
|
+
"publisher" => options[:publisher],
|
|
73
|
+
"publication_year" => options[:publication_year] }
|
|
74
|
+
string = options[:input]
|
|
75
|
+
@from = options[:from] || find_from_format(string: string)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# make sure input is encoded as utf8
|
|
79
|
+
string1 = string.dup.force_encoding("UTF-8") if string.present?
|
|
80
|
+
@string = string1
|
|
81
|
+
|
|
82
|
+
# input options for citation formatting
|
|
83
|
+
@style = options[:style]
|
|
84
|
+
@locale = options[:locale]
|
|
85
|
+
|
|
86
|
+
@sandbox = options[:sandbox]
|
|
87
|
+
|
|
88
|
+
# options that come from the datacite database
|
|
89
|
+
@url = hsh.to_h["url"].presence || options[:url].presence
|
|
90
|
+
@state = hsh.to_h["state"].presence
|
|
91
|
+
@date_registered = hsh.to_h["date_registered"].presence
|
|
92
|
+
@date_updated = hsh.to_h["date_updated"].presence
|
|
93
|
+
@provider_id = hsh.to_h["provider_id"].presence
|
|
94
|
+
@client_id = hsh.to_h["client_id"].presence
|
|
95
|
+
@content_url = hsh.to_h["content_url"].presence
|
|
96
|
+
|
|
97
|
+
# set attributes directly
|
|
98
|
+
read_options = options.slice(
|
|
99
|
+
:creators,
|
|
100
|
+
:contributors,
|
|
101
|
+
:titles,
|
|
102
|
+
:types,
|
|
103
|
+
:identifiers,
|
|
104
|
+
:container,
|
|
105
|
+
:publisher,
|
|
106
|
+
:funding_references,
|
|
107
|
+
:dates,
|
|
108
|
+
:publication_year,
|
|
109
|
+
:descriptions,
|
|
110
|
+
:rights_list,
|
|
111
|
+
:version_info,
|
|
112
|
+
:subjects,
|
|
113
|
+
:language,
|
|
114
|
+
:geo_locations,
|
|
115
|
+
:related_identifiers,
|
|
116
|
+
:related_items,
|
|
117
|
+
:formats,
|
|
118
|
+
:sizes
|
|
119
|
+
).compact
|
|
120
|
+
|
|
121
|
+
@regenerate = options[:regenerate] || read_options.present?
|
|
122
|
+
# generate name for method to call dynamically
|
|
123
|
+
opts = { string: string1, sandbox: options[:sandbox], doi: options[:doi], id: id, ra: ra }.merge(read_options)
|
|
124
|
+
@meta = @from.present? ? send("read_" + @from, **opts) : {}
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def id
|
|
128
|
+
@id ||= meta.fetch("id", nil)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def doi
|
|
132
|
+
@doi ||= meta.fetch("doi", nil)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def provider_id
|
|
136
|
+
@provider_id ||= meta.fetch("provider_id", nil)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def client_id
|
|
140
|
+
@client_id ||= meta.fetch("client_id", nil)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def exists?
|
|
144
|
+
(@state || meta.fetch("state", nil)) != "not_found"
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def valid?
|
|
148
|
+
exists? && errors.nil?
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# validate against DataCite schema, unless there are already errors in the reader
|
|
152
|
+
def errors
|
|
153
|
+
meta.fetch("errors", nil) || datacite_errors(xml: datacite, schema_version: schema_version)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def descriptions
|
|
157
|
+
@descriptions ||= meta.fetch("descriptions", nil)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def rights_list
|
|
161
|
+
@rights_list ||= meta.fetch("rights_list", nil)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def subjects
|
|
165
|
+
@subjects ||= meta.fetch("subjects", nil)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def language
|
|
169
|
+
@language ||= meta.fetch("language", nil)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def sizes
|
|
173
|
+
@sizes ||= meta.fetch("sizes", nil)
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def formats
|
|
177
|
+
@formats ||= meta.fetch("formats", nil)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def schema_version
|
|
181
|
+
@schema_version ||= meta.fetch("schema_version", nil)
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def funding_references
|
|
185
|
+
@funding_references ||= meta.fetch("funding_references", nil)
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def related_identifiers
|
|
189
|
+
@related_identifiers ||= meta.fetch("related_identifiers", nil)
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def related_items
|
|
193
|
+
@related_items ||= meta.fetch("related_items", nil)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def url
|
|
197
|
+
@url ||= meta.fetch("url", nil)
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def version_info
|
|
201
|
+
@version_info ||= meta.fetch("version_info", nil) || meta.fetch("version", nil)
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def publication_year
|
|
205
|
+
@publication_year ||= meta.fetch("publication_year", nil)
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def container
|
|
209
|
+
@container ||= begin
|
|
210
|
+
generate_container(types, related_items, related_identifiers, descriptions) || meta.fetch("container", nil)
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def geo_locations
|
|
215
|
+
@geo_locations ||= meta.fetch("geo_locations", nil)
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def dates
|
|
219
|
+
@dates ||= meta.fetch("dates", nil)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def publisher
|
|
223
|
+
@publisher ||= normalize_publisher(meta["publisher"]) if meta.fetch("publisher", nil).present?
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def identifiers
|
|
227
|
+
@identifiers ||= meta.fetch("identifiers", nil)
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def content_url
|
|
231
|
+
@content_url ||= meta.fetch("content_url", nil)
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def agency
|
|
235
|
+
@agency ||= meta.fetch("agency", nil)
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def state
|
|
239
|
+
@state ||= meta.fetch("state", nil)
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
def date_registered
|
|
243
|
+
@date_registered ||= meta.fetch("date_registered", nil)
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
def types
|
|
247
|
+
@types ||= meta.fetch("types", nil)
|
|
248
|
+
end
|
|
16
249
|
|
|
17
|
-
def
|
|
18
|
-
@
|
|
19
|
-
@provider = find_provider(@id)
|
|
250
|
+
def titles
|
|
251
|
+
@titles ||= meta.fetch("titles", nil)
|
|
20
252
|
end
|
|
21
253
|
|
|
22
|
-
def
|
|
23
|
-
|
|
254
|
+
def creators
|
|
255
|
+
@creators ||= meta.fetch("creators", nil)
|
|
24
256
|
end
|
|
25
257
|
|
|
26
|
-
def
|
|
27
|
-
|
|
258
|
+
def contributors
|
|
259
|
+
@contributors ||= meta.fetch("contributors", nil)
|
|
28
260
|
end
|
|
29
261
|
end
|
|
30
|
-
end
|
|
262
|
+
end
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'doi_utils'
|
|
4
|
+
require_relative 'author_utils'
|
|
5
|
+
require_relative 'datacite_utils'
|
|
6
|
+
require_relative 'utils'
|
|
7
|
+
|
|
8
|
+
require_relative 'readers/bibtex_reader'
|
|
9
|
+
require_relative 'readers/citeproc_reader'
|
|
10
|
+
require_relative 'readers/codemeta_reader'
|
|
11
|
+
require_relative 'readers/crosscite_reader'
|
|
12
|
+
require_relative 'readers/crossref_reader'
|
|
13
|
+
require_relative 'readers/datacite_json_reader'
|
|
14
|
+
require_relative 'readers/datacite_reader'
|
|
15
|
+
require_relative 'readers/npm_reader'
|
|
16
|
+
require_relative 'readers/ris_reader'
|
|
17
|
+
require_relative 'readers/schema_org_reader'
|
|
18
|
+
|
|
19
|
+
require_relative 'writers/bibtex_writer'
|
|
20
|
+
require_relative 'writers/citation_writer'
|
|
21
|
+
require_relative 'writers/citeproc_writer'
|
|
22
|
+
require_relative 'writers/codemeta_writer'
|
|
23
|
+
require_relative 'writers/crosscite_writer'
|
|
24
|
+
require_relative 'writers/crossref_writer'
|
|
25
|
+
require_relative 'writers/csv_writer'
|
|
26
|
+
require_relative 'writers/datacite_writer'
|
|
27
|
+
require_relative 'writers/datacite_json_writer'
|
|
28
|
+
require_relative 'writers/jats_writer'
|
|
29
|
+
require_relative 'writers/rdf_xml_writer'
|
|
30
|
+
require_relative 'writers/ris_writer'
|
|
31
|
+
require_relative 'writers/schema_org_writer'
|
|
32
|
+
require_relative 'writers/turtle_writer'
|
|
33
|
+
|
|
34
|
+
module Bolognese
|
|
35
|
+
module MetadataUtils
|
|
36
|
+
include Bolognese::DoiUtils
|
|
37
|
+
include Bolognese::AuthorUtils
|
|
38
|
+
include Bolognese::DataciteUtils
|
|
39
|
+
include Bolognese::Utils
|
|
40
|
+
|
|
41
|
+
include Bolognese::Readers::BibtexReader
|
|
42
|
+
include Bolognese::Readers::CiteprocReader
|
|
43
|
+
include Bolognese::Readers::CodemetaReader
|
|
44
|
+
include Bolognese::Readers::CrossciteReader
|
|
45
|
+
include Bolognese::Readers::CrossrefReader
|
|
46
|
+
include Bolognese::Readers::DataciteReader
|
|
47
|
+
include Bolognese::Readers::DataciteJsonReader
|
|
48
|
+
include Bolognese::Readers::NpmReader
|
|
49
|
+
include Bolognese::Readers::RisReader
|
|
50
|
+
include Bolognese::Readers::SchemaOrgReader
|
|
51
|
+
|
|
52
|
+
include Bolognese::Writers::BibtexWriter
|
|
53
|
+
include Bolognese::Writers::CitationWriter
|
|
54
|
+
include Bolognese::Writers::CiteprocWriter
|
|
55
|
+
include Bolognese::Writers::CodemetaWriter
|
|
56
|
+
include Bolognese::Writers::CrossciteWriter
|
|
57
|
+
include Bolognese::Writers::CrossrefWriter
|
|
58
|
+
include Bolognese::Writers::CsvWriter
|
|
59
|
+
include Bolognese::Writers::DataciteWriter
|
|
60
|
+
include Bolognese::Writers::DataciteJsonWriter
|
|
61
|
+
include Bolognese::Writers::JatsWriter
|
|
62
|
+
include Bolognese::Writers::RdfXmlWriter
|
|
63
|
+
include Bolognese::Writers::RisWriter
|
|
64
|
+
include Bolognese::Writers::SchemaOrgWriter
|
|
65
|
+
include Bolognese::Writers::TurtleWriter
|
|
66
|
+
|
|
67
|
+
attr_reader :name_detector, :reverse
|
|
68
|
+
|
|
69
|
+
# some dois in the Crossref index are from other registration agencies
|
|
70
|
+
alias get_medra get_crossref
|
|
71
|
+
alias read_medra read_crossref
|
|
72
|
+
alias get_kisti get_crossref
|
|
73
|
+
alias read_kisti read_crossref
|
|
74
|
+
alias get_jalc get_crossref
|
|
75
|
+
alias read_jalc read_crossref
|
|
76
|
+
alias get_op get_crossref
|
|
77
|
+
alias read_op read_crossref
|
|
78
|
+
|
|
79
|
+
# replace DOI in XML if provided in options
|
|
80
|
+
def raw
|
|
81
|
+
r = string.present? ? string.strip : nil
|
|
82
|
+
return r unless (from == "datacite" && r.present?)
|
|
83
|
+
|
|
84
|
+
doc = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks)
|
|
85
|
+
node = doc.at_css("identifier")
|
|
86
|
+
node.content = doi.to_s.upcase if node.present? && doi.present?
|
|
87
|
+
doc.to_xml.strip
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def should_passthru
|
|
91
|
+
(from == "datacite") && regenerate.blank? && raw.present?
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def container_title
|
|
95
|
+
if container.present?
|
|
96
|
+
container["title"]
|
|
97
|
+
elsif types["citeproc"] == "article-journal"
|
|
98
|
+
publisher["name"] if publisher.present?
|
|
99
|
+
else
|
|
100
|
+
nil
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# recognize given name. Can be loaded once as ::NameDetector, e.g. in a Rails initializer
|
|
105
|
+
def name_detector
|
|
106
|
+
@name_detector ||= defined?(::NameDetector) ? ::NameDetector : nil
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def reverse
|
|
110
|
+
{ "citation" => Array.wrap(related_identifiers).select { |ri| ri["relationType"] == "IsReferencedBy" }.map do |r|
|
|
111
|
+
{ "@id" => normalize_doi(r["relatedIdentifier"]),
|
|
112
|
+
"@type" => r["resourceTypeGeneral"] || "ScholarlyArticle",
|
|
113
|
+
"identifier" => r["relatedIdentifierType"] == "DOI" ? nil : to_identifier(r) }.compact
|
|
114
|
+
end.unwrap,
|
|
115
|
+
"isBasedOn" => Array.wrap(related_identifiers).select { |ri| ri["relationType"] == "IsSupplementTo" }.map do |r|
|
|
116
|
+
{ "@id" => normalize_doi(r["relatedIdentifier"]),
|
|
117
|
+
"@type" => r["resourceTypeGeneral"] || "ScholarlyArticle",
|
|
118
|
+
"identifier" => r["relatedIdentifierType"] == "DOI" ? nil : to_identifier(r) }.compact
|
|
119
|
+
end.unwrap }.compact
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def graph
|
|
123
|
+
# preload schema_org context
|
|
124
|
+
JSON::LD::Context.add_preloaded(
|
|
125
|
+
'http://schema.org/',
|
|
126
|
+
JSON::LD::Context.new.parse('resources/schema_org/jsonldcontext.json')
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
RDF::Graph.new << JSON::LD::API.toRdf(schema_hsh)
|
|
130
|
+
rescue NameError
|
|
131
|
+
nil
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def citeproc_hsh
|
|
135
|
+
page = container.to_h["firstPage"].present? ? [container["firstPage"], container["lastPage"]].compact.join("-") : nil
|
|
136
|
+
if Array.wrap(creators).size == 1 && Array.wrap(creators).first.fetch("name", nil) == ":(unav)"
|
|
137
|
+
author = nil
|
|
138
|
+
else
|
|
139
|
+
author = to_citeproc(creators)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
if types["resourceTypeGeneral"] == "Software"
|
|
143
|
+
type = "software"
|
|
144
|
+
else
|
|
145
|
+
type = types["citeproc"]
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Filter out contributors who are already creators, editors, or translators to avoid duplication
|
|
149
|
+
creator_names = Array.wrap(creators).map { |c| c["name"] || [c["givenName"], c["familyName"]].compact.join(" ") }.compact
|
|
150
|
+
unique_contributors = Array.wrap(contributors).reject do |c|
|
|
151
|
+
contributor_name = c["name"] || [c["givenName"], c["familyName"]].compact.join(" ")
|
|
152
|
+
creator_names.include?(contributor_name) ||
|
|
153
|
+
c["contributorType"] == "Editor" ||
|
|
154
|
+
c["contributorType"] == "Translator"
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
{
|
|
158
|
+
"type" => type,
|
|
159
|
+
"id" => normalize_doi(doi),
|
|
160
|
+
"categories" => Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) }.presence,
|
|
161
|
+
"language" => language,
|
|
162
|
+
"author" => author,
|
|
163
|
+
"contributor" => unique_contributors.presence ? to_citeproc(unique_contributors) : nil,
|
|
164
|
+
"editor" => contributors ? to_citeproc(contributors.select { |c| c["contributorType"] == "Editor" }) : nil,
|
|
165
|
+
"translator" => contributors ? to_citeproc(contributors.select { |c| c["contributorType"] == "Translator" }) : nil,
|
|
166
|
+
"issued" => get_date(dates, "Issued") ? get_date_parts(get_date(dates, "Issued")) : get_date_parts(publication_year.to_s),
|
|
167
|
+
"submitted" => Array.wrap(dates).find { |d| d["dateType"] == "Submitted" }.to_h.fetch("__content__", nil),
|
|
168
|
+
"available-date" => Array.wrap(dates).find { |d| d["dateType"] == "Available" }.to_h.fetch("__content__", nil),
|
|
169
|
+
"abstract" => parse_attributes(descriptions, content: "description", first: true),
|
|
170
|
+
"container-title" => container_title,
|
|
171
|
+
"DOI" => doi,
|
|
172
|
+
"volume" => container.to_h["volume"],
|
|
173
|
+
"issue" => container.to_h["issue"],
|
|
174
|
+
"number" => container.to_h["number"],
|
|
175
|
+
"chapter-number" => container.to_h["chapterNumber"],
|
|
176
|
+
"edition" => container.to_h["edition"],
|
|
177
|
+
"page" => page,
|
|
178
|
+
"page-first" => container.to_h["firstPage"],
|
|
179
|
+
"publisher" => publisher["name"],
|
|
180
|
+
"title" => parse_attributes(titles, content: "title", first: true),
|
|
181
|
+
"URL" => url,
|
|
182
|
+
"copyright" => Array.wrap(rights_list).map { |l| l["rights"] }.first,
|
|
183
|
+
"version" => version_info
|
|
184
|
+
}.compact.symbolize_keys
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def crosscite_hsh
|
|
188
|
+
{
|
|
189
|
+
"id" => normalize_doi(doi),
|
|
190
|
+
"doi" => doi,
|
|
191
|
+
"url" => url,
|
|
192
|
+
"types" => types,
|
|
193
|
+
"creators" => creators,
|
|
194
|
+
"titles" => titles,
|
|
195
|
+
"publisher" => publisher,
|
|
196
|
+
"container" => container,
|
|
197
|
+
"subjects" => subjects,
|
|
198
|
+
"contributors" => contributors,
|
|
199
|
+
"dates" => dates,
|
|
200
|
+
"publication_year" => publication_year,
|
|
201
|
+
"language" => language,
|
|
202
|
+
"identifiers" => identifiers,
|
|
203
|
+
"sizes" => sizes,
|
|
204
|
+
"formats" => formats,
|
|
205
|
+
"version" => version_info,
|
|
206
|
+
"rights_list" => rights_list,
|
|
207
|
+
"descriptions" => descriptions,
|
|
208
|
+
"geo_locations" => geo_locations,
|
|
209
|
+
"funding_references" => funding_references,
|
|
210
|
+
"related_identifiers" => related_identifiers,
|
|
211
|
+
"related_items" => related_items,
|
|
212
|
+
"schema_version" => schema_version,
|
|
213
|
+
"provider_id" => provider_id,
|
|
214
|
+
"client_id" => client_id,
|
|
215
|
+
"agency" => agency,
|
|
216
|
+
"state" => state
|
|
217
|
+
}.compact
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def style
|
|
221
|
+
@style ||= "apa"
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def locale
|
|
225
|
+
@locale ||= "en-US"
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
end
|
data/lib/bolognese/pubmed.rb
CHANGED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bolognese
|
|
4
|
+
module Readers
|
|
5
|
+
module BibtexReader
|
|
6
|
+
BIB_TO_CP_TRANSLATIONS = {
|
|
7
|
+
"article" => "article-journal",
|
|
8
|
+
"phdthesis" => "thesis"
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
BIB_TO_RIS_TRANSLATIONS = {
|
|
12
|
+
"article" => "JOUR",
|
|
13
|
+
"book" => "BOOK",
|
|
14
|
+
"inbook" => "CHAP",
|
|
15
|
+
"inproceedings" => "CPAPER",
|
|
16
|
+
"manual" => nil,
|
|
17
|
+
"misc" => "GEN",
|
|
18
|
+
"phdthesis" => "THES",
|
|
19
|
+
"proceedings" => "CONF",
|
|
20
|
+
"techreport" => "RPRT",
|
|
21
|
+
"unpublished" => "UNPD"
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
BIB_TO_SO_TRANSLATIONS = {
|
|
25
|
+
"article" => "ScholarlyArticle",
|
|
26
|
+
"phdthesis" => "Thesis"
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
def read_bibtex(string: nil, **options)
|
|
30
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
|
|
31
|
+
|
|
32
|
+
meta = string.present? ? BibTeX.parse(string).first : OpenStruct.new
|
|
33
|
+
|
|
34
|
+
bibtex_type = meta.try(:type).to_s
|
|
35
|
+
schema_org = BIB_TO_SO_TRANSLATIONS[bibtex_type] || "ScholarlyArticle"
|
|
36
|
+
types = {
|
|
37
|
+
"resourceTypeGeneral" => Metadata::BIB_TO_DC_TRANSLATIONS[bibtex_type],
|
|
38
|
+
"resourceType" => Bolognese::Utils::BIB_TO_CR_TRANSLATIONS[meta.try(:type).to_s] || meta.try(:type).to_s,
|
|
39
|
+
"schemaOrg" => schema_org,
|
|
40
|
+
"bibtex" => bibtex_type,
|
|
41
|
+
"citeproc" => BIB_TO_CP_TRANSLATIONS[meta.try(:type).to_s] || "misc",
|
|
42
|
+
"ris" => BIB_TO_RIS_TRANSLATIONS[meta.try(:type).to_s] || "GEN"
|
|
43
|
+
}.compact
|
|
44
|
+
doi = meta.try(:doi).to_s.presence || options[:doi]
|
|
45
|
+
|
|
46
|
+
creators = Array(meta.try(:author)).map do |a|
|
|
47
|
+
{ "nameType" => "Personal",
|
|
48
|
+
"name" => [a.last, a.first].join(", "),
|
|
49
|
+
"givenName" => a.first,
|
|
50
|
+
"familyName" => a.last }.compact
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
related_identifiers = if meta.try(:journal).present? && meta.try(:issn).to_s.presence
|
|
54
|
+
[{ "type" => "Periodical",
|
|
55
|
+
"relationType" => "IsPartOf",
|
|
56
|
+
"relatedIdentifierType" => "ISSN",
|
|
57
|
+
"title" => meta.journal.to_s,
|
|
58
|
+
"relatedIdentifier" => meta.try(:issn).to_s.presence }.compact]
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
container = if meta.try(:journal).present?
|
|
62
|
+
first_page = meta.try(:pages).present? ? meta.try(:pages).split("-").map(&:strip)[0] : nil
|
|
63
|
+
last_page = meta.try(:pages).present? ? meta.try(:pages).split("-").map(&:strip)[1] : nil
|
|
64
|
+
|
|
65
|
+
{ "type" => "Journal",
|
|
66
|
+
"title" => meta.journal.to_s,
|
|
67
|
+
"identifier" => meta.try(:issn).to_s.presence,
|
|
68
|
+
"identifierType" => meta.try(:issn).present? ? "ISSN" : nil,
|
|
69
|
+
"volume" => meta.try(:volume).to_s.presence,
|
|
70
|
+
"firstPage" => first_page,
|
|
71
|
+
"lastPage" => last_page }.compact
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
state = meta.try(:doi).to_s.present? || read_options.present? ? "findable" : "not_found"
|
|
75
|
+
dates = if meta.try(:date).present? && Date.edtf(meta.date.to_s).present?
|
|
76
|
+
[{ "date" => meta.date.to_s,
|
|
77
|
+
"dateType" => "Issued" }]
|
|
78
|
+
end
|
|
79
|
+
publication_year = meta.try(:date).present? ? meta.date.to_s[0..3] : nil
|
|
80
|
+
rights_list = meta.try(:copyright).present? ? [hsh_to_spdx("rightsURI" => meta[:copyright])] : []
|
|
81
|
+
|
|
82
|
+
{ "id" => normalize_doi(doi),
|
|
83
|
+
"types" => types,
|
|
84
|
+
"doi" => doi,
|
|
85
|
+
"url" => meta.try(:url).to_s.presence,
|
|
86
|
+
"titles" => meta.try(:title).present? ? [{ "title" => meta.try(:title).to_s }] : [],
|
|
87
|
+
"creators" => creators,
|
|
88
|
+
"container" => container,
|
|
89
|
+
"publisher" => meta.try(:publisher).present? ? { "name" => meta.publisher.to_s } : nil,
|
|
90
|
+
"related_identifiers" => related_identifiers,
|
|
91
|
+
"dates" => dates,
|
|
92
|
+
"publication_year" => publication_year,
|
|
93
|
+
"descriptions" => meta.try(:abstract).present? ? [{ "description" => meta.try(:abstract) && sanitize(meta.abstract.to_s, new_line: true).presence, "descriptionType" => "Abstract" }] : [],
|
|
94
|
+
"rights_list" => rights_list,
|
|
95
|
+
"state" => state
|
|
96
|
+
}.merge(read_options)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|