bolognese 0.2.2 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/build.yml +9 -0
- data/.github/workflows/changelog.yml +36 -0
- data/.github/workflows/ci.yml +22 -0
- data/.github/workflows/pull-request.yml +9 -0
- data/.github/workflows/release.yml +32 -0
- data/.gitignore +4 -0
- data/.rubocop.yml +658 -0
- data/CHANGELOG.md +1864 -0
- data/CITATION +17 -0
- data/Gemfile +1 -1
- data/Gemfile.lock +251 -99
- data/README.md +1026 -2
- data/Rakefile +1 -0
- data/bin/bolognese +5 -1
- data/bolognese.gemspec +33 -21
- data/lib/bolognese/array.rb +13 -0
- data/lib/bolognese/author_utils.rb +115 -39
- data/lib/bolognese/citeproc_extensions.rb +48 -0
- data/lib/bolognese/cli.rb +28 -15
- data/lib/bolognese/datacite_utils.rb +418 -0
- data/lib/bolognese/doi_utils.rb +45 -23
- data/lib/bolognese/metadata.rb +250 -18
- data/lib/bolognese/metadata_utils.rb +228 -0
- data/lib/bolognese/pubmed.rb +2 -0
- data/lib/bolognese/readers/bibtex_reader.rb +100 -0
- data/lib/bolognese/readers/citeproc_reader.rb +125 -0
- data/lib/bolognese/readers/codemeta_reader.rb +108 -0
- data/lib/bolognese/readers/crosscite_reader.rb +17 -0
- data/lib/bolognese/readers/crossref_reader.rb +413 -0
- data/lib/bolognese/readers/datacite_json_reader.rb +17 -0
- data/lib/bolognese/readers/datacite_reader.rb +338 -0
- data/lib/bolognese/readers/npm_reader.rb +115 -0
- data/lib/bolognese/readers/ris_reader.rb +114 -0
- data/lib/bolognese/readers/schema_org_reader.rb +264 -0
- data/lib/bolognese/string.rb +3 -1
- data/lib/bolognese/utils.rb +1403 -12
- data/lib/bolognese/version.rb +1 -1
- data/lib/bolognese/whitelist_scrubber.rb +47 -0
- data/lib/bolognese/writers/bibtex_writer.rb +32 -0
- data/lib/bolognese/writers/citation_writer.rb +14 -0
- data/lib/bolognese/writers/citeproc_writer.rb +11 -0
- data/lib/bolognese/writers/codemeta_writer.rb +29 -0
- data/lib/bolognese/writers/crosscite_writer.rb +11 -0
- data/lib/bolognese/writers/crossref_writer.rb +11 -0
- data/lib/bolognese/writers/csv_writer.rb +24 -0
- data/lib/bolognese/writers/datacite_json_writer.rb +13 -0
- data/lib/bolognese/writers/datacite_writer.rb +12 -0
- data/lib/bolognese/writers/jats_writer.rb +138 -0
- data/lib/bolognese/writers/rdf_xml_writer.rb +11 -0
- data/lib/bolognese/writers/ris_writer.rb +29 -0
- data/lib/bolognese/writers/schema_org_writer.rb +55 -0
- data/lib/bolognese/writers/turtle_writer.rb +11 -0
- data/lib/bolognese.rb +19 -4
- data/package.json +12 -0
- data/resources/2008/09/xsd.xsl +997 -0
- data/resources/datacite-contributorType-v4.xsd +35 -0
- data/resources/datacite-dateType-v4.xsd +25 -0
- data/resources/datacite-descriptionType-v4.xsd +19 -0
- data/resources/datacite-funderIdentifierType-v4.xsd +15 -0
- data/resources/datacite-nameType-v4.xsd +10 -0
- data/resources/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/datacite-relationType-v4.xsd +49 -0
- data/resources/datacite-resourceType-v4.xsd +28 -0
- data/resources/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-2.1/include/datacite-contributorType-v1.1.xsd +22 -0
- data/resources/kernel-2.1/include/datacite-contributorType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-dateType-v1.1.xsd +31 -0
- data/resources/kernel-2.1/include/datacite-dateType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-descriptionType-v1.1.xsd +14 -0
- data/resources/kernel-2.1/include/datacite-descriptionType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v1.1.xsd +24 -0
- data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-relationType-v1.1.xsd +29 -0
- data/resources/kernel-2.1/include/datacite-relationType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-resourceType-v1.1.xsd +22 -0
- data/resources/kernel-2.1/include/datacite-resourceType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-titleType-v1.1.xsd +11 -0
- data/resources/kernel-2.1/include/datacite-titleType-v2.xsd +3 -0
- data/resources/kernel-2.1/metadata.xsd +315 -0
- data/resources/kernel-2.2/include/datacite-contributorType-v2.xsd +29 -0
- data/resources/kernel-2.2/include/datacite-dateType-v2.xsd +21 -0
- data/resources/kernel-2.2/include/datacite-descriptionType-v2.xsd +15 -0
- data/resources/kernel-2.2/include/datacite-relatedIdentifierType-v2.xsd +25 -0
- data/resources/kernel-2.2/include/datacite-relationType-v2.xsd +29 -0
- data/resources/kernel-2.2/include/datacite-resourceType-v2.xsd +23 -0
- data/resources/kernel-2.2/include/datacite-titleType-v2.xsd +10 -0
- data/resources/kernel-2.2/metadata.xsd +316 -0
- data/resources/kernel-3/include/datacite-contributorType-v3.1.xsd +35 -0
- data/resources/kernel-3/include/datacite-dateType-v3.xsd +21 -0
- data/resources/kernel-3/include/datacite-descriptionType-v3.xsd +17 -0
- data/resources/kernel-3/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
- data/resources/kernel-3/include/datacite-relationType-v3.1.xsd +38 -0
- data/resources/kernel-3/include/datacite-resourceType-v3.xsd +26 -0
- data/resources/kernel-3/include/datacite-titleType-v3.xsd +12 -0
- data/resources/kernel-3/include/xml.xsd +286 -0
- data/resources/kernel-3/metadata.xsd +380 -0
- data/resources/kernel-3.0/include/datacite-contributorType-v3.xsd +33 -0
- data/resources/kernel-3.0/include/datacite-dateType-v3.xsd +21 -0
- data/resources/kernel-3.0/include/datacite-descriptionType-v3.xsd +17 -0
- data/resources/kernel-3.0/include/datacite-relatedIdentifierType-v3.xsd +27 -0
- data/resources/kernel-3.0/include/datacite-relationType-v3.xsd +33 -0
- data/resources/kernel-3.0/include/datacite-resourceType-v3.xsd +26 -0
- data/resources/kernel-3.0/include/datacite-titleType-v3.xsd +12 -0
- data/resources/kernel-3.0/include/xml.xsd +286 -0
- data/resources/kernel-3.0/metadata.xsd +377 -0
- data/resources/kernel-3.1/include/datacite-contributorType-v3.1.xsd +35 -0
- data/resources/kernel-3.1/include/datacite-dateType-v3.xsd +21 -0
- data/resources/kernel-3.1/include/datacite-descriptionType-v3.xsd +17 -0
- data/resources/kernel-3.1/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
- data/resources/kernel-3.1/include/datacite-relationType-v3.1.xsd +38 -0
- data/resources/kernel-3.1/include/datacite-resourceType-v3.xsd +26 -0
- data/resources/kernel-3.1/include/datacite-titleType-v3.xsd +12 -0
- data/resources/kernel-3.1/include/xml.xsd +286 -0
- data/resources/kernel-3.1/metadata.xsd +380 -0
- data/resources/kernel-4/include/datacite-contributorType-v4.xsd +37 -0
- data/resources/kernel-4/include/datacite-dateType-v4.xsd +27 -0
- data/resources/kernel-4/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4/include/datacite-relatedIdentifierType-v4.xsd +39 -0
- data/resources/kernel-4/include/datacite-relationType-v4.xsd +59 -0
- data/resources/kernel-4/include/datacite-resourceType-v4.xsd +52 -0
- data/resources/kernel-4/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4/include/xml.xsd +286 -0
- data/resources/kernel-4/metadata.xsd +715 -0
- data/resources/kernel-4.0/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.0/include/datacite-dateType-v4.xsd +21 -0
- data/resources/kernel-4.0/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.0/include/datacite-funderIdentifierType-v4.xsd +15 -0
- data/resources/kernel-4.0/include/datacite-relatedIdentifierType-v4.xsd +32 -0
- data/resources/kernel-4.0/include/datacite-relationType-v4.xsd +39 -0
- data/resources/kernel-4.0/include/datacite-resourceType-v4.xsd +26 -0
- data/resources/kernel-4.0/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.0/include/xml.xsd +286 -0
- data/resources/kernel-4.0/metadata.xsd +470 -0
- data/resources/kernel-4.1/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.1/include/datacite-dateType-v4.1.xsd +23 -0
- data/resources/kernel-4.1/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.1/include/datacite-funderIdentifierType-v4.xsd +15 -0
- data/resources/kernel-4.1/include/datacite-nameType-v4.1.xsd +10 -0
- data/resources/kernel-4.1/include/datacite-relatedIdentifierType-v4.xsd +32 -0
- data/resources/kernel-4.1/include/datacite-relationType-v4.1.xsd +46 -0
- data/resources/kernel-4.1/include/datacite-resourceType-v4.1.xsd +28 -0
- data/resources/kernel-4.1/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.1/include/xml.xsd +286 -0
- data/resources/kernel-4.1/metadata.xsd +483 -0
- data/resources/kernel-4.2/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.2/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.2/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.2/include/datacite-funderIdentifierType-v4.xsd +15 -0
- data/resources/kernel-4.2/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.2/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.2/include/datacite-relationType-v4.xsd +49 -0
- data/resources/kernel-4.2/include/datacite-resourceType-v4.xsd +28 -0
- data/resources/kernel-4.2/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.2/include/xml.xsd +286 -0
- data/resources/kernel-4.2/metadata.xsd +479 -0
- data/resources/kernel-4.3/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.3/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.3/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.3/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.3/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.3/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.3/include/datacite-relationType-v4.xsd +49 -0
- data/resources/kernel-4.3/include/datacite-resourceType-v4.xsd +28 -0
- data/resources/kernel-4.3/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.3/include/xml.xsd +286 -0
- data/resources/kernel-4.3/metadata.xsd +515 -0
- data/resources/kernel-4.4/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.4/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.4/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.4/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.4/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.4/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.4/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.4/include/datacite-relationType-v4.xsd +51 -0
- data/resources/kernel-4.4/include/datacite-resourceType-v4.xsd +43 -0
- data/resources/kernel-4.4/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.4/include/xml.xsd +286 -0
- data/resources/kernel-4.4/metadata.xsd +707 -0
- data/resources/kernel-4.5/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.5/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.5/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.5/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.5/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.5/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.5/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.5/include/datacite-relationType-v4.xsd +53 -0
- data/resources/kernel-4.5/include/datacite-resourceType-v4.xsd +45 -0
- data/resources/kernel-4.5/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.5/include/xml.xsd +286 -0
- data/resources/kernel-4.5/metadata.xsd +711 -0
- data/resources/kernel-4.6/include/datacite-contributorType-v4.xsd +37 -0
- data/resources/kernel-4.6/include/datacite-dateType-v4.xsd +27 -0
- data/resources/kernel-4.6/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.6/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.6/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.6/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.6/include/datacite-relatedIdentifierType-v4.xsd +37 -0
- data/resources/kernel-4.6/include/datacite-relationType-v4.xsd +57 -0
- data/resources/kernel-4.6/include/datacite-resourceType-v4.xsd +49 -0
- data/resources/kernel-4.6/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.6/include/xml.xsd +286 -0
- data/resources/kernel-4.6/metadata.xsd +712 -0
- data/resources/kernel-4.7/include/datacite-contributorType-v4.xsd +37 -0
- data/resources/kernel-4.7/include/datacite-dateType-v4.xsd +27 -0
- data/resources/kernel-4.7/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.7/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.7/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.7/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.7/include/datacite-relatedIdentifierType-v4.xsd +39 -0
- data/resources/kernel-4.7/include/datacite-relationType-v4.xsd +59 -0
- data/resources/kernel-4.7/include/datacite-resourceType-v4.xsd +52 -0
- data/resources/kernel-4.7/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.7/include/xml.xsd +286 -0
- data/resources/kernel-4.7/metadata.xsd +715 -0
- data/resources/oecd/dfg-mappings.json +1866 -0
- data/resources/oecd/for-mappings.json +1101 -0
- data/resources/oecd/fos-mappings.json +198 -0
- data/resources/schema_org/jsonldcontext.json +7477 -0
- data/resources/spdx/licenses.json +5297 -0
- data/resources/xml.xsd +286 -0
- metadata +478 -150
- data/.travis.yml +0 -23
- data/lib/bolognese/crossref.rb +0 -202
- data/lib/bolognese/datacite.rb +0 -157
- data/lib/bolognese/date_utils.rb +0 -48
- data/lib/bolognese/github.rb +0 -106
- data/lib/bolognese/orcid.rb +0 -24
- data/lib/bolognese/pid_utils.rb +0 -23
- data/spec/cli_spec.rb +0 -37
- data/spec/crossref_spec.rb +0 -113
- data/spec/datacite_spec.rb +0 -49
- data/spec/doi_spec.rb +0 -89
- data/spec/fixtures/crossref.xml +0 -742
- data/spec/fixtures/datacite.xml +0 -40
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_crossref.yml +0 -760
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_schema_org.yml +0 -1476
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_datacite.yml +0 -214
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_schema_org.yml +0 -384
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/crossref.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/datacite.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/medra.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/not_found.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_test.yml +0 -843
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_SICI_DOI.yml +0 -277
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_data_citation.yml +0 -15755
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/date_in_future.yml +0 -2691
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/journal_article.yml +0 -1857
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/not_found_error.yml +0 -93
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/posted_content.yml +0 -5715
- data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/BlogPosting.yml +0 -307
- data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/Dataset.yml +0 -343
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref_doi_not_url.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite_doi_http.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/orcid.yml +0 -44
- data/spec/metadata_spec.rb +0 -35
- data/spec/orcid_spec.rb +0 -23
- data/spec/spec_helper.rb +0 -88
- /data/{LICENSE → LICENSE.md} +0 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bolognese
|
|
4
|
+
module Readers
|
|
5
|
+
module CiteprocReader
|
|
6
|
+
CP_TO_SO_TRANSLATIONS = {
|
|
7
|
+
"song" => "AudioObject",
|
|
8
|
+
"post-weblog" => "BlogPosting",
|
|
9
|
+
"dataset" => "Dataset",
|
|
10
|
+
"graphic" => "ImageObject",
|
|
11
|
+
"motion_picture" => "Movie",
|
|
12
|
+
"article-journal" => "ScholarlyArticle",
|
|
13
|
+
"broadcast" => "VideoObject",
|
|
14
|
+
"webpage" => "WebPage"
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
CP_TO_RIS_TRANSLATIONS = {
|
|
18
|
+
"post-weblog" => "BLOG",
|
|
19
|
+
"dataset" => "DATA",
|
|
20
|
+
"graphic" => "FIGURE",
|
|
21
|
+
"book" => "BOOK",
|
|
22
|
+
"motion_picture" => "MPCT",
|
|
23
|
+
"article-journal" => "JOUR",
|
|
24
|
+
"broadcast" => "MPCT",
|
|
25
|
+
"webpage" => "ELEC"
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
def read_citeproc(string: nil, **options)
|
|
29
|
+
if string.present?
|
|
30
|
+
errors = jsonlint(string)
|
|
31
|
+
return { "errors" => errors } if errors.present?
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
|
|
35
|
+
|
|
36
|
+
meta = string.present? ? Maremma.from_json(string) : {}
|
|
37
|
+
|
|
38
|
+
citeproc_type = meta.fetch("type", nil)
|
|
39
|
+
schema_org = CP_TO_SO_TRANSLATIONS[citeproc_type] || "CreativeWork"
|
|
40
|
+
types = {
|
|
41
|
+
"resourceTypeGeneral" => Bolognese::Utils::CP_TO_DC_TRANSLATIONS[citeproc_type],
|
|
42
|
+
"reourceType" => meta.fetch("additionalType", nil),
|
|
43
|
+
"schemaOrg" => schema_org,
|
|
44
|
+
"citeproc" => citeproc_type,
|
|
45
|
+
"bibtex" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
|
|
46
|
+
"ris" => CP_TO_RIS_TRANSLATIONS[schema_org] || "GEN"
|
|
47
|
+
}.compact
|
|
48
|
+
|
|
49
|
+
creators = if meta.fetch("author", nil).present?
|
|
50
|
+
get_authors(from_citeproc(Array.wrap(meta.fetch("author", nil))))
|
|
51
|
+
else
|
|
52
|
+
[{ "nameType" => "Organizational", "name" => ":(unav)" }]
|
|
53
|
+
end
|
|
54
|
+
contributors = get_authors(from_citeproc(Array.wrap(meta.fetch("editor", nil))))
|
|
55
|
+
translators = get_authors(from_citeproc(Array.wrap(meta.fetch("translator", nil))))
|
|
56
|
+
translators.each do |translator|
|
|
57
|
+
translator["contributorType"] = "Translator"
|
|
58
|
+
end
|
|
59
|
+
contributors += translators
|
|
60
|
+
|
|
61
|
+
dates = if date = get_date_from_date_parts(meta.fetch("issued", nil))
|
|
62
|
+
if Date.edtf(date).present?
|
|
63
|
+
[{ "date" => date,
|
|
64
|
+
"dateType" => "Issued" }]
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
publication_year = get_date_from_date_parts(meta.fetch("issued", nil)).to_s[0..3]
|
|
68
|
+
rights_list = if meta.fetch("copyright", nil)
|
|
69
|
+
[hsh_to_spdx("rightsURI" => meta.fetch("copyright"))]
|
|
70
|
+
end
|
|
71
|
+
related_identifiers = if meta.fetch("container-title", nil).present? && meta.fetch("ISSN", nil).present?
|
|
72
|
+
[{ "type" => "Periodical",
|
|
73
|
+
"relationType" => "IsPartOf",
|
|
74
|
+
"relatedIdentifierType" => "ISSN",
|
|
75
|
+
"title" => meta.fetch("container-title", nil),
|
|
76
|
+
"relatedIdentifier" => meta.fetch("ISSN", nil) }.compact]
|
|
77
|
+
end
|
|
78
|
+
container = if meta.fetch("container-title", nil).present?
|
|
79
|
+
first_page = meta.fetch("page", nil).present? ? meta.fetch("page").split("-").map(&:strip)[0] : nil
|
|
80
|
+
last_page = meta.fetch("page", nil).present? ? meta.fetch("page").split("-").map(&:strip)[1] : nil
|
|
81
|
+
|
|
82
|
+
{ "type" => "Periodical",
|
|
83
|
+
"title" => meta.fetch("container-title", nil),
|
|
84
|
+
"identifier" => meta.fetch("ISSN", nil),
|
|
85
|
+
"identifierType" => meta.fetch("ISSN", nil).present? ? "ISSN" : nil,
|
|
86
|
+
"volume" => meta.fetch("volume", nil),
|
|
87
|
+
"issue" => meta.fetch("issue", nil),
|
|
88
|
+
"firstPage" => first_page,
|
|
89
|
+
"lastPage" => last_page
|
|
90
|
+
}.compact
|
|
91
|
+
else
|
|
92
|
+
nil
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
id = normalize_id(meta.fetch("id", nil) || meta.fetch("DOI", nil))
|
|
96
|
+
|
|
97
|
+
state = id.present? || read_options.present? ? "findable" : "not_found"
|
|
98
|
+
subjects = Array.wrap(meta.fetch("categories", nil)).reduce([]) do |sum, subject|
|
|
99
|
+
sum += name_to_subject(subject)
|
|
100
|
+
|
|
101
|
+
sum
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
{ "id" => id,
|
|
105
|
+
"types" => types,
|
|
106
|
+
"doi" => doi_from_url(id),
|
|
107
|
+
"url" => normalize_id(meta.fetch("URL", nil)),
|
|
108
|
+
"titles" => [{ "title" => meta.fetch("title", nil) }],
|
|
109
|
+
"creators" => creators,
|
|
110
|
+
"contributors" => contributors,
|
|
111
|
+
"container" => container,
|
|
112
|
+
"publisher" => meta.fetch("publisher", nil),
|
|
113
|
+
"related_identifiers" => related_identifiers,
|
|
114
|
+
"dates" => dates,
|
|
115
|
+
"publication_year" => publication_year,
|
|
116
|
+
"descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract"), new_line: true), "descriptionType" => "Abstract" }] : [],
|
|
117
|
+
"rights_list" => rights_list,
|
|
118
|
+
"version_info" => meta.fetch("version", nil),
|
|
119
|
+
"subjects" => subjects,
|
|
120
|
+
"state" => state
|
|
121
|
+
}.merge(read_options)
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bolognese
|
|
4
|
+
module Readers
|
|
5
|
+
module CodemetaReader
|
|
6
|
+
def get_codemeta(id: nil, **options)
|
|
7
|
+
return { "string" => nil, "state" => "not_found" } unless id.present?
|
|
8
|
+
id = normalize_id(id)
|
|
9
|
+
response = Maremma.get(github_as_codemeta_url(id), accept: "json", raw: true)
|
|
10
|
+
string = response.body.fetch("data", nil)
|
|
11
|
+
|
|
12
|
+
{ "string" => string }
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def read_codemeta(string: nil, **options)
|
|
16
|
+
if string.present?
|
|
17
|
+
errors = jsonlint(string)
|
|
18
|
+
return { "errors" => errors } if errors.present?
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
|
|
22
|
+
|
|
23
|
+
meta = string.present? ? Maremma.from_json(string) : {}
|
|
24
|
+
|
|
25
|
+
identifiers = Array.wrap(meta.fetch("identifier", nil)).map do |r|
|
|
26
|
+
r = normalize_id(r) if r.is_a?(String)
|
|
27
|
+
if r.is_a?(String) && !r.start_with?("https://doi.org")
|
|
28
|
+
{ "identifierType" => "URL", "identifier" => r }
|
|
29
|
+
elsif r.is_a?(Hash)
|
|
30
|
+
{ "identifierType" => get_identifier_type(r["propertyID"]), "identifier" => r["value"] }
|
|
31
|
+
end
|
|
32
|
+
end.compact.uniq
|
|
33
|
+
|
|
34
|
+
id = normalize_id(options[:doi] || meta.fetch("@id", nil) || meta.fetch("identifier", nil))
|
|
35
|
+
|
|
36
|
+
has_agents = meta.fetch("agents", nil)
|
|
37
|
+
authors = has_agents.nil? ? meta.fetch("authors", nil) : has_agents
|
|
38
|
+
creators = get_authors(from_schema_org_creators(Array.wrap(authors)))
|
|
39
|
+
|
|
40
|
+
contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
|
|
41
|
+
dates = []
|
|
42
|
+
dates << { "date" => meta.fetch("datePublished"), "dateType" => "Issued" } if meta.fetch("datePublished", nil).present?
|
|
43
|
+
dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if meta.fetch("dateCreated", nil).present?
|
|
44
|
+
dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if meta.fetch("dateModified", nil).present?
|
|
45
|
+
publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
|
|
46
|
+
publisher = { "name" => meta.fetch("publisher", nil) } if meta.fetch("publisher", nil).present?
|
|
47
|
+
state = meta.present? || read_options.present? ? "findable" : "not_found"
|
|
48
|
+
schema_org = meta.fetch("@type", nil)
|
|
49
|
+
types = {
|
|
50
|
+
"resourceTypeGeneral" => Bolognese::Utils::SO_TO_DC_TRANSLATIONS[schema_org],
|
|
51
|
+
"resourceType" => meta.fetch("additionalType", nil),
|
|
52
|
+
"schemaOrg" => schema_org,
|
|
53
|
+
"citeproc" => Bolognese::Utils::SO_TO_CP_TRANSLATIONS[schema_org] || "article-journal",
|
|
54
|
+
"bibtex" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
|
|
55
|
+
"ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[schema_org] || "GEN"
|
|
56
|
+
}.compact
|
|
57
|
+
subjects = Array.wrap(meta.fetch("tags", nil)).reduce([]) do |sum, subject|
|
|
58
|
+
sum += name_to_subject(subject)
|
|
59
|
+
|
|
60
|
+
sum
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
has_title = meta.fetch("title", nil)
|
|
64
|
+
titles = has_title.nil? ? [{ "title" => meta.fetch("name", nil) }] : [{ "title" => has_title }]
|
|
65
|
+
rights_list = meta.fetch("licenseId", nil).present? ? [hsh_to_spdx("rightsIdentifier" => meta.fetch("licenseId"))] : nil
|
|
66
|
+
|
|
67
|
+
{ "id" => id,
|
|
68
|
+
"types" => types,
|
|
69
|
+
"identifiers" => identifiers,
|
|
70
|
+
"doi" => doi_from_url(id),
|
|
71
|
+
"url" => normalize_id(meta.fetch("codeRepository", nil)),
|
|
72
|
+
"titles" => titles,
|
|
73
|
+
"creators" => creators,
|
|
74
|
+
"contributors" => contributors,
|
|
75
|
+
"publisher" => publisher,
|
|
76
|
+
#{}"is_part_of" => is_part_of,
|
|
77
|
+
"dates" => dates,
|
|
78
|
+
"publication_year" => publication_year,
|
|
79
|
+
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : nil,
|
|
80
|
+
"rights_list" => rights_list,
|
|
81
|
+
"version_info" => meta.fetch("version", nil),
|
|
82
|
+
"subjects" => subjects,
|
|
83
|
+
"state" => state
|
|
84
|
+
}.merge(read_options)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# def related_identifiers(relation_type)
|
|
88
|
+
# normalize_ids(ids: metadata.fetch(relation_type, nil), relation_type: relation_type)
|
|
89
|
+
# end
|
|
90
|
+
#
|
|
91
|
+
# def same_as
|
|
92
|
+
# related_identifiers("isIdenticalTo")
|
|
93
|
+
# end
|
|
94
|
+
#
|
|
95
|
+
# def is_part_of
|
|
96
|
+
# related_identifiers("isPartOf")
|
|
97
|
+
# end
|
|
98
|
+
#
|
|
99
|
+
# def has_part
|
|
100
|
+
# related_identifiers("hasPart")
|
|
101
|
+
# end
|
|
102
|
+
#
|
|
103
|
+
# def citation
|
|
104
|
+
# related_identifiers("citation")
|
|
105
|
+
# end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bolognese
|
|
4
|
+
module Readers
|
|
5
|
+
module CrossciteReader
|
|
6
|
+
def read_crosscite(string: nil, **options)
|
|
7
|
+
errors = jsonlint(string)
|
|
8
|
+
return { "errors" => errors } if errors.present?
|
|
9
|
+
|
|
10
|
+
crosscite = string.present? ? Maremma.from_json(string) : {}
|
|
11
|
+
crosscite["publisher"] = normalize_publisher(crosscite["publisher"]) if crosscite.fetch("publisher", nil).present?
|
|
12
|
+
|
|
13
|
+
crosscite
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bolognese
|
|
4
|
+
module Readers
|
|
5
|
+
module CrossrefReader
|
|
6
|
+
# CrossRef types from https://api.crossref.org/types
|
|
7
|
+
|
|
8
|
+
CR_TO_DC_CONTRIBUTOR_TYPES = {
|
|
9
|
+
"editor" => "Editor",
|
|
10
|
+
"translator" => "Translator",
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
def get_crossref(id: nil, **options)
|
|
14
|
+
return { "string" => nil, "state" => "not_found" } unless id.present?
|
|
15
|
+
|
|
16
|
+
doi = doi_from_url(id)
|
|
17
|
+
url = "https://api.crossref.org/works/#{doi}/transform/application/vnd.crossref.unixsd+xml"
|
|
18
|
+
response = Maremma.get(url, accept: "text/xml;charset=utf-8", raw: true)
|
|
19
|
+
string = response.body.fetch("data", nil)
|
|
20
|
+
string = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).to_s if string.present?
|
|
21
|
+
|
|
22
|
+
{ "string" => string }
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def read_crossref(string: nil, **options)
|
|
26
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
|
|
27
|
+
|
|
28
|
+
if string.present?
|
|
29
|
+
m = Maremma.from_xml(string).dig("crossref_result", "query_result", "body", "query", "doi_record") || {}
|
|
30
|
+
meta = m.dig("doi_record", "crossref", "error").nil? ? m : {}
|
|
31
|
+
|
|
32
|
+
# query contains information from outside metadata schema, e.g. publisher name
|
|
33
|
+
query = Maremma.from_xml(string).dig("crossref_result", "query_result", "body", "query") || {}
|
|
34
|
+
else
|
|
35
|
+
meta = {}
|
|
36
|
+
query = {}
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# model should be one of book, conference, database, dissertation, journal, peer_review, posted_content,
|
|
40
|
+
# report_paper, sa_component, standard
|
|
41
|
+
model = meta.dig("crossref").to_h.keys.last
|
|
42
|
+
|
|
43
|
+
resource_type = nil
|
|
44
|
+
bibliographic_metadata = {}
|
|
45
|
+
program_metadata = {}
|
|
46
|
+
journal_metadata = nil
|
|
47
|
+
journal_issue = {}
|
|
48
|
+
journal_metadata = nil
|
|
49
|
+
publisher = query.dig("crm_item", 0).is_a?(String) ? { "name" => query.dig("crm_item", 0) } : nil
|
|
50
|
+
|
|
51
|
+
case model
|
|
52
|
+
when "book"
|
|
53
|
+
book_metadata = meta.dig("crossref", "book", "book_metadata")
|
|
54
|
+
book_series_metadata = meta.dig("crossref", "book", "book_series_metadata")
|
|
55
|
+
book_set_metadata = meta.dig("crossref", "book", "book_set_metadata")
|
|
56
|
+
bibliographic_metadata = meta.dig("crossref", "book", "content_item") || book_metadata || book_series_metadata || book_set_metadata
|
|
57
|
+
resource_type = bibliographic_metadata.fetch("component_type", nil) ? "book-" + bibliographic_metadata.fetch("component_type") : "book"
|
|
58
|
+
# publisher = if book_metadata.present?
|
|
59
|
+
# book_metadata.dig("publisher", "publisher_name")
|
|
60
|
+
# elsif book_series_metadata.present?
|
|
61
|
+
# book_series_metadata.dig("publisher", "publisher_name")
|
|
62
|
+
# end
|
|
63
|
+
when "conference"
|
|
64
|
+
event_metadata = meta.dig("crossref", "conference", "event_metadata") || {}
|
|
65
|
+
bibliographic_metadata = meta.dig("crossref", "conference", "conference_paper").to_h
|
|
66
|
+
when "journal"
|
|
67
|
+
journal_metadata = meta.dig("crossref", "journal", "journal_metadata") || {}
|
|
68
|
+
journal_issue = meta.dig("crossref", "journal", "journal_issue") || {}
|
|
69
|
+
journal_article = meta.dig("crossref", "journal", "journal_article") || {}
|
|
70
|
+
bibliographic_metadata = journal_article.presence || journal_issue.presence || journal_metadata
|
|
71
|
+
program_metadata = bibliographic_metadata.dig("crossmark", "custom_metadata", "program") || bibliographic_metadata.dig("program")
|
|
72
|
+
resource_type = if journal_article.present?
|
|
73
|
+
"journal_article"
|
|
74
|
+
elsif journal_issue.present?
|
|
75
|
+
"journal_issue"
|
|
76
|
+
else
|
|
77
|
+
"journal"
|
|
78
|
+
end
|
|
79
|
+
when "posted_content"
|
|
80
|
+
bibliographic_metadata = meta.dig("crossref", "posted_content").to_h
|
|
81
|
+
publisher ||= bibliographic_metadata.dig("institution", "institution_name")
|
|
82
|
+
when "sa_component"
|
|
83
|
+
bibliographic_metadata = meta.dig("crossref", "sa_component", "component_list", "component").to_h
|
|
84
|
+
related_identifier = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "relation" }
|
|
85
|
+
journal_metadata = { "relatedIdentifier" => related_identifier.to_h.fetch("__content", nil) }
|
|
86
|
+
when "database"
|
|
87
|
+
bibliographic_metadata = meta.dig("crossref", "database", "dataset").to_h
|
|
88
|
+
resource_type = "dataset"
|
|
89
|
+
when "report_paper"
|
|
90
|
+
bibliographic_metadata = meta.dig("crossref", "report_paper", "report_paper_metadata").to_h
|
|
91
|
+
resource_type = "report"
|
|
92
|
+
when "peer_review"
|
|
93
|
+
bibliographic_metadata = meta.dig("crossref", "peer_review")
|
|
94
|
+
when "dissertation"
|
|
95
|
+
bibliographic_metadata = meta.dig("crossref", "dissertation")
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
resource_type = (resource_type || model).to_s.underscore.camelcase.presence
|
|
99
|
+
schema_org = Bolognese::Utils::CR_TO_SO_TRANSLATIONS[resource_type] || "ScholarlyArticle"
|
|
100
|
+
types = {
|
|
101
|
+
"resourceTypeGeneral" => Bolognese::Utils::CR_TO_DC_TRANSLATIONS[resource_type],
|
|
102
|
+
"resourceType" => resource_type,
|
|
103
|
+
"schemaOrg" => schema_org,
|
|
104
|
+
"citeproc" => Bolognese::Utils::CR_TO_CP_TRANSLATIONS[resource_type] || "article-journal",
|
|
105
|
+
"bibtex" => Bolognese::Utils::CR_TO_BIB_TRANSLATIONS[resource_type] || "misc",
|
|
106
|
+
"ris" => Bolognese::Utils::CR_TO_RIS_TRANSLATIONS[resource_type] || "JOUR"
|
|
107
|
+
}.compact
|
|
108
|
+
|
|
109
|
+
titles = if bibliographic_metadata.dig("titles").present?
|
|
110
|
+
Array.wrap(bibliographic_metadata.dig("titles")).map do |r|
|
|
111
|
+
if r.blank? || (r["title"].blank? && r["original_language_title"].blank?)
|
|
112
|
+
nil
|
|
113
|
+
elsif r["title"].is_a?(String)
|
|
114
|
+
{ "title" => sanitize(r["title"]) }
|
|
115
|
+
elsif r["original_language_title"].present?
|
|
116
|
+
{ "title" => sanitize(r.dig("original_language_title", "__content__")), "lang" => r.dig("original_language_title", "language") }
|
|
117
|
+
else
|
|
118
|
+
{ "title" => sanitize(r.dig("title", "__content__")) }.compact
|
|
119
|
+
end
|
|
120
|
+
end.compact
|
|
121
|
+
else
|
|
122
|
+
[{ "title" => ":(unav)" }]
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
date_published = crossref_date_published(bibliographic_metadata)
|
|
126
|
+
if date_published.present?
|
|
127
|
+
date_published = { "date" => date_published, "dateType" => "Issued" }
|
|
128
|
+
else
|
|
129
|
+
date_published = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "created" }
|
|
130
|
+
date_published = { "date" => date_published.fetch("__content__", "")[0..9], "dateType" => "Issued" } if date_published.present?
|
|
131
|
+
end
|
|
132
|
+
date_updated = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "last-update" }
|
|
133
|
+
date_updated = { "date" => date_updated.fetch("__content__", nil), "dateType" => "Updated" } if date_updated.present?
|
|
134
|
+
|
|
135
|
+
date_registered = Array.wrap(query.to_h["crm_item"]).find { |cr| cr["name"] == "deposit-timestamp" }
|
|
136
|
+
date_registered = get_datetime_from_time(date_registered.fetch("__content__", nil)) if date_registered.present?
|
|
137
|
+
|
|
138
|
+
# check that date is valid iso8601 date
|
|
139
|
+
date_published = nil unless Date.edtf(date_published.to_h["date"]).present?
|
|
140
|
+
date_updated = nil unless Date.edtf(date_updated.to_h["date"]).present?
|
|
141
|
+
|
|
142
|
+
dates = [date_published, date_updated].compact
|
|
143
|
+
publication_year = date_published.to_h.fetch("date", "")[0..3].presence
|
|
144
|
+
|
|
145
|
+
state = meta.present? || read_options.present? ? "findable" : "not_found"
|
|
146
|
+
|
|
147
|
+
related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata)) + Array.wrap(crossref_has_translation(program_metadata)) + Array.wrap(crossref_is_translation_of(program_metadata)) + Array.wrap(crossref_is_related_material(program_metadata))
|
|
148
|
+
|
|
149
|
+
container = if journal_metadata.present?
|
|
150
|
+
issn = normalize_issn(journal_metadata.to_h.fetch("issn", nil))
|
|
151
|
+
|
|
152
|
+
{ "type" => "Journal",
|
|
153
|
+
"identifier" => issn,
|
|
154
|
+
"identifierType" => issn.present? ? "ISSN" : nil,
|
|
155
|
+
"title" => parse_attributes(journal_metadata.to_h["full_title"]),
|
|
156
|
+
"volume" => parse_attributes(journal_issue.dig("journal_volume", "volume")),
|
|
157
|
+
"issue" => parse_attributes(journal_issue.dig("issue")),
|
|
158
|
+
"firstPage" => bibliographic_metadata.dig("pages", "first_page") || parse_attributes(journal_article.to_h.dig("publisher_item", "item_number"), first: true),
|
|
159
|
+
"lastPage" => bibliographic_metadata.dig("pages", "last_page") }.compact
|
|
160
|
+
|
|
161
|
+
# By using book_metadata, we can account for where resource_type is `BookChapter` and not assume its a whole book
|
|
162
|
+
elsif book_metadata.present?
|
|
163
|
+
identifiers = crossref_alternate_identifiers(book_metadata)
|
|
164
|
+
|
|
165
|
+
{
|
|
166
|
+
"type" => "Book",
|
|
167
|
+
"title" => book_metadata.dig("titles", "title"),
|
|
168
|
+
"firstPage" => bibliographic_metadata.dig("pages", "first_page"),
|
|
169
|
+
"lastPage" => bibliographic_metadata.dig("pages", "last_page"),
|
|
170
|
+
"identifiers" => identifiers,
|
|
171
|
+
}.compact
|
|
172
|
+
|
|
173
|
+
elsif book_series_metadata.to_h.fetch("series_metadata", nil).present?
|
|
174
|
+
issn = normalize_issn(book_series_metadata.dig("series_metadata", "issn"))
|
|
175
|
+
|
|
176
|
+
{ "type" => "Book Series",
|
|
177
|
+
"identifier" => issn,
|
|
178
|
+
"identifierType" => issn.present? ? "ISSN" : nil,
|
|
179
|
+
"title" => book_series_metadata.dig("series_metadata", "titles", "title"),
|
|
180
|
+
"volume" => bibliographic_metadata.fetch("volume", nil) }.compact
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
id = normalize_doi(options[:doi] || options[:id] || bibliographic_metadata.dig("doi_data", "doi"))
|
|
184
|
+
|
|
185
|
+
# Let sections override this in case of alternative metadata structures, such as book chapters, which
|
|
186
|
+
# have their meta inside `content_item`, but the main book indentifers inside of `book_metadata`
|
|
187
|
+
identifiers ||= crossref_alternate_identifiers(bibliographic_metadata)
|
|
188
|
+
|
|
189
|
+
{ "id" => id,
|
|
190
|
+
"types" => types,
|
|
191
|
+
"doi" => doi_from_url(id),
|
|
192
|
+
"url" => parse_attributes(bibliographic_metadata.dig("doi_data", "resource"), first: true),
|
|
193
|
+
"titles" => titles,
|
|
194
|
+
"identifiers" => identifiers,
|
|
195
|
+
"creators" => crossref_people(bibliographic_metadata, "author"),
|
|
196
|
+
"contributors" => crossref_people(bibliographic_metadata, "editor") + crossref_people(bibliographic_metadata, "translator"),
|
|
197
|
+
"funding_references" => crossref_funding_reference(program_metadata),
|
|
198
|
+
"publisher" => publisher,
|
|
199
|
+
"container" => container,
|
|
200
|
+
"agency" => agency = options[:ra] || "crossref",
|
|
201
|
+
"related_identifiers" => related_identifiers,
|
|
202
|
+
"dates" => dates,
|
|
203
|
+
"publication_year" => publication_year,
|
|
204
|
+
"descriptions" => crossref_description(bibliographic_metadata),
|
|
205
|
+
"rights_list" => crossref_license(program_metadata),
|
|
206
|
+
"version_info" => nil,
|
|
207
|
+
"subjects" => nil,
|
|
208
|
+
"language" => nil,
|
|
209
|
+
"sizes" => nil,
|
|
210
|
+
"schema_version" => nil,
|
|
211
|
+
"state" => state,
|
|
212
|
+
"date_registered" => date_registered
|
|
213
|
+
}.merge(read_options)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def crossref_alternate_identifiers(bibliographic_metadata)
|
|
217
|
+
if bibliographic_metadata.dig("publisher_item", "item_number").present?
|
|
218
|
+
Array.wrap(bibliographic_metadata.dig("publisher_item", "item_number")).map do |item|
|
|
219
|
+
if item.is_a?(String)
|
|
220
|
+
{ "identifier" => item,
|
|
221
|
+
"identifierType" => "Publisher ID" }
|
|
222
|
+
else
|
|
223
|
+
{ "identifier" => item.fetch("__content__", nil),
|
|
224
|
+
"identifierType" => item.fetch("item_number_type", nil) || "Publisher ID" }
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
elsif parse_attributes(bibliographic_metadata.fetch("item_number", nil)).present?
|
|
228
|
+
[{ "identifier" => parse_attributes(bibliographic_metadata.fetch("item_number", nil)),
|
|
229
|
+
"identifierType" => "Publisher ID" }]
|
|
230
|
+
elsif parse_attributes(bibliographic_metadata.fetch("isbn", nil)).present?
|
|
231
|
+
[{ "identifier" => parse_attributes(bibliographic_metadata.fetch("isbn", nil), first: true),
|
|
232
|
+
"identifierType" => "ISBN" }]
|
|
233
|
+
else
|
|
234
|
+
[]
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def crossref_description(bibliographic_metadata)
|
|
239
|
+
abstract = Array.wrap(bibliographic_metadata.dig("abstract")).map do |r|
|
|
240
|
+
{ "descriptionType" => "Abstract", "description" => sanitize(parse_attributes(r, content: 'p'), new_line: true) }.compact
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
description = Array.wrap(bibliographic_metadata.dig("description")).map do |r|
|
|
244
|
+
{ "descriptionType" => "Other", "description" => sanitize(parse_attributes(r), new_line: true) }.compact
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
(abstract + description)
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def crossref_license(program_metadata)
|
|
251
|
+
access_indicator = Array.wrap(program_metadata).find { |m| m["name"] == "AccessIndicators" }
|
|
252
|
+
if access_indicator.present?
|
|
253
|
+
Array.wrap(access_indicator["license_ref"]).map do |license|
|
|
254
|
+
hsh_to_spdx("rightsURI" => parse_attributes(license))
|
|
255
|
+
end.uniq
|
|
256
|
+
else
|
|
257
|
+
[]
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def crossref_people(bibliographic_metadata, contributor_role)
|
|
262
|
+
person = bibliographic_metadata.dig("contributors", "person_name") || bibliographic_metadata.dig("person_name")
|
|
263
|
+
organization = Array.wrap(bibliographic_metadata.dig("contributors", "organization"))
|
|
264
|
+
person = [{ "name" => ":(unav)", "contributor_role"=>"author" }] if contributor_role == "author" && Array.wrap(person).select { |a| a["contributor_role"] == "author" }.blank? && Array.wrap(organization).select { |a| a["contributor_role"] == "author" }.blank?
|
|
265
|
+
|
|
266
|
+
(Array.wrap(person) + Array.wrap(organization)).select { |a| a["contributor_role"] == contributor_role }.map do |a|
|
|
267
|
+
name_identifiers = normalize_orcid(parse_attributes(a["ORCID"])).present? ? [{ "nameIdentifier" => normalize_orcid(parse_attributes(a["ORCID"])), "nameIdentifierScheme" => "ORCID", "schemeUri"=>"https://orcid.org" }] : nil
|
|
268
|
+
if a["surname"].present? || a["given_name"].present? || name_identifiers.present?
|
|
269
|
+
given_name = parse_attributes(a["given_name"])
|
|
270
|
+
family_name = parse_attributes(a["surname"])
|
|
271
|
+
affiliation = Array.wrap(a["affiliation"]).map do |a|
|
|
272
|
+
if a.is_a?(Hash) && a.key?("__content__") && a["__content__"].strip.blank?
|
|
273
|
+
nil
|
|
274
|
+
elsif a.is_a?(Hash) && a.key?("__content__")
|
|
275
|
+
{ "name" => a["__content__"] }
|
|
276
|
+
elsif a.is_a?(Hash)
|
|
277
|
+
a
|
|
278
|
+
elsif a.strip.blank?
|
|
279
|
+
nil
|
|
280
|
+
elsif a.is_a?(String)
|
|
281
|
+
{ "name" => a }
|
|
282
|
+
end
|
|
283
|
+
end.compact
|
|
284
|
+
|
|
285
|
+
{ "nameType" => "Personal",
|
|
286
|
+
"nameIdentifiers" => name_identifiers,
|
|
287
|
+
"name" => [family_name, given_name].compact.join(", "),
|
|
288
|
+
"givenName" => given_name,
|
|
289
|
+
"familyName" => family_name,
|
|
290
|
+
"affiliation" => affiliation.presence,
|
|
291
|
+
"contributorType" => contributor_role == 'author' ? nil : CR_TO_DC_CONTRIBUTOR_TYPES[a["contributor_role"]] }.compact
|
|
292
|
+
else
|
|
293
|
+
{ "nameType" => "Organizational",
|
|
294
|
+
"name" => a["name"] || a["__content__"],
|
|
295
|
+
"contributorType" => contributor_role == 'author' ? nil : CR_TO_DC_CONTRIBUTOR_TYPES[a["contributor_role"]] }.compact
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
def crossref_funding_reference(program_metadata)
|
|
301
|
+
fundref = Array.wrap(program_metadata).find { |a| a["name"] == "fundref" } || {}
|
|
302
|
+
Array.wrap(fundref.fetch("assertion", [])).select { |a| a["name"] == "fundgroup" && a["assertion"].present? }.map do |f|
|
|
303
|
+
funder_identifier = nil
|
|
304
|
+
funder_identifier_type = nil
|
|
305
|
+
funder_name = nil
|
|
306
|
+
award_title = nil
|
|
307
|
+
award_number = nil
|
|
308
|
+
award_uri = nil
|
|
309
|
+
|
|
310
|
+
Array.wrap(f.fetch("assertion")).each do |a|
|
|
311
|
+
if a.fetch("name") == "award_number"
|
|
312
|
+
award_number = a.fetch("__content__", nil)
|
|
313
|
+
award_uri = a.fetch("awardURI", nil)
|
|
314
|
+
elsif a.fetch("name") == "funder_name"
|
|
315
|
+
funder_name = a.fetch("__content__", nil).to_s.squish.presence
|
|
316
|
+
funder_identifier = validate_funder_doi(a.dig("assertion", "__content__"))
|
|
317
|
+
funder_identifier_type = "Crossref Funder ID" if funder_identifier.present?
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
# funder_name is required in DataCite
|
|
322
|
+
if funder_name.present?
|
|
323
|
+
{ "funderIdentifier" => funder_identifier,
|
|
324
|
+
"funderIdentifierType" => funder_identifier_type,
|
|
325
|
+
"funderName" => funder_name,
|
|
326
|
+
"awardTitle" => award_title,
|
|
327
|
+
"awardNumber" => award_number,
|
|
328
|
+
"awardUri" => award_uri }.compact
|
|
329
|
+
else
|
|
330
|
+
nil
|
|
331
|
+
end
|
|
332
|
+
end.compact
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
def crossref_date_published(bibliographic_metadata)
|
|
336
|
+
pub_date = Array.wrap(bibliographic_metadata.fetch("publication_date", nil)).presence ||
|
|
337
|
+
Array.wrap(bibliographic_metadata.fetch("acceptance_date", nil))
|
|
338
|
+
if pub_date.present?
|
|
339
|
+
get_date_from_parts(pub_date.first["year"], pub_date.first["month"], pub_date.first["day"])
|
|
340
|
+
else
|
|
341
|
+
nil
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def crossref_is_part_of(model_metadata)
|
|
346
|
+
if model_metadata.present? && model_metadata.fetch("issn", nil).present?
|
|
347
|
+
{ "relatedIdentifier" => normalize_issn(model_metadata.fetch("issn", nil)),
|
|
348
|
+
"relationType" => "IsPartOf",
|
|
349
|
+
"relatedIdentifierType" => "ISSN",
|
|
350
|
+
"resourceTypeGeneral" => "Collection" }.compact
|
|
351
|
+
elsif model_metadata.present? && model_metadata.fetch("relatedIdentifier", nil).present?
|
|
352
|
+
{ "relatedIdentifier" => model_metadata.fetch("relatedIdentifier", nil),
|
|
353
|
+
"relationType" => "IsPartOf",
|
|
354
|
+
"relatedIdentifierType" => "DOI" }.compact
|
|
355
|
+
else
|
|
356
|
+
nil
|
|
357
|
+
end
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
def crossref_references(bibliographic_metadata)
|
|
361
|
+
refs = bibliographic_metadata.dig("citation_list", "citation")
|
|
362
|
+
Array.wrap(refs).select { |a| a["doi"].present? }.map do |c|
|
|
363
|
+
if c["doi"].present?
|
|
364
|
+
{ "relatedIdentifier" => parse_attributes(c["doi"]).downcase,
|
|
365
|
+
"relationType" => "References",
|
|
366
|
+
"relatedIdentifierType" => "DOI" }.compact
|
|
367
|
+
else
|
|
368
|
+
nil
|
|
369
|
+
end
|
|
370
|
+
end.compact.unwrap
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
def crossref_has_translation(program_metadata)
|
|
374
|
+
refs = program_metadata.dig("related_item") if program_metadata.is_a?(Hash)
|
|
375
|
+
Array.wrap(refs).select { |a| a.dig("intra_work_relation", "relationship_type") == "hasTranslation" }.map do |c|
|
|
376
|
+
if c.dig("intra_work_relation", "identifier_type") == "doi"
|
|
377
|
+
{ "relatedIdentifier" => parse_attributes(c["intra_work_relation"]).downcase,
|
|
378
|
+
"relationType" => "HasTranslation",
|
|
379
|
+
"relatedIdentifierType" => "DOI" }.compact
|
|
380
|
+
else
|
|
381
|
+
nil
|
|
382
|
+
end
|
|
383
|
+
end.compact.unwrap
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
def crossref_is_translation_of(program_metadata)
|
|
387
|
+
refs = program_metadata.dig("related_item") if program_metadata.is_a?(Hash)
|
|
388
|
+
Array.wrap(refs).select { |a| a.dig("intra_work_relation", "relationship_type") == "isTranslationOf" }.map do |c|
|
|
389
|
+
if c.dig("intra_work_relation", "identifier_type") == "doi"
|
|
390
|
+
{ "relatedIdentifier" => parse_attributes(c["intra_work_relation"]).downcase,
|
|
391
|
+
"relationType" => "IsTranslationOf",
|
|
392
|
+
"relatedIdentifierType" => "DOI" }.compact
|
|
393
|
+
else
|
|
394
|
+
nil
|
|
395
|
+
end
|
|
396
|
+
end.compact.unwrap
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
def crossref_is_related_material(program_metadata)
|
|
400
|
+
refs = program_metadata.dig("related_item") if program_metadata.is_a?(Hash)
|
|
401
|
+
Array.wrap(refs).select { |a| a.dig("interwork_relation", "relationship_type") == "isRelatedMaterial" }.map do |c|
|
|
402
|
+
if c.dig("inter_work_relation", "identifier_type") == "doi"
|
|
403
|
+
{ "relatedIdentifier" => parse_attributes(c["inter_work_relation"]).downcase,
|
|
404
|
+
"relationType" => "Other",
|
|
405
|
+
"relatedIdentifierType" => "DOI" }.compact
|
|
406
|
+
else
|
|
407
|
+
nil
|
|
408
|
+
end
|
|
409
|
+
end.compact.unwrap
|
|
410
|
+
end
|
|
411
|
+
end
|
|
412
|
+
end
|
|
413
|
+
end
|