bolognese 0.2.2 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/build.yml +9 -0
- data/.github/workflows/changelog.yml +36 -0
- data/.github/workflows/ci.yml +22 -0
- data/.github/workflows/pull-request.yml +9 -0
- data/.github/workflows/release.yml +32 -0
- data/.gitignore +4 -0
- data/.rubocop.yml +658 -0
- data/CHANGELOG.md +1864 -0
- data/CITATION +17 -0
- data/Gemfile +1 -1
- data/Gemfile.lock +251 -99
- data/README.md +1026 -2
- data/Rakefile +1 -0
- data/bin/bolognese +5 -1
- data/bolognese.gemspec +33 -21
- data/lib/bolognese/array.rb +13 -0
- data/lib/bolognese/author_utils.rb +115 -39
- data/lib/bolognese/citeproc_extensions.rb +48 -0
- data/lib/bolognese/cli.rb +28 -15
- data/lib/bolognese/datacite_utils.rb +418 -0
- data/lib/bolognese/doi_utils.rb +45 -23
- data/lib/bolognese/metadata.rb +250 -18
- data/lib/bolognese/metadata_utils.rb +228 -0
- data/lib/bolognese/pubmed.rb +2 -0
- data/lib/bolognese/readers/bibtex_reader.rb +100 -0
- data/lib/bolognese/readers/citeproc_reader.rb +125 -0
- data/lib/bolognese/readers/codemeta_reader.rb +108 -0
- data/lib/bolognese/readers/crosscite_reader.rb +17 -0
- data/lib/bolognese/readers/crossref_reader.rb +413 -0
- data/lib/bolognese/readers/datacite_json_reader.rb +17 -0
- data/lib/bolognese/readers/datacite_reader.rb +338 -0
- data/lib/bolognese/readers/npm_reader.rb +115 -0
- data/lib/bolognese/readers/ris_reader.rb +114 -0
- data/lib/bolognese/readers/schema_org_reader.rb +264 -0
- data/lib/bolognese/string.rb +3 -1
- data/lib/bolognese/utils.rb +1403 -12
- data/lib/bolognese/version.rb +1 -1
- data/lib/bolognese/whitelist_scrubber.rb +47 -0
- data/lib/bolognese/writers/bibtex_writer.rb +32 -0
- data/lib/bolognese/writers/citation_writer.rb +14 -0
- data/lib/bolognese/writers/citeproc_writer.rb +11 -0
- data/lib/bolognese/writers/codemeta_writer.rb +29 -0
- data/lib/bolognese/writers/crosscite_writer.rb +11 -0
- data/lib/bolognese/writers/crossref_writer.rb +11 -0
- data/lib/bolognese/writers/csv_writer.rb +24 -0
- data/lib/bolognese/writers/datacite_json_writer.rb +13 -0
- data/lib/bolognese/writers/datacite_writer.rb +12 -0
- data/lib/bolognese/writers/jats_writer.rb +138 -0
- data/lib/bolognese/writers/rdf_xml_writer.rb +11 -0
- data/lib/bolognese/writers/ris_writer.rb +29 -0
- data/lib/bolognese/writers/schema_org_writer.rb +55 -0
- data/lib/bolognese/writers/turtle_writer.rb +11 -0
- data/lib/bolognese.rb +19 -4
- data/package.json +12 -0
- data/resources/2008/09/xsd.xsl +997 -0
- data/resources/datacite-contributorType-v4.xsd +35 -0
- data/resources/datacite-dateType-v4.xsd +25 -0
- data/resources/datacite-descriptionType-v4.xsd +19 -0
- data/resources/datacite-funderIdentifierType-v4.xsd +15 -0
- data/resources/datacite-nameType-v4.xsd +10 -0
- data/resources/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/datacite-relationType-v4.xsd +49 -0
- data/resources/datacite-resourceType-v4.xsd +28 -0
- data/resources/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-2.1/include/datacite-contributorType-v1.1.xsd +22 -0
- data/resources/kernel-2.1/include/datacite-contributorType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-dateType-v1.1.xsd +31 -0
- data/resources/kernel-2.1/include/datacite-dateType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-descriptionType-v1.1.xsd +14 -0
- data/resources/kernel-2.1/include/datacite-descriptionType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v1.1.xsd +24 -0
- data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-relationType-v1.1.xsd +29 -0
- data/resources/kernel-2.1/include/datacite-relationType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-resourceType-v1.1.xsd +22 -0
- data/resources/kernel-2.1/include/datacite-resourceType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-titleType-v1.1.xsd +11 -0
- data/resources/kernel-2.1/include/datacite-titleType-v2.xsd +3 -0
- data/resources/kernel-2.1/metadata.xsd +315 -0
- data/resources/kernel-2.2/include/datacite-contributorType-v2.xsd +29 -0
- data/resources/kernel-2.2/include/datacite-dateType-v2.xsd +21 -0
- data/resources/kernel-2.2/include/datacite-descriptionType-v2.xsd +15 -0
- data/resources/kernel-2.2/include/datacite-relatedIdentifierType-v2.xsd +25 -0
- data/resources/kernel-2.2/include/datacite-relationType-v2.xsd +29 -0
- data/resources/kernel-2.2/include/datacite-resourceType-v2.xsd +23 -0
- data/resources/kernel-2.2/include/datacite-titleType-v2.xsd +10 -0
- data/resources/kernel-2.2/metadata.xsd +316 -0
- data/resources/kernel-3/include/datacite-contributorType-v3.1.xsd +35 -0
- data/resources/kernel-3/include/datacite-dateType-v3.xsd +21 -0
- data/resources/kernel-3/include/datacite-descriptionType-v3.xsd +17 -0
- data/resources/kernel-3/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
- data/resources/kernel-3/include/datacite-relationType-v3.1.xsd +38 -0
- data/resources/kernel-3/include/datacite-resourceType-v3.xsd +26 -0
- data/resources/kernel-3/include/datacite-titleType-v3.xsd +12 -0
- data/resources/kernel-3/include/xml.xsd +286 -0
- data/resources/kernel-3/metadata.xsd +380 -0
- data/resources/kernel-3.0/include/datacite-contributorType-v3.xsd +33 -0
- data/resources/kernel-3.0/include/datacite-dateType-v3.xsd +21 -0
- data/resources/kernel-3.0/include/datacite-descriptionType-v3.xsd +17 -0
- data/resources/kernel-3.0/include/datacite-relatedIdentifierType-v3.xsd +27 -0
- data/resources/kernel-3.0/include/datacite-relationType-v3.xsd +33 -0
- data/resources/kernel-3.0/include/datacite-resourceType-v3.xsd +26 -0
- data/resources/kernel-3.0/include/datacite-titleType-v3.xsd +12 -0
- data/resources/kernel-3.0/include/xml.xsd +286 -0
- data/resources/kernel-3.0/metadata.xsd +377 -0
- data/resources/kernel-3.1/include/datacite-contributorType-v3.1.xsd +35 -0
- data/resources/kernel-3.1/include/datacite-dateType-v3.xsd +21 -0
- data/resources/kernel-3.1/include/datacite-descriptionType-v3.xsd +17 -0
- data/resources/kernel-3.1/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
- data/resources/kernel-3.1/include/datacite-relationType-v3.1.xsd +38 -0
- data/resources/kernel-3.1/include/datacite-resourceType-v3.xsd +26 -0
- data/resources/kernel-3.1/include/datacite-titleType-v3.xsd +12 -0
- data/resources/kernel-3.1/include/xml.xsd +286 -0
- data/resources/kernel-3.1/metadata.xsd +380 -0
- data/resources/kernel-4/include/datacite-contributorType-v4.xsd +37 -0
- data/resources/kernel-4/include/datacite-dateType-v4.xsd +27 -0
- data/resources/kernel-4/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4/include/datacite-relatedIdentifierType-v4.xsd +39 -0
- data/resources/kernel-4/include/datacite-relationType-v4.xsd +59 -0
- data/resources/kernel-4/include/datacite-resourceType-v4.xsd +52 -0
- data/resources/kernel-4/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4/include/xml.xsd +286 -0
- data/resources/kernel-4/metadata.xsd +715 -0
- data/resources/kernel-4.0/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.0/include/datacite-dateType-v4.xsd +21 -0
- data/resources/kernel-4.0/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.0/include/datacite-funderIdentifierType-v4.xsd +15 -0
- data/resources/kernel-4.0/include/datacite-relatedIdentifierType-v4.xsd +32 -0
- data/resources/kernel-4.0/include/datacite-relationType-v4.xsd +39 -0
- data/resources/kernel-4.0/include/datacite-resourceType-v4.xsd +26 -0
- data/resources/kernel-4.0/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.0/include/xml.xsd +286 -0
- data/resources/kernel-4.0/metadata.xsd +470 -0
- data/resources/kernel-4.1/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.1/include/datacite-dateType-v4.1.xsd +23 -0
- data/resources/kernel-4.1/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.1/include/datacite-funderIdentifierType-v4.xsd +15 -0
- data/resources/kernel-4.1/include/datacite-nameType-v4.1.xsd +10 -0
- data/resources/kernel-4.1/include/datacite-relatedIdentifierType-v4.xsd +32 -0
- data/resources/kernel-4.1/include/datacite-relationType-v4.1.xsd +46 -0
- data/resources/kernel-4.1/include/datacite-resourceType-v4.1.xsd +28 -0
- data/resources/kernel-4.1/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.1/include/xml.xsd +286 -0
- data/resources/kernel-4.1/metadata.xsd +483 -0
- data/resources/kernel-4.2/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.2/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.2/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.2/include/datacite-funderIdentifierType-v4.xsd +15 -0
- data/resources/kernel-4.2/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.2/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.2/include/datacite-relationType-v4.xsd +49 -0
- data/resources/kernel-4.2/include/datacite-resourceType-v4.xsd +28 -0
- data/resources/kernel-4.2/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.2/include/xml.xsd +286 -0
- data/resources/kernel-4.2/metadata.xsd +479 -0
- data/resources/kernel-4.3/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.3/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.3/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.3/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.3/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.3/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.3/include/datacite-relationType-v4.xsd +49 -0
- data/resources/kernel-4.3/include/datacite-resourceType-v4.xsd +28 -0
- data/resources/kernel-4.3/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.3/include/xml.xsd +286 -0
- data/resources/kernel-4.3/metadata.xsd +515 -0
- data/resources/kernel-4.4/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.4/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.4/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.4/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.4/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.4/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.4/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.4/include/datacite-relationType-v4.xsd +51 -0
- data/resources/kernel-4.4/include/datacite-resourceType-v4.xsd +43 -0
- data/resources/kernel-4.4/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.4/include/xml.xsd +286 -0
- data/resources/kernel-4.4/metadata.xsd +707 -0
- data/resources/kernel-4.5/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.5/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.5/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.5/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.5/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.5/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.5/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.5/include/datacite-relationType-v4.xsd +53 -0
- data/resources/kernel-4.5/include/datacite-resourceType-v4.xsd +45 -0
- data/resources/kernel-4.5/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.5/include/xml.xsd +286 -0
- data/resources/kernel-4.5/metadata.xsd +711 -0
- data/resources/kernel-4.6/include/datacite-contributorType-v4.xsd +37 -0
- data/resources/kernel-4.6/include/datacite-dateType-v4.xsd +27 -0
- data/resources/kernel-4.6/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.6/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.6/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.6/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.6/include/datacite-relatedIdentifierType-v4.xsd +37 -0
- data/resources/kernel-4.6/include/datacite-relationType-v4.xsd +57 -0
- data/resources/kernel-4.6/include/datacite-resourceType-v4.xsd +49 -0
- data/resources/kernel-4.6/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.6/include/xml.xsd +286 -0
- data/resources/kernel-4.6/metadata.xsd +712 -0
- data/resources/kernel-4.7/include/datacite-contributorType-v4.xsd +37 -0
- data/resources/kernel-4.7/include/datacite-dateType-v4.xsd +27 -0
- data/resources/kernel-4.7/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.7/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.7/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.7/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.7/include/datacite-relatedIdentifierType-v4.xsd +39 -0
- data/resources/kernel-4.7/include/datacite-relationType-v4.xsd +59 -0
- data/resources/kernel-4.7/include/datacite-resourceType-v4.xsd +52 -0
- data/resources/kernel-4.7/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.7/include/xml.xsd +286 -0
- data/resources/kernel-4.7/metadata.xsd +715 -0
- data/resources/oecd/dfg-mappings.json +1866 -0
- data/resources/oecd/for-mappings.json +1101 -0
- data/resources/oecd/fos-mappings.json +198 -0
- data/resources/schema_org/jsonldcontext.json +7477 -0
- data/resources/spdx/licenses.json +5297 -0
- data/resources/xml.xsd +286 -0
- metadata +478 -150
- data/.travis.yml +0 -23
- data/lib/bolognese/crossref.rb +0 -202
- data/lib/bolognese/datacite.rb +0 -157
- data/lib/bolognese/date_utils.rb +0 -48
- data/lib/bolognese/github.rb +0 -106
- data/lib/bolognese/orcid.rb +0 -24
- data/lib/bolognese/pid_utils.rb +0 -23
- data/spec/cli_spec.rb +0 -37
- data/spec/crossref_spec.rb +0 -113
- data/spec/datacite_spec.rb +0 -49
- data/spec/doi_spec.rb +0 -89
- data/spec/fixtures/crossref.xml +0 -742
- data/spec/fixtures/datacite.xml +0 -40
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_crossref.yml +0 -760
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_schema_org.yml +0 -1476
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_datacite.yml +0 -214
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_schema_org.yml +0 -384
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/crossref.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/datacite.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/medra.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/not_found.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_test.yml +0 -843
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_SICI_DOI.yml +0 -277
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_data_citation.yml +0 -15755
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/date_in_future.yml +0 -2691
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/journal_article.yml +0 -1857
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/not_found_error.yml +0 -93
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/posted_content.yml +0 -5715
- data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/BlogPosting.yml +0 -307
- data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/Dataset.yml +0 -343
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref_doi_not_url.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite_doi_http.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/orcid.yml +0 -44
- data/spec/metadata_spec.rb +0 -35
- data/spec/orcid_spec.rb +0 -23
- data/spec/spec_helper.rb +0 -88
- /data/{LICENSE → LICENSE.md} +0 -0
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bolognese
|
|
4
|
+
module Readers
|
|
5
|
+
module SchemaOrgReader
|
|
6
|
+
SO_TO_DC_RELATION_TYPES = {
|
|
7
|
+
"citation" => "References",
|
|
8
|
+
"isBasedOn" => "IsSupplementedBy",
|
|
9
|
+
"sameAs" => "IsIdenticalTo",
|
|
10
|
+
"isPartOf" => "IsPartOf",
|
|
11
|
+
"hasPart" => "HasPart",
|
|
12
|
+
"isPredecessor" => "IsPreviousVersionOf",
|
|
13
|
+
"isSuccessor" => "IsNewVersionOf",
|
|
14
|
+
"workTranslation" => "HasTranslation",
|
|
15
|
+
"translationOfWork" => "IsTranslationOf"
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
SO_TO_DC_REVERSE_RELATION_TYPES = {
|
|
19
|
+
"citation" => "IsReferencedBy",
|
|
20
|
+
"isBasedOn" => "IsSupplementTo",
|
|
21
|
+
"sameAs" => "IsIdenticalTo",
|
|
22
|
+
"isPartOf" => "HasPart",
|
|
23
|
+
"hasPart" => "IsPartOf",
|
|
24
|
+
"isPredecessor" => "IsNewVersionOf",
|
|
25
|
+
"isSuccessor" => "IsPreviousVersionOf"
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
def get_schema_org(id: nil, **options)
|
|
29
|
+
return { "string" => nil, "state" => "not_found" } unless id.present?
|
|
30
|
+
|
|
31
|
+
url = normalize_id(id)
|
|
32
|
+
response = Maremma.get(url)
|
|
33
|
+
|
|
34
|
+
# Find the schema.org json from the html body
|
|
35
|
+
doc = Nokogiri::HTML(response.body.fetch("data", nil))
|
|
36
|
+
string = doc.at('script[type="application/ld+json"]')
|
|
37
|
+
string = string.text if string.present?
|
|
38
|
+
|
|
39
|
+
{ "string" => string }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def read_schema_org(string: nil, **options)
|
|
43
|
+
if string.present?
|
|
44
|
+
errors = jsonlint(string)
|
|
45
|
+
return { "errors" => errors } if errors.present?
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
|
|
49
|
+
|
|
50
|
+
meta = string.present? ? Maremma.from_json(string) : {}
|
|
51
|
+
|
|
52
|
+
identifiers = Array.wrap(meta.fetch("identifier", nil)).map do |r|
|
|
53
|
+
r = normalize_id(r) if r.is_a?(String)
|
|
54
|
+
if r.is_a?(String) && !r.start_with?("https://doi.org")
|
|
55
|
+
{ "identifierType" => "URL", "identifier" => r }
|
|
56
|
+
elsif r.is_a?(Hash)
|
|
57
|
+
{ "identifierType" => get_identifier_type(r["propertyID"]), "identifier" => r["value"] }
|
|
58
|
+
end
|
|
59
|
+
end.compact.uniq
|
|
60
|
+
|
|
61
|
+
id = normalize_id(options[:doi] || meta.fetch("@id", nil) || meta.fetch("identifier", nil))
|
|
62
|
+
|
|
63
|
+
schema_org = meta.fetch("@type", nil) && meta.fetch("@type").camelcase
|
|
64
|
+
resource_type_general = Bolognese::Utils::SO_TO_DC_TRANSLATIONS[schema_org]
|
|
65
|
+
types = {
|
|
66
|
+
"resourceTypeGeneral" => resource_type_general,
|
|
67
|
+
"resourceType" => meta.fetch("additionalType", nil),
|
|
68
|
+
"schemaOrg" => schema_org,
|
|
69
|
+
"citeproc" => Bolognese::Utils::SO_TO_CP_TRANSLATIONS[schema_org] || "article-journal",
|
|
70
|
+
"bibtex" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
|
|
71
|
+
"ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN"
|
|
72
|
+
}.compact
|
|
73
|
+
authors = meta.fetch("author", nil) || meta.fetch("creator", nil)
|
|
74
|
+
# Authors should be an object, if it's just a plain string don't try and parse it.
|
|
75
|
+
if not authors.is_a?(String)
|
|
76
|
+
creators = get_authors(from_schema_org_creators(Array.wrap(authors)))
|
|
77
|
+
end
|
|
78
|
+
contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
|
|
79
|
+
translators = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("translator", nil))))
|
|
80
|
+
translators.map! do |translator|
|
|
81
|
+
translator["contributorType"] = "Translator"
|
|
82
|
+
translator
|
|
83
|
+
end
|
|
84
|
+
contributors += translators
|
|
85
|
+
|
|
86
|
+
publisher = {
|
|
87
|
+
"name" => parse_attributes(meta.fetch("publisher", nil), content: "name", first: true),
|
|
88
|
+
"publisherIdentifier" => parse_attributes(meta.fetch("publisher", nil), content: "@id", first: true),
|
|
89
|
+
}.compact if meta.fetch("publisher", nil).present?
|
|
90
|
+
|
|
91
|
+
ct = (schema_org == "Dataset") ? "includedInDataCatalog" : "Periodical"
|
|
92
|
+
container = if meta.fetch(ct, nil).present?
|
|
93
|
+
url = parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "url", first: true)
|
|
94
|
+
|
|
95
|
+
{
|
|
96
|
+
"type" => (schema_org == "Dataset") ? "DataRepository" : "Periodical",
|
|
97
|
+
"title" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "name", first: true),
|
|
98
|
+
"identifier" => url,
|
|
99
|
+
"identifierType" => url.present? ? "URL" : nil,
|
|
100
|
+
"volume" => meta.fetch("volumeNumber", nil),
|
|
101
|
+
"issue" => meta.fetch("issueNumber", nil),
|
|
102
|
+
"firstPage" => meta.fetch("pageStart", nil),
|
|
103
|
+
"lastPage" => meta.fetch("pageEnd", nil)
|
|
104
|
+
}.compact
|
|
105
|
+
else
|
|
106
|
+
{}
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
related_identifiers = Array.wrap(schema_org_is_identical_to(meta)) +
|
|
110
|
+
Array.wrap(schema_org_is_part_of(meta)) +
|
|
111
|
+
Array.wrap(schema_org_has_part(meta)) +
|
|
112
|
+
Array.wrap(schema_org_is_previous_version_of(meta)) +
|
|
113
|
+
Array.wrap(schema_org_is_new_version_of(meta)) +
|
|
114
|
+
Array.wrap(schema_org_references(meta)) +
|
|
115
|
+
Array.wrap(schema_org_is_referenced_by(meta)) +
|
|
116
|
+
Array.wrap(schema_org_is_supplement_to(meta)) +
|
|
117
|
+
Array.wrap(schema_org_is_supplemented_by(meta)) +
|
|
118
|
+
Array.wrap(schema_org_has_translation(meta)) +
|
|
119
|
+
Array.wrap(schema_org_is_translation_of(meta))
|
|
120
|
+
|
|
121
|
+
rights_list = Array.wrap(meta.fetch("license", nil)).compact.map do |rl|
|
|
122
|
+
hsh_to_spdx("__content__" => rl["name"], "rightsURI" => rl["id"])
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
funding_references = Array.wrap(meta.fetch("funder", nil)).compact.map do |fr|
|
|
126
|
+
if fr["@id"].present?
|
|
127
|
+
{
|
|
128
|
+
"funderName" => fr["name"],
|
|
129
|
+
"funderIdentifier" => fr["@id"],
|
|
130
|
+
"funderIdentifierType" => fr["@id"].to_s.start_with?("https://doi.org/10.13039") ? "Crossref Funder ID" : "Other" }.compact
|
|
131
|
+
else
|
|
132
|
+
{
|
|
133
|
+
"funderName" => fr["name"] }.compact
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
dates = []
|
|
137
|
+
dates << { "date" => meta.fetch("datePublished"), "dateType" => "Issued" } if Date.edtf(meta.fetch("datePublished", nil)).present?
|
|
138
|
+
dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if Date.edtf(meta.fetch("dateCreated", nil)).present?
|
|
139
|
+
dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if Date.edtf(meta.fetch("dateModified", nil)).present?
|
|
140
|
+
dates << { "date" => meta.fetch("temporalCoverage"), "dateType" => "Coverage" } if Date.edtf(meta.fetch("temporalCoverage", nil)).present?
|
|
141
|
+
publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?
|
|
142
|
+
|
|
143
|
+
if meta.fetch("inLanguage", nil).is_a?(String)
|
|
144
|
+
language = meta.fetch("inLanguage")
|
|
145
|
+
elsif meta.fetch("inLanguage", nil).is_a?(Object)
|
|
146
|
+
language = meta.dig("inLanguage", 'alternateName') || meta.dig("inLanguage", 'name')
|
|
147
|
+
else
|
|
148
|
+
language = nil
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
state = meta.present? || read_options.present? ? "findable" : "not_found"
|
|
152
|
+
geo_locations = Array.wrap(meta.fetch("spatialCoverage", nil)).map do |gl|
|
|
153
|
+
if gl.dig("geo", "box")
|
|
154
|
+
s, w, n, e = gl.dig("geo", "box").split(" ", 4)
|
|
155
|
+
geo_location_box = {
|
|
156
|
+
"westBoundLongitude" => w,
|
|
157
|
+
"eastBoundLongitude" => e,
|
|
158
|
+
"southBoundLatitude" => s,
|
|
159
|
+
"northBoundLatitude" => n,
|
|
160
|
+
}.compact.presence
|
|
161
|
+
else
|
|
162
|
+
geo_location_box = nil
|
|
163
|
+
end
|
|
164
|
+
geo_location_point = { "pointLongitude" => gl.dig("geo", "longitude"), "pointLatitude" => gl.dig("geo", "latitude") }.compact.presence
|
|
165
|
+
|
|
166
|
+
{
|
|
167
|
+
"geoLocationPlace" => gl.dig("geo", "address"),
|
|
168
|
+
"geoLocationPoint" => geo_location_point,
|
|
169
|
+
"geoLocationBox" => geo_location_box
|
|
170
|
+
}.compact
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# handle keywords as array and as comma-separated string
|
|
174
|
+
subjects = meta.fetch("keywords", nil)
|
|
175
|
+
subjects = subjects.to_s.split(", ") if subjects.is_a?(String)
|
|
176
|
+
subjects = Array.wrap(subjects).reduce([]) do |sum, subject|
|
|
177
|
+
sum += name_to_subject(subject)
|
|
178
|
+
sum
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
{ "id" => id,
|
|
182
|
+
"types" => types,
|
|
183
|
+
"doi" => validate_doi(id),
|
|
184
|
+
"identifiers" => identifiers,
|
|
185
|
+
"url" => normalize_id(meta.fetch("url", nil)),
|
|
186
|
+
"content_url" => Array.wrap(meta.fetch("contentUrl", nil)),
|
|
187
|
+
"sizes" => Array.wrap(meta.fetch("contenSize", nil)).presence,
|
|
188
|
+
"formats" => Array.wrap(meta.fetch("encodingFormat", nil) || meta.fetch("fileFormat", nil)),
|
|
189
|
+
"titles" => meta.fetch("name", nil).present? ? [{ "title" => meta.fetch("name", nil) }] : nil,
|
|
190
|
+
"creators" => creators,
|
|
191
|
+
"contributors" => contributors,
|
|
192
|
+
"publisher" => publisher,
|
|
193
|
+
"agency" => parse_attributes(meta.fetch("provider", nil), content: "name", first: true),
|
|
194
|
+
"container" => container,
|
|
195
|
+
"related_identifiers" => related_identifiers,
|
|
196
|
+
"publication_year" => publication_year,
|
|
197
|
+
"dates" => dates,
|
|
198
|
+
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : nil,
|
|
199
|
+
"rights_list" => rights_list,
|
|
200
|
+
"version_info" => meta.fetch("version", nil).to_s.presence,
|
|
201
|
+
"subjects" => subjects,
|
|
202
|
+
"language" => language,
|
|
203
|
+
"state" => state,
|
|
204
|
+
"schema_version" => meta.fetch("schemaVersion", nil).to_s.presence,
|
|
205
|
+
"funding_references" => funding_references,
|
|
206
|
+
"geo_locations" => geo_locations
|
|
207
|
+
}.merge(read_options)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def schema_org_related_identifier(meta, relation_type: nil)
|
|
211
|
+
normalize_ids(ids: meta.fetch(relation_type, nil), relation_type: SO_TO_DC_RELATION_TYPES[relation_type])
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def schema_org_reverse_related_identifier(meta, relation_type: nil)
|
|
215
|
+
normalize_ids(ids: meta.dig("@reverse", relation_type), relation_type: SO_TO_DC_REVERSE_RELATION_TYPES[relation_type])
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def schema_org_is_identical_to(meta)
|
|
219
|
+
schema_org_related_identifier(meta, relation_type: "sameAs")
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def schema_org_is_part_of(meta)
|
|
223
|
+
schema_org_related_identifier(meta, relation_type: "isPartOf")
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def schema_org_has_part(meta)
|
|
227
|
+
schema_org_related_identifier(meta, relation_type: "hasPart")
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def schema_org_is_previous_version_of(meta)
|
|
231
|
+
schema_org_related_identifier(meta, relation_type: "PredecessorOf")
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def schema_org_is_new_version_of(meta)
|
|
235
|
+
schema_org_related_identifier(meta, relation_type: "SuccessorOf")
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def schema_org_references(meta)
|
|
239
|
+
schema_org_related_identifier(meta, relation_type: "citation")
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
def schema_org_is_referenced_by(meta)
|
|
243
|
+
schema_org_reverse_related_identifier(meta, relation_type: "citation")
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
def schema_org_is_supplement_to(meta)
|
|
247
|
+
schema_org_reverse_related_identifier(meta, relation_type: "isBasedOn")
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def schema_org_is_supplemented_by(meta)
|
|
251
|
+
schema_org_related_identifier(meta, relation_type: "isBasedOn")
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def schema_org_has_translation(meta)
|
|
255
|
+
schema_org_related_identifier(meta, relation_type: "workTranslation", )
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def schema_org_is_translation_of(meta)
|
|
259
|
+
schema_org_related_identifier(meta, relation_type: "translationOfWork")
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
end
|