bolognese 0.2.2 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/build.yml +9 -0
- data/.github/workflows/changelog.yml +36 -0
- data/.github/workflows/ci.yml +22 -0
- data/.github/workflows/pull-request.yml +9 -0
- data/.github/workflows/release.yml +32 -0
- data/.gitignore +4 -0
- data/.rubocop.yml +658 -0
- data/CHANGELOG.md +1864 -0
- data/CITATION +17 -0
- data/Gemfile +1 -1
- data/Gemfile.lock +251 -99
- data/README.md +1026 -2
- data/Rakefile +1 -0
- data/bin/bolognese +5 -1
- data/bolognese.gemspec +33 -21
- data/lib/bolognese/array.rb +13 -0
- data/lib/bolognese/author_utils.rb +115 -39
- data/lib/bolognese/citeproc_extensions.rb +48 -0
- data/lib/bolognese/cli.rb +28 -15
- data/lib/bolognese/datacite_utils.rb +418 -0
- data/lib/bolognese/doi_utils.rb +45 -23
- data/lib/bolognese/metadata.rb +250 -18
- data/lib/bolognese/metadata_utils.rb +228 -0
- data/lib/bolognese/pubmed.rb +2 -0
- data/lib/bolognese/readers/bibtex_reader.rb +100 -0
- data/lib/bolognese/readers/citeproc_reader.rb +125 -0
- data/lib/bolognese/readers/codemeta_reader.rb +108 -0
- data/lib/bolognese/readers/crosscite_reader.rb +17 -0
- data/lib/bolognese/readers/crossref_reader.rb +413 -0
- data/lib/bolognese/readers/datacite_json_reader.rb +17 -0
- data/lib/bolognese/readers/datacite_reader.rb +338 -0
- data/lib/bolognese/readers/npm_reader.rb +115 -0
- data/lib/bolognese/readers/ris_reader.rb +114 -0
- data/lib/bolognese/readers/schema_org_reader.rb +264 -0
- data/lib/bolognese/string.rb +3 -1
- data/lib/bolognese/utils.rb +1403 -12
- data/lib/bolognese/version.rb +1 -1
- data/lib/bolognese/whitelist_scrubber.rb +47 -0
- data/lib/bolognese/writers/bibtex_writer.rb +32 -0
- data/lib/bolognese/writers/citation_writer.rb +14 -0
- data/lib/bolognese/writers/citeproc_writer.rb +11 -0
- data/lib/bolognese/writers/codemeta_writer.rb +29 -0
- data/lib/bolognese/writers/crosscite_writer.rb +11 -0
- data/lib/bolognese/writers/crossref_writer.rb +11 -0
- data/lib/bolognese/writers/csv_writer.rb +24 -0
- data/lib/bolognese/writers/datacite_json_writer.rb +13 -0
- data/lib/bolognese/writers/datacite_writer.rb +12 -0
- data/lib/bolognese/writers/jats_writer.rb +138 -0
- data/lib/bolognese/writers/rdf_xml_writer.rb +11 -0
- data/lib/bolognese/writers/ris_writer.rb +29 -0
- data/lib/bolognese/writers/schema_org_writer.rb +55 -0
- data/lib/bolognese/writers/turtle_writer.rb +11 -0
- data/lib/bolognese.rb +19 -4
- data/package.json +12 -0
- data/resources/2008/09/xsd.xsl +997 -0
- data/resources/datacite-contributorType-v4.xsd +35 -0
- data/resources/datacite-dateType-v4.xsd +25 -0
- data/resources/datacite-descriptionType-v4.xsd +19 -0
- data/resources/datacite-funderIdentifierType-v4.xsd +15 -0
- data/resources/datacite-nameType-v4.xsd +10 -0
- data/resources/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/datacite-relationType-v4.xsd +49 -0
- data/resources/datacite-resourceType-v4.xsd +28 -0
- data/resources/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-2.1/include/datacite-contributorType-v1.1.xsd +22 -0
- data/resources/kernel-2.1/include/datacite-contributorType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-dateType-v1.1.xsd +31 -0
- data/resources/kernel-2.1/include/datacite-dateType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-descriptionType-v1.1.xsd +14 -0
- data/resources/kernel-2.1/include/datacite-descriptionType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v1.1.xsd +24 -0
- data/resources/kernel-2.1/include/datacite-relatedIdentifierType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-relationType-v1.1.xsd +29 -0
- data/resources/kernel-2.1/include/datacite-relationType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-resourceType-v1.1.xsd +22 -0
- data/resources/kernel-2.1/include/datacite-resourceType-v2.xsd +3 -0
- data/resources/kernel-2.1/include/datacite-titleType-v1.1.xsd +11 -0
- data/resources/kernel-2.1/include/datacite-titleType-v2.xsd +3 -0
- data/resources/kernel-2.1/metadata.xsd +315 -0
- data/resources/kernel-2.2/include/datacite-contributorType-v2.xsd +29 -0
- data/resources/kernel-2.2/include/datacite-dateType-v2.xsd +21 -0
- data/resources/kernel-2.2/include/datacite-descriptionType-v2.xsd +15 -0
- data/resources/kernel-2.2/include/datacite-relatedIdentifierType-v2.xsd +25 -0
- data/resources/kernel-2.2/include/datacite-relationType-v2.xsd +29 -0
- data/resources/kernel-2.2/include/datacite-resourceType-v2.xsd +23 -0
- data/resources/kernel-2.2/include/datacite-titleType-v2.xsd +10 -0
- data/resources/kernel-2.2/metadata.xsd +316 -0
- data/resources/kernel-3/include/datacite-contributorType-v3.1.xsd +35 -0
- data/resources/kernel-3/include/datacite-dateType-v3.xsd +21 -0
- data/resources/kernel-3/include/datacite-descriptionType-v3.xsd +17 -0
- data/resources/kernel-3/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
- data/resources/kernel-3/include/datacite-relationType-v3.1.xsd +38 -0
- data/resources/kernel-3/include/datacite-resourceType-v3.xsd +26 -0
- data/resources/kernel-3/include/datacite-titleType-v3.xsd +12 -0
- data/resources/kernel-3/include/xml.xsd +286 -0
- data/resources/kernel-3/metadata.xsd +380 -0
- data/resources/kernel-3.0/include/datacite-contributorType-v3.xsd +33 -0
- data/resources/kernel-3.0/include/datacite-dateType-v3.xsd +21 -0
- data/resources/kernel-3.0/include/datacite-descriptionType-v3.xsd +17 -0
- data/resources/kernel-3.0/include/datacite-relatedIdentifierType-v3.xsd +27 -0
- data/resources/kernel-3.0/include/datacite-relationType-v3.xsd +33 -0
- data/resources/kernel-3.0/include/datacite-resourceType-v3.xsd +26 -0
- data/resources/kernel-3.0/include/datacite-titleType-v3.xsd +12 -0
- data/resources/kernel-3.0/include/xml.xsd +286 -0
- data/resources/kernel-3.0/metadata.xsd +377 -0
- data/resources/kernel-3.1/include/datacite-contributorType-v3.1.xsd +35 -0
- data/resources/kernel-3.1/include/datacite-dateType-v3.xsd +21 -0
- data/resources/kernel-3.1/include/datacite-descriptionType-v3.xsd +17 -0
- data/resources/kernel-3.1/include/datacite-relatedIdentifierType-v3.1.xsd +30 -0
- data/resources/kernel-3.1/include/datacite-relationType-v3.1.xsd +38 -0
- data/resources/kernel-3.1/include/datacite-resourceType-v3.xsd +26 -0
- data/resources/kernel-3.1/include/datacite-titleType-v3.xsd +12 -0
- data/resources/kernel-3.1/include/xml.xsd +286 -0
- data/resources/kernel-3.1/metadata.xsd +380 -0
- data/resources/kernel-4/include/datacite-contributorType-v4.xsd +37 -0
- data/resources/kernel-4/include/datacite-dateType-v4.xsd +27 -0
- data/resources/kernel-4/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4/include/datacite-relatedIdentifierType-v4.xsd +39 -0
- data/resources/kernel-4/include/datacite-relationType-v4.xsd +59 -0
- data/resources/kernel-4/include/datacite-resourceType-v4.xsd +52 -0
- data/resources/kernel-4/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4/include/xml.xsd +286 -0
- data/resources/kernel-4/metadata.xsd +715 -0
- data/resources/kernel-4.0/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.0/include/datacite-dateType-v4.xsd +21 -0
- data/resources/kernel-4.0/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.0/include/datacite-funderIdentifierType-v4.xsd +15 -0
- data/resources/kernel-4.0/include/datacite-relatedIdentifierType-v4.xsd +32 -0
- data/resources/kernel-4.0/include/datacite-relationType-v4.xsd +39 -0
- data/resources/kernel-4.0/include/datacite-resourceType-v4.xsd +26 -0
- data/resources/kernel-4.0/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.0/include/xml.xsd +286 -0
- data/resources/kernel-4.0/metadata.xsd +470 -0
- data/resources/kernel-4.1/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.1/include/datacite-dateType-v4.1.xsd +23 -0
- data/resources/kernel-4.1/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.1/include/datacite-funderIdentifierType-v4.xsd +15 -0
- data/resources/kernel-4.1/include/datacite-nameType-v4.1.xsd +10 -0
- data/resources/kernel-4.1/include/datacite-relatedIdentifierType-v4.xsd +32 -0
- data/resources/kernel-4.1/include/datacite-relationType-v4.1.xsd +46 -0
- data/resources/kernel-4.1/include/datacite-resourceType-v4.1.xsd +28 -0
- data/resources/kernel-4.1/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.1/include/xml.xsd +286 -0
- data/resources/kernel-4.1/metadata.xsd +483 -0
- data/resources/kernel-4.2/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.2/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.2/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.2/include/datacite-funderIdentifierType-v4.xsd +15 -0
- data/resources/kernel-4.2/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.2/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.2/include/datacite-relationType-v4.xsd +49 -0
- data/resources/kernel-4.2/include/datacite-resourceType-v4.xsd +28 -0
- data/resources/kernel-4.2/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.2/include/xml.xsd +286 -0
- data/resources/kernel-4.2/metadata.xsd +479 -0
- data/resources/kernel-4.3/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.3/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.3/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.3/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.3/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.3/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.3/include/datacite-relationType-v4.xsd +49 -0
- data/resources/kernel-4.3/include/datacite-resourceType-v4.xsd +28 -0
- data/resources/kernel-4.3/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.3/include/xml.xsd +286 -0
- data/resources/kernel-4.3/metadata.xsd +515 -0
- data/resources/kernel-4.4/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.4/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.4/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.4/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.4/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.4/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.4/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.4/include/datacite-relationType-v4.xsd +51 -0
- data/resources/kernel-4.4/include/datacite-resourceType-v4.xsd +43 -0
- data/resources/kernel-4.4/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.4/include/xml.xsd +286 -0
- data/resources/kernel-4.4/metadata.xsd +707 -0
- data/resources/kernel-4.5/include/datacite-contributorType-v4.xsd +35 -0
- data/resources/kernel-4.5/include/datacite-dateType-v4.xsd +25 -0
- data/resources/kernel-4.5/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.5/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.5/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.5/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.5/include/datacite-relatedIdentifierType-v4.xsd +34 -0
- data/resources/kernel-4.5/include/datacite-relationType-v4.xsd +53 -0
- data/resources/kernel-4.5/include/datacite-resourceType-v4.xsd +45 -0
- data/resources/kernel-4.5/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.5/include/xml.xsd +286 -0
- data/resources/kernel-4.5/metadata.xsd +711 -0
- data/resources/kernel-4.6/include/datacite-contributorType-v4.xsd +37 -0
- data/resources/kernel-4.6/include/datacite-dateType-v4.xsd +27 -0
- data/resources/kernel-4.6/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.6/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.6/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.6/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.6/include/datacite-relatedIdentifierType-v4.xsd +37 -0
- data/resources/kernel-4.6/include/datacite-relationType-v4.xsd +57 -0
- data/resources/kernel-4.6/include/datacite-resourceType-v4.xsd +49 -0
- data/resources/kernel-4.6/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.6/include/xml.xsd +286 -0
- data/resources/kernel-4.6/metadata.xsd +712 -0
- data/resources/kernel-4.7/include/datacite-contributorType-v4.xsd +37 -0
- data/resources/kernel-4.7/include/datacite-dateType-v4.xsd +27 -0
- data/resources/kernel-4.7/include/datacite-descriptionType-v4.xsd +19 -0
- data/resources/kernel-4.7/include/datacite-funderIdentifierType-v4.xsd +16 -0
- data/resources/kernel-4.7/include/datacite-nameType-v4.xsd +10 -0
- data/resources/kernel-4.7/include/datacite-numberType-v4.xsd +12 -0
- data/resources/kernel-4.7/include/datacite-relatedIdentifierType-v4.xsd +39 -0
- data/resources/kernel-4.7/include/datacite-relationType-v4.xsd +59 -0
- data/resources/kernel-4.7/include/datacite-resourceType-v4.xsd +52 -0
- data/resources/kernel-4.7/include/datacite-titleType-v4.xsd +14 -0
- data/resources/kernel-4.7/include/xml.xsd +286 -0
- data/resources/kernel-4.7/metadata.xsd +715 -0
- data/resources/oecd/dfg-mappings.json +1866 -0
- data/resources/oecd/for-mappings.json +1101 -0
- data/resources/oecd/fos-mappings.json +198 -0
- data/resources/schema_org/jsonldcontext.json +7477 -0
- data/resources/spdx/licenses.json +5297 -0
- data/resources/xml.xsd +286 -0
- metadata +478 -150
- data/.travis.yml +0 -23
- data/lib/bolognese/crossref.rb +0 -202
- data/lib/bolognese/datacite.rb +0 -157
- data/lib/bolognese/date_utils.rb +0 -48
- data/lib/bolognese/github.rb +0 -106
- data/lib/bolognese/orcid.rb +0 -24
- data/lib/bolognese/pid_utils.rb +0 -23
- data/spec/cli_spec.rb +0 -37
- data/spec/crossref_spec.rb +0 -113
- data/spec/datacite_spec.rb +0 -49
- data/spec/doi_spec.rb +0 -89
- data/spec/fixtures/crossref.xml +0 -742
- data/spec/fixtures/datacite.xml +0 -40
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_crossref.yml +0 -760
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_schema_org.yml +0 -1476
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_datacite.yml +0 -214
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_schema_org.yml +0 -384
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/crossref.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/datacite.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/medra.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/doi_registration_agency/not_found.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_test.yml +0 -843
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_SICI_DOI.yml +0 -277
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/DOI_with_data_citation.yml +0 -15755
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/date_in_future.yml +0 -2691
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/journal_article.yml +0 -1857
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/not_found_error.yml +0 -93
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata/posted_content.yml +0 -5715
- data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/BlogPosting.yml +0 -307
- data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata/Dataset.yml +0 -343
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/crossref_doi_not_url.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/datacite_doi_http.yml +0 -44
- data/spec/fixtures/vcr_cassettes/Bolognese_Metadata/find_PID_provider/orcid.yml +0 -44
- data/spec/metadata_spec.rb +0 -35
- data/spec/orcid_spec.rb +0 -23
- data/spec/spec_helper.rb +0 -88
- /data/{LICENSE → LICENSE.md} +0 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bolognese
|
|
4
|
+
module Readers
|
|
5
|
+
module DataciteJsonReader
|
|
6
|
+
def read_datacite_json(string: nil, **options)
|
|
7
|
+
errors = jsonlint(string)
|
|
8
|
+
return { "errors" => errors } if errors.present?
|
|
9
|
+
|
|
10
|
+
datacite_json = string.present? ? Maremma.from_json(string).transform_keys! { |key| key.underscore } : {}
|
|
11
|
+
datacite_json["publisher"] = normalize_publisher(datacite_json["publisher"]) if datacite_json.fetch("publisher", nil).present?
|
|
12
|
+
|
|
13
|
+
datacite_json
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'base64'
|
|
4
|
+
|
|
5
|
+
module Bolognese
|
|
6
|
+
module Readers
|
|
7
|
+
module DataciteReader
|
|
8
|
+
def get_datacite(id: nil, **options)
|
|
9
|
+
return { "string" => nil, "state" => "not_found" } unless id.present?
|
|
10
|
+
|
|
11
|
+
api_url = doi_api_url(id, options)
|
|
12
|
+
response = Maremma.get(api_url)
|
|
13
|
+
attributes = response.body.dig("data", "attributes")
|
|
14
|
+
return { "string" => nil, "state" => "not_found" } unless attributes.present?
|
|
15
|
+
|
|
16
|
+
string = attributes.fetch('xml', nil)
|
|
17
|
+
string = Base64.decode64(string) if string.present?
|
|
18
|
+
|
|
19
|
+
if string.present?
|
|
20
|
+
doc = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks)
|
|
21
|
+
|
|
22
|
+
# remove leading and trailing whitespace in text nodes
|
|
23
|
+
doc.xpath("//text()").each do |node|
|
|
24
|
+
if node.content =~ /\S/
|
|
25
|
+
node.content = node.content.strip
|
|
26
|
+
else
|
|
27
|
+
node.remove
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
string = doc.to_xml(:indent => 2)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
client = Array.wrap(response.body.fetch("included", nil)).find { |m| m["type"] == "clients" }
|
|
34
|
+
client_id = client.to_h.fetch("id", nil)
|
|
35
|
+
provider_id = Array.wrap(client.to_h.fetch("relationships", nil)).find { |m| m["provider"].present? }.to_h.dig("provider", "data", "id")
|
|
36
|
+
|
|
37
|
+
content_url = attributes.fetch("contentUrl", nil) || Array.wrap(response.body.fetch("included", nil)).select { |m| m["type"] == "media" }.map do |m|
|
|
38
|
+
m.dig("attributes", "url")
|
|
39
|
+
end.compact
|
|
40
|
+
|
|
41
|
+
{ "string" => string,
|
|
42
|
+
"url" => attributes.fetch("url", nil),
|
|
43
|
+
"state" => attributes.fetch("state", nil),
|
|
44
|
+
"date_registered" => attributes.fetch("registered", nil),
|
|
45
|
+
"date_updated" => attributes.fetch("updated", nil),
|
|
46
|
+
"provider_id" => provider_id,
|
|
47
|
+
"client_id" => client_id,
|
|
48
|
+
"content_url" => content_url }
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def read_datacite(string: nil, **options)
|
|
52
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
|
|
53
|
+
|
|
54
|
+
doc = Nokogiri::XML(string, nil, 'UTF-8', &:noblanks)
|
|
55
|
+
if read_options.present?
|
|
56
|
+
schema_version = "http://datacite.org/schema/kernel-4"
|
|
57
|
+
else
|
|
58
|
+
ns = doc.collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
|
|
59
|
+
schema_version = Array.wrap(ns).last || "http://datacite.org/schema/kernel-4"
|
|
60
|
+
end
|
|
61
|
+
doc.remove_namespaces!
|
|
62
|
+
string = doc.to_xml(:indent => 2)
|
|
63
|
+
|
|
64
|
+
meta = Maremma.from_xml(string).to_h.fetch("resource", {})
|
|
65
|
+
|
|
66
|
+
# validate only when option is set, as this step is expensive and
|
|
67
|
+
# not needed if XML comes from DataCite MDS
|
|
68
|
+
if options[:validate]
|
|
69
|
+
errors = datacite_errors(xml: string, schema_version: schema_version)
|
|
70
|
+
return { "errors" => errors } if errors.present?
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
if options[:doi]
|
|
74
|
+
id = normalize_doi(options[:doi], sandbox: options[:sandbox])
|
|
75
|
+
else
|
|
76
|
+
id = normalize_doi(meta.dig("identifier", "__content__") || options[:id], sandbox: options[:sandbox])
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
identifiers = Array.wrap(meta.dig("alternateIdentifiers", "alternateIdentifier")).map do |r|
|
|
80
|
+
if r["__content__"].present?
|
|
81
|
+
{ "identifierType" => get_identifier_type(r["alternateIdentifierType"]), "identifier" => r["__content__"] }
|
|
82
|
+
end
|
|
83
|
+
end.compact
|
|
84
|
+
|
|
85
|
+
resource_type_general = meta.dig("resourceType", "resourceTypeGeneral")
|
|
86
|
+
resource_type = meta.dig("resourceType", "__content__")
|
|
87
|
+
schema_org = Bolognese::Utils::CR_TO_SO_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::DC_TO_SO_TRANSLATIONS[resource_type_general.to_s.dasherize] || "CreativeWork"
|
|
88
|
+
types = {
|
|
89
|
+
"resourceTypeGeneral" => resource_type_general,
|
|
90
|
+
"resourceType" => resource_type,
|
|
91
|
+
"schemaOrg" => schema_org,
|
|
92
|
+
"citeproc" => Bolognese::Utils::CR_TO_CP_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::SO_TO_CP_TRANSLATIONS[schema_org] || "article",
|
|
93
|
+
"bibtex" => Bolognese::Utils::CR_TO_BIB_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
|
|
94
|
+
"ris" => Bolognese::Utils::CR_TO_RIS_TRANSLATIONS[resource_type.to_s.underscore.camelcase] || Bolognese::Utils::DC_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN"
|
|
95
|
+
}.compact
|
|
96
|
+
|
|
97
|
+
titles = get_titles(meta)
|
|
98
|
+
|
|
99
|
+
publisher = Array.wrap(meta.dig("publisher")).map do |r|
|
|
100
|
+
if r.blank?
|
|
101
|
+
nil
|
|
102
|
+
elsif r.is_a?(String)
|
|
103
|
+
{ "name" => r.strip }
|
|
104
|
+
elsif r.is_a?(Hash)
|
|
105
|
+
{
|
|
106
|
+
"name" => r["__content__"].present? ? r["__content__"].strip : nil,
|
|
107
|
+
"publisherIdentifier" => r["publisherIdentifierScheme"] == "ROR" ? normalize_ror(r["publisherIdentifier"]) : r["publisherIdentifier"],
|
|
108
|
+
"publisherIdentifierScheme" => r["publisherIdentifierScheme"],
|
|
109
|
+
"schemeUri" => r["schemeURI"],
|
|
110
|
+
"lang" => r["lang"],
|
|
111
|
+
}.compact
|
|
112
|
+
end
|
|
113
|
+
end.compact.first
|
|
114
|
+
|
|
115
|
+
descriptions = Array.wrap(meta.dig("descriptions", "description")).map do |r|
|
|
116
|
+
if r.blank?
|
|
117
|
+
nil
|
|
118
|
+
elsif r.is_a?(String)
|
|
119
|
+
{ "description" => sanitize(r, new_line: true), "descriptionType" => "Abstract" }
|
|
120
|
+
elsif r.is_a?(Hash)
|
|
121
|
+
{ "description" => sanitize(r["__content__"], new_line: true), "descriptionType" => r["descriptionType"], "lang" => r["lang"] }.compact
|
|
122
|
+
end
|
|
123
|
+
end.compact
|
|
124
|
+
rights_list = Array.wrap(meta.dig("rightsList", "rights")).map do |r|
|
|
125
|
+
if r.blank?
|
|
126
|
+
nil
|
|
127
|
+
elsif r.is_a?(String)
|
|
128
|
+
name_to_spdx(r)
|
|
129
|
+
elsif r.is_a?(Hash)
|
|
130
|
+
hsh_to_spdx(r)
|
|
131
|
+
end
|
|
132
|
+
end.compact
|
|
133
|
+
|
|
134
|
+
subjects = Array.wrap(meta.dig("subjects", "subject")).reduce([]) do |sum, subject|
|
|
135
|
+
if subject.is_a?(String)
|
|
136
|
+
sum += name_to_subject(subject)
|
|
137
|
+
elsif subject.is_a?(Hash)
|
|
138
|
+
sum += hsh_to_subject(subject)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
sum
|
|
142
|
+
end.uniq
|
|
143
|
+
|
|
144
|
+
dates = Array.wrap(meta.dig("dates", "date")).map do |r|
|
|
145
|
+
if r.is_a?(Hash) && date = sanitize(r["__content__"]).presence
|
|
146
|
+
{ "date" => date,
|
|
147
|
+
"dateType" => parse_attributes(r, content: "dateType"),
|
|
148
|
+
"dateInformation" => parse_attributes(r, content: "dateInformation")
|
|
149
|
+
}.compact
|
|
150
|
+
end
|
|
151
|
+
end.compact
|
|
152
|
+
sizes = Array.wrap(meta.dig("sizes", "size")).map do |k|
|
|
153
|
+
if k.blank?
|
|
154
|
+
nil
|
|
155
|
+
elsif k.is_a?(String)
|
|
156
|
+
sanitize(k).presence
|
|
157
|
+
elsif k.is_a?(Hash)
|
|
158
|
+
sanitize(k["__content__"]).presence
|
|
159
|
+
end
|
|
160
|
+
end.compact
|
|
161
|
+
formats = Array.wrap(meta.dig("formats", "format")).map do |k|
|
|
162
|
+
if k.blank?
|
|
163
|
+
nil
|
|
164
|
+
elsif k.is_a?(String)
|
|
165
|
+
sanitize(k).presence
|
|
166
|
+
elsif k.is_a?(Hash)
|
|
167
|
+
sanitize(k["__content__"]).presence
|
|
168
|
+
end
|
|
169
|
+
end.compact
|
|
170
|
+
.map { |s| s.to_s.squish.presence }.compact
|
|
171
|
+
funding_references = Array.wrap(meta.dig("fundingReferences", "fundingReference")).compact.map do |fr|
|
|
172
|
+
scheme_uri = parse_attributes(fr["funderIdentifier"], content: "schemeURI")
|
|
173
|
+
funder_identifier = parse_attributes(fr["funderIdentifier"])
|
|
174
|
+
funder_identifier_type = parse_attributes(fr["funderIdentifier"], content: "funderIdentifierType")
|
|
175
|
+
|
|
176
|
+
if funder_identifier_type == "Crossref Funder ID"
|
|
177
|
+
funder_identifier = validate_funder_doi(funder_identifier)
|
|
178
|
+
elsif funder_identifier_type == "ROR"
|
|
179
|
+
funder_identifier = normalize_ror(funder_identifier)
|
|
180
|
+
else
|
|
181
|
+
funder_identifier = normalize_id(funder_identifier) ? normalize_id(funder_identifier) : funder_identifier
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
{
|
|
185
|
+
"funderName" => fr["funderName"],
|
|
186
|
+
"funderIdentifier" => funder_identifier,
|
|
187
|
+
"funderIdentifierType" => funder_identifier_type,
|
|
188
|
+
"schemeUri" => scheme_uri,
|
|
189
|
+
"awardNumber" => parse_attributes(fr["awardNumber"]),
|
|
190
|
+
"awardUri" => parse_attributes(fr["awardNumber"], content: "awardURI"),
|
|
191
|
+
"awardTitle" => fr["awardTitle"] }.compact
|
|
192
|
+
end
|
|
193
|
+
related_identifiers = Array.wrap(meta.dig("relatedIdentifiers", "relatedIdentifier")).map do |ri|
|
|
194
|
+
if ri["relatedIdentifierType"] == "DOI"
|
|
195
|
+
doi = ri["__content__"].to_s.downcase
|
|
196
|
+
rid = validate_doi(doi) || doi
|
|
197
|
+
else
|
|
198
|
+
rid = ri["__content__"]
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
{
|
|
202
|
+
"relatedIdentifier" => rid,
|
|
203
|
+
"relatedIdentifierType" => ri["relatedIdentifierType"],
|
|
204
|
+
"relationType" => ri["relationType"],
|
|
205
|
+
"resourceTypeGeneral" => ri["resourceTypeGeneral"],
|
|
206
|
+
"relatedMetadataScheme" => ri["relatedMetadataScheme"],
|
|
207
|
+
"schemeUri" => ri["schemeURI"],
|
|
208
|
+
"schemeType" => ri["schemeType"],
|
|
209
|
+
"relationTypeInformation" => ri["relationTypeInformation"]
|
|
210
|
+
}.compact
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
related_items = Array.wrap(meta.dig("relatedItems", "relatedItem")).map do |ri|
|
|
214
|
+
|
|
215
|
+
rii = ri["relatedItemIdentifier"]
|
|
216
|
+
relatedItemIdentifier = nil
|
|
217
|
+
if rii
|
|
218
|
+
if rii["relatedItemIdentifierType"] == "DOI"
|
|
219
|
+
doi = rii["__content__"].to_s.downcase
|
|
220
|
+
rid = validate_doi(doi) || doi
|
|
221
|
+
else
|
|
222
|
+
rid = rii["__content__"]
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
relatedItemIdentifier = {
|
|
226
|
+
"relatedItemIdentifier" => rid,
|
|
227
|
+
"relatedItemIdentifierType" => rii["relatedItemIdentifierType"],
|
|
228
|
+
"relatedMetadataScheme" => rii["relatedMetadataScheme"],
|
|
229
|
+
"schemeURI" => rii["schemeURI"],
|
|
230
|
+
"schemeType" => rii["schemeType"],
|
|
231
|
+
}.compact
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
number = ri["number"]
|
|
235
|
+
if number.is_a?(String)
|
|
236
|
+
number = number
|
|
237
|
+
else
|
|
238
|
+
number = ri.dig("number", "__content__")
|
|
239
|
+
numberType = ri.dig("number", "numberType")
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
a = {
|
|
243
|
+
"relationType" => ri["relationType"],
|
|
244
|
+
"relationTypeInformation" => ri["relationTypeInformation"],
|
|
245
|
+
"relatedItemType" => ri["relatedItemType"],
|
|
246
|
+
"relatedItemIdentifier" => relatedItemIdentifier,
|
|
247
|
+
"creators" => get_authors(Array.wrap(ri.dig("creators", "creator"))),
|
|
248
|
+
"titles" => get_titles(ri),
|
|
249
|
+
"publicationYear" => ri["publicationYear"],
|
|
250
|
+
"volume" => parse_attributes(ri["volume"]),
|
|
251
|
+
"issue" => parse_attributes(ri["issue"]),
|
|
252
|
+
"number" => number,
|
|
253
|
+
"numberType" => numberType,
|
|
254
|
+
"firstPage" => parse_attributes(ri["firstPage"]),
|
|
255
|
+
"lastPage" => parse_attributes(ri["lastPage"]),
|
|
256
|
+
"publisher" => parse_attributes(ri["publisher"]),
|
|
257
|
+
"edition" => parse_attributes(ri["edition"]),
|
|
258
|
+
"contributors" => get_authors(Array.wrap(ri.dig("contributors", "contributor"))),
|
|
259
|
+
}.compact
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
geo_locations = Array.wrap(meta.dig("geoLocations", "geoLocation")).map do |gl|
|
|
263
|
+
if !gl.is_a?(Hash) || gl["geoLocationPoint"].is_a?(String) || gl["geoLocationBox"].is_a?(String) || gl["geoLocationPolygon"].is_a?(String)
|
|
264
|
+
nil
|
|
265
|
+
else
|
|
266
|
+
## Array of geoLocationPolygons within a geoLocation is Unsupported in JSON.
|
|
267
|
+
if gl.dig("geoLocationPolygon").kind_of?(Array)
|
|
268
|
+
geoLocationPolygon = nil
|
|
269
|
+
else
|
|
270
|
+
geoLocationPolygon = Array.wrap(gl.dig("geoLocationPolygon", "polygonPoint")).map { |glp| { "polygonPoint" => glp } }
|
|
271
|
+
.push(Array.wrap(gl.dig("geoLocationPolygon", "inPolygonPoint")).map { |glp| { "inPolygonPoint" => glp } }.first)
|
|
272
|
+
.compact.presence
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
{
|
|
276
|
+
"geoLocationPoint" => {
|
|
277
|
+
"pointLatitude" => gl.dig("geoLocationPoint", "pointLatitude"),
|
|
278
|
+
"pointLongitude" => gl.dig("geoLocationPoint", "pointLongitude")
|
|
279
|
+
}.compact.presence,
|
|
280
|
+
"geoLocationBox" => {
|
|
281
|
+
"westBoundLongitude" => gl.dig("geoLocationBox", "westBoundLongitude"),
|
|
282
|
+
"eastBoundLongitude" => gl.dig("geoLocationBox", "eastBoundLongitude"),
|
|
283
|
+
"southBoundLatitude" => gl.dig("geoLocationBox", "southBoundLatitude"),
|
|
284
|
+
"northBoundLatitude" => gl.dig("geoLocationBox", "northBoundLatitude")
|
|
285
|
+
}.compact.presence,
|
|
286
|
+
"geoLocationPolygon" => geoLocationPolygon,
|
|
287
|
+
"geoLocationPlace" => parse_attributes(gl["geoLocationPlace"], first: true).to_s.strip.presence
|
|
288
|
+
}.compact
|
|
289
|
+
end
|
|
290
|
+
end.compact
|
|
291
|
+
|
|
292
|
+
state = id.present? || read_options.present? ? "findable" : "not_found"
|
|
293
|
+
|
|
294
|
+
{ "id" => id,
|
|
295
|
+
"types" => types,
|
|
296
|
+
"doi" => doi_from_url(id),
|
|
297
|
+
"identifiers" => identifiers,
|
|
298
|
+
"url" => options.fetch(:url, nil).to_s.strip.presence,
|
|
299
|
+
"titles" => titles,
|
|
300
|
+
"creators" => get_authors(Array.wrap(meta.dig("creators", "creator"))),
|
|
301
|
+
"contributors" => get_authors(Array.wrap(meta.dig("contributors", "contributor"))),
|
|
302
|
+
"publisher" => publisher,
|
|
303
|
+
"agency" => "datacite",
|
|
304
|
+
"funding_references" => funding_references,
|
|
305
|
+
"dates" => dates,
|
|
306
|
+
"publication_year" => parse_attributes(meta.fetch("publicationYear", nil), first: true).to_s.strip.presence,
|
|
307
|
+
"descriptions" => descriptions,
|
|
308
|
+
"rights_list" => Array.wrap(rights_list),
|
|
309
|
+
"version_info" => meta.fetch("version", nil).to_s.presence,
|
|
310
|
+
"subjects" => subjects,
|
|
311
|
+
"language" => parse_attributes(meta.fetch("language", nil), first: true).to_s.strip.presence,
|
|
312
|
+
"geo_locations" => geo_locations,
|
|
313
|
+
"related_identifiers" => related_identifiers,
|
|
314
|
+
"related_items" => related_items,
|
|
315
|
+
"formats" => formats,
|
|
316
|
+
"sizes" => sizes,
|
|
317
|
+
"schema_version" => schema_version,
|
|
318
|
+
"state" => state
|
|
319
|
+
}.merge(read_options)
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
def get_titles(meta)
|
|
323
|
+
titles = Array.wrap(meta.dig("titles", "title")).map do |r|
|
|
324
|
+
if r.blank?
|
|
325
|
+
nil
|
|
326
|
+
elsif r.is_a?(String)
|
|
327
|
+
{ "title" => sanitize(r) }
|
|
328
|
+
else
|
|
329
|
+
{ "title" => sanitize(r["__content__"]), "titleType" => r["titleType"], "lang" => r["lang"] }.compact
|
|
330
|
+
end
|
|
331
|
+
end.compact
|
|
332
|
+
|
|
333
|
+
titles
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
end
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bolognese
|
|
4
|
+
module Readers
|
|
5
|
+
module NpmReader
|
|
6
|
+
def get_npm(id: nil, **options)
|
|
7
|
+
return { "string" => nil, "state" => "not_found" } unless id.present?
|
|
8
|
+
id = normalize_id(id)
|
|
9
|
+
response = Maremma.get(id, accept: "json", raw: true)
|
|
10
|
+
string = response.body.fetch("data", nil)
|
|
11
|
+
|
|
12
|
+
{ "string" => string }
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def read_npm(string: nil, **options)
|
|
16
|
+
if string.present?
|
|
17
|
+
errors = jsonlint(string)
|
|
18
|
+
return { "errors" => errors } if errors.present?
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
|
|
22
|
+
|
|
23
|
+
meta = string.present? ? Maremma.from_json(string) : {}
|
|
24
|
+
|
|
25
|
+
types = {
|
|
26
|
+
"resourceTypeGeneral" => "Software",
|
|
27
|
+
"reourceType" => "NPM Package",
|
|
28
|
+
"schemaOrg" => "SoftwareSourceCode",
|
|
29
|
+
"citeproc" => "article",
|
|
30
|
+
"bibtex" => "misc",
|
|
31
|
+
"ris" => "GEN"
|
|
32
|
+
}.compact
|
|
33
|
+
|
|
34
|
+
creators = if meta.fetch("author", nil).present?
|
|
35
|
+
get_authors(Array.wrap(meta.fetch("author", nil)))
|
|
36
|
+
else
|
|
37
|
+
[{ "nameType" => "Organizational", "name" => ":(unav)" }]
|
|
38
|
+
end
|
|
39
|
+
# contributors = get_authors(from_citeproc(Array.wrap(meta.fetch("editor", nil))))
|
|
40
|
+
# dates = if date = get_date_from_date_parts(meta.fetch("issued", nil))
|
|
41
|
+
# if Date.edtf(date).present?
|
|
42
|
+
# [{ "date" => date,
|
|
43
|
+
# "dateType" => "Issued" }]
|
|
44
|
+
# end
|
|
45
|
+
# end
|
|
46
|
+
# publication_year = get_date_from_date_parts(meta.fetch("issued", nil)).to_s[0..3]
|
|
47
|
+
rights_list = if meta.fetch("license", nil)
|
|
48
|
+
[{ "rights" => meta.fetch("license") }.compact]
|
|
49
|
+
end
|
|
50
|
+
# related_identifiers = if meta.fetch("container-title", nil).present? && meta.fetch("ISSN", nil).present?
|
|
51
|
+
# [{ "type" => "Periodical",
|
|
52
|
+
# "relationType" => "IsPartOf",
|
|
53
|
+
# "relatedIdentifierType" => "ISSN",
|
|
54
|
+
# "title" => meta.fetch("container-title", nil),
|
|
55
|
+
# "relatedIdentifier" => meta.fetch("ISSN", nil) }.compact]
|
|
56
|
+
# end
|
|
57
|
+
# container = if meta.fetch("container-title", nil).present?
|
|
58
|
+
# first_page = meta.fetch("page", nil).present? ? meta.fetch("page").split("-").map(&:strip)[0] : nil
|
|
59
|
+
# last_page = meta.fetch("page", nil).present? ? meta.fetch("page").split("-").map(&:strip)[1] : nil
|
|
60
|
+
|
|
61
|
+
# { "type" => "Periodical",
|
|
62
|
+
# "title" => meta.fetch("container-title", nil),
|
|
63
|
+
# "identifier" => meta.fetch("ISSN", nil),
|
|
64
|
+
# "identifierType" => meta.fetch("ISSN", nil).present? ? "ISSN" : nil,
|
|
65
|
+
# "volume" => meta.fetch("volume", nil),
|
|
66
|
+
# "issue" => meta.fetch("issue", nil),
|
|
67
|
+
# "firstPage" => first_page,
|
|
68
|
+
# "lastPage" => last_page
|
|
69
|
+
# }.compact
|
|
70
|
+
# else
|
|
71
|
+
# nil
|
|
72
|
+
# end
|
|
73
|
+
|
|
74
|
+
# identifiers = [normalize_id(meta.fetch("id", nil)), normalize_doi(meta.fetch("DOI", nil))].compact.map do |r|
|
|
75
|
+
# r = normalize_id(r)
|
|
76
|
+
|
|
77
|
+
# if r.start_with?("https://doi.org")
|
|
78
|
+
# { "identifierType" => "DOI", "identifier" => r }
|
|
79
|
+
# else
|
|
80
|
+
# { "identifierType" => "URL", "identifier" => r }
|
|
81
|
+
# end
|
|
82
|
+
# end.uniq
|
|
83
|
+
|
|
84
|
+
# id = Array.wrap(identifiers).first.to_h.fetch("identifier", nil)
|
|
85
|
+
# doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)
|
|
86
|
+
|
|
87
|
+
# state = id.present? || read_options.present? ? "findable" : "not_found"
|
|
88
|
+
subjects = Array.wrap(meta.fetch("keywords", nil)).map do |s|
|
|
89
|
+
{ "subject" => s }
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
{
|
|
93
|
+
#"id" => id,
|
|
94
|
+
#"identifiers" => identifiers,
|
|
95
|
+
"types" => types,
|
|
96
|
+
#"doi" => doi_from_url(doi),
|
|
97
|
+
#"url" => normalize_id(meta.fetch("URL", nil)),
|
|
98
|
+
"titles" => [{ "title" => meta.fetch("name", nil) }],
|
|
99
|
+
"creators" => creators,
|
|
100
|
+
#"contributors" => contributors,
|
|
101
|
+
#"container" => container,
|
|
102
|
+
#"publisher" => meta.fetch("publisher", nil),
|
|
103
|
+
#"related_identifiers" => related_identifiers,
|
|
104
|
+
#"dates" => dates,
|
|
105
|
+
#"publication_year" => publication_year,
|
|
106
|
+
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description"), new_line: true), "descriptionType" => "Abstract" }] : [],
|
|
107
|
+
"rights_list" => rights_list,
|
|
108
|
+
"version_info" => meta.fetch("version", nil),
|
|
109
|
+
"subjects" => subjects
|
|
110
|
+
#"state" => state
|
|
111
|
+
}.merge(read_options)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Bolognese
|
|
4
|
+
module Readers
|
|
5
|
+
module RisReader
|
|
6
|
+
RIS_TO_SO_TRANSLATIONS = {
|
|
7
|
+
"BLOG" => "BlogPosting",
|
|
8
|
+
"GEN" => "CreativeWork",
|
|
9
|
+
"CTLG" => "DataCatalog",
|
|
10
|
+
"DATA" => "Dataset",
|
|
11
|
+
"FIGURE" => "ImageObject",
|
|
12
|
+
"THES" => "Thesis",
|
|
13
|
+
"MPCT" => "Movie",
|
|
14
|
+
"JOUR" => "ScholarlyArticle",
|
|
15
|
+
"COMP" => "SoftwareSourceCode",
|
|
16
|
+
"VIDEO" => "VideoObject",
|
|
17
|
+
"ELEC" => "WebPage"
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
RIS_TO_CP_TRANSLATIONS = {
|
|
21
|
+
"JOUR" => "article-journal"
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
RIS_TO_BIB_TRANSLATIONS = {
|
|
25
|
+
"JOUR" => "article",
|
|
26
|
+
"BOOK" => "book",
|
|
27
|
+
"CHAP" => "inbook",
|
|
28
|
+
"CPAPER" => "inproceedings",
|
|
29
|
+
"GEN" => "misc",
|
|
30
|
+
"THES" => "phdthesis",
|
|
31
|
+
"CONF" => "proceedings",
|
|
32
|
+
"RPRT" => "techreport",
|
|
33
|
+
"UNPD" => "unpublished"
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
def read_ris(string: nil, **options)
|
|
37
|
+
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
|
|
38
|
+
|
|
39
|
+
meta = ris_meta(string: string)
|
|
40
|
+
|
|
41
|
+
ris_type = meta.fetch("TY", nil) || "GEN"
|
|
42
|
+
schema_org = RIS_TO_SO_TRANSLATIONS[ris_type] || "CreativeWork"
|
|
43
|
+
types = {
|
|
44
|
+
"resourceTypeGeneral" => Metadata::RIS_TO_DC_TRANSLATIONS[ris_type],
|
|
45
|
+
"schemaOrg" => schema_org,
|
|
46
|
+
"citeproc" => RIS_TO_CP_TRANSLATIONS[schema_org] || "misc",
|
|
47
|
+
"ris" => ris_type
|
|
48
|
+
}.compact
|
|
49
|
+
|
|
50
|
+
id = normalize_doi(options[:doi] || meta.fetch("DO", nil))
|
|
51
|
+
|
|
52
|
+
author = Array.wrap(meta.fetch("AU", nil)).map { |a| { "creatorName" => a } }
|
|
53
|
+
date_parts = meta.fetch("PY", nil).to_s.split("/")
|
|
54
|
+
created_date_parts = meta.fetch("Y1", nil).to_s.split("/")
|
|
55
|
+
dates = []
|
|
56
|
+
dates << { "date" => get_date_from_parts(*date_parts), "dateType" => "Issued" } if meta.fetch("PY", nil).present?
|
|
57
|
+
dates << { "date" => get_date_from_parts(*created_date_parts), "dateType" => "Created" } if meta.fetch("Y1", nil).present?
|
|
58
|
+
publication_year = get_date_from_parts(*date_parts).to_s[0..3]
|
|
59
|
+
related_identifiers = if meta.fetch("T2", nil).present? && meta.fetch("SN", nil).present?
|
|
60
|
+
[{ "type" => "Periodical",
|
|
61
|
+
"id" => meta.fetch("SN", nil),
|
|
62
|
+
"relatedIdentifierType" => "ISSN",
|
|
63
|
+
"relationType" => "IsPartOf",
|
|
64
|
+
"title" => meta.fetch("T2", nil), }.compact]
|
|
65
|
+
else
|
|
66
|
+
[]
|
|
67
|
+
end
|
|
68
|
+
container = if meta.fetch("T2", nil).present?
|
|
69
|
+
{ "type" => "Journal",
|
|
70
|
+
"title" => meta.fetch("T2", nil),
|
|
71
|
+
"identifier" => meta.fetch("SN", nil),
|
|
72
|
+
"volume" => meta.fetch("VL", nil),
|
|
73
|
+
"issue" => meta.fetch("IS", nil),
|
|
74
|
+
"firstPage" => meta.fetch("SP", nil),
|
|
75
|
+
"lastPage" => meta.fetch("EP", nil) }.compact
|
|
76
|
+
else
|
|
77
|
+
nil
|
|
78
|
+
end
|
|
79
|
+
state = meta.fetch("DO", nil).present? || read_options.present? ? "findable" : "not_found"
|
|
80
|
+
subjects = Array.wrap(meta.fetch("KW", nil)).reduce([]) do |sum, subject|
|
|
81
|
+
sum += name_to_subject(subject)
|
|
82
|
+
|
|
83
|
+
sum
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
{ "id" => id,
|
|
87
|
+
"types" => types,
|
|
88
|
+
"doi" => doi_from_url(id),
|
|
89
|
+
"url" => meta.fetch("UR", nil),
|
|
90
|
+
"titles" => meta.fetch("T1", nil).present? ? [{ "title" => meta.fetch("T1", nil) }] : nil,
|
|
91
|
+
"creators" => get_authors(author),
|
|
92
|
+
"publisher" => { "name" => meta.fetch("PB", "(:unav)") },
|
|
93
|
+
"container" => container,
|
|
94
|
+
"related_identifiers" => related_identifiers,
|
|
95
|
+
"dates" => dates,
|
|
96
|
+
"publication_year" => publication_year,
|
|
97
|
+
"descriptions" => meta.fetch("AB", nil).present? ? [{ "description" => sanitize(meta.fetch("AB"), new_line: true), "descriptionType" => "Abstract" }] : nil,
|
|
98
|
+
"subjects" => subjects,
|
|
99
|
+
"language" => meta.fetch("LA", nil),
|
|
100
|
+
"state" => state
|
|
101
|
+
}.merge(read_options)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def ris_meta(string: nil)
|
|
105
|
+
h = Hash.new { |h,k| h[k] = [] }
|
|
106
|
+
string.split("\n").reduce(h) do |sum, line|
|
|
107
|
+
k, v = line.split("-",2)
|
|
108
|
+
h[k.strip] << v.to_s.strip
|
|
109
|
+
sum
|
|
110
|
+
end.map { |k,v| [k, v.unwrap] }.to_h.compact
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|