bolognese 0.5.3 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +10 -7
- data/README.md +101 -467
- data/bolognese.gemspec +1 -1
- data/lib/bolognese.rb +2 -0
- data/lib/bolognese/bibtex.rb +101 -0
- data/lib/bolognese/cli.rb +29 -0
- data/lib/bolognese/crossref.rb +37 -1
- data/lib/bolognese/datacite.rb +10 -1
- data/lib/bolognese/datacite_utils.rb +1 -1
- data/lib/bolognese/metadata.rb +33 -31
- data/lib/bolognese/schema_org.rb +5 -0
- data/lib/bolognese/version.rb +1 -1
- data/spec/bibtex_spec.rb +35 -0
- data/spec/cli_spec.rb +43 -0
- data/spec/crossref_spec.rb +29 -0
- data/spec/datacite_spec.rb +29 -3
- data/spec/fixtures/crossref.bib +12 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/crossref/as_bibtex.yml +760 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/datacite/as_bibtex.yml +214 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_CLI/read/schema_org/as_bibtex.yml +653 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata_as_bibtex/with_data_citation.yml +719 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Crossref/get_metadata_as_bibtex/with_pages.yml +366 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata_as_bibtex/BlogPosting.yml +155 -0
- data/spec/fixtures/vcr_cassettes/Bolognese_Datacite/get_metadata_as_bibtex/Dataset.yml +173 -0
- metadata +15 -11
data/bolognese.gemspec
CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.add_dependency 'nokogiri', '~> 1.6', '>= 1.6.8'
|
20
20
|
s.add_dependency 'builder', '~> 3.2', '>= 3.2.2'
|
21
21
|
s.add_dependency 'activesupport', '~> 4.2', '>= 4.2.5'
|
22
|
-
s.add_dependency '
|
22
|
+
s.add_dependency 'bibtex-ruby', '~> 4.1'
|
23
23
|
s.add_dependency 'thor', '~> 0.19'
|
24
24
|
s.add_dependency 'namae', '~> 0.10.2'
|
25
25
|
s.add_dependency 'postrank-uri', '~> 1.0', '>= 1.0.18'
|
data/lib/bolognese.rb
CHANGED
@@ -2,12 +2,14 @@ require 'active_support/all'
|
|
2
2
|
require 'nokogiri'
|
3
3
|
require 'maremma'
|
4
4
|
require 'postrank-uri'
|
5
|
+
require 'bibtex'
|
5
6
|
|
6
7
|
require "bolognese/version"
|
7
8
|
require "bolognese/metadata"
|
8
9
|
require "bolognese/crossref"
|
9
10
|
require "bolognese/datacite"
|
10
11
|
require "bolognese/schema_org"
|
12
|
+
require "bolognese/bibtex"
|
11
13
|
require "bolognese/orcid"
|
12
14
|
require "bolognese/cli"
|
13
15
|
require "bolognese/string"
|
@@ -0,0 +1,101 @@
|
|
1
|
+
module Bolognese
|
2
|
+
class Bibtex < Metadata
|
3
|
+
|
4
|
+
BIB_TO_SO_TRANSLATIONS = {
|
5
|
+
"article" => "ScholarlyArticle"
|
6
|
+
}
|
7
|
+
|
8
|
+
SO_TO_BIB_TRANSLATIONS = {
|
9
|
+
"Article" => "article",
|
10
|
+
"AudioObject" => "misc",
|
11
|
+
"Blog" => "misc",
|
12
|
+
"BlogPosting" => "article",
|
13
|
+
"Collection" => "misc",
|
14
|
+
"CreativeWork" => "misc",
|
15
|
+
"DataCatalog" => "misc",
|
16
|
+
"Dataset" => "misc",
|
17
|
+
"Event" => "misc",
|
18
|
+
"ImageObject" => "misc",
|
19
|
+
"Movie" => "misc",
|
20
|
+
"PublicationIssue" => "misc",
|
21
|
+
"ScholarlyArticle" => "article",
|
22
|
+
"Service" => "misc",
|
23
|
+
"SoftwareSourceCode" => "misc",
|
24
|
+
"VideoObject" => "misc",
|
25
|
+
"WebPage" => "misc",
|
26
|
+
"WebSite" => "misc"
|
27
|
+
}
|
28
|
+
|
29
|
+
def initialize(string: nil)
|
30
|
+
@raw = string
|
31
|
+
end
|
32
|
+
|
33
|
+
alias_method :schema_org, :as_schema_org
|
34
|
+
|
35
|
+
def metadata
|
36
|
+
@metadata ||= raw.present? ? BibTeX.parse(raw).first : {}
|
37
|
+
end
|
38
|
+
|
39
|
+
def exists?
|
40
|
+
metadata.present?
|
41
|
+
end
|
42
|
+
|
43
|
+
def type
|
44
|
+
BIB_TO_SO_TRANSLATIONS[metadata.type.to_s] || "ScholarlyArticle"
|
45
|
+
end
|
46
|
+
|
47
|
+
def resource_type_general
|
48
|
+
SO_TO_DC_TRANSLATIONS[type]
|
49
|
+
end
|
50
|
+
|
51
|
+
def doi
|
52
|
+
metadata.doi
|
53
|
+
end
|
54
|
+
|
55
|
+
def url
|
56
|
+
metadata.url
|
57
|
+
end
|
58
|
+
|
59
|
+
def id
|
60
|
+
normalize_doi(doi)
|
61
|
+
end
|
62
|
+
|
63
|
+
def author
|
64
|
+
Array(metadata.author).map do |a|
|
65
|
+
{ "@type" => "Person",
|
66
|
+
"givenName" => a.first,
|
67
|
+
"familyName" => a.last }.compact
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def name
|
72
|
+
metadata.title
|
73
|
+
end
|
74
|
+
|
75
|
+
def container_title
|
76
|
+
metadata.journal.to_s.presence || metadata.pubisher.to_s
|
77
|
+
end
|
78
|
+
|
79
|
+
def date_published
|
80
|
+
metadata.date.to_s.presence
|
81
|
+
end
|
82
|
+
|
83
|
+
def publication_year
|
84
|
+
metadata.year.to_s.presence
|
85
|
+
end
|
86
|
+
|
87
|
+
def is_part_of
|
88
|
+
if metadata.journal.present?
|
89
|
+
{ "@type" => "Periodical",
|
90
|
+
"name" => metadata.journal.to_s,
|
91
|
+
"issn" => metadata.issn.to_s.presence }.compact
|
92
|
+
else
|
93
|
+
nil
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def description
|
98
|
+
metadata.note.to_s.presence
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
data/lib/bolognese/cli.rb
CHANGED
@@ -10,6 +10,8 @@ module Bolognese
|
|
10
10
|
include Bolognese::DoiUtils
|
11
11
|
include Bolognese::Utils
|
12
12
|
|
13
|
+
default_task :open
|
14
|
+
|
13
15
|
def self.exit_on_failure?
|
14
16
|
true
|
15
17
|
end
|
@@ -42,5 +44,32 @@ module Bolognese
|
|
42
44
|
puts "not implemented"
|
43
45
|
end
|
44
46
|
end
|
47
|
+
|
48
|
+
desc "open file", "read metadata from file"
|
49
|
+
method_option :as, default: "schema_org"
|
50
|
+
method_option :schema_version
|
51
|
+
def open(file)
|
52
|
+
ext = File.extname(file)
|
53
|
+
unless %w(.bib).include? ext
|
54
|
+
$stderr.puts "File type #{ext} not supported"
|
55
|
+
exit 1
|
56
|
+
end
|
57
|
+
string = IO.read(file)
|
58
|
+
provider = "bibtex"
|
59
|
+
output = options[:as] || "schema_org"
|
60
|
+
|
61
|
+
if provider.present?
|
62
|
+
p = case provider
|
63
|
+
when "crossref" then Crossref.new(id: id)
|
64
|
+
when "datacite" then Datacite.new(id: id, schema_version: options[:schema_version])
|
65
|
+
when "bibtex" then Bibtex.new(string: string)
|
66
|
+
else SchemaOrg.new(id: id)
|
67
|
+
end
|
68
|
+
|
69
|
+
puts p.send(output)
|
70
|
+
else
|
71
|
+
puts "not implemented"
|
72
|
+
end
|
73
|
+
end
|
45
74
|
end
|
46
75
|
end
|
data/lib/bolognese/crossref.rb
CHANGED
@@ -31,6 +31,35 @@ module Bolognese
|
|
31
31
|
"PostedContent" => "ScholarlyArticle"
|
32
32
|
}
|
33
33
|
|
34
|
+
CR_TO_BIB_TRANSLATIONS = {
|
35
|
+
"Proceedings" => "proceedings",
|
36
|
+
"ReferenceBook" => "book",
|
37
|
+
"JournalIssue" => nil,
|
38
|
+
"ProceedingsArticle" => nil,
|
39
|
+
"Other" => nil,
|
40
|
+
"Dissertation" => "phdthesis",
|
41
|
+
"Dataset" => nil,
|
42
|
+
"EditedBook" => "book",
|
43
|
+
"JournalArticle" => "article",
|
44
|
+
"Journal" => nil,
|
45
|
+
"Report" => nil,
|
46
|
+
"BookSeries" => nil,
|
47
|
+
"ReportSeries" => nil,
|
48
|
+
"BookTrack" => nil,
|
49
|
+
"Standard" => nil,
|
50
|
+
"BookSection" => "inbook",
|
51
|
+
"BookPart" => nil,
|
52
|
+
"Book" => "book",
|
53
|
+
"BookChapter" => "inbook",
|
54
|
+
"StandardSeries" => nil,
|
55
|
+
"Monograph" => "book",
|
56
|
+
"Component" => nil,
|
57
|
+
"ReferenceEntry" => nil,
|
58
|
+
"JournalVolume" => nil,
|
59
|
+
"BookSet" => nil,
|
60
|
+
"PostedContent" => "article"
|
61
|
+
}
|
62
|
+
|
34
63
|
def initialize(id: nil, string: nil)
|
35
64
|
id = normalize_doi(id) if id.present?
|
36
65
|
|
@@ -45,6 +74,7 @@ module Bolognese
|
|
45
74
|
alias_method :crossref, :raw
|
46
75
|
alias_method :as_crossref, :raw
|
47
76
|
alias_method :schema_org, :as_schema_org
|
77
|
+
alias_method :bibtex, :as_bibtex
|
48
78
|
|
49
79
|
def metadata
|
50
80
|
@metadata ||= raw.present? ? Maremma.from_xml(raw).fetch("doi_records", {}).fetch("doi_record", {}) : {}
|
@@ -100,6 +130,10 @@ module Bolognese
|
|
100
130
|
end
|
101
131
|
end
|
102
132
|
|
133
|
+
def bibtex_type
|
134
|
+
CR_TO_BIB_TRANSLATIONS[additional_type] || "misc"
|
135
|
+
end
|
136
|
+
|
103
137
|
def name
|
104
138
|
parse_attribute(bibliographic_metadata.dig("titles", "title"))
|
105
139
|
end
|
@@ -190,9 +224,11 @@ module Bolognese
|
|
190
224
|
end
|
191
225
|
|
192
226
|
def container_title
|
193
|
-
is_part_of.fetch("name", nil)
|
227
|
+
is_part_of.to_h.fetch("name", nil)
|
194
228
|
end
|
195
229
|
|
230
|
+
alias_method :journal, :container_title
|
231
|
+
|
196
232
|
def citation
|
197
233
|
citations = bibliographic_metadata.dig("citation_list", "citation")
|
198
234
|
Array.wrap(citations).map do |c|
|
data/lib/bolognese/datacite.rb
CHANGED
@@ -32,6 +32,7 @@ module Bolognese
|
|
32
32
|
end
|
33
33
|
|
34
34
|
alias_method :schema_org, :as_schema_org
|
35
|
+
alias_method :bibtex, :as_bibtex
|
35
36
|
|
36
37
|
def schema_version
|
37
38
|
@schema_version ||= metadata.fetch("xsi:schemaLocation", "").split(" ").first
|
@@ -76,6 +77,10 @@ module Bolognese
|
|
76
77
|
metadata.fetch("resourceType", {}).fetch("resourceTypeGeneral", nil)
|
77
78
|
end
|
78
79
|
|
80
|
+
def bibtex_type
|
81
|
+
Bolognese::Bibtex::SO_TO_BIB_TRANSLATIONS[type] || "misc"
|
82
|
+
end
|
83
|
+
|
79
84
|
def name
|
80
85
|
metadata.dig("titles", "title")
|
81
86
|
end
|
@@ -157,13 +162,17 @@ module Bolognese
|
|
157
162
|
end
|
158
163
|
|
159
164
|
def date_published
|
160
|
-
date("Issued") ||
|
165
|
+
date("Issued") || publication_year
|
161
166
|
end
|
162
167
|
|
163
168
|
def date_modified
|
164
169
|
date("Updated")
|
165
170
|
end
|
166
171
|
|
172
|
+
def publication_year
|
173
|
+
metadata.fetch("publicationYear")
|
174
|
+
end
|
175
|
+
|
167
176
|
def language
|
168
177
|
metadata.fetch("language", nil)
|
169
178
|
end
|
data/lib/bolognese/metadata.rb
CHANGED
@@ -12,32 +12,23 @@ module Bolognese
|
|
12
12
|
include Bolognese::DataciteUtils
|
13
13
|
include Bolognese::Utils
|
14
14
|
|
15
|
-
attr_reader :id, :raw, :provider, :schema_version
|
15
|
+
attr_reader :id, :raw, :provider, :schema_version, :license, :citation,
|
16
|
+
:additional_type, :alternate_name, :url, :version, :keywords, :editor,
|
17
|
+
:page_start, :page_end, :date_modified, :language, :spatial_coverage,
|
18
|
+
:content_size, :funder, :journal, :bibtex_type
|
16
19
|
|
17
20
|
alias_method :datacite, :as_datacite
|
18
21
|
|
19
|
-
def
|
20
|
-
|
21
|
-
end
|
22
|
-
|
23
|
-
def version
|
24
|
-
|
25
|
-
end
|
26
|
-
|
27
|
-
def keywords
|
28
|
-
|
22
|
+
def publication_year
|
23
|
+
date_published && date_published[0..3]
|
29
24
|
end
|
30
25
|
|
31
26
|
def date_created
|
32
27
|
|
33
28
|
end
|
34
29
|
|
35
|
-
def
|
36
|
-
|
37
|
-
end
|
38
|
-
|
39
|
-
def page_end
|
40
|
-
|
30
|
+
def pagination
|
31
|
+
[page_start, page_end].compact.join("-").presence
|
41
32
|
end
|
42
33
|
|
43
34
|
def has_part
|
@@ -52,28 +43,21 @@ module Bolognese
|
|
52
43
|
|
53
44
|
end
|
54
45
|
|
55
|
-
def language
|
56
|
-
|
57
|
-
end
|
58
|
-
|
59
|
-
def spatial_coverage
|
60
|
-
|
61
|
-
end
|
62
|
-
|
63
|
-
def content_size
|
64
|
-
|
65
|
-
end
|
66
|
-
|
67
46
|
def schema_version
|
68
47
|
|
69
48
|
end
|
70
49
|
|
71
|
-
def
|
50
|
+
def author_string
|
51
|
+
author.map { |a| [a["familyName"], a["givenName"]].join(", ") }
|
52
|
+
.join(" and ").presence
|
53
|
+
end
|
72
54
|
|
55
|
+
def publisher_string
|
56
|
+
publisher.to_h.fetch("name", nil)
|
73
57
|
end
|
74
58
|
|
75
59
|
def as_schema_org
|
76
|
-
{ "@context" => "http://schema.org",
|
60
|
+
{ "@context" => id.present? ? "http://schema.org" : nil,
|
77
61
|
"@type" => type,
|
78
62
|
"@id" => id,
|
79
63
|
"url" => url,
|
@@ -103,5 +87,23 @@ module Bolognese
|
|
103
87
|
"provider" => provider
|
104
88
|
}.compact.to_json
|
105
89
|
end
|
90
|
+
|
91
|
+
def as_bibtex
|
92
|
+
bib = {
|
93
|
+
bibtex_type: bibtex_type.to_sym,
|
94
|
+
bibtex_key: id,
|
95
|
+
doi: doi,
|
96
|
+
url: url,
|
97
|
+
author: author_string,
|
98
|
+
keywords: keywords,
|
99
|
+
title: name,
|
100
|
+
journal: journal,
|
101
|
+
pages: pagination,
|
102
|
+
publisher: publisher_string,
|
103
|
+
year: publication_year
|
104
|
+
}.compact
|
105
|
+
|
106
|
+
BibTeX::Entry.new(bib).to_s
|
107
|
+
end
|
106
108
|
end
|
107
109
|
end
|
data/lib/bolognese/schema_org.rb
CHANGED
@@ -14,6 +14,7 @@ module Bolognese
|
|
14
14
|
end
|
15
15
|
|
16
16
|
alias_method :schema_org, :as_schema_org
|
17
|
+
alias_method :bibtex, :as_bibtex
|
17
18
|
|
18
19
|
def metadata
|
19
20
|
@metadata ||= raw.present? ? Maremma.from_json(raw) : {}
|
@@ -47,6 +48,10 @@ module Bolognese
|
|
47
48
|
metadata.fetch("additionalType", nil)
|
48
49
|
end
|
49
50
|
|
51
|
+
def bibtex_type
|
52
|
+
Bolognese::Bibtex::SO_TO_BIB_TRANSLATIONS[type] || "misc"
|
53
|
+
end
|
54
|
+
|
50
55
|
def name
|
51
56
|
metadata.fetch("name", nil)
|
52
57
|
end
|
data/lib/bolognese/version.rb
CHANGED
data/spec/bibtex_spec.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Bolognese::Bibtex, vcr: true do
|
4
|
+
let(:string) { IO.read(fixture_path + "crossref.bib") }
|
5
|
+
|
6
|
+
subject { Bolognese::Bibtex.new(string: string) }
|
7
|
+
|
8
|
+
context "get metadata" do
|
9
|
+
it "Crossref DOI" do
|
10
|
+
expect(subject.id).to eq("https://doi.org/10.7554/elife.01567")
|
11
|
+
expect(subject.type).to eq("ScholarlyArticle")
|
12
|
+
expect(subject.url).to eq("http://elifesciences.org/lookup/doi/10.7554/eLife.01567")
|
13
|
+
expect(subject.resource_type_general).to eq("Text")
|
14
|
+
expect(subject.author).to eq([{"@type"=>"Person", "givenName"=>"Martial", "familyName"=>"Sankar"},
|
15
|
+
{"@type"=>"Person", "givenName"=>"Kaisa", "familyName"=>"Nieminen"},
|
16
|
+
{"@type"=>"Person", "givenName"=>"Laura", "familyName"=>"Ragni"},
|
17
|
+
{"@type"=>"Person", "givenName"=>"Ioannis", "familyName"=>"Xenarios"},
|
18
|
+
{"@type"=>"Person", "givenName"=>"Christian S", "familyName"=>"Hardtke"}])
|
19
|
+
expect(subject.name).to eq("Automated quantitative histology reveals vascular morphodynamics during Arabidopsis hypocotyl secondary growth")
|
20
|
+
expect(subject.date_published).to eq("2014")
|
21
|
+
expect(subject.is_part_of).to eq("@type"=>"Periodical", "name"=>"eLife", "issn"=>"2050-084X")
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
context "get metadata as datacite xml" do
|
26
|
+
it "Crossref DOI" do
|
27
|
+
expect(subject.validation_errors).to be_empty
|
28
|
+
datacite = Maremma.from_xml(subject.as_datacite).fetch("resource", {})
|
29
|
+
expect(datacite.dig("resourceType", "resourceTypeGeneral")).to eq("Text")
|
30
|
+
expect(datacite.dig("titles", "title")).to eq("Automated quantitative histology reveals vascular morphodynamics during Arabidopsis hypocotyl secondary growth")
|
31
|
+
expect(datacite.dig("creators", "creator").count).to eq(5)
|
32
|
+
expect(datacite.dig("creators", "creator").first).to eq("creatorName"=>"Sankar, Martial", "givenName"=>"Martial", "familyName"=>"Sankar")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/spec/cli_spec.rb
CHANGED
@@ -28,6 +28,11 @@ describe Bolognese::CLI do
|
|
28
28
|
subject.options = { as: "datacite" }
|
29
29
|
expect { subject.read id }.to output(/http:\/\/datacite.org\/schema\/kernel-4/).to_stdout
|
30
30
|
end
|
31
|
+
|
32
|
+
it 'as bibtex' do
|
33
|
+
subject.options = { as: "bibtex" }
|
34
|
+
expect { subject.read id }.to output(/@article{https:\/\/doi.org\/10.7554\/elife.01567/).to_stdout
|
35
|
+
end
|
31
36
|
end
|
32
37
|
|
33
38
|
context "datacite" do
|
@@ -42,6 +47,11 @@ describe Bolognese::CLI do
|
|
42
47
|
expect { subject.read id }.to output(/Phylogeny, Malaria, Parasites, Taxonomy, Mitochondrial genome, Africa, Plasmodium/).to_stdout
|
43
48
|
end
|
44
49
|
|
50
|
+
it 'as bibtex' do
|
51
|
+
subject.options = { as: "bibtex" }
|
52
|
+
expect { subject.read id }.to output(/@misc{https:\/\/doi.org\/10.5061\/dryad.8515/).to_stdout
|
53
|
+
end
|
54
|
+
|
45
55
|
it 'as datacite' do
|
46
56
|
subject.options = { as: "datacite" }
|
47
57
|
expect { subject.read id }.to output(/http:\/\/datacite.org\/schema\/kernel-3/).to_stdout
|
@@ -64,6 +74,39 @@ describe Bolognese::CLI do
|
|
64
74
|
subject.options = { as: "datacite" }
|
65
75
|
expect { subject.read id }.to output(/http:\/\/datacite.org\/schema\/kernel-4/).to_stdout
|
66
76
|
end
|
77
|
+
|
78
|
+
it 'as bibtex' do
|
79
|
+
subject.options = { as: "bibtex" }
|
80
|
+
expect { subject.read id }.to output(/@article{https:\/\/doi.org\/10.5438\/4k3m-nyvg/).to_stdout
|
81
|
+
end
|
67
82
|
end
|
68
83
|
end
|
84
|
+
|
85
|
+
describe "open" do
|
86
|
+
context "bibtex" do
|
87
|
+
let(:file) { fixture_path + "crossref.bib" }
|
88
|
+
|
89
|
+
it 'default' do
|
90
|
+
expect { subject.open file }.to output(/datePublished/).to_stdout
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'as schema_org' do
|
94
|
+
subject.options = { as: "schema_org" }
|
95
|
+
expect { subject.open file }.to output(/datePublished/).to_stdout
|
96
|
+
end
|
97
|
+
|
98
|
+
it 'as datacite' do
|
99
|
+
subject.options = { as: "datacite" }
|
100
|
+
expect { subject.open file }.to output(/http:\/\/datacite.org\/schema\/kernel-4/).to_stdout
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# context "unsupported format" do
|
105
|
+
# let(:file) { fixture_path + "crossref.xxx" }
|
106
|
+
#
|
107
|
+
# it 'error' do
|
108
|
+
# expect { subject.open file }.to output(/datePublished/).to_stderr
|
109
|
+
# end
|
110
|
+
# end
|
111
|
+
end
|
69
112
|
end
|