bolognese 0.5.3 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
19
19
  s.add_dependency 'nokogiri', '~> 1.6', '>= 1.6.8'
20
20
  s.add_dependency 'builder', '~> 3.2', '>= 3.2.2'
21
21
  s.add_dependency 'activesupport', '~> 4.2', '>= 4.2.5'
22
- s.add_dependency 'dotenv', '~> 2.1', '>= 2.1.1'
22
+ s.add_dependency 'bibtex-ruby', '~> 4.1'
23
23
  s.add_dependency 'thor', '~> 0.19'
24
24
  s.add_dependency 'namae', '~> 0.10.2'
25
25
  s.add_dependency 'postrank-uri', '~> 1.0', '>= 1.0.18'
@@ -2,12 +2,14 @@ require 'active_support/all'
2
2
  require 'nokogiri'
3
3
  require 'maremma'
4
4
  require 'postrank-uri'
5
+ require 'bibtex'
5
6
 
6
7
  require "bolognese/version"
7
8
  require "bolognese/metadata"
8
9
  require "bolognese/crossref"
9
10
  require "bolognese/datacite"
10
11
  require "bolognese/schema_org"
12
+ require "bolognese/bibtex"
11
13
  require "bolognese/orcid"
12
14
  require "bolognese/cli"
13
15
  require "bolognese/string"
@@ -0,0 +1,101 @@
1
+ module Bolognese
2
+ class Bibtex < Metadata
3
+
4
+ BIB_TO_SO_TRANSLATIONS = {
5
+ "article" => "ScholarlyArticle"
6
+ }
7
+
8
+ SO_TO_BIB_TRANSLATIONS = {
9
+ "Article" => "article",
10
+ "AudioObject" => "misc",
11
+ "Blog" => "misc",
12
+ "BlogPosting" => "article",
13
+ "Collection" => "misc",
14
+ "CreativeWork" => "misc",
15
+ "DataCatalog" => "misc",
16
+ "Dataset" => "misc",
17
+ "Event" => "misc",
18
+ "ImageObject" => "misc",
19
+ "Movie" => "misc",
20
+ "PublicationIssue" => "misc",
21
+ "ScholarlyArticle" => "article",
22
+ "Service" => "misc",
23
+ "SoftwareSourceCode" => "misc",
24
+ "VideoObject" => "misc",
25
+ "WebPage" => "misc",
26
+ "WebSite" => "misc"
27
+ }
28
+
29
+ def initialize(string: nil)
30
+ @raw = string
31
+ end
32
+
33
+ alias_method :schema_org, :as_schema_org
34
+
35
+ def metadata
36
+ @metadata ||= raw.present? ? BibTeX.parse(raw).first : {}
37
+ end
38
+
39
+ def exists?
40
+ metadata.present?
41
+ end
42
+
43
+ def type
44
+ BIB_TO_SO_TRANSLATIONS[metadata.type.to_s] || "ScholarlyArticle"
45
+ end
46
+
47
+ def resource_type_general
48
+ SO_TO_DC_TRANSLATIONS[type]
49
+ end
50
+
51
+ def doi
52
+ metadata.doi
53
+ end
54
+
55
+ def url
56
+ metadata.url
57
+ end
58
+
59
+ def id
60
+ normalize_doi(doi)
61
+ end
62
+
63
+ def author
64
+ Array(metadata.author).map do |a|
65
+ { "@type" => "Person",
66
+ "givenName" => a.first,
67
+ "familyName" => a.last }.compact
68
+ end
69
+ end
70
+
71
+ def name
72
+ metadata.title
73
+ end
74
+
75
+ def container_title
76
+ metadata.journal.to_s.presence || metadata.pubisher.to_s
77
+ end
78
+
79
+ def date_published
80
+ metadata.date.to_s.presence
81
+ end
82
+
83
+ def publication_year
84
+ metadata.year.to_s.presence
85
+ end
86
+
87
+ def is_part_of
88
+ if metadata.journal.present?
89
+ { "@type" => "Periodical",
90
+ "name" => metadata.journal.to_s,
91
+ "issn" => metadata.issn.to_s.presence }.compact
92
+ else
93
+ nil
94
+ end
95
+ end
96
+
97
+ def description
98
+ metadata.note.to_s.presence
99
+ end
100
+ end
101
+ end
@@ -10,6 +10,8 @@ module Bolognese
10
10
  include Bolognese::DoiUtils
11
11
  include Bolognese::Utils
12
12
 
13
+ default_task :open
14
+
13
15
  def self.exit_on_failure?
14
16
  true
15
17
  end
@@ -42,5 +44,32 @@ module Bolognese
42
44
  puts "not implemented"
43
45
  end
44
46
  end
47
+
48
+ desc "open file", "read metadata from file"
49
+ method_option :as, default: "schema_org"
50
+ method_option :schema_version
51
+ def open(file)
52
+ ext = File.extname(file)
53
+ unless %w(.bib).include? ext
54
+ $stderr.puts "File type #{ext} not supported"
55
+ exit 1
56
+ end
57
+ string = IO.read(file)
58
+ provider = "bibtex"
59
+ output = options[:as] || "schema_org"
60
+
61
+ if provider.present?
62
+ p = case provider
63
+ when "crossref" then Crossref.new(id: id)
64
+ when "datacite" then Datacite.new(id: id, schema_version: options[:schema_version])
65
+ when "bibtex" then Bibtex.new(string: string)
66
+ else SchemaOrg.new(id: id)
67
+ end
68
+
69
+ puts p.send(output)
70
+ else
71
+ puts "not implemented"
72
+ end
73
+ end
45
74
  end
46
75
  end
@@ -31,6 +31,35 @@ module Bolognese
31
31
  "PostedContent" => "ScholarlyArticle"
32
32
  }
33
33
 
34
+ CR_TO_BIB_TRANSLATIONS = {
35
+ "Proceedings" => "proceedings",
36
+ "ReferenceBook" => "book",
37
+ "JournalIssue" => nil,
38
+ "ProceedingsArticle" => nil,
39
+ "Other" => nil,
40
+ "Dissertation" => "phdthesis",
41
+ "Dataset" => nil,
42
+ "EditedBook" => "book",
43
+ "JournalArticle" => "article",
44
+ "Journal" => nil,
45
+ "Report" => nil,
46
+ "BookSeries" => nil,
47
+ "ReportSeries" => nil,
48
+ "BookTrack" => nil,
49
+ "Standard" => nil,
50
+ "BookSection" => "inbook",
51
+ "BookPart" => nil,
52
+ "Book" => "book",
53
+ "BookChapter" => "inbook",
54
+ "StandardSeries" => nil,
55
+ "Monograph" => "book",
56
+ "Component" => nil,
57
+ "ReferenceEntry" => nil,
58
+ "JournalVolume" => nil,
59
+ "BookSet" => nil,
60
+ "PostedContent" => "article"
61
+ }
62
+
34
63
  def initialize(id: nil, string: nil)
35
64
  id = normalize_doi(id) if id.present?
36
65
 
@@ -45,6 +74,7 @@ module Bolognese
45
74
  alias_method :crossref, :raw
46
75
  alias_method :as_crossref, :raw
47
76
  alias_method :schema_org, :as_schema_org
77
+ alias_method :bibtex, :as_bibtex
48
78
 
49
79
  def metadata
50
80
  @metadata ||= raw.present? ? Maremma.from_xml(raw).fetch("doi_records", {}).fetch("doi_record", {}) : {}
@@ -100,6 +130,10 @@ module Bolognese
100
130
  end
101
131
  end
102
132
 
133
+ def bibtex_type
134
+ CR_TO_BIB_TRANSLATIONS[additional_type] || "misc"
135
+ end
136
+
103
137
  def name
104
138
  parse_attribute(bibliographic_metadata.dig("titles", "title"))
105
139
  end
@@ -190,9 +224,11 @@ module Bolognese
190
224
  end
191
225
 
192
226
  def container_title
193
- is_part_of.fetch("name", nil)
227
+ is_part_of.to_h.fetch("name", nil)
194
228
  end
195
229
 
230
+ alias_method :journal, :container_title
231
+
196
232
  def citation
197
233
  citations = bibliographic_metadata.dig("citation_list", "citation")
198
234
  Array.wrap(citations).map do |c|
@@ -32,6 +32,7 @@ module Bolognese
32
32
  end
33
33
 
34
34
  alias_method :schema_org, :as_schema_org
35
+ alias_method :bibtex, :as_bibtex
35
36
 
36
37
  def schema_version
37
38
  @schema_version ||= metadata.fetch("xsi:schemaLocation", "").split(" ").first
@@ -76,6 +77,10 @@ module Bolognese
76
77
  metadata.fetch("resourceType", {}).fetch("resourceTypeGeneral", nil)
77
78
  end
78
79
 
80
+ def bibtex_type
81
+ Bolognese::Bibtex::SO_TO_BIB_TRANSLATIONS[type] || "misc"
82
+ end
83
+
79
84
  def name
80
85
  metadata.dig("titles", "title")
81
86
  end
@@ -157,13 +162,17 @@ module Bolognese
157
162
  end
158
163
 
159
164
  def date_published
160
- date("Issued") || metadata.fetch("publicationYear")
165
+ date("Issued") || publication_year
161
166
  end
162
167
 
163
168
  def date_modified
164
169
  date("Updated")
165
170
  end
166
171
 
172
+ def publication_year
173
+ metadata.fetch("publicationYear")
174
+ end
175
+
167
176
  def language
168
177
  metadata.fetch("language", nil)
169
178
  end
@@ -123,7 +123,7 @@ module Bolognese
123
123
  end
124
124
 
125
125
  def insert_publication_year(xml)
126
- xml.publicationYear(date_published && date_published[0..3])
126
+ xml.publicationYear(publication_year)
127
127
  end
128
128
 
129
129
  def resource_type
@@ -12,32 +12,23 @@ module Bolognese
12
12
  include Bolognese::DataciteUtils
13
13
  include Bolognese::Utils
14
14
 
15
- attr_reader :id, :raw, :provider, :schema_version
15
+ attr_reader :id, :raw, :provider, :schema_version, :license, :citation,
16
+ :additional_type, :alternate_name, :url, :version, :keywords, :editor,
17
+ :page_start, :page_end, :date_modified, :language, :spatial_coverage,
18
+ :content_size, :funder, :journal, :bibtex_type
16
19
 
17
20
  alias_method :datacite, :as_datacite
18
21
 
19
- def url
20
-
21
- end
22
-
23
- def version
24
-
25
- end
26
-
27
- def keywords
28
-
22
+ def publication_year
23
+ date_published && date_published[0..3]
29
24
  end
30
25
 
31
26
  def date_created
32
27
 
33
28
  end
34
29
 
35
- def page_start
36
-
37
- end
38
-
39
- def page_end
40
-
30
+ def pagination
31
+ [page_start, page_end].compact.join("-").presence
41
32
  end
42
33
 
43
34
  def has_part
@@ -52,28 +43,21 @@ module Bolognese
52
43
 
53
44
  end
54
45
 
55
- def language
56
-
57
- end
58
-
59
- def spatial_coverage
60
-
61
- end
62
-
63
- def content_size
64
-
65
- end
66
-
67
46
  def schema_version
68
47
 
69
48
  end
70
49
 
71
- def funder
50
+ def author_string
51
+ author.map { |a| [a["familyName"], a["givenName"]].join(", ") }
52
+ .join(" and ").presence
53
+ end
72
54
 
55
+ def publisher_string
56
+ publisher.to_h.fetch("name", nil)
73
57
  end
74
58
 
75
59
  def as_schema_org
76
- { "@context" => "http://schema.org",
60
+ { "@context" => id.present? ? "http://schema.org" : nil,
77
61
  "@type" => type,
78
62
  "@id" => id,
79
63
  "url" => url,
@@ -103,5 +87,23 @@ module Bolognese
103
87
  "provider" => provider
104
88
  }.compact.to_json
105
89
  end
90
+
91
+ def as_bibtex
92
+ bib = {
93
+ bibtex_type: bibtex_type.to_sym,
94
+ bibtex_key: id,
95
+ doi: doi,
96
+ url: url,
97
+ author: author_string,
98
+ keywords: keywords,
99
+ title: name,
100
+ journal: journal,
101
+ pages: pagination,
102
+ publisher: publisher_string,
103
+ year: publication_year
104
+ }.compact
105
+
106
+ BibTeX::Entry.new(bib).to_s
107
+ end
106
108
  end
107
109
  end
@@ -14,6 +14,7 @@ module Bolognese
14
14
  end
15
15
 
16
16
  alias_method :schema_org, :as_schema_org
17
+ alias_method :bibtex, :as_bibtex
17
18
 
18
19
  def metadata
19
20
  @metadata ||= raw.present? ? Maremma.from_json(raw) : {}
@@ -47,6 +48,10 @@ module Bolognese
47
48
  metadata.fetch("additionalType", nil)
48
49
  end
49
50
 
51
+ def bibtex_type
52
+ Bolognese::Bibtex::SO_TO_BIB_TRANSLATIONS[type] || "misc"
53
+ end
54
+
50
55
  def name
51
56
  metadata.fetch("name", nil)
52
57
  end
@@ -1,3 +1,3 @@
1
1
  module Bolognese
2
- VERSION = "0.5.3"
2
+ VERSION = "0.6.1"
3
3
  end
@@ -0,0 +1,35 @@
1
+ require 'spec_helper'
2
+
3
+ describe Bolognese::Bibtex, vcr: true do
4
+ let(:string) { IO.read(fixture_path + "crossref.bib") }
5
+
6
+ subject { Bolognese::Bibtex.new(string: string) }
7
+
8
+ context "get metadata" do
9
+ it "Crossref DOI" do
10
+ expect(subject.id).to eq("https://doi.org/10.7554/elife.01567")
11
+ expect(subject.type).to eq("ScholarlyArticle")
12
+ expect(subject.url).to eq("http://elifesciences.org/lookup/doi/10.7554/eLife.01567")
13
+ expect(subject.resource_type_general).to eq("Text")
14
+ expect(subject.author).to eq([{"@type"=>"Person", "givenName"=>"Martial", "familyName"=>"Sankar"},
15
+ {"@type"=>"Person", "givenName"=>"Kaisa", "familyName"=>"Nieminen"},
16
+ {"@type"=>"Person", "givenName"=>"Laura", "familyName"=>"Ragni"},
17
+ {"@type"=>"Person", "givenName"=>"Ioannis", "familyName"=>"Xenarios"},
18
+ {"@type"=>"Person", "givenName"=>"Christian S", "familyName"=>"Hardtke"}])
19
+ expect(subject.name).to eq("Automated quantitative histology reveals vascular morphodynamics during Arabidopsis hypocotyl secondary growth")
20
+ expect(subject.date_published).to eq("2014")
21
+ expect(subject.is_part_of).to eq("@type"=>"Periodical", "name"=>"eLife", "issn"=>"2050-084X")
22
+ end
23
+ end
24
+
25
+ context "get metadata as datacite xml" do
26
+ it "Crossref DOI" do
27
+ expect(subject.validation_errors).to be_empty
28
+ datacite = Maremma.from_xml(subject.as_datacite).fetch("resource", {})
29
+ expect(datacite.dig("resourceType", "resourceTypeGeneral")).to eq("Text")
30
+ expect(datacite.dig("titles", "title")).to eq("Automated quantitative histology reveals vascular morphodynamics during Arabidopsis hypocotyl secondary growth")
31
+ expect(datacite.dig("creators", "creator").count).to eq(5)
32
+ expect(datacite.dig("creators", "creator").first).to eq("creatorName"=>"Sankar, Martial", "givenName"=>"Martial", "familyName"=>"Sankar")
33
+ end
34
+ end
35
+ end
@@ -28,6 +28,11 @@ describe Bolognese::CLI do
28
28
  subject.options = { as: "datacite" }
29
29
  expect { subject.read id }.to output(/http:\/\/datacite.org\/schema\/kernel-4/).to_stdout
30
30
  end
31
+
32
+ it 'as bibtex' do
33
+ subject.options = { as: "bibtex" }
34
+ expect { subject.read id }.to output(/@article{https:\/\/doi.org\/10.7554\/elife.01567/).to_stdout
35
+ end
31
36
  end
32
37
 
33
38
  context "datacite" do
@@ -42,6 +47,11 @@ describe Bolognese::CLI do
42
47
  expect { subject.read id }.to output(/Phylogeny, Malaria, Parasites, Taxonomy, Mitochondrial genome, Africa, Plasmodium/).to_stdout
43
48
  end
44
49
 
50
+ it 'as bibtex' do
51
+ subject.options = { as: "bibtex" }
52
+ expect { subject.read id }.to output(/@misc{https:\/\/doi.org\/10.5061\/dryad.8515/).to_stdout
53
+ end
54
+
45
55
  it 'as datacite' do
46
56
  subject.options = { as: "datacite" }
47
57
  expect { subject.read id }.to output(/http:\/\/datacite.org\/schema\/kernel-3/).to_stdout
@@ -64,6 +74,39 @@ describe Bolognese::CLI do
64
74
  subject.options = { as: "datacite" }
65
75
  expect { subject.read id }.to output(/http:\/\/datacite.org\/schema\/kernel-4/).to_stdout
66
76
  end
77
+
78
+ it 'as bibtex' do
79
+ subject.options = { as: "bibtex" }
80
+ expect { subject.read id }.to output(/@article{https:\/\/doi.org\/10.5438\/4k3m-nyvg/).to_stdout
81
+ end
67
82
  end
68
83
  end
84
+
85
+ describe "open" do
86
+ context "bibtex" do
87
+ let(:file) { fixture_path + "crossref.bib" }
88
+
89
+ it 'default' do
90
+ expect { subject.open file }.to output(/datePublished/).to_stdout
91
+ end
92
+
93
+ it 'as schema_org' do
94
+ subject.options = { as: "schema_org" }
95
+ expect { subject.open file }.to output(/datePublished/).to_stdout
96
+ end
97
+
98
+ it 'as datacite' do
99
+ subject.options = { as: "datacite" }
100
+ expect { subject.open file }.to output(/http:\/\/datacite.org\/schema\/kernel-4/).to_stdout
101
+ end
102
+ end
103
+
104
+ # context "unsupported format" do
105
+ # let(:file) { fixture_path + "crossref.xxx" }
106
+ #
107
+ # it 'error' do
108
+ # expect { subject.open file }.to output(/datePublished/).to_stderr
109
+ # end
110
+ # end
111
+ end
69
112
  end