bergamasco 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/lib/bergamasco.rb +1 -1
- data/lib/bergamasco/markdown.rb +1 -26
- data/lib/bergamasco/pandoc.rb +60 -0
- data/lib/bergamasco/version.rb +1 -1
- data/spec/markdown_spec.rb +2 -9
- data/spec/pandoc_spec.rb +38 -0
- data/spec/summarize_spec.rb +1 -1
- metadata +4 -4
- data/lib/bergamasco/jats.rb +0 -33
- data/spec/jats_spec.rb +0 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c9fd041fa952b8ce0d759d55f1a5ea9fbb0eeba
|
4
|
+
data.tar.gz: 2fabed95db18a9ab876e1d6843fe01ca9d645c88
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c073e07cb08e34c90c5afdfd36ac11fb88fc341a26f7d892d7bd9c5aed6e90632f5e937f8bcfa015e40e2c191b076ff38707c365faa2da5e91e289fa143bb7a9
|
7
|
+
data.tar.gz: 1855153f47914dae465a7db87cfe2910eb53fa95bec68e7defaa8e764929bf4cb946109c15b52c0243da1c5d476bdcfc9566589be925ad8ff5784b8abd941ea5
|
data/Gemfile.lock
CHANGED
data/lib/bergamasco.rb
CHANGED
data/lib/bergamasco/markdown.rb
CHANGED
@@ -48,7 +48,7 @@ module Bergamasco
|
|
48
48
|
metadata = yaml.extract!(*keys).compact
|
49
49
|
|
50
50
|
content = YAML_FRONT_MATTER_REGEXP.match(file).post_match
|
51
|
-
html =
|
51
|
+
html = Bergamasco::Pandoc.convert(content, options)
|
52
52
|
metadata["summary"] = Bergamasco::Summarize.summary_from_html(html, options)
|
53
53
|
metadata["references"] = extract_references(html)
|
54
54
|
metadata["date"] = metadata["date"].iso8601
|
@@ -59,31 +59,6 @@ module Bergamasco
|
|
59
59
|
IO.write(filepath, content.to_yaml)
|
60
60
|
end
|
61
61
|
|
62
|
-
def self.render_html(text, options={})
|
63
|
-
PandocRuby.new(text, options.except(:skip_yaml_header,
|
64
|
-
:separator,
|
65
|
-
:sitepath,
|
66
|
-
:authorpath,
|
67
|
-
:referencespath,
|
68
|
-
:username,
|
69
|
-
:password,
|
70
|
-
:sandbox,
|
71
|
-
:prefix,
|
72
|
-
:number)).to_html
|
73
|
-
rescue Errno::ENOENT
|
74
|
-
# fallback to commonmarker if pandoc is not installed.
|
75
|
-
# Commonmarker doesn't parse or ignore yaml frontmatter
|
76
|
-
text = split_yaml_frontmatter(text).last if options[:skip_yaml_header]
|
77
|
-
CommonMarker.render_html(text, :default)
|
78
|
-
end
|
79
|
-
|
80
|
-
def self.write_bibliograpy_to_yaml(bib_path, yaml_path)
|
81
|
-
yaml = `pandoc-citeproc --bib2yaml #{bib_path} 2>&1`
|
82
|
-
return nil if $?.exitstatus > 0
|
83
|
-
IO.write(yaml_path, yaml)
|
84
|
-
SafeYAML.load(yaml)
|
85
|
-
end
|
86
|
-
|
87
62
|
# expects a references list generated by pandoc
|
88
63
|
def self.extract_references(html)
|
89
64
|
doc = Nokogiri::HTML(html)
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module Bergamasco
|
2
|
+
module Pandoc
|
3
|
+
# Options understood by pandoc, taken from http://pandoc.org/MANUAL.html.
|
4
|
+
# Ignore all other options passed to pandoc, unless overriden.
|
5
|
+
AVAILABLE_OPTIONS = Set.new %w(from read to write output data-dir strict
|
6
|
+
parse-raw smart old-dashes base-header-level indented-code-classes filter
|
7
|
+
normalize preserve-tabs tab-stop track-changes file-scope extract-media
|
8
|
+
standalone template metadata variable print-default-template
|
9
|
+
print-default-data-file no-wrap wrap columns toc table-of-contents toc-depth
|
10
|
+
no-highlight highlight-style include-in-header include-before-body
|
11
|
+
include-after-body self-contained offline html5 html-q-tags ascii
|
12
|
+
reference-links reference-location atx-headers chapters top-level-division
|
13
|
+
number-sections number-offsetS no-tex-ligatures listings incremental
|
14
|
+
slide-level section-divs default-image-extension email-obfuscation id-prefix
|
15
|
+
title-prefix css reference-odt reference-docx epub-stylesheet
|
16
|
+
epub-cover-image epub-metadata epub-embed-font epub-chapter-level
|
17
|
+
latex-engine latex-engine-opt bibliography csl citation-abbreviations natbib
|
18
|
+
biblatex latexmathml asciimathml mathml mimetex webtex jsmath mathjax katex
|
19
|
+
katex-stylesheet gladtex trace dump-args ignore-args verbose bash-completion
|
20
|
+
list-input-formats list-output-formats list-extensions
|
21
|
+
list-highlight-languages list-highlight-styles)
|
22
|
+
ALIAS_OPTIONS = Set.new %w(f r t w o R S F p s M V D H B A 5 N i T c m)
|
23
|
+
ALLOWED_OPTIONS = AVAILABLE_OPTIONS + ALIAS_OPTIONS
|
24
|
+
|
25
|
+
def self.convert(text, options={})
|
26
|
+
options = options.select { |k, v| ALLOWED_OPTIONS.include?(k.to_s.gsub('_', '-')) }
|
27
|
+
|
28
|
+
options[:from] ||= :markdown
|
29
|
+
options[:to] ||= :html
|
30
|
+
|
31
|
+
PandocRuby.convert(text, options)
|
32
|
+
rescue Errno::ENOENT
|
33
|
+
puts "Pandoc is not installed"
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.convert_to_jats(text, options={})
|
37
|
+
options = options.merge(template: "templates/default.jats",
|
38
|
+
to: "lib/bergamasco/jats.lua",
|
39
|
+
csl: "lib/bergamasco/jats.csl")
|
40
|
+
options = options.merge(metadata: options[:metadata]) if options[:metadata].present?
|
41
|
+
|
42
|
+
convert(text, options)
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.write_jats(filepath, options={})
|
46
|
+
file = IO.read(filepath)
|
47
|
+
xml_path = File.join(File.dirname(filepath), File.basename(filepath, ".html.md")) + ".xml"
|
48
|
+
xml = convert_to_jats(file, options)
|
49
|
+
IO.write(xml_path, xml)
|
50
|
+
xml_path
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.write_bibliograpy_to_yaml(bib_path, yaml_path)
|
54
|
+
yaml = `pandoc-citeproc --bib2yaml #{bib_path} 2>&1`
|
55
|
+
return nil if $?.exitstatus > 0
|
56
|
+
IO.write(yaml_path, yaml)
|
57
|
+
SafeYAML.load(yaml)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/bergamasco/version.rb
CHANGED
data/spec/markdown_spec.rb
CHANGED
@@ -64,13 +64,6 @@ describe Bergamasco::Markdown do
|
|
64
64
|
expect(length).to eq(text.length)
|
65
65
|
end
|
66
66
|
|
67
|
-
it 'should write bibliography to yaml' do
|
68
|
-
bib_path = fixture_path + 'references.bib'
|
69
|
-
yaml_path = fixture_path + 'references.yaml'
|
70
|
-
yaml = subject.write_bibliograpy_to_yaml(bib_path, yaml_path)
|
71
|
-
expect(yaml["references"].length).to eq(61)
|
72
|
-
end
|
73
|
-
|
74
67
|
it 'should update file' do
|
75
68
|
filepath = fixture_path + 'cool-dois.html.md'
|
76
69
|
new_metadata = { "doi" => "10.23725/0000-03VC"}
|
@@ -81,14 +74,14 @@ describe Bergamasco::Markdown do
|
|
81
74
|
it 'should convert markdown' do
|
82
75
|
filepath = fixture_path + 'cool-dois.html.md'
|
83
76
|
file = IO.read(filepath)
|
84
|
-
html =
|
77
|
+
html = Bergamasco::Pandoc.convert(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
|
85
78
|
expect(html).to start_with("<p>In 1998 Tim Berners-Lee coined the term cool URIs <span class=\"citation\">(1998)</span>, that is URIs that don’t change.")
|
86
79
|
end
|
87
80
|
|
88
81
|
it 'should extract references' do
|
89
82
|
filepath = fixture_path + 'cool-dois.html.md'
|
90
83
|
file = IO.read(filepath)
|
91
|
-
html =
|
84
|
+
html = Bergamasco::Pandoc.convert(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
|
92
85
|
refs = subject.extract_references(html)
|
93
86
|
expect(refs).to eq(["https://www.w3.org/Provider/Style/URI", "https://doi.org/10.1371/journal.pone.0115253"])
|
94
87
|
end
|
data/spec/pandoc_spec.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Bergamasco::Pandoc do
|
4
|
+
subject { Bergamasco::Pandoc }
|
5
|
+
|
6
|
+
it 'should convert markdown' do
|
7
|
+
filepath = fixture_path + 'cool-dois.html.md'
|
8
|
+
file = IO.read(filepath)
|
9
|
+
html = subject.convert(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
|
10
|
+
expect(html).to start_with("<p>In 1998 Tim Berners-Lee coined the term cool URIs <span class=\"citation\">(1998)</span>, that is URIs that don’t change.")
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'should convert to jats' do
|
14
|
+
filepath = fixture_path + 'cool-dois.html.md'
|
15
|
+
file = IO.read(filepath)
|
16
|
+
xml = subject.convert_to_jats(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
|
17
|
+
doc = Nokogiri::XML(xml)
|
18
|
+
article_id = doc.at_xpath("//article-id")
|
19
|
+
expect(article_id.text).to eq("10.23725/0000-03VC")
|
20
|
+
expect(article_id.values.first).to eq("doi")
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'should write jats xml' do
|
24
|
+
filepath = fixture_path + 'cool-dois.html.md'
|
25
|
+
xml_path = subject.write_jats(filepath, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
|
26
|
+
doc = File.open(xml_path) { |f| Nokogiri::XML(f) }
|
27
|
+
article_id = doc.at_xpath("//article-id")
|
28
|
+
expect(article_id.text).to eq("10.23725/0000-03VC")
|
29
|
+
expect(article_id.values.first).to eq("doi")
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'should write bibliography to yaml' do
|
33
|
+
bib_path = fixture_path + 'references.bib'
|
34
|
+
yaml_path = fixture_path + 'references.yaml'
|
35
|
+
yaml = subject.write_bibliograpy_to_yaml(bib_path, yaml_path)
|
36
|
+
expect(yaml["references"].length).to eq(61)
|
37
|
+
end
|
38
|
+
end
|
data/spec/summarize_spec.rb
CHANGED
@@ -30,7 +30,7 @@ describe Bergamasco::Summarize do
|
|
30
30
|
it 'should truncate at separator and convert to html' do
|
31
31
|
filepath = fixture_path + 'cool-dois-without-yml.md'
|
32
32
|
file = IO.read(filepath)
|
33
|
-
html = Bergamasco::
|
33
|
+
html = Bergamasco::Pandoc.convert(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.bib')
|
34
34
|
content = subject.summary_from_html(html, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.bib')
|
35
35
|
expect(content).to start_with("In 1998 Tim Berners-Lee coined the term cool URIs (1998), that is URIs that don’t change.")
|
36
36
|
expect(content).to end_with("the referenced resource.")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bergamasco
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martin Fenner
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-12-
|
11
|
+
date: 2016-12-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -269,8 +269,8 @@ files:
|
|
269
269
|
- lib/bergamasco.rb
|
270
270
|
- lib/bergamasco/jats.csl
|
271
271
|
- lib/bergamasco/jats.lua
|
272
|
-
- lib/bergamasco/jats.rb
|
273
272
|
- lib/bergamasco/markdown.rb
|
273
|
+
- lib/bergamasco/pandoc.rb
|
274
274
|
- lib/bergamasco/sanitize.rb
|
275
275
|
- lib/bergamasco/summarize.rb
|
276
276
|
- lib/bergamasco/version.rb
|
@@ -283,8 +283,8 @@ files:
|
|
283
283
|
- spec/fixtures/cool-dois.yml
|
284
284
|
- spec/fixtures/references.bib
|
285
285
|
- spec/fixtures/references.yaml
|
286
|
-
- spec/jats_spec.rb
|
287
286
|
- spec/markdown_spec.rb
|
287
|
+
- spec/pandoc_spec.rb
|
288
288
|
- spec/sanitize_spec.rb
|
289
289
|
- spec/spec_helper.rb
|
290
290
|
- spec/summarize_spec.rb
|
data/lib/bergamasco/jats.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
module Bergamasco
|
2
|
-
module Jats
|
3
|
-
|
4
|
-
def self.render_jats(text, options={})
|
5
|
-
options = options.merge(template: "templates/default.jats",
|
6
|
-
to: "lib/bergamasco/jats.lua",
|
7
|
-
csl: "lib/bergamasco/jats.csl")
|
8
|
-
options = options.merge(metadata: options[:metadata]) if options[:metadata].present?
|
9
|
-
converter = PandocRuby.new(text, options.except(:skip_yaml_header,
|
10
|
-
:separator,
|
11
|
-
:sitepath,
|
12
|
-
:authorpath,
|
13
|
-
:referencespath,
|
14
|
-
:username,
|
15
|
-
:password,
|
16
|
-
:sandbox,
|
17
|
-
:prefix,
|
18
|
-
:number))
|
19
|
-
converter.convert
|
20
|
-
rescue Errno::ENOENT
|
21
|
-
# if pandoc is not installed.
|
22
|
-
puts "Pandoc is not installed"
|
23
|
-
end
|
24
|
-
|
25
|
-
def self.write_jats(filepath, options={})
|
26
|
-
file = IO.read(filepath)
|
27
|
-
xml_path = File.join(File.dirname(filepath), File.basename(filepath, ".html.md")) + ".xml"
|
28
|
-
xml = render_jats(file, options)
|
29
|
-
IO.write(xml_path, xml)
|
30
|
-
xml_path
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
data/spec/jats_spec.rb
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Bergamasco::Jats do
|
4
|
-
subject { Bergamasco::Jats }
|
5
|
-
|
6
|
-
it 'should convert to jats' do
|
7
|
-
filepath = fixture_path + 'cool-dois.html.md'
|
8
|
-
file = IO.read(filepath)
|
9
|
-
xml = subject.render_jats(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
|
10
|
-
doc = Nokogiri::XML(xml)
|
11
|
-
article_id = doc.at_xpath("//article-id")
|
12
|
-
expect(article_id.text).to eq("10.23725/0000-03VC")
|
13
|
-
expect(article_id.values.first).to eq("doi")
|
14
|
-
end
|
15
|
-
|
16
|
-
it 'should write jats xml' do
|
17
|
-
filepath = fixture_path + 'cool-dois.html.md'
|
18
|
-
xml_path = subject.write_jats(filepath, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
|
19
|
-
doc = File.open(xml_path) { |f| Nokogiri::XML(f) }
|
20
|
-
article_id = doc.at_xpath("//article-id")
|
21
|
-
expect(article_id.text).to eq("10.23725/0000-03VC")
|
22
|
-
expect(article_id.values.first).to eq("doi")
|
23
|
-
end
|
24
|
-
end
|