bergamasco 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/lib/bergamasco.rb +1 -1
- data/lib/bergamasco/markdown.rb +1 -26
- data/lib/bergamasco/pandoc.rb +60 -0
- data/lib/bergamasco/version.rb +1 -1
- data/spec/markdown_spec.rb +2 -9
- data/spec/pandoc_spec.rb +38 -0
- data/spec/summarize_spec.rb +1 -1
- metadata +4 -4
- data/lib/bergamasco/jats.rb +0 -33
- data/spec/jats_spec.rb +0 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c9fd041fa952b8ce0d759d55f1a5ea9fbb0eeba
|
4
|
+
data.tar.gz: 2fabed95db18a9ab876e1d6843fe01ca9d645c88
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c073e07cb08e34c90c5afdfd36ac11fb88fc341a26f7d892d7bd9c5aed6e90632f5e937f8bcfa015e40e2c191b076ff38707c365faa2da5e91e289fa143bb7a9
|
7
|
+
data.tar.gz: 1855153f47914dae465a7db87cfe2910eb53fa95bec68e7defaa8e764929bf4cb946109c15b52c0243da1c5d476bdcfc9566589be925ad8ff5784b8abd941ea5
|
data/Gemfile.lock
CHANGED
data/lib/bergamasco.rb
CHANGED
data/lib/bergamasco/markdown.rb
CHANGED
@@ -48,7 +48,7 @@ module Bergamasco
|
|
48
48
|
metadata = yaml.extract!(*keys).compact
|
49
49
|
|
50
50
|
content = YAML_FRONT_MATTER_REGEXP.match(file).post_match
|
51
|
-
html =
|
51
|
+
html = Bergamasco::Pandoc.convert(content, options)
|
52
52
|
metadata["summary"] = Bergamasco::Summarize.summary_from_html(html, options)
|
53
53
|
metadata["references"] = extract_references(html)
|
54
54
|
metadata["date"] = metadata["date"].iso8601
|
@@ -59,31 +59,6 @@ module Bergamasco
|
|
59
59
|
IO.write(filepath, content.to_yaml)
|
60
60
|
end
|
61
61
|
|
62
|
-
def self.render_html(text, options={})
|
63
|
-
PandocRuby.new(text, options.except(:skip_yaml_header,
|
64
|
-
:separator,
|
65
|
-
:sitepath,
|
66
|
-
:authorpath,
|
67
|
-
:referencespath,
|
68
|
-
:username,
|
69
|
-
:password,
|
70
|
-
:sandbox,
|
71
|
-
:prefix,
|
72
|
-
:number)).to_html
|
73
|
-
rescue Errno::ENOENT
|
74
|
-
# fallback to commonmarker if pandoc is not installed.
|
75
|
-
# Commonmarker doesn't parse or ignore yaml frontmatter
|
76
|
-
text = split_yaml_frontmatter(text).last if options[:skip_yaml_header]
|
77
|
-
CommonMarker.render_html(text, :default)
|
78
|
-
end
|
79
|
-
|
80
|
-
def self.write_bibliograpy_to_yaml(bib_path, yaml_path)
|
81
|
-
yaml = `pandoc-citeproc --bib2yaml #{bib_path} 2>&1`
|
82
|
-
return nil if $?.exitstatus > 0
|
83
|
-
IO.write(yaml_path, yaml)
|
84
|
-
SafeYAML.load(yaml)
|
85
|
-
end
|
86
|
-
|
87
62
|
# expects a references list generated by pandoc
|
88
63
|
def self.extract_references(html)
|
89
64
|
doc = Nokogiri::HTML(html)
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module Bergamasco
|
2
|
+
module Pandoc
|
3
|
+
# Options understood by pandoc, taken from http://pandoc.org/MANUAL.html.
|
4
|
+
# Ignore all other options passed to pandoc, unless overriden.
|
5
|
+
AVAILABLE_OPTIONS = Set.new %w(from read to write output data-dir strict
|
6
|
+
parse-raw smart old-dashes base-header-level indented-code-classes filter
|
7
|
+
normalize preserve-tabs tab-stop track-changes file-scope extract-media
|
8
|
+
standalone template metadata variable print-default-template
|
9
|
+
print-default-data-file no-wrap wrap columns toc table-of-contents toc-depth
|
10
|
+
no-highlight highlight-style include-in-header include-before-body
|
11
|
+
include-after-body self-contained offline html5 html-q-tags ascii
|
12
|
+
reference-links reference-location atx-headers chapters top-level-division
|
13
|
+
number-sections number-offsetS no-tex-ligatures listings incremental
|
14
|
+
slide-level section-divs default-image-extension email-obfuscation id-prefix
|
15
|
+
title-prefix css reference-odt reference-docx epub-stylesheet
|
16
|
+
epub-cover-image epub-metadata epub-embed-font epub-chapter-level
|
17
|
+
latex-engine latex-engine-opt bibliography csl citation-abbreviations natbib
|
18
|
+
biblatex latexmathml asciimathml mathml mimetex webtex jsmath mathjax katex
|
19
|
+
katex-stylesheet gladtex trace dump-args ignore-args verbose bash-completion
|
20
|
+
list-input-formats list-output-formats list-extensions
|
21
|
+
list-highlight-languages list-highlight-styles)
|
22
|
+
ALIAS_OPTIONS = Set.new %w(f r t w o R S F p s M V D H B A 5 N i T c m)
|
23
|
+
ALLOWED_OPTIONS = AVAILABLE_OPTIONS + ALIAS_OPTIONS
|
24
|
+
|
25
|
+
def self.convert(text, options={})
|
26
|
+
options = options.select { |k, v| ALLOWED_OPTIONS.include?(k.to_s.gsub('_', '-')) }
|
27
|
+
|
28
|
+
options[:from] ||= :markdown
|
29
|
+
options[:to] ||= :html
|
30
|
+
|
31
|
+
PandocRuby.convert(text, options)
|
32
|
+
rescue Errno::ENOENT
|
33
|
+
puts "Pandoc is not installed"
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.convert_to_jats(text, options={})
|
37
|
+
options = options.merge(template: "templates/default.jats",
|
38
|
+
to: "lib/bergamasco/jats.lua",
|
39
|
+
csl: "lib/bergamasco/jats.csl")
|
40
|
+
options = options.merge(metadata: options[:metadata]) if options[:metadata].present?
|
41
|
+
|
42
|
+
convert(text, options)
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.write_jats(filepath, options={})
|
46
|
+
file = IO.read(filepath)
|
47
|
+
xml_path = File.join(File.dirname(filepath), File.basename(filepath, ".html.md")) + ".xml"
|
48
|
+
xml = convert_to_jats(file, options)
|
49
|
+
IO.write(xml_path, xml)
|
50
|
+
xml_path
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.write_bibliograpy_to_yaml(bib_path, yaml_path)
|
54
|
+
yaml = `pandoc-citeproc --bib2yaml #{bib_path} 2>&1`
|
55
|
+
return nil if $?.exitstatus > 0
|
56
|
+
IO.write(yaml_path, yaml)
|
57
|
+
SafeYAML.load(yaml)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/bergamasco/version.rb
CHANGED
data/spec/markdown_spec.rb
CHANGED
@@ -64,13 +64,6 @@ describe Bergamasco::Markdown do
|
|
64
64
|
expect(length).to eq(text.length)
|
65
65
|
end
|
66
66
|
|
67
|
-
it 'should write bibliography to yaml' do
|
68
|
-
bib_path = fixture_path + 'references.bib'
|
69
|
-
yaml_path = fixture_path + 'references.yaml'
|
70
|
-
yaml = subject.write_bibliograpy_to_yaml(bib_path, yaml_path)
|
71
|
-
expect(yaml["references"].length).to eq(61)
|
72
|
-
end
|
73
|
-
|
74
67
|
it 'should update file' do
|
75
68
|
filepath = fixture_path + 'cool-dois.html.md'
|
76
69
|
new_metadata = { "doi" => "10.23725/0000-03VC"}
|
@@ -81,14 +74,14 @@ describe Bergamasco::Markdown do
|
|
81
74
|
it 'should convert markdown' do
|
82
75
|
filepath = fixture_path + 'cool-dois.html.md'
|
83
76
|
file = IO.read(filepath)
|
84
|
-
html =
|
77
|
+
html = Bergamasco::Pandoc.convert(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
|
85
78
|
expect(html).to start_with("<p>In 1998 Tim Berners-Lee coined the term cool URIs <span class=\"citation\">(1998)</span>, that is URIs that don’t change.")
|
86
79
|
end
|
87
80
|
|
88
81
|
it 'should extract references' do
|
89
82
|
filepath = fixture_path + 'cool-dois.html.md'
|
90
83
|
file = IO.read(filepath)
|
91
|
-
html =
|
84
|
+
html = Bergamasco::Pandoc.convert(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
|
92
85
|
refs = subject.extract_references(html)
|
93
86
|
expect(refs).to eq(["https://www.w3.org/Provider/Style/URI", "https://doi.org/10.1371/journal.pone.0115253"])
|
94
87
|
end
|
data/spec/pandoc_spec.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Bergamasco::Pandoc do
|
4
|
+
subject { Bergamasco::Pandoc }
|
5
|
+
|
6
|
+
it 'should convert markdown' do
|
7
|
+
filepath = fixture_path + 'cool-dois.html.md'
|
8
|
+
file = IO.read(filepath)
|
9
|
+
html = subject.convert(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
|
10
|
+
expect(html).to start_with("<p>In 1998 Tim Berners-Lee coined the term cool URIs <span class=\"citation\">(1998)</span>, that is URIs that don’t change.")
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'should convert to jats' do
|
14
|
+
filepath = fixture_path + 'cool-dois.html.md'
|
15
|
+
file = IO.read(filepath)
|
16
|
+
xml = subject.convert_to_jats(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
|
17
|
+
doc = Nokogiri::XML(xml)
|
18
|
+
article_id = doc.at_xpath("//article-id")
|
19
|
+
expect(article_id.text).to eq("10.23725/0000-03VC")
|
20
|
+
expect(article_id.values.first).to eq("doi")
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'should write jats xml' do
|
24
|
+
filepath = fixture_path + 'cool-dois.html.md'
|
25
|
+
xml_path = subject.write_jats(filepath, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
|
26
|
+
doc = File.open(xml_path) { |f| Nokogiri::XML(f) }
|
27
|
+
article_id = doc.at_xpath("//article-id")
|
28
|
+
expect(article_id.text).to eq("10.23725/0000-03VC")
|
29
|
+
expect(article_id.values.first).to eq("doi")
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'should write bibliography to yaml' do
|
33
|
+
bib_path = fixture_path + 'references.bib'
|
34
|
+
yaml_path = fixture_path + 'references.yaml'
|
35
|
+
yaml = subject.write_bibliograpy_to_yaml(bib_path, yaml_path)
|
36
|
+
expect(yaml["references"].length).to eq(61)
|
37
|
+
end
|
38
|
+
end
|
data/spec/summarize_spec.rb
CHANGED
@@ -30,7 +30,7 @@ describe Bergamasco::Summarize do
|
|
30
30
|
it 'should truncate at separator and convert to html' do
|
31
31
|
filepath = fixture_path + 'cool-dois-without-yml.md'
|
32
32
|
file = IO.read(filepath)
|
33
|
-
html = Bergamasco::
|
33
|
+
html = Bergamasco::Pandoc.convert(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.bib')
|
34
34
|
content = subject.summary_from_html(html, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.bib')
|
35
35
|
expect(content).to start_with("In 1998 Tim Berners-Lee coined the term cool URIs (1998), that is URIs that don’t change.")
|
36
36
|
expect(content).to end_with("the referenced resource.")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bergamasco
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martin Fenner
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-12-
|
11
|
+
date: 2016-12-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -269,8 +269,8 @@ files:
|
|
269
269
|
- lib/bergamasco.rb
|
270
270
|
- lib/bergamasco/jats.csl
|
271
271
|
- lib/bergamasco/jats.lua
|
272
|
-
- lib/bergamasco/jats.rb
|
273
272
|
- lib/bergamasco/markdown.rb
|
273
|
+
- lib/bergamasco/pandoc.rb
|
274
274
|
- lib/bergamasco/sanitize.rb
|
275
275
|
- lib/bergamasco/summarize.rb
|
276
276
|
- lib/bergamasco/version.rb
|
@@ -283,8 +283,8 @@ files:
|
|
283
283
|
- spec/fixtures/cool-dois.yml
|
284
284
|
- spec/fixtures/references.bib
|
285
285
|
- spec/fixtures/references.yaml
|
286
|
-
- spec/jats_spec.rb
|
287
286
|
- spec/markdown_spec.rb
|
287
|
+
- spec/pandoc_spec.rb
|
288
288
|
- spec/sanitize_spec.rb
|
289
289
|
- spec/spec_helper.rb
|
290
290
|
- spec/summarize_spec.rb
|
data/lib/bergamasco/jats.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
module Bergamasco
|
2
|
-
module Jats
|
3
|
-
|
4
|
-
def self.render_jats(text, options={})
|
5
|
-
options = options.merge(template: "templates/default.jats",
|
6
|
-
to: "lib/bergamasco/jats.lua",
|
7
|
-
csl: "lib/bergamasco/jats.csl")
|
8
|
-
options = options.merge(metadata: options[:metadata]) if options[:metadata].present?
|
9
|
-
converter = PandocRuby.new(text, options.except(:skip_yaml_header,
|
10
|
-
:separator,
|
11
|
-
:sitepath,
|
12
|
-
:authorpath,
|
13
|
-
:referencespath,
|
14
|
-
:username,
|
15
|
-
:password,
|
16
|
-
:sandbox,
|
17
|
-
:prefix,
|
18
|
-
:number))
|
19
|
-
converter.convert
|
20
|
-
rescue Errno::ENOENT
|
21
|
-
# if pandoc is not installed.
|
22
|
-
puts "Pandoc is not installed"
|
23
|
-
end
|
24
|
-
|
25
|
-
def self.write_jats(filepath, options={})
|
26
|
-
file = IO.read(filepath)
|
27
|
-
xml_path = File.join(File.dirname(filepath), File.basename(filepath, ".html.md")) + ".xml"
|
28
|
-
xml = render_jats(file, options)
|
29
|
-
IO.write(xml_path, xml)
|
30
|
-
xml_path
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
data/spec/jats_spec.rb
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Bergamasco::Jats do
|
4
|
-
subject { Bergamasco::Jats }
|
5
|
-
|
6
|
-
it 'should convert to jats' do
|
7
|
-
filepath = fixture_path + 'cool-dois.html.md'
|
8
|
-
file = IO.read(filepath)
|
9
|
-
xml = subject.render_jats(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
|
10
|
-
doc = Nokogiri::XML(xml)
|
11
|
-
article_id = doc.at_xpath("//article-id")
|
12
|
-
expect(article_id.text).to eq("10.23725/0000-03VC")
|
13
|
-
expect(article_id.values.first).to eq("doi")
|
14
|
-
end
|
15
|
-
|
16
|
-
it 'should write jats xml' do
|
17
|
-
filepath = fixture_path + 'cool-dois.html.md'
|
18
|
-
xml_path = subject.write_jats(filepath, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
|
19
|
-
doc = File.open(xml_path) { |f| Nokogiri::XML(f) }
|
20
|
-
article_id = doc.at_xpath("//article-id")
|
21
|
-
expect(article_id.text).to eq("10.23725/0000-03VC")
|
22
|
-
expect(article_id.values.first).to eq("doi")
|
23
|
-
end
|
24
|
-
end
|