bergamasco 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 63d34d9a6d674818e54cedec4ba4ae26bfc98d1a
4
- data.tar.gz: 3d373fd955263556b5766c3e1319a720d39f62b9
3
+ metadata.gz: 4c9fd041fa952b8ce0d759d55f1a5ea9fbb0eeba
4
+ data.tar.gz: 2fabed95db18a9ab876e1d6843fe01ca9d645c88
5
5
  SHA512:
6
- metadata.gz: f8b6b4045b3ffacb31f7bf3f0c019b374cf534ffafa21850dd4a288295fb516e2dffec9793611e5ef8d64919cf3705f83a7eb099f790ef88a1e147b43cc1b51e
7
- data.tar.gz: a06d40fd69e0f7606e0b87fcba01d151ca85e4b1e99afb2523f90eadc121da5dedb25a2a42d3b1375e4a1bcac433ba231ff4fddf194c6cb42f5ae45fb562ade4
6
+ metadata.gz: c073e07cb08e34c90c5afdfd36ac11fb88fc341a26f7d892d7bd9c5aed6e90632f5e937f8bcfa015e40e2c191b076ff38707c365faa2da5e91e289fa143bb7a9
7
+ data.tar.gz: 1855153f47914dae465a7db87cfe2910eb53fa95bec68e7defaa8e764929bf4cb946109c15b52c0243da1c5d476bdcfc9566589be925ad8ff5784b8abd941ea5
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bergamasco (0.3.2)
4
+ bergamasco (0.3.3)
5
5
  activesupport (~> 4.2, >= 4.2.5)
6
6
  addressable (~> 2.3.8)
7
7
  builder (~> 3.2, >= 3.2.2)
@@ -79,4 +79,4 @@ DEPENDENCIES
79
79
  simplecov
80
80
 
81
81
  BUNDLED WITH
82
- 1.12.5
82
+ 1.13.6
data/lib/bergamasco.rb CHANGED
@@ -13,5 +13,5 @@ require 'addressable/uri'
13
13
  require "bergamasco/summarize"
14
14
  require "bergamasco/sanitize"
15
15
  require "bergamasco/markdown"
16
- require "bergamasco/jats"
16
+ require "bergamasco/pandoc"
17
17
  require "bergamasco/whitelist_scrubber"
@@ -48,7 +48,7 @@ module Bergamasco
48
48
  metadata = yaml.extract!(*keys).compact
49
49
 
50
50
  content = YAML_FRONT_MATTER_REGEXP.match(file).post_match
51
- html = render_html(content, options)
51
+ html = Bergamasco::Pandoc.convert(content, options)
52
52
  metadata["summary"] = Bergamasco::Summarize.summary_from_html(html, options)
53
53
  metadata["references"] = extract_references(html)
54
54
  metadata["date"] = metadata["date"].iso8601
@@ -59,31 +59,6 @@ module Bergamasco
59
59
  IO.write(filepath, content.to_yaml)
60
60
  end
61
61
 
62
- def self.render_html(text, options={})
63
- PandocRuby.new(text, options.except(:skip_yaml_header,
64
- :separator,
65
- :sitepath,
66
- :authorpath,
67
- :referencespath,
68
- :username,
69
- :password,
70
- :sandbox,
71
- :prefix,
72
- :number)).to_html
73
- rescue Errno::ENOENT
74
- # fallback to commonmarker if pandoc is not installed.
75
- # Commonmarker doesn't parse or ignore yaml frontmatter
76
- text = split_yaml_frontmatter(text).last if options[:skip_yaml_header]
77
- CommonMarker.render_html(text, :default)
78
- end
79
-
80
- def self.write_bibliograpy_to_yaml(bib_path, yaml_path)
81
- yaml = `pandoc-citeproc --bib2yaml #{bib_path} 2>&1`
82
- return nil if $?.exitstatus > 0
83
- IO.write(yaml_path, yaml)
84
- SafeYAML.load(yaml)
85
- end
86
-
87
62
  # expects a references list generated by pandoc
88
63
  def self.extract_references(html)
89
64
  doc = Nokogiri::HTML(html)
@@ -0,0 +1,60 @@
1
+ module Bergamasco
2
+ module Pandoc
3
+ # Options understood by pandoc, taken from http://pandoc.org/MANUAL.html.
4
+ # Ignore all other options passed to pandoc, unless overriden.
5
+ AVAILABLE_OPTIONS = Set.new %w(from read to write output data-dir strict
6
+ parse-raw smart old-dashes base-header-level indented-code-classes filter
7
+ normalize preserve-tabs tab-stop track-changes file-scope extract-media
8
+ standalone template metadata variable print-default-template
9
+ print-default-data-file no-wrap wrap columns toc table-of-contents toc-depth
10
+ no-highlight highlight-style include-in-header include-before-body
11
+ include-after-body self-contained offline html5 html-q-tags ascii
12
+ reference-links reference-location atx-headers chapters top-level-division
13
+ number-sections number-offsetS no-tex-ligatures listings incremental
14
+ slide-level section-divs default-image-extension email-obfuscation id-prefix
15
+ title-prefix css reference-odt reference-docx epub-stylesheet
16
+ epub-cover-image epub-metadata epub-embed-font epub-chapter-level
17
+ latex-engine latex-engine-opt bibliography csl citation-abbreviations natbib
18
+ biblatex latexmathml asciimathml mathml mimetex webtex jsmath mathjax katex
19
+ katex-stylesheet gladtex trace dump-args ignore-args verbose bash-completion
20
+ list-input-formats list-output-formats list-extensions
21
+ list-highlight-languages list-highlight-styles)
22
+ ALIAS_OPTIONS = Set.new %w(f r t w o R S F p s M V D H B A 5 N i T c m)
23
+ ALLOWED_OPTIONS = AVAILABLE_OPTIONS + ALIAS_OPTIONS
24
+
25
+ def self.convert(text, options={})
26
+ options = options.select { |k, v| ALLOWED_OPTIONS.include?(k.to_s.gsub('_', '-')) }
27
+
28
+ options[:from] ||= :markdown
29
+ options[:to] ||= :html
30
+
31
+ PandocRuby.convert(text, options)
32
+ rescue Errno::ENOENT
33
+ puts "Pandoc is not installed"
34
+ end
35
+
36
+ def self.convert_to_jats(text, options={})
37
+ options = options.merge(template: "templates/default.jats",
38
+ to: "lib/bergamasco/jats.lua",
39
+ csl: "lib/bergamasco/jats.csl")
40
+ options = options.merge(metadata: options[:metadata]) if options[:metadata].present?
41
+
42
+ convert(text, options)
43
+ end
44
+
45
+ def self.write_jats(filepath, options={})
46
+ file = IO.read(filepath)
47
+ xml_path = File.join(File.dirname(filepath), File.basename(filepath, ".html.md")) + ".xml"
48
+ xml = convert_to_jats(file, options)
49
+ IO.write(xml_path, xml)
50
+ xml_path
51
+ end
52
+
53
+ def self.write_bibliograpy_to_yaml(bib_path, yaml_path)
54
+ yaml = `pandoc-citeproc --bib2yaml #{bib_path} 2>&1`
55
+ return nil if $?.exitstatus > 0
56
+ IO.write(yaml_path, yaml)
57
+ SafeYAML.load(yaml)
58
+ end
59
+ end
60
+ end
@@ -1,3 +1,3 @@
1
1
  module Bergamasco
2
- VERSION = "0.3.2"
2
+ VERSION = "0.3.3"
3
3
  end
@@ -64,13 +64,6 @@ describe Bergamasco::Markdown do
64
64
  expect(length).to eq(text.length)
65
65
  end
66
66
 
67
- it 'should write bibliography to yaml' do
68
- bib_path = fixture_path + 'references.bib'
69
- yaml_path = fixture_path + 'references.yaml'
70
- yaml = subject.write_bibliograpy_to_yaml(bib_path, yaml_path)
71
- expect(yaml["references"].length).to eq(61)
72
- end
73
-
74
67
  it 'should update file' do
75
68
  filepath = fixture_path + 'cool-dois.html.md'
76
69
  new_metadata = { "doi" => "10.23725/0000-03VC"}
@@ -81,14 +74,14 @@ describe Bergamasco::Markdown do
81
74
  it 'should convert markdown' do
82
75
  filepath = fixture_path + 'cool-dois.html.md'
83
76
  file = IO.read(filepath)
84
- html = subject.render_html(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
77
+ html = Bergamasco::Pandoc.convert(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
85
78
  expect(html).to start_with("<p>In 1998 Tim Berners-Lee coined the term cool URIs <span class=\"citation\">(1998)</span>, that is URIs that don’t change.")
86
79
  end
87
80
 
88
81
  it 'should extract references' do
89
82
  filepath = fixture_path + 'cool-dois.html.md'
90
83
  file = IO.read(filepath)
91
- html = subject.render_html(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
84
+ html = Bergamasco::Pandoc.convert(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
92
85
  refs = subject.extract_references(html)
93
86
  expect(refs).to eq(["https://www.w3.org/Provider/Style/URI", "https://doi.org/10.1371/journal.pone.0115253"])
94
87
  end
@@ -0,0 +1,38 @@
1
+ require 'spec_helper'
2
+
3
+ describe Bergamasco::Pandoc do
4
+ subject { Bergamasco::Pandoc }
5
+
6
+ it 'should convert markdown' do
7
+ filepath = fixture_path + 'cool-dois.html.md'
8
+ file = IO.read(filepath)
9
+ html = subject.convert(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
10
+ expect(html).to start_with("<p>In 1998 Tim Berners-Lee coined the term cool URIs <span class=\"citation\">(1998)</span>, that is URIs that don’t change.")
11
+ end
12
+
13
+ it 'should convert to jats' do
14
+ filepath = fixture_path + 'cool-dois.html.md'
15
+ file = IO.read(filepath)
16
+ xml = subject.convert_to_jats(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
17
+ doc = Nokogiri::XML(xml)
18
+ article_id = doc.at_xpath("//article-id")
19
+ expect(article_id.text).to eq("10.23725/0000-03VC")
20
+ expect(article_id.values.first).to eq("doi")
21
+ end
22
+
23
+ it 'should write jats xml' do
24
+ filepath = fixture_path + 'cool-dois.html.md'
25
+ xml_path = subject.write_jats(filepath, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
26
+ doc = File.open(xml_path) { |f| Nokogiri::XML(f) }
27
+ article_id = doc.at_xpath("//article-id")
28
+ expect(article_id.text).to eq("10.23725/0000-03VC")
29
+ expect(article_id.values.first).to eq("doi")
30
+ end
31
+
32
+ it 'should write bibliography to yaml' do
33
+ bib_path = fixture_path + 'references.bib'
34
+ yaml_path = fixture_path + 'references.yaml'
35
+ yaml = subject.write_bibliograpy_to_yaml(bib_path, yaml_path)
36
+ expect(yaml["references"].length).to eq(61)
37
+ end
38
+ end
@@ -30,7 +30,7 @@ describe Bergamasco::Summarize do
30
30
  it 'should truncate at separator and convert to html' do
31
31
  filepath = fixture_path + 'cool-dois-without-yml.md'
32
32
  file = IO.read(filepath)
33
- html = Bergamasco::Markdown.render_html(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.bib')
33
+ html = Bergamasco::Pandoc.convert(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.bib')
34
34
  content = subject.summary_from_html(html, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.bib')
35
35
  expect(content).to start_with("In 1998 Tim Berners-Lee coined the term cool URIs (1998), that is URIs that don’t change.")
36
36
  expect(content).to end_with("the referenced resource.")
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bergamasco
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Fenner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-21 00:00:00.000000000 Z
11
+ date: 2016-12-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -269,8 +269,8 @@ files:
269
269
  - lib/bergamasco.rb
270
270
  - lib/bergamasco/jats.csl
271
271
  - lib/bergamasco/jats.lua
272
- - lib/bergamasco/jats.rb
273
272
  - lib/bergamasco/markdown.rb
273
+ - lib/bergamasco/pandoc.rb
274
274
  - lib/bergamasco/sanitize.rb
275
275
  - lib/bergamasco/summarize.rb
276
276
  - lib/bergamasco/version.rb
@@ -283,8 +283,8 @@ files:
283
283
  - spec/fixtures/cool-dois.yml
284
284
  - spec/fixtures/references.bib
285
285
  - spec/fixtures/references.yaml
286
- - spec/jats_spec.rb
287
286
  - spec/markdown_spec.rb
287
+ - spec/pandoc_spec.rb
288
288
  - spec/sanitize_spec.rb
289
289
  - spec/spec_helper.rb
290
290
  - spec/summarize_spec.rb
@@ -1,33 +0,0 @@
1
- module Bergamasco
2
- module Jats
3
-
4
- def self.render_jats(text, options={})
5
- options = options.merge(template: "templates/default.jats",
6
- to: "lib/bergamasco/jats.lua",
7
- csl: "lib/bergamasco/jats.csl")
8
- options = options.merge(metadata: options[:metadata]) if options[:metadata].present?
9
- converter = PandocRuby.new(text, options.except(:skip_yaml_header,
10
- :separator,
11
- :sitepath,
12
- :authorpath,
13
- :referencespath,
14
- :username,
15
- :password,
16
- :sandbox,
17
- :prefix,
18
- :number))
19
- converter.convert
20
- rescue Errno::ENOENT
21
- # if pandoc is not installed.
22
- puts "Pandoc is not installed"
23
- end
24
-
25
- def self.write_jats(filepath, options={})
26
- file = IO.read(filepath)
27
- xml_path = File.join(File.dirname(filepath), File.basename(filepath, ".html.md")) + ".xml"
28
- xml = render_jats(file, options)
29
- IO.write(xml_path, xml)
30
- xml_path
31
- end
32
- end
33
- end
data/spec/jats_spec.rb DELETED
@@ -1,24 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Bergamasco::Jats do
4
- subject { Bergamasco::Jats }
5
-
6
- it 'should convert to jats' do
7
- filepath = fixture_path + 'cool-dois.html.md'
8
- file = IO.read(filepath)
9
- xml = subject.render_jats(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
10
- doc = Nokogiri::XML(xml)
11
- article_id = doc.at_xpath("//article-id")
12
- expect(article_id.text).to eq("10.23725/0000-03VC")
13
- expect(article_id.values.first).to eq("doi")
14
- end
15
-
16
- it 'should write jats xml' do
17
- filepath = fixture_path + 'cool-dois.html.md'
18
- xml_path = subject.write_jats(filepath, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
19
- doc = File.open(xml_path) { |f| Nokogiri::XML(f) }
20
- article_id = doc.at_xpath("//article-id")
21
- expect(article_id.text).to eq("10.23725/0000-03VC")
22
- expect(article_id.values.first).to eq("doi")
23
- end
24
- end