bergamasco 0.3.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 63d34d9a6d674818e54cedec4ba4ae26bfc98d1a
4
- data.tar.gz: 3d373fd955263556b5766c3e1319a720d39f62b9
3
+ metadata.gz: 4c9fd041fa952b8ce0d759d55f1a5ea9fbb0eeba
4
+ data.tar.gz: 2fabed95db18a9ab876e1d6843fe01ca9d645c88
5
5
  SHA512:
6
- metadata.gz: f8b6b4045b3ffacb31f7bf3f0c019b374cf534ffafa21850dd4a288295fb516e2dffec9793611e5ef8d64919cf3705f83a7eb099f790ef88a1e147b43cc1b51e
7
- data.tar.gz: a06d40fd69e0f7606e0b87fcba01d151ca85e4b1e99afb2523f90eadc121da5dedb25a2a42d3b1375e4a1bcac433ba231ff4fddf194c6cb42f5ae45fb562ade4
6
+ metadata.gz: c073e07cb08e34c90c5afdfd36ac11fb88fc341a26f7d892d7bd9c5aed6e90632f5e937f8bcfa015e40e2c191b076ff38707c365faa2da5e91e289fa143bb7a9
7
+ data.tar.gz: 1855153f47914dae465a7db87cfe2910eb53fa95bec68e7defaa8e764929bf4cb946109c15b52c0243da1c5d476bdcfc9566589be925ad8ff5784b8abd941ea5
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bergamasco (0.3.2)
4
+ bergamasco (0.3.3)
5
5
  activesupport (~> 4.2, >= 4.2.5)
6
6
  addressable (~> 2.3.8)
7
7
  builder (~> 3.2, >= 3.2.2)
@@ -79,4 +79,4 @@ DEPENDENCIES
79
79
  simplecov
80
80
 
81
81
  BUNDLED WITH
82
- 1.12.5
82
+ 1.13.6
data/lib/bergamasco.rb CHANGED
@@ -13,5 +13,5 @@ require 'addressable/uri'
13
13
  require "bergamasco/summarize"
14
14
  require "bergamasco/sanitize"
15
15
  require "bergamasco/markdown"
16
- require "bergamasco/jats"
16
+ require "bergamasco/pandoc"
17
17
  require "bergamasco/whitelist_scrubber"
@@ -48,7 +48,7 @@ module Bergamasco
48
48
  metadata = yaml.extract!(*keys).compact
49
49
 
50
50
  content = YAML_FRONT_MATTER_REGEXP.match(file).post_match
51
- html = render_html(content, options)
51
+ html = Bergamasco::Pandoc.convert(content, options)
52
52
  metadata["summary"] = Bergamasco::Summarize.summary_from_html(html, options)
53
53
  metadata["references"] = extract_references(html)
54
54
  metadata["date"] = metadata["date"].iso8601
@@ -59,31 +59,6 @@ module Bergamasco
59
59
  IO.write(filepath, content.to_yaml)
60
60
  end
61
61
 
62
- def self.render_html(text, options={})
63
- PandocRuby.new(text, options.except(:skip_yaml_header,
64
- :separator,
65
- :sitepath,
66
- :authorpath,
67
- :referencespath,
68
- :username,
69
- :password,
70
- :sandbox,
71
- :prefix,
72
- :number)).to_html
73
- rescue Errno::ENOENT
74
- # fallback to commonmarker if pandoc is not installed.
75
- # Commonmarker doesn't parse or ignore yaml frontmatter
76
- text = split_yaml_frontmatter(text).last if options[:skip_yaml_header]
77
- CommonMarker.render_html(text, :default)
78
- end
79
-
80
- def self.write_bibliograpy_to_yaml(bib_path, yaml_path)
81
- yaml = `pandoc-citeproc --bib2yaml #{bib_path} 2>&1`
82
- return nil if $?.exitstatus > 0
83
- IO.write(yaml_path, yaml)
84
- SafeYAML.load(yaml)
85
- end
86
-
87
62
  # expects a references list generated by pandoc
88
63
  def self.extract_references(html)
89
64
  doc = Nokogiri::HTML(html)
@@ -0,0 +1,60 @@
1
+ module Bergamasco
2
+ module Pandoc
3
+ # Options understood by pandoc, taken from http://pandoc.org/MANUAL.html.
4
+ # Ignore all other options passed to pandoc, unless overriden.
5
+ AVAILABLE_OPTIONS = Set.new %w(from read to write output data-dir strict
6
+ parse-raw smart old-dashes base-header-level indented-code-classes filter
7
+ normalize preserve-tabs tab-stop track-changes file-scope extract-media
8
+ standalone template metadata variable print-default-template
9
+ print-default-data-file no-wrap wrap columns toc table-of-contents toc-depth
10
+ no-highlight highlight-style include-in-header include-before-body
11
+ include-after-body self-contained offline html5 html-q-tags ascii
12
+ reference-links reference-location atx-headers chapters top-level-division
13
+ number-sections number-offsetS no-tex-ligatures listings incremental
14
+ slide-level section-divs default-image-extension email-obfuscation id-prefix
15
+ title-prefix css reference-odt reference-docx epub-stylesheet
16
+ epub-cover-image epub-metadata epub-embed-font epub-chapter-level
17
+ latex-engine latex-engine-opt bibliography csl citation-abbreviations natbib
18
+ biblatex latexmathml asciimathml mathml mimetex webtex jsmath mathjax katex
19
+ katex-stylesheet gladtex trace dump-args ignore-args verbose bash-completion
20
+ list-input-formats list-output-formats list-extensions
21
+ list-highlight-languages list-highlight-styles)
22
+ ALIAS_OPTIONS = Set.new %w(f r t w o R S F p s M V D H B A 5 N i T c m)
23
+ ALLOWED_OPTIONS = AVAILABLE_OPTIONS + ALIAS_OPTIONS
24
+
25
+ def self.convert(text, options={})
26
+ options = options.select { |k, v| ALLOWED_OPTIONS.include?(k.to_s.gsub('_', '-')) }
27
+
28
+ options[:from] ||= :markdown
29
+ options[:to] ||= :html
30
+
31
+ PandocRuby.convert(text, options)
32
+ rescue Errno::ENOENT
33
+ puts "Pandoc is not installed"
34
+ end
35
+
36
+ def self.convert_to_jats(text, options={})
37
+ options = options.merge(template: "templates/default.jats",
38
+ to: "lib/bergamasco/jats.lua",
39
+ csl: "lib/bergamasco/jats.csl")
40
+ options = options.merge(metadata: options[:metadata]) if options[:metadata].present?
41
+
42
+ convert(text, options)
43
+ end
44
+
45
+ def self.write_jats(filepath, options={})
46
+ file = IO.read(filepath)
47
+ xml_path = File.join(File.dirname(filepath), File.basename(filepath, ".html.md")) + ".xml"
48
+ xml = convert_to_jats(file, options)
49
+ IO.write(xml_path, xml)
50
+ xml_path
51
+ end
52
+
53
+ def self.write_bibliograpy_to_yaml(bib_path, yaml_path)
54
+ yaml = `pandoc-citeproc --bib2yaml #{bib_path} 2>&1`
55
+ return nil if $?.exitstatus > 0
56
+ IO.write(yaml_path, yaml)
57
+ SafeYAML.load(yaml)
58
+ end
59
+ end
60
+ end
@@ -1,3 +1,3 @@
1
1
  module Bergamasco
2
- VERSION = "0.3.2"
2
+ VERSION = "0.3.3"
3
3
  end
@@ -64,13 +64,6 @@ describe Bergamasco::Markdown do
64
64
  expect(length).to eq(text.length)
65
65
  end
66
66
 
67
- it 'should write bibliography to yaml' do
68
- bib_path = fixture_path + 'references.bib'
69
- yaml_path = fixture_path + 'references.yaml'
70
- yaml = subject.write_bibliograpy_to_yaml(bib_path, yaml_path)
71
- expect(yaml["references"].length).to eq(61)
72
- end
73
-
74
67
  it 'should update file' do
75
68
  filepath = fixture_path + 'cool-dois.html.md'
76
69
  new_metadata = { "doi" => "10.23725/0000-03VC"}
@@ -81,14 +74,14 @@ describe Bergamasco::Markdown do
81
74
  it 'should convert markdown' do
82
75
  filepath = fixture_path + 'cool-dois.html.md'
83
76
  file = IO.read(filepath)
84
- html = subject.render_html(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
77
+ html = Bergamasco::Pandoc.convert(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
85
78
  expect(html).to start_with("<p>In 1998 Tim Berners-Lee coined the term cool URIs <span class=\"citation\">(1998)</span>, that is URIs that don’t change.")
86
79
  end
87
80
 
88
81
  it 'should extract references' do
89
82
  filepath = fixture_path + 'cool-dois.html.md'
90
83
  file = IO.read(filepath)
91
- html = subject.render_html(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
84
+ html = Bergamasco::Pandoc.convert(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
92
85
  refs = subject.extract_references(html)
93
86
  expect(refs).to eq(["https://www.w3.org/Provider/Style/URI", "https://doi.org/10.1371/journal.pone.0115253"])
94
87
  end
@@ -0,0 +1,38 @@
1
+ require 'spec_helper'
2
+
3
+ describe Bergamasco::Pandoc do
4
+ subject { Bergamasco::Pandoc }
5
+
6
+ it 'should convert markdown' do
7
+ filepath = fixture_path + 'cool-dois.html.md'
8
+ file = IO.read(filepath)
9
+ html = subject.convert(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
10
+ expect(html).to start_with("<p>In 1998 Tim Berners-Lee coined the term cool URIs <span class=\"citation\">(1998)</span>, that is URIs that don’t change.")
11
+ end
12
+
13
+ it 'should convert to jats' do
14
+ filepath = fixture_path + 'cool-dois.html.md'
15
+ file = IO.read(filepath)
16
+ xml = subject.convert_to_jats(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
17
+ doc = Nokogiri::XML(xml)
18
+ article_id = doc.at_xpath("//article-id")
19
+ expect(article_id.text).to eq("10.23725/0000-03VC")
20
+ expect(article_id.values.first).to eq("doi")
21
+ end
22
+
23
+ it 'should write jats xml' do
24
+ filepath = fixture_path + 'cool-dois.html.md'
25
+ xml_path = subject.write_jats(filepath, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
26
+ doc = File.open(xml_path) { |f| Nokogiri::XML(f) }
27
+ article_id = doc.at_xpath("//article-id")
28
+ expect(article_id.text).to eq("10.23725/0000-03VC")
29
+ expect(article_id.values.first).to eq("doi")
30
+ end
31
+
32
+ it 'should write bibliography to yaml' do
33
+ bib_path = fixture_path + 'references.bib'
34
+ yaml_path = fixture_path + 'references.yaml'
35
+ yaml = subject.write_bibliograpy_to_yaml(bib_path, yaml_path)
36
+ expect(yaml["references"].length).to eq(61)
37
+ end
38
+ end
@@ -30,7 +30,7 @@ describe Bergamasco::Summarize do
30
30
  it 'should truncate at separator and convert to html' do
31
31
  filepath = fixture_path + 'cool-dois-without-yml.md'
32
32
  file = IO.read(filepath)
33
- html = Bergamasco::Markdown.render_html(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.bib')
33
+ html = Bergamasco::Pandoc.convert(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.bib')
34
34
  content = subject.summary_from_html(html, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.bib')
35
35
  expect(content).to start_with("In 1998 Tim Berners-Lee coined the term cool URIs (1998), that is URIs that don’t change.")
36
36
  expect(content).to end_with("the referenced resource.")
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bergamasco
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Fenner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-21 00:00:00.000000000 Z
11
+ date: 2016-12-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -269,8 +269,8 @@ files:
269
269
  - lib/bergamasco.rb
270
270
  - lib/bergamasco/jats.csl
271
271
  - lib/bergamasco/jats.lua
272
- - lib/bergamasco/jats.rb
273
272
  - lib/bergamasco/markdown.rb
273
+ - lib/bergamasco/pandoc.rb
274
274
  - lib/bergamasco/sanitize.rb
275
275
  - lib/bergamasco/summarize.rb
276
276
  - lib/bergamasco/version.rb
@@ -283,8 +283,8 @@ files:
283
283
  - spec/fixtures/cool-dois.yml
284
284
  - spec/fixtures/references.bib
285
285
  - spec/fixtures/references.yaml
286
- - spec/jats_spec.rb
287
286
  - spec/markdown_spec.rb
287
+ - spec/pandoc_spec.rb
288
288
  - spec/sanitize_spec.rb
289
289
  - spec/spec_helper.rb
290
290
  - spec/summarize_spec.rb
@@ -1,33 +0,0 @@
1
- module Bergamasco
2
- module Jats
3
-
4
- def self.render_jats(text, options={})
5
- options = options.merge(template: "templates/default.jats",
6
- to: "lib/bergamasco/jats.lua",
7
- csl: "lib/bergamasco/jats.csl")
8
- options = options.merge(metadata: options[:metadata]) if options[:metadata].present?
9
- converter = PandocRuby.new(text, options.except(:skip_yaml_header,
10
- :separator,
11
- :sitepath,
12
- :authorpath,
13
- :referencespath,
14
- :username,
15
- :password,
16
- :sandbox,
17
- :prefix,
18
- :number))
19
- converter.convert
20
- rescue Errno::ENOENT
21
- # if pandoc is not installed.
22
- puts "Pandoc is not installed"
23
- end
24
-
25
- def self.write_jats(filepath, options={})
26
- file = IO.read(filepath)
27
- xml_path = File.join(File.dirname(filepath), File.basename(filepath, ".html.md")) + ".xml"
28
- xml = render_jats(file, options)
29
- IO.write(xml_path, xml)
30
- xml_path
31
- end
32
- end
33
- end
data/spec/jats_spec.rb DELETED
@@ -1,24 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Bergamasco::Jats do
4
- subject { Bergamasco::Jats }
5
-
6
- it 'should convert to jats' do
7
- filepath = fixture_path + 'cool-dois.html.md'
8
- file = IO.read(filepath)
9
- xml = subject.render_jats(file, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
10
- doc = Nokogiri::XML(xml)
11
- article_id = doc.at_xpath("//article-id")
12
- expect(article_id.text).to eq("10.23725/0000-03VC")
13
- expect(article_id.values.first).to eq("doi")
14
- end
15
-
16
- it 'should write jats xml' do
17
- filepath = fixture_path + 'cool-dois.html.md'
18
- xml_path = subject.write_jats(filepath, skip_yaml_header: true, csl: 'spec/fixtures/apa.csl', bibliography: 'spec/fixtures/references.yaml')
19
- doc = File.open(xml_path) { |f| Nokogiri::XML(f) }
20
- article_id = doc.at_xpath("//article-id")
21
- expect(article_id.text).to eq("10.23725/0000-03VC")
22
- expect(article_id.values.first).to eq("doi")
23
- end
24
- end