reverse_adoc 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: de7821c8ca445117a045f3c2c28ae78f20a2185f61af5de3fff7ed064dd98c3e
4
- data.tar.gz: 53151bcdd028e693da6b496df31c7a7da5ffa1dea8b0ee22696011a91fae074a
3
+ metadata.gz: d3d387a4fbedd246900150be67a80bf62ee70a70cf697b1a2a7a247080d879ad
4
+ data.tar.gz: 956f8db0ca1d2f34e1f165f323e6db6d1f5e68a91dc886d6fd3cf3f1f7aa0886
5
5
  SHA512:
6
- metadata.gz: 4ae2e50d201e868dc7caa44a599695c6d03f155dd72013888d3cdee5505b074aa37cfc93cfc96435e43fa9bc0096ed46742dd2ff77280957c99cde1ca6454cee
7
- data.tar.gz: 8a628ecfe850737831d5f1e10ef30ee65390f9599d7d75f7323de4ca39445c2d873fbaca49db7178b913d696a8fce85c50ce780a7e1715a37a12dce7c4e63cc4
6
+ metadata.gz: a72c96ffc8e41f7e38b28108107b12bec33a22e3b1722416e7b4d5a493ab3f47d6c3fab49a98b27969ba8235cb3ed6fff86f027a8c9ff5e4b3276af8d2d0a752
7
+ data.tar.gz: 1f0de4f61007133d49c0dfcd0dd85e8a87f8445a59fb41ba5dd1001eeec4e1405402449389b3e1355a453ff5a21bf6fa7d4b8bfd1fc883c654feee83600e55f3
@@ -1,11 +1,12 @@
1
1
  = AsciiDoc from HTML and Microsoft Word: reverse_adoc
2
2
 
3
- image:https://img.shields.io/gem/v/reverse_asciidoctor.svg["Gem Version", link="https://rubygems.org/gems/reverse_asciidoctor"]
4
- image:https://travis-ci.com/metanorma/reverse_asciidoctor.svg["Build Status", link="https://travis-ci.com/metanorma/reverse_asciidoctor"]
5
- image:https://ci.appveyor.com/api/projects/status/9dui2fs4pc590f4k?svg=true["Appveyor Build Status", link="https://ci.appveyor.com/project/metanorma/reverse-asciidoctor"]
6
- image:https://codeclimate.com/github/metanorma/reverse_asciidoctor/badges/gpa.svg["Code Climate", link="https://codeclimate.com/github/metanorma/reverse_asciidoctor"]
7
- image:https://img.shields.io/github/issues-pr-raw/metanorma/reverse_asciidoctor.svg["Pull Requests", link="https://github.com/metanorma/reverse_asciidoctor/pulls"]
8
- image:https://img.shields.io/github/commits-since/metanorma/reverse_asciidoctor/latest.svg["Commits since latest",link="https://github.com/metanorma/reverse_asciidoctor/releases"]
3
+ https://github.com/metanorma/reverse_adoc[reverse_adoc] image:https://img.shields.io/gem/v/reverse_adoc.svg["Gem Version", link="https://rubygems.org/gems/reverse_adoc"]::
4
+ image:https://github.com/metanorma/reverse_adoc/workflows/macos/badge.svg["Build Status", link="https://github.com/metanorma/reverse_adoc/actions?workflow=macos"]
5
+ image:https://github.com/metanorma/reverse_adoc/workflows/windows/badge.svg["Build Status", link="https://github.com/metanorma/reverse_adoc/actions?workflow=windows"]
6
+ image:https://github.com/metanorma/reverse_adoc/workflows/ubuntu/badge.svg["Build Status", link="https://github.com/metanorma/reverse_adoc/actions?workflow=ubuntu"]
7
+ image:https://codeclimate.com/github/metanorma/reverse_adoc/badges/gpa.svg["Code Climate", link="https://codeclimate.com/github/metanorma/reverse_adoc"]
8
+ image:https://img.shields.io/github/issues-pr-raw/metanorma/reverse_adoc.svg["Pull Requests", link="https://github.com/metanorma/reverse_adoc/pulls"]
9
+ image:https://img.shields.io/github/commits-since/metanorma/reverse_adoc/latest.svg["Commits since latest",link="https://github.com/metanorma/reverse_adoc/releases"]
9
10
 
10
11
  == Purpose
11
12
 
@@ -1,6 +1,9 @@
1
1
  #!/usr/bin/env ruby
2
2
  # Usage: reverse_asciidoctor [FILE]...
3
3
  # Usage: cat FILE | reverse_asciidoctor
4
+ require 'rubygems'
5
+ require 'bundler/setup'
6
+
4
7
  require 'reverse_asciidoctor'
5
8
  require 'optparse'
6
9
  require 'fileutils'
data/bin/w2a CHANGED
@@ -1,25 +1,12 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
+ require 'rubygems'
4
+ require 'bundler/setup'
3
5
 
4
6
  require 'word-to-markdown'
5
7
  require 'optparse'
6
8
  require 'reverse_asciidoctor'
7
9
 
8
- def scrub_whitespace(string)
9
- string = string.dup
10
- string.gsub!(/ |\ |\u00a0/i, ' ') # HTML encoded spaces
11
- string.sub!(/^\A[[:space:]]+/m, '') # document leading whitespace
12
- string.sub!(/[[:space:]]+\z$/m, '') # document trailing whitespace
13
- string.gsub!(/([ ]+)$/, ' ') # line trailing whitespace
14
- string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks
15
- #string.delete!(' ') # Unicode non-breaking spaces, injected as tabs
16
- # following added by me
17
- string.gsub!(%r{<h[1-9][^>]*></h1>}, " ") # I don't know why Libre Office is inserting them, but they need to go
18
- string.gsub!(%r{<h1[^>]* style="vertical-align: super;[^>]*>([^<]+)</h1>},
19
- "<sup>\\1</sup>") # I absolutely don't know why Libre Office is rendering superscripts as h1
20
- string
21
- end
22
-
23
10
  ARGV.push('-h') if ARGV.empty?
24
11
 
25
12
  OptionParser.new do |opts|
@@ -66,9 +53,9 @@ ReverseAsciidoctor.config.sourcedir = Dir.mktmpdir
66
53
  # puts "ReverseAsciidoctor.config.sourcedir #{ReverseAsciidoctor.config.sourcedir}"
67
54
 
68
55
  doc = WordToMarkdown.new(filename, ReverseAsciidoctor.config.sourcedir)
69
- #File.open("test.html", "w:UTF-8") { |f| f.write doc.document.html }
56
+ File.open("test.html", "w:UTF-8") { |f| f.write doc.document.html }
70
57
  adoc_content = ReverseAsciidoctor.convert(
71
- scrub_whitespace(doc.document.html),
58
+ ReverseAsciidoctor.cleaner.preprocess_word_html(doc.document.html),
72
59
  WordToMarkdown::REVERSE_MARKDOWN_OPTIONS
73
60
  )
74
61
  # puts scrub_whitespace(doc.document.html)
@@ -64,6 +64,29 @@ module ReverseAsciidoctor
64
64
  string.gsub(/(\*\*|~~|__)\s([\.!\?'"])/, "\\1".strip + "\\2")
65
65
  end
66
66
 
67
+ # preprocesses HTML, rather than postprocessing it
68
+ def preprocess_word_html(string)
69
+ clean_headings(scrub_whitespace(string.dup))
70
+ end
71
+
72
+ def scrub_whitespace(string)
73
+ string.gsub!(/&nbsp;|\&#xA0;|\u00a0/i, '&#xA0;') # HTML encoded spaces
74
+ string.sub!(/^\A[[:space:]]+/m, '') # document leading whitespace
75
+ string.sub!(/[[:space:]]+\z$/m, '') # document trailing whitespace
76
+ string.gsub!(/([ ]+)$/, ' ') # line trailing whitespace
77
+ string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks
78
+ #string.delete!('?| ') # Unicode non-breaking spaces, injected as tabs
79
+ string
80
+ end
81
+
82
+ # following added by me
83
+ def clean_headings(string)
84
+ string.gsub!(%r{<h([1-9])[^>]*></h\1>}, " ") # I don't know why Libre Office is inserting them, but they need to go
85
+ string.gsub!(%r{<h([1-9])[^>]* style="vertical-align: super;[^>]*>(.+?)</h\1>},
86
+ "<sup>\\2</sup>") # I absolutely don't know why Libre Office is rendering superscripts as h1
87
+ string
88
+ end
89
+
67
90
  private
68
91
 
69
92
  def preserve_border_whitespaces(string, options = {}, &block)
@@ -3,9 +3,24 @@ module ReverseAsciidoctor
3
3
  class H < Base
4
4
  def convert(node, state = {})
5
5
  id = node['id']
6
- anchor = id ? "[[#{id}]]\n" : ""
6
+ anchor = id ? "[[#{id}]]" : ""
7
+ internal_anchor = treat_children_anchors(node, state) || ""
8
+ anchor.empty? and anchor = internal_anchor
9
+ anchor.empty? or anchor += "\n"
7
10
  prefix = '=' * (node.name[/\d/].to_i + 1)
8
- ["\n", anchor, prefix, ' ', treat_children(node, state), "\n"].join
11
+ ["\n", anchor, prefix, ' ', treat_children_no_anchors(node, state), "\n"].join
12
+ end
13
+
14
+ def treat_children_no_anchors(node, state)
15
+ node.children.reject { |a| a.name == "a" }.inject('') do |memo, child|
16
+ memo << treat(child, state)
17
+ end
18
+ end
19
+
20
+ def treat_children_anchors(node, state)
21
+ node.children.select { |a| a.name == "a" }.inject('') do |memo, child|
22
+ memo << treat(child, state)
23
+ end
9
24
  end
10
25
  end
11
26
 
@@ -1,5 +1,8 @@
1
1
  require "fileutils"
2
2
  require "pathname"
3
+ require "tempfile"
4
+ require "base64"
5
+ require "mimemagic"
3
6
 
4
7
  module ReverseAsciidoctor
5
8
  module Converters
@@ -23,29 +26,7 @@ module ReverseAsciidoctor
23
26
  images_dir = dest_dir + 'images'
24
27
  FileUtils.mkdir_p(images_dir)
25
28
 
26
- ext = ""
27
-
28
- if imgdata
29
- file = Tempfile.open(["radoc", ".jpg"]) do |f|
30
- begin
31
- f.binmode
32
- f.write(Base64.strict_decode64(imgdata))
33
- f.rewind
34
- ext = MimeMagic.by_magic(f)
35
- ensure
36
- f.close!
37
- end
38
- end
39
-
40
- image_src_path = file.path
41
- # puts "tempfile: #{file}"
42
-
43
- else
44
- ext = File.extname(src).strip.downcase[1..-1]
45
- image_src_path = Pathname.new(ReverseAsciidoctor.config.sourcedir) + src
46
-
47
- end
48
-
29
+ ext, image_src_path = determine_image_src_path(imgdata)
49
30
  image_dest_path = images_dir + "#{image_number}.#{ext}"
50
31
 
51
32
  # puts "image_dest_path: #{image_dest_path.to_s}"
@@ -57,6 +38,23 @@ module ReverseAsciidoctor
57
38
  image_dest_path.relative_path_from(dest_dir)
58
39
  end
59
40
 
41
+ def determine_image_src_path(imgdata)
42
+ return copy_temp_file(imgdata) if imgdata
43
+
44
+ ext = File.extname(src).strip.downcase[1..-1]
45
+ [ext, Pathname.new(ReverseAsciidoctor.config.sourcedir) + src]
46
+ end
47
+
48
+ def copy_temp_file(imgdata)
49
+ Tempfile.open(['radoc', '.jpg']) do |f|
50
+ f.binmode
51
+ f.write(Base64.strict_decode64(imgdata))
52
+ f.rewind
53
+ ext = MimeMagic.by_magic(f).subtype
54
+ [ext, f.path]
55
+ end
56
+ end
57
+
60
58
  def convert(node, state = {})
61
59
  alt = node['alt']
62
60
  src = node['src']
@@ -1,3 +1,3 @@
1
1
  module ReverseAsciidoctor
2
- VERSION = '0.2.5'
2
+ VERSION = '0.2.6'
3
3
  end
@@ -22,6 +22,7 @@ Gem::Specification.new do |s|
22
22
  # specify any dependencies here; for example:
23
23
  s.add_dependency 'nokogiri', ">= 1.10.4"
24
24
  s.add_dependency 'mathml2asciimath'
25
+ s.add_dependency 'mimemagic'
25
26
  s.add_development_dependency 'rspec'
26
27
  s.add_development_dependency 'simplecov'
27
28
  s.add_development_dependency 'rake'
@@ -24,5 +24,7 @@
24
24
  <img alt="foobar image" src="http://foobar.com/foobar.png">
25
25
  <img alt="foobar image 2" title="this is the foobar image 2" src="http://foobar.com/foobar2.png">
26
26
  some text...
27
+
28
+ <h1><a id="a__Foreword"></a>Text</h1>
27
29
  </body>
28
30
  </html>
Binary file
@@ -0,0 +1,35 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd">
2
+ <?xml version="1.0" encoding="UTF-8"??><html xmlns="http://www.w3.org/1999/xhtml" style="margin: 0;">
3
+ <!--This file was converted to xhtml by LibreOffice - see http://cgit.freedesktop.org/libreoffice/core/tree/filter/source/xslt for the code.--><head profile="http://dublincore.org/documents/dcmi-terms/">
4
+ <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8">
5
+ <meta name="DCTERMS.title" content="" xml:lang="en-US">
6
+ <meta name="DCTERMS.language" content="en-US" scheme="DCTERMS.RFC4646">
7
+ <meta name="DCTERMS.source" content="http://xml.openoffice.org/odf2xhtml">
8
+ <meta name="DCTERMS.creator" content="Nick Nicholas">
9
+ <meta name="DCTERMS.issued" content="2019-11-21T08:48:00" scheme="DCTERMS.W3CDTF">
10
+ <meta name="DCTERMS.contributor" content="Nick Nicholas">
11
+ <meta name="DCTERMS.modified" content="2019-11-21T10:01:00" scheme="DCTERMS.W3CDTF">
12
+ <meta name="DCTERMS.provenance" content="" xml:lang="en-US">
13
+ <meta name="DCTERMS.subject" content="," xml:lang="en-US">
14
+ <link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" hreflang="en">
15
+ <link rel="schema.DCTERMS" href="http://purl.org/dc/terms/" hreflang="en">
16
+ <link rel="schema.DCTYPE" href="http://purl.org/dc/dcmitype/" hreflang="en">
17
+ <link rel="schema.DCAM" href="http://purl.org/dc/dcam/" hreflang="en">
18
+ </head>
19
+ <body dir="ltr" style="max-width: 21.001cm; margin: 2.54cm 3.175cm;">
20
+ <h1 class="P3" style="clear: both; color: #2f5496; font-size: 16pt; font-family: Calibri Light; writing-mode: lr-tb; margin: 0.423cm 0 0cm;" align="left ! important">
21
+ <a id="a__Hello" style="margin: 0;"><span style="margin: 0;"></span></a><span class="T1" style="margin: 0;">Hello</span>
22
+ </h1>
23
+ <p class="P1" style="font-size: 12pt; font-family: Calibri; writing-mode: lr-tb; margin: 0;" align="left ! important"> </p>
24
+ <p class="Standard" style="font-size: 12pt; font-family: Calibri; writing-mode: lr-tb; margin: 0;" align="left ! important"><span class="T1" style="margin: 0;">H</span><h1 class="T3" style="vertical-align: super; font-size: 58%; margin: 0;">2</h1><span class="T1" style="margin: 0;">0</span></p>
25
+ <p class="P1" style="font-size: 12pt; font-family: Calibri; writing-mode: lr-tb; margin: 0;" align="left ! important"> </p>
26
+ <!--Next 'div' was a 'text:p'.--><div class="Standard" style="font-size: 12pt; font-family: Calibri; writing-mode: lr-tb; margin: 0;" align="left ! important">
27
+ <!--Next ' span' is a draw:frame. --><span style="margin: 0;"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block" style="margin: 0;"><mrow style="margin: 0;"><mrow style="margin: 0;"><mrow style="margin: 0;"></mrow><mrow style="margin: 0;"><mi style="margin: 0;">i</mi><mo stretchy="false" style="margin: 0;">=</mo><mn style="margin: 0;">1</mn></mrow></mrow><mrow style="margin: 0;"><mrow style="margin: 0;"></mrow><mi style="margin: 0;">n</mi><msubsup style="margin: 0;"><mi style="margin: 0;">β</mi><mn style="margin: 0;">2</mn><mi style="margin: 0;">i</mi></msubsup></mrow></mrow></math></span>
28
+ </div>
29
+ <div style="clear: both; line-height: 0; width: 0; height: 0; margin: 0; padding: 0;"> </div>
30
+ <!--Next 'div' was a 'text:p'.--><div class="Standard" style="font-size: 12pt; font-family: Calibri; writing-mode: lr-tb; margin: 0;" align="left ! important">
31
+ <a id="_GoBack" style="margin: 0;"></a><!--Next ' span' is a draw:frame. --><span style="height: 1.764cm; margin: 0cm; padding: 0; border: none; width: 1.764cm; font-size: 12pt; font-family: Calibri; text-align: center; vertical-align: top; background-color: transparent;" class="fr1" id="Εικόνα_2"><img style="height: 1.764cm; width: 1.764cm; margin: 0;" alt="" src="data:image/gif;base64,R0lGODlhZABkAIAAAAAAAP///ywAAAAAZABkAEAC+4yPqcvtD6OctNqLs968XwCGokd24kidYckuKri9cFvKQCzTdG7x+u+y+YDECqqIzByTzKbzCY1Kp9Sq9eoQKrTY1Ev57TJsuLA4Meylz8HTxMz2LOPFOR1ov+v3/L7/DxgoOEj4IcRVeLA2Bue3CEHGF2nYyDaJcWn5+LZ5lamm0tf5MHpWilYJ6INymmiQ52q0EqsBSythews5qwt60wscLDxMXGx8jJysvMzc7PwM7XmYWzh9SPuJmuq43Raq2ooQbpUdMT5Vbn4ela7+fdfu/h4Xv9stfS/vBr+uPa+Zz94/gPvADCRY0FdCfgdxBezSr1+Vh+Iaclu4KoSiKGsSL9YTRu1YSGMji5UkdnJYSpC8nq0M9hJYzF4zo9m8iTOnzigFAAAh/tNUaGlzIGZpbGUgd2FzIGNyZWF0ZWQgYnkNDUdyYXBoaWMgV29ya3Nob3CZIFByb2Zlc3Npb25hbCAyLjBhDQ1mcm9tIEFsY2hlbXkgTWluZHdvcmtzIEluYy4NaHR0cDovL3d3dy5taW5kd29ya3Nob3AuY29tDQ1UaGlzIGltYWdlIG1heSBoYXZlIGJlZW4gY3JlYXRlZCBieQ1hIHBhcnR5IG90aGVyIHRoYW4gQWxjaGVteSBNaW5kd29ya3MgSW5jLg0NVXNlIG5vIGhvb2tzADs="></span>
32
+ </div>
33
+ <div style="clear: both; line-height: 0; width: 0; height: 0; margin: 0; padding: 0;"> </div>
34
+ </body>
35
+ </html>
@@ -20,6 +20,7 @@ describe ReverseAsciidoctor do
20
20
 
21
21
  it { is_expected.to include "<<a_bspaced,Double \\_\\_ anchor with space>>" }
22
22
  it { is_expected.to include "[[a_bspaced]]" }
23
+ it { is_expected.to include "[[a_Foreword]]\n== Text" }
23
24
  it { is_expected.not_to include "[[_Toc12345]]" }
24
25
 
25
26
  end
@@ -3,6 +3,30 @@ require 'spec_helper'
3
3
  describe ReverseAsciidoctor::Cleaner do
4
4
  let(:cleaner) { ReverseAsciidoctor::Cleaner.new }
5
5
 
6
+ describe '#scrub_whitespace' do
7
+ it "makes consistent nonbreaking spaces" do
8
+ result = cleaner.scrub_whitespace("&nbsp; &#xA0;  ")
9
+ expect(result).to eq "&#xA0; &#xA0; &#xA0;"
10
+ end
11
+
12
+ it "makes four linebreaks into two" do
13
+ result = cleaner.scrub_whitespace("A\n\n\n\nB")
14
+ expect(result).to eq "A\n\nB"
15
+ end
16
+ end
17
+
18
+ describe '#clean_headings' do
19
+ it "removes empty headings" do
20
+ result = cleaner.clean_headings("<h2></h2>")
21
+ expect(result).to eq " "
22
+ end
23
+
24
+ it "cleans superscripts rendered as headings" do
25
+ result = cleaner.clean_headings(%{<p class="Standard" style="font-size: 12pt; font-family: Calibri; writing-mode: lr-tb; margin: 0;" align="left ! important"><span class="T1" style="margin: 0;">H</span><h1 class="T2" style="vertical-align: super; font-size: 58%; margin: 0;">2</h1><span class="T1" style="margin: 0;">0</span></p>})
26
+ expect(result).to eq %{<p class="Standard" style="font-size: 12pt; font-family: Calibri; writing-mode: lr-tb; margin: 0;" align="left ! important"><span class="T1" style="margin: 0;">H</span><sup>2</sup><span class="T1" style="margin: 0;">0</span></p>}
27
+ end
28
+ end
29
+
6
30
  describe '#remove_newlines' do
7
31
  it 'removes more than 2 subsequent newlines' do
8
32
  result = cleaner.remove_newlines("foo\n\n\nbar")
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: reverse_adoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-19 00:00:00.000000000 Z
11
+ date: 2019-11-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: mimemagic
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rspec
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -195,6 +209,8 @@ files:
195
209
  - spec/assets/paragraphs.html
196
210
  - spec/assets/quotation.html
197
211
  - spec/assets/tables.html
212
+ - spec/assets/test.docx
213
+ - spec/assets/test.html
198
214
  - spec/assets/unknown_tags.html
199
215
  - spec/components/anchors_spec.rb
200
216
  - spec/components/basic_spec.rb
@@ -265,6 +281,8 @@ test_files:
265
281
  - spec/assets/paragraphs.html
266
282
  - spec/assets/quotation.html
267
283
  - spec/assets/tables.html
284
+ - spec/assets/test.docx
285
+ - spec/assets/test.html
268
286
  - spec/assets/unknown_tags.html
269
287
  - spec/components/anchors_spec.rb
270
288
  - spec/components/basic_spec.rb