reverse_adoc 0.2.5 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: de7821c8ca445117a045f3c2c28ae78f20a2185f61af5de3fff7ed064dd98c3e
4
- data.tar.gz: 53151bcdd028e693da6b496df31c7a7da5ffa1dea8b0ee22696011a91fae074a
3
+ metadata.gz: d3d387a4fbedd246900150be67a80bf62ee70a70cf697b1a2a7a247080d879ad
4
+ data.tar.gz: 956f8db0ca1d2f34e1f165f323e6db6d1f5e68a91dc886d6fd3cf3f1f7aa0886
5
5
  SHA512:
6
- metadata.gz: 4ae2e50d201e868dc7caa44a599695c6d03f155dd72013888d3cdee5505b074aa37cfc93cfc96435e43fa9bc0096ed46742dd2ff77280957c99cde1ca6454cee
7
- data.tar.gz: 8a628ecfe850737831d5f1e10ef30ee65390f9599d7d75f7323de4ca39445c2d873fbaca49db7178b913d696a8fce85c50ce780a7e1715a37a12dce7c4e63cc4
6
+ metadata.gz: a72c96ffc8e41f7e38b28108107b12bec33a22e3b1722416e7b4d5a493ab3f47d6c3fab49a98b27969ba8235cb3ed6fff86f027a8c9ff5e4b3276af8d2d0a752
7
+ data.tar.gz: 1f0de4f61007133d49c0dfcd0dd85e8a87f8445a59fb41ba5dd1001eeec4e1405402449389b3e1355a453ff5a21bf6fa7d4b8bfd1fc883c654feee83600e55f3
@@ -1,11 +1,12 @@
1
1
  = AsciiDoc from HTML and Microsoft Word: reverse_adoc
2
2
 
3
- image:https://img.shields.io/gem/v/reverse_asciidoctor.svg["Gem Version", link="https://rubygems.org/gems/reverse_asciidoctor"]
4
- image:https://travis-ci.com/metanorma/reverse_asciidoctor.svg["Build Status", link="https://travis-ci.com/metanorma/reverse_asciidoctor"]
5
- image:https://ci.appveyor.com/api/projects/status/9dui2fs4pc590f4k?svg=true["Appveyor Build Status", link="https://ci.appveyor.com/project/metanorma/reverse-asciidoctor"]
6
- image:https://codeclimate.com/github/metanorma/reverse_asciidoctor/badges/gpa.svg["Code Climate", link="https://codeclimate.com/github/metanorma/reverse_asciidoctor"]
7
- image:https://img.shields.io/github/issues-pr-raw/metanorma/reverse_asciidoctor.svg["Pull Requests", link="https://github.com/metanorma/reverse_asciidoctor/pulls"]
8
- image:https://img.shields.io/github/commits-since/metanorma/reverse_asciidoctor/latest.svg["Commits since latest",link="https://github.com/metanorma/reverse_asciidoctor/releases"]
3
+ https://github.com/metanorma/reverse_adoc[reverse_adoc] image:https://img.shields.io/gem/v/reverse_adoc.svg["Gem Version", link="https://rubygems.org/gems/reverse_adoc"]::
4
+ image:https://github.com/metanorma/reverse_adoc/workflows/macos/badge.svg["Build Status", link="https://github.com/metanorma/reverse_adoc/actions?workflow=macos"]
5
+ image:https://github.com/metanorma/reverse_adoc/workflows/windows/badge.svg["Build Status", link="https://github.com/metanorma/reverse_adoc/actions?workflow=windows"]
6
+ image:https://github.com/metanorma/reverse_adoc/workflows/ubuntu/badge.svg["Build Status", link="https://github.com/metanorma/reverse_adoc/actions?workflow=ubuntu"]
7
+ image:https://codeclimate.com/github/metanorma/reverse_adoc/badges/gpa.svg["Code Climate", link="https://codeclimate.com/github/metanorma/reverse_adoc"]
8
+ image:https://img.shields.io/github/issues-pr-raw/metanorma/reverse_adoc.svg["Pull Requests", link="https://github.com/metanorma/reverse_adoc/pulls"]
9
+ image:https://img.shields.io/github/commits-since/metanorma/reverse_adoc/latest.svg["Commits since latest",link="https://github.com/metanorma/reverse_adoc/releases"]
9
10
 
10
11
  == Purpose
11
12
 
@@ -1,6 +1,9 @@
1
1
  #!/usr/bin/env ruby
2
2
  # Usage: reverse_asciidoctor [FILE]...
3
3
  # Usage: cat FILE | reverse_asciidoctor
4
+ require 'rubygems'
5
+ require 'bundler/setup'
6
+
4
7
  require 'reverse_asciidoctor'
5
8
  require 'optparse'
6
9
  require 'fileutils'
data/bin/w2a CHANGED
@@ -1,25 +1,12 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
+ require 'rubygems'
4
+ require 'bundler/setup'
3
5
 
4
6
  require 'word-to-markdown'
5
7
  require 'optparse'
6
8
  require 'reverse_asciidoctor'
7
9
 
8
- def scrub_whitespace(string)
9
- string = string.dup
10
- string.gsub!(/ |\ |\u00a0/i, ' ') # HTML encoded spaces
11
- string.sub!(/^\A[[:space:]]+/m, '') # document leading whitespace
12
- string.sub!(/[[:space:]]+\z$/m, '') # document trailing whitespace
13
- string.gsub!(/([ ]+)$/, ' ') # line trailing whitespace
14
- string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks
15
- #string.delete!(' ') # Unicode non-breaking spaces, injected as tabs
16
- # following added by me
17
- string.gsub!(%r{<h[1-9][^>]*></h1>}, " ") # I don't know why Libre Office is inserting them, but they need to go
18
- string.gsub!(%r{<h1[^>]* style="vertical-align: super;[^>]*>([^<]+)</h1>},
19
- "<sup>\\1</sup>") # I absolutely don't know why Libre Office is rendering superscripts as h1
20
- string
21
- end
22
-
23
10
  ARGV.push('-h') if ARGV.empty?
24
11
 
25
12
  OptionParser.new do |opts|
@@ -66,9 +53,9 @@ ReverseAsciidoctor.config.sourcedir = Dir.mktmpdir
66
53
  # puts "ReverseAsciidoctor.config.sourcedir #{ReverseAsciidoctor.config.sourcedir}"
67
54
 
68
55
  doc = WordToMarkdown.new(filename, ReverseAsciidoctor.config.sourcedir)
69
- #File.open("test.html", "w:UTF-8") { |f| f.write doc.document.html }
56
+ File.open("test.html", "w:UTF-8") { |f| f.write doc.document.html }
70
57
  adoc_content = ReverseAsciidoctor.convert(
71
- scrub_whitespace(doc.document.html),
58
+ ReverseAsciidoctor.cleaner.preprocess_word_html(doc.document.html),
72
59
  WordToMarkdown::REVERSE_MARKDOWN_OPTIONS
73
60
  )
74
61
  # puts scrub_whitespace(doc.document.html)
@@ -64,6 +64,29 @@ module ReverseAsciidoctor
64
64
  string.gsub(/(\*\*|~~|__)\s([\.!\?'"])/, "\\1".strip + "\\2")
65
65
  end
66
66
 
67
+ # preprocesses HTML, rather than postprocessing it
68
+ def preprocess_word_html(string)
69
+ clean_headings(scrub_whitespace(string.dup))
70
+ end
71
+
72
+ def scrub_whitespace(string)
73
+ string.gsub!(/&nbsp;|\&#xA0;|\u00a0/i, '&#xA0;') # HTML encoded spaces
74
+ string.sub!(/^\A[[:space:]]+/m, '') # document leading whitespace
75
+ string.sub!(/[[:space:]]+\z$/m, '') # document trailing whitespace
76
+ string.gsub!(/([ ]+)$/, ' ') # line trailing whitespace
77
+ string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks
78
+ #string.delete!('?| ') # Unicode non-breaking spaces, injected as tabs
79
+ string
80
+ end
81
+
82
+ # following added by me
83
+ def clean_headings(string)
84
+ string.gsub!(%r{<h([1-9])[^>]*></h\1>}, " ") # I don't know why Libre Office is inserting them, but they need to go
85
+ string.gsub!(%r{<h([1-9])[^>]* style="vertical-align: super;[^>]*>(.+?)</h\1>},
86
+ "<sup>\\2</sup>") # I absolutely don't know why Libre Office is rendering superscripts as h1
87
+ string
88
+ end
89
+
67
90
  private
68
91
 
69
92
  def preserve_border_whitespaces(string, options = {}, &block)
@@ -3,9 +3,24 @@ module ReverseAsciidoctor
3
3
  class H < Base
4
4
  def convert(node, state = {})
5
5
  id = node['id']
6
- anchor = id ? "[[#{id}]]\n" : ""
6
+ anchor = id ? "[[#{id}]]" : ""
7
+ internal_anchor = treat_children_anchors(node, state) || ""
8
+ anchor.empty? and anchor = internal_anchor
9
+ anchor.empty? or anchor += "\n"
7
10
  prefix = '=' * (node.name[/\d/].to_i + 1)
8
- ["\n", anchor, prefix, ' ', treat_children(node, state), "\n"].join
11
+ ["\n", anchor, prefix, ' ', treat_children_no_anchors(node, state), "\n"].join
12
+ end
13
+
14
+ def treat_children_no_anchors(node, state)
15
+ node.children.reject { |a| a.name == "a" }.inject('') do |memo, child|
16
+ memo << treat(child, state)
17
+ end
18
+ end
19
+
20
+ def treat_children_anchors(node, state)
21
+ node.children.select { |a| a.name == "a" }.inject('') do |memo, child|
22
+ memo << treat(child, state)
23
+ end
9
24
  end
10
25
  end
11
26
 
@@ -1,5 +1,8 @@
1
1
  require "fileutils"
2
2
  require "pathname"
3
+ require "tempfile"
4
+ require "base64"
5
+ require "mimemagic"
3
6
 
4
7
  module ReverseAsciidoctor
5
8
  module Converters
@@ -23,29 +26,7 @@ module ReverseAsciidoctor
23
26
  images_dir = dest_dir + 'images'
24
27
  FileUtils.mkdir_p(images_dir)
25
28
 
26
- ext = ""
27
-
28
- if imgdata
29
- file = Tempfile.open(["radoc", ".jpg"]) do |f|
30
- begin
31
- f.binmode
32
- f.write(Base64.strict_decode64(imgdata))
33
- f.rewind
34
- ext = MimeMagic.by_magic(f)
35
- ensure
36
- f.close!
37
- end
38
- end
39
-
40
- image_src_path = file.path
41
- # puts "tempfile: #{file}"
42
-
43
- else
44
- ext = File.extname(src).strip.downcase[1..-1]
45
- image_src_path = Pathname.new(ReverseAsciidoctor.config.sourcedir) + src
46
-
47
- end
48
-
29
+ ext, image_src_path = determine_image_src_path(imgdata)
49
30
  image_dest_path = images_dir + "#{image_number}.#{ext}"
50
31
 
51
32
  # puts "image_dest_path: #{image_dest_path.to_s}"
@@ -57,6 +38,23 @@ module ReverseAsciidoctor
57
38
  image_dest_path.relative_path_from(dest_dir)
58
39
  end
59
40
 
41
+ def determine_image_src_path(imgdata)
42
+ return copy_temp_file(imgdata) if imgdata
43
+
44
+ ext = File.extname(src).strip.downcase[1..-1]
45
+ [ext, Pathname.new(ReverseAsciidoctor.config.sourcedir) + src]
46
+ end
47
+
48
+ def copy_temp_file(imgdata)
49
+ Tempfile.open(['radoc', '.jpg']) do |f|
50
+ f.binmode
51
+ f.write(Base64.strict_decode64(imgdata))
52
+ f.rewind
53
+ ext = MimeMagic.by_magic(f).subtype
54
+ [ext, f.path]
55
+ end
56
+ end
57
+
60
58
  def convert(node, state = {})
61
59
  alt = node['alt']
62
60
  src = node['src']
@@ -1,3 +1,3 @@
1
1
  module ReverseAsciidoctor
2
- VERSION = '0.2.5'
2
+ VERSION = '0.2.6'
3
3
  end
@@ -22,6 +22,7 @@ Gem::Specification.new do |s|
22
22
  # specify any dependencies here; for example:
23
23
  s.add_dependency 'nokogiri', ">= 1.10.4"
24
24
  s.add_dependency 'mathml2asciimath'
25
+ s.add_dependency 'mimemagic'
25
26
  s.add_development_dependency 'rspec'
26
27
  s.add_development_dependency 'simplecov'
27
28
  s.add_development_dependency 'rake'
@@ -24,5 +24,7 @@
24
24
  <img alt="foobar image" src="http://foobar.com/foobar.png">
25
25
  <img alt="foobar image 2" title="this is the foobar image 2" src="http://foobar.com/foobar2.png">
26
26
  some text...
27
+
28
+ <h1><a id="a__Foreword"></a>Text</h1>
27
29
  </body>
28
30
  </html>
Binary file
@@ -0,0 +1,35 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd">
2
+ <?xml version="1.0" encoding="UTF-8"??><html xmlns="http://www.w3.org/1999/xhtml" style="margin: 0;">
3
+ <!--This file was converted to xhtml by LibreOffice - see http://cgit.freedesktop.org/libreoffice/core/tree/filter/source/xslt for the code.--><head profile="http://dublincore.org/documents/dcmi-terms/">
4
+ <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8">
5
+ <meta name="DCTERMS.title" content="" xml:lang="en-US">
6
+ <meta name="DCTERMS.language" content="en-US" scheme="DCTERMS.RFC4646">
7
+ <meta name="DCTERMS.source" content="http://xml.openoffice.org/odf2xhtml">
8
+ <meta name="DCTERMS.creator" content="Nick Nicholas">
9
+ <meta name="DCTERMS.issued" content="2019-11-21T08:48:00" scheme="DCTERMS.W3CDTF">
10
+ <meta name="DCTERMS.contributor" content="Nick Nicholas">
11
+ <meta name="DCTERMS.modified" content="2019-11-21T10:01:00" scheme="DCTERMS.W3CDTF">
12
+ <meta name="DCTERMS.provenance" content="" xml:lang="en-US">
13
+ <meta name="DCTERMS.subject" content="," xml:lang="en-US">
14
+ <link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" hreflang="en">
15
+ <link rel="schema.DCTERMS" href="http://purl.org/dc/terms/" hreflang="en">
16
+ <link rel="schema.DCTYPE" href="http://purl.org/dc/dcmitype/" hreflang="en">
17
+ <link rel="schema.DCAM" href="http://purl.org/dc/dcam/" hreflang="en">
18
+ </head>
19
+ <body dir="ltr" style="max-width: 21.001cm; margin: 2.54cm 3.175cm;">
20
+ <h1 class="P3" style="clear: both; color: #2f5496; font-size: 16pt; font-family: Calibri Light; writing-mode: lr-tb; margin: 0.423cm 0 0cm;" align="left ! important">
21
+ <a id="a__Hello" style="margin: 0;"><span style="margin: 0;"></span></a><span class="T1" style="margin: 0;">Hello</span>
22
+ </h1>
23
+ <p class="P1" style="font-size: 12pt; font-family: Calibri; writing-mode: lr-tb; margin: 0;" align="left ! important"> </p>
24
+ <p class="Standard" style="font-size: 12pt; font-family: Calibri; writing-mode: lr-tb; margin: 0;" align="left ! important"><span class="T1" style="margin: 0;">H</span><h1 class="T3" style="vertical-align: super; font-size: 58%; margin: 0;">2</h1><span class="T1" style="margin: 0;">0</span></p>
25
+ <p class="P1" style="font-size: 12pt; font-family: Calibri; writing-mode: lr-tb; margin: 0;" align="left ! important"> </p>
26
+ <!--Next 'div' was a 'text:p'.--><div class="Standard" style="font-size: 12pt; font-family: Calibri; writing-mode: lr-tb; margin: 0;" align="left ! important">
27
+ <!--Next ' span' is a draw:frame. --><span style="margin: 0;"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block" style="margin: 0;"><mrow style="margin: 0;"><mrow style="margin: 0;"><mrow style="margin: 0;"></mrow><mrow style="margin: 0;"><mi style="margin: 0;">i</mi><mo stretchy="false" style="margin: 0;">=</mo><mn style="margin: 0;">1</mn></mrow></mrow><mrow style="margin: 0;"><mrow style="margin: 0;"></mrow><mi style="margin: 0;">n</mi><msubsup style="margin: 0;"><mi style="margin: 0;">β</mi><mn style="margin: 0;">2</mn><mi style="margin: 0;">i</mi></msubsup></mrow></mrow></math></span>
28
+ </div>
29
+ <div style="clear: both; line-height: 0; width: 0; height: 0; margin: 0; padding: 0;"> </div>
30
+ <!--Next 'div' was a 'text:p'.--><div class="Standard" style="font-size: 12pt; font-family: Calibri; writing-mode: lr-tb; margin: 0;" align="left ! important">
31
+ <a id="_GoBack" style="margin: 0;"></a><!--Next ' span' is a draw:frame. --><span style="height: 1.764cm; margin: 0cm; padding: 0; border: none; width: 1.764cm; font-size: 12pt; font-family: Calibri; text-align: center; vertical-align: top; background-color: transparent;" class="fr1" id="Εικόνα_2"><img style="height: 1.764cm; width: 1.764cm; margin: 0;" alt="" src=""></span>
32
+ </div>
33
+ <div style="clear: both; line-height: 0; width: 0; height: 0; margin: 0; padding: 0;"> </div>
34
+ </body>
35
+ </html>
@@ -20,6 +20,7 @@ describe ReverseAsciidoctor do
20
20
 
21
21
  it { is_expected.to include "<<a_bspaced,Double \\_\\_ anchor with space>>" }
22
22
  it { is_expected.to include "[[a_bspaced]]" }
23
+ it { is_expected.to include "[[a_Foreword]]\n== Text" }
23
24
  it { is_expected.not_to include "[[_Toc12345]]" }
24
25
 
25
26
  end
@@ -3,6 +3,30 @@ require 'spec_helper'
3
3
  describe ReverseAsciidoctor::Cleaner do
4
4
  let(:cleaner) { ReverseAsciidoctor::Cleaner.new }
5
5
 
6
+ describe '#scrub_whitespace' do
7
+ it "makes consistent nonbreaking spaces" do
8
+ result = cleaner.scrub_whitespace("&nbsp; &#xA0;  ")
9
+ expect(result).to eq "&#xA0; &#xA0; &#xA0;"
10
+ end
11
+
12
+ it "makes four linebreaks into two" do
13
+ result = cleaner.scrub_whitespace("A\n\n\n\nB")
14
+ expect(result).to eq "A\n\nB"
15
+ end
16
+ end
17
+
18
+ describe '#clean_headings' do
19
+ it "removes empty headings" do
20
+ result = cleaner.clean_headings("<h2></h2>")
21
+ expect(result).to eq " "
22
+ end
23
+
24
+ it "cleans superscripts rendered as headings" do
25
+ result = cleaner.clean_headings(%{<p class="Standard" style="font-size: 12pt; font-family: Calibri; writing-mode: lr-tb; margin: 0;" align="left ! important"><span class="T1" style="margin: 0;">H</span><h1 class="T2" style="vertical-align: super; font-size: 58%; margin: 0;">2</h1><span class="T1" style="margin: 0;">0</span></p>})
26
+ expect(result).to eq %{<p class="Standard" style="font-size: 12pt; font-family: Calibri; writing-mode: lr-tb; margin: 0;" align="left ! important"><span class="T1" style="margin: 0;">H</span><sup>2</sup><span class="T1" style="margin: 0;">0</span></p>}
27
+ end
28
+ end
29
+
6
30
  describe '#remove_newlines' do
7
31
  it 'removes more than 2 subsequent newlines' do
8
32
  result = cleaner.remove_newlines("foo\n\n\nbar")
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: reverse_adoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-19 00:00:00.000000000 Z
11
+ date: 2019-11-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: mimemagic
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rspec
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -195,6 +209,8 @@ files:
195
209
  - spec/assets/paragraphs.html
196
210
  - spec/assets/quotation.html
197
211
  - spec/assets/tables.html
212
+ - spec/assets/test.docx
213
+ - spec/assets/test.html
198
214
  - spec/assets/unknown_tags.html
199
215
  - spec/components/anchors_spec.rb
200
216
  - spec/components/basic_spec.rb
@@ -265,6 +281,8 @@ test_files:
265
281
  - spec/assets/paragraphs.html
266
282
  - spec/assets/quotation.html
267
283
  - spec/assets/tables.html
284
+ - spec/assets/test.docx
285
+ - spec/assets/test.html
268
286
  - spec/assets/unknown_tags.html
269
287
  - spec/components/anchors_spec.rb
270
288
  - spec/components/basic_spec.rb