reverse_adoc 0.2.4 → 0.2.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/macos.yml +16 -5
- data/.github/workflows/ubuntu.yml +18 -5
- data/.github/workflows/windows.yml +20 -6
- data/Gemfile +2 -2
- data/Gemfile.lock +84 -0
- data/README.adoc +40 -7
- data/Rakefile +1 -1
- data/bin/reverse_adoc +24 -17
- data/bin/w2a +19 -31
- data/lib/reverse_adoc.rb +30 -0
- data/lib/{reverse_asciidoctor → reverse_adoc}/cleaner.rb +27 -4
- data/lib/{reverse_asciidoctor → reverse_adoc}/config.rb +3 -2
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters.rb +6 -6
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/a.rb +7 -3
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/aside.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/audio.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/base.rb +2 -2
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/blockquote.rb +2 -2
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/br.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/bypass.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/code.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/div.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/drop.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/em.rb +1 -1
- data/lib/reverse_adoc/converters/example.rb +17 -0
- data/lib/reverse_adoc/converters/express_ref.rb +12 -0
- data/lib/reverse_adoc/converters/ext_description.rb +16 -0
- data/lib/reverse_adoc/converters/ext_descriptions.rb +12 -0
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/figure.rb +1 -1
- data/lib/reverse_adoc/converters/h.rb +34 -0
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/head.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/hr.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/ignore.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/img.rb +27 -25
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/li.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/mark.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/math.rb +3 -3
- data/lib/reverse_adoc/converters/note.rb +17 -0
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/ol.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/p.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/pass_through.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/pre.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/q.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/strong.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/sub.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/sup.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/table.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/td.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/text.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/th.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/tr.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/converters/video.rb +1 -1
- data/lib/{reverse_asciidoctor → reverse_adoc}/errors.rb +1 -1
- data/lib/reverse_adoc/html_converter.rb +56 -0
- data/lib/reverse_adoc/smrl_description_converter.rb +45 -0
- data/lib/reverse_adoc/version.rb +3 -0
- data/reverse_adoc.gemspec +3 -2
- data/spec/assets/anchors.html +8 -0
- data/spec/assets/external_images.docx +0 -0
- data/spec/assets/external_images.html +35 -0
- data/spec/bin/reverse_adoc_spec.rb +32 -0
- data/spec/bin/w2a_spec.rb +35 -0
- data/spec/components/anchors_spec.rb +7 -2
- data/spec/components/basic_spec.rb +2 -2
- data/spec/components/code_spec.rb +4 -4
- data/spec/components/escapables_spec.rb +2 -2
- data/spec/components/from_the_wild_spec.rb +2 -2
- data/spec/components/html_fragment_spec.rb +2 -2
- data/spec/components/lists_spec.rb +2 -2
- data/spec/components/paragraphs_spec.rb +2 -2
- data/spec/components/quotation_spec.rb +2 -2
- data/spec/components/tables_spec.rb +2 -2
- data/spec/components/unknown_tags_spec.rb +9 -9
- data/spec/lib/reverse_adoc.rb +90 -0
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/cleaner_spec.rb +28 -4
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/config_spec.rb +5 -5
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/converters/aside_spec.rb +2 -2
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/converters/audio_spec.rb +2 -2
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/converters/blockquote_spec.rb +2 -2
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/converters/br_spec.rb +2 -2
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/converters/code_spec.rb +2 -2
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/converters/div_spec.rb +2 -2
- data/spec/lib/reverse_adoc/converters/example_spec.rb +22 -0
- data/spec/lib/reverse_adoc/converters/express_ref_spec.rb +14 -0
- data/spec/lib/reverse_adoc/converters/ext_description_spec.rb +20 -0
- data/spec/lib/reverse_adoc/converters/ext_descriptions_spec.rb +13 -0
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/converters/figure_spec.rb +2 -2
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/converters/img_spec.rb +2 -2
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/converters/li_spec.rb +2 -2
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/converters/mark_spec.rb +2 -2
- data/spec/lib/reverse_adoc/converters/note_spec.rb +22 -0
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/converters/p_spec.rb +2 -2
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/converters/pre_spec.rb +2 -2
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/converters/q_spec.rb +2 -2
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/converters/strong_spec.rb +2 -2
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/converters/text_spec.rb +2 -2
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/converters/video_spec.rb +2 -2
- data/spec/lib/{reverse_asciidoctor → reverse_adoc}/converters_spec.rb +5 -5
- data/spec/spec_helper.rb +7 -2
- data/spec/support/shell_helpers.rb +15 -0
- metadata +125 -84
- data/lib/reverse_asciidoctor.rb +0 -70
- data/lib/reverse_asciidoctor/converters/h.rb +0 -19
- data/lib/reverse_asciidoctor/version.rb +0 -3
- data/spec/lib/reverse_asciidoctor_spec.rb +0 -37
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 906cadff19bcc530a5aa527f3a7cb87bd80050163638ce87d052d3615e82c68d
|
4
|
+
data.tar.gz: ffaf10410be78e4971c80a7c45c946aa4d4803546102b90c0c5bc01342dd962d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1436a34f5bd46e9c73d41028562f9edd78970bc5c88acf3cf815c6b76975c228045266c32e6cbc63ffcef3fc9d05101f51de446476f10ace57d565cc306d8629
|
7
|
+
data.tar.gz: 78e5514b5d5e01e5407d12644db5deda0d73e9d62f274932a2dee50de098dd65e36ddd96ef0a523726ec1915d80e42ab847d6227e8d927b5a85e229f53aaff86
|
data/.github/workflows/macos.yml
CHANGED
@@ -1,26 +1,37 @@
|
|
1
|
-
# Auto-generated
|
2
|
-
#
|
1
|
+
# Auto-generated by Cimas: Do not edit it manually!
|
2
|
+
# See https://github.com/metanorma/cimas
|
3
3
|
name: macos
|
4
4
|
|
5
|
-
on:
|
5
|
+
on:
|
6
|
+
push:
|
7
|
+
branches: [ master ]
|
8
|
+
pull_request:
|
9
|
+
paths-ignore:
|
10
|
+
- .github/workflows/ubuntu.yml
|
11
|
+
- .github/workflows/windows.yml
|
6
12
|
|
7
13
|
jobs:
|
8
14
|
test-macos:
|
9
15
|
name: Test on Ruby ${{ matrix.ruby }} macOS
|
10
16
|
runs-on: macos-latest
|
17
|
+
continue-on-error: ${{ matrix.experimental }}
|
11
18
|
strategy:
|
19
|
+
fail-fast: false
|
12
20
|
matrix:
|
13
21
|
ruby: [ '2.6', '2.5', '2.4' ]
|
22
|
+
experimental: [false]
|
23
|
+
include:
|
24
|
+
- ruby: '2.7'
|
25
|
+
experimental: true
|
14
26
|
steps:
|
15
27
|
- uses: actions/checkout@master
|
16
28
|
- name: Use Ruby
|
17
29
|
uses: actions/setup-ruby@v1
|
18
30
|
with:
|
19
31
|
ruby-version: ${{ matrix.ruby }}
|
20
|
-
architecture: 'x64'
|
21
32
|
- name: Update gems
|
22
33
|
run: |
|
23
|
-
sudo gem install bundler
|
34
|
+
sudo gem install bundler --force
|
24
35
|
bundle install --jobs 4 --retry 3
|
25
36
|
- name: Run specs
|
26
37
|
run: |
|
@@ -1,26 +1,39 @@
|
|
1
|
-
# Auto-generated
|
2
|
-
#
|
1
|
+
# Auto-generated by Cimas: Do not edit it manually!
|
2
|
+
# See https://github.com/metanorma/cimas
|
3
3
|
name: ubuntu
|
4
4
|
|
5
|
-
on:
|
5
|
+
on:
|
6
|
+
push:
|
7
|
+
branches: [ master ]
|
8
|
+
pull_request:
|
9
|
+
paths-ignore:
|
10
|
+
- .github/workflows/macos.yml
|
11
|
+
- .github/workflows/windows.yml
|
6
12
|
|
7
13
|
jobs:
|
8
14
|
test-linux:
|
9
15
|
name: Test on Ruby ${{ matrix.ruby }} Ubuntu
|
10
16
|
runs-on: ubuntu-latest
|
17
|
+
continue-on-error: ${{ matrix.experimental }}
|
11
18
|
strategy:
|
19
|
+
fail-fast: false
|
12
20
|
matrix:
|
13
21
|
ruby: [ '2.6', '2.5', '2.4' ]
|
22
|
+
experimental: [false]
|
23
|
+
include:
|
24
|
+
- ruby: '2.7'
|
25
|
+
experimental: true
|
14
26
|
steps:
|
15
27
|
- uses: actions/checkout@master
|
16
28
|
- name: Use Ruby
|
17
29
|
uses: actions/setup-ruby@v1
|
18
30
|
with:
|
19
31
|
ruby-version: ${{ matrix.ruby }}
|
20
|
-
|
32
|
+
- name: Install LibreOffice
|
33
|
+
run: sudo apt-get -y --no-install-recommends install libreoffice
|
21
34
|
- name: Update gems
|
22
35
|
run: |
|
23
|
-
gem install bundler
|
36
|
+
gem install bundler
|
24
37
|
bundle install --jobs 4 --retry 3
|
25
38
|
- name: Run specs
|
26
39
|
run: |
|
@@ -1,29 +1,43 @@
|
|
1
|
-
# Auto-generated
|
2
|
-
#
|
1
|
+
# Auto-generated by Cimas: Do not edit it manually!
|
2
|
+
# See https://github.com/metanorma/cimas
|
3
3
|
name: windows
|
4
4
|
|
5
|
-
on:
|
5
|
+
on:
|
6
|
+
push:
|
7
|
+
branches: [ master ]
|
8
|
+
pull_request:
|
9
|
+
paths-ignore:
|
10
|
+
- .github/workflows/macos.yml
|
11
|
+
- .github/workflows/ubuntu.yml
|
6
12
|
|
7
13
|
jobs:
|
8
14
|
test-windows:
|
9
15
|
name: Test on Ruby ${{ matrix.ruby }} Windows
|
10
16
|
runs-on: windows-latest
|
17
|
+
continue-on-error: ${{ matrix.experimental }}
|
11
18
|
strategy:
|
19
|
+
fail-fast: false
|
12
20
|
matrix:
|
13
21
|
ruby: [ '2.6', '2.5', '2.4' ]
|
22
|
+
experimental: [false]
|
23
|
+
include:
|
24
|
+
- ruby: '2.7'
|
25
|
+
experimental: true
|
14
26
|
steps:
|
15
27
|
- uses: actions/checkout@master
|
16
28
|
- name: Use Ruby
|
17
29
|
uses: actions/setup-ruby@v1
|
18
30
|
with:
|
19
31
|
ruby-version: ${{ matrix.ruby }}
|
20
|
-
|
32
|
+
- name: Install LibreOffice
|
33
|
+
run: |
|
34
|
+
choco install libreoffice-still
|
35
|
+
echo "::add-path::C:\Program Files\LibreOffice\program"
|
21
36
|
- name: Update gems
|
22
37
|
shell: pwsh
|
23
38
|
run: |
|
24
|
-
gem install bundler
|
39
|
+
gem install bundler
|
25
40
|
bundle config --local path vendor/bundle
|
26
|
-
bundle update
|
27
41
|
bundle install --jobs 4 --retry 3
|
28
42
|
- name: Run specs
|
29
43
|
run: |
|
data/Gemfile
CHANGED
data/Gemfile.lock
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
reverse_adoc (0.2.8)
|
5
|
+
mathml2asciimath
|
6
|
+
mimemagic
|
7
|
+
nokogiri (>= 1.10.4)
|
8
|
+
word-to-markdown
|
9
|
+
|
10
|
+
GEM
|
11
|
+
remote: https://rubygems.org/
|
12
|
+
specs:
|
13
|
+
addressable (2.7.0)
|
14
|
+
public_suffix (>= 2.0.2, < 5.0)
|
15
|
+
byebug (11.1.1)
|
16
|
+
cliver (0.3.2)
|
17
|
+
codeclimate-test-reporter (1.0.7)
|
18
|
+
simplecov
|
19
|
+
css_parser (1.7.1)
|
20
|
+
addressable
|
21
|
+
descriptive_statistics (2.5.1)
|
22
|
+
diff-lcs (1.3)
|
23
|
+
docile (1.3.2)
|
24
|
+
ffi (1.13.1)
|
25
|
+
htmlentities (4.3.4)
|
26
|
+
mathml2asciimath (0.0.10)
|
27
|
+
htmlentities (~> 4.3.4)
|
28
|
+
nokogiri (>= 1.10.4)
|
29
|
+
mimemagic (0.3.5)
|
30
|
+
mini_portile2 (2.4.0)
|
31
|
+
nokogiri (1.10.9)
|
32
|
+
mini_portile2 (~> 2.4.0)
|
33
|
+
nokogiri-styles (0.1.2)
|
34
|
+
nokogiri
|
35
|
+
premailer (1.11.1)
|
36
|
+
addressable
|
37
|
+
css_parser (>= 1.6.0)
|
38
|
+
htmlentities (>= 4.0.0)
|
39
|
+
public_suffix (4.0.5)
|
40
|
+
rake (13.0.1)
|
41
|
+
redcarpet (3.5.0)
|
42
|
+
reverse_markdown (1.4.0)
|
43
|
+
nokogiri
|
44
|
+
rspec (3.9.0)
|
45
|
+
rspec-core (~> 3.9.0)
|
46
|
+
rspec-expectations (~> 3.9.0)
|
47
|
+
rspec-mocks (~> 3.9.0)
|
48
|
+
rspec-core (3.9.1)
|
49
|
+
rspec-support (~> 3.9.1)
|
50
|
+
rspec-expectations (3.9.0)
|
51
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
52
|
+
rspec-support (~> 3.9.0)
|
53
|
+
rspec-mocks (3.9.1)
|
54
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
55
|
+
rspec-support (~> 3.9.0)
|
56
|
+
rspec-support (3.9.2)
|
57
|
+
simplecov (0.18.5)
|
58
|
+
docile (~> 1.1)
|
59
|
+
simplecov-html (~> 0.11)
|
60
|
+
simplecov-html (0.12.1)
|
61
|
+
sys-proctable (1.2.5)
|
62
|
+
ffi
|
63
|
+
word-to-markdown (1.1.8)
|
64
|
+
cliver (~> 0.3)
|
65
|
+
descriptive_statistics (~> 2.5)
|
66
|
+
nokogiri-styles (~> 0.1)
|
67
|
+
premailer (~> 1.8)
|
68
|
+
reverse_markdown (~> 1.0)
|
69
|
+
sys-proctable (~> 1.0)
|
70
|
+
|
71
|
+
PLATFORMS
|
72
|
+
ruby
|
73
|
+
|
74
|
+
DEPENDENCIES
|
75
|
+
byebug
|
76
|
+
codeclimate-test-reporter
|
77
|
+
rake
|
78
|
+
redcarpet
|
79
|
+
reverse_adoc!
|
80
|
+
rspec
|
81
|
+
simplecov
|
82
|
+
|
83
|
+
BUNDLED WITH
|
84
|
+
2.0.2
|
data/README.adoc
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
= AsciiDoc from HTML and Microsoft Word: reverse_adoc
|
2
2
|
|
3
|
-
image:https://img.shields.io/gem/v/reverse_adoc.svg["Gem Version", link="https://rubygems.org/gems/reverse_adoc"]
|
4
|
-
image:https://
|
3
|
+
https://github.com/metanorma/reverse_adoc[reverse_adoc] image:https://img.shields.io/gem/v/reverse_adoc.svg["Gem Version", link="https://rubygems.org/gems/reverse_adoc"]::
|
4
|
+
image:https://github.com/metanorma/reverse_adoc/workflows/macos/badge.svg["Build Status", link="https://github.com/metanorma/reverse_adoc/actions?workflow=macos"]
|
5
|
+
image:https://github.com/metanorma/reverse_adoc/workflows/windows/badge.svg["Build Status", link="https://github.com/metanorma/reverse_adoc/actions?workflow=windows"]
|
6
|
+
image:https://github.com/metanorma/reverse_adoc/workflows/ubuntu/badge.svg["Build Status", link="https://github.com/metanorma/reverse_adoc/actions?workflow=ubuntu"]
|
5
7
|
image:https://codeclimate.com/github/metanorma/reverse_adoc/badges/gpa.svg["Code Climate", link="https://codeclimate.com/github/metanorma/reverse_adoc"]
|
6
|
-
image:https://
|
8
|
+
image:https://img.shields.io/github/issues-pr-raw/metanorma/reverse_adoc.svg["Pull Requests", link="https://github.com/metanorma/reverse_adoc/pulls"]
|
9
|
+
image:https://img.shields.io/github/commits-since/metanorma/reverse_adoc/latest.svg["Commits since latest",link="https://github.com/metanorma/reverse_adoc/releases"]
|
7
10
|
|
8
11
|
== Purpose
|
9
12
|
|
@@ -41,10 +44,21 @@ $ reverse_adoc file.html > file.adoc
|
|
41
44
|
$ cat file.html | reverse_adoc > file.adoc
|
42
45
|
----
|
43
46
|
|
47
|
+
=== XML smrl description to AsciiDoc: `reverse_adoc`
|
48
|
+
|
49
|
+
Convert XML smrl description files to AsciiDoc:
|
50
|
+
|
51
|
+
[source,console]
|
52
|
+
----
|
53
|
+
$ reverse_adoc -f smrl_description descriptions.xml > file.adoc
|
54
|
+
$ cat descriptions.xml | reverse_adoc -f smrl_description > file.adoc
|
55
|
+
----
|
56
|
+
|
44
57
|
[source,console]
|
45
58
|
----
|
46
59
|
$ reverse_adoc -h
|
47
60
|
Usage: reverse_adoc [options] <file>
|
61
|
+
-f, --input_format Supply input format to choose converter(html or smrl_description)
|
48
62
|
-m, --mathml2asciimath Convert MathML to AsciiMath
|
49
63
|
-o, --output=FILENAME Output file to write to
|
50
64
|
-e, --external-images Export images if data URI
|
@@ -92,6 +106,13 @@ document, and it may not cope with ordered lists or headings with customised app
|
|
92
106
|
For best results, reset the styles in the document you're converting to those in
|
93
107
|
the default `Normal.dot` template.
|
94
108
|
|
109
|
+
NOTE: `w2a` requires the command-line version of LibreOffice, `soffice`. As it turns out,
|
110
|
+
LibreOffice v6 appears to render formulae in HTML as images instead of MathML expressions;
|
111
|
+
use LibreOffice v5. If you have both LibreOffice v5 and LibreOffice v6 installed, make sure
|
112
|
+
that your OS path searches for the LibreOffice v5 version of `soffice` first; e.g. on Mac,
|
113
|
+
include something like `/Applications/LibreOffice5.4.7.2.app/Contents/MacOS` in your PATH
|
114
|
+
environment.
|
115
|
+
|
95
116
|
NOTE: Some information in OOMML is not preserved in the export to MathML from LibreOffice;
|
96
117
|
in particular, font shifts such as double-struck fonts.
|
97
118
|
The LibreOffice exporter does seem to drop some text (possibly associated with
|
@@ -226,7 +247,7 @@ The gem does not support:
|
|
226
247
|
=== MathML support
|
227
248
|
|
228
249
|
If you are using this gem in the context of https://www.metanorma.com[Metanorma],
|
229
|
-
Metanorma
|
250
|
+
Metanorma AsciiDoc accepts MathML as a native mathematical format. So you do not need
|
230
251
|
to convert the MathML to AsciiMath.
|
231
252
|
|
232
253
|
The gem will optionally invoke the https://github.com/metanorma/mathml2asciimath
|
@@ -242,6 +263,18 @@ AsciiMath, and fail. But of course, MathJax has no problem with MathML, and some
|
|
242
263
|
on the Asciidoctor output can ensure that the MathML is treated by MathJax (or whatever else
|
243
264
|
uses the output) as such; so this is still much better than nothing for stem processing.
|
244
265
|
|
266
|
+
=== Word cleanup
|
267
|
+
|
268
|
+
This gem is routinely used in the Metanorma project to export Word documents to AsciiDoc.
|
269
|
+
The HTML export from Word that the gem uses, from LibreOffice, is much cleaner than the
|
270
|
+
native HTML 4 export from Word; but it has some infelicities which this gem cleans up:
|
271
|
+
|
272
|
+
* The HTML export has trouble with subscripts, and routinely exports them as headings; the `w2a`
|
273
|
+
script tries to clean them up.
|
274
|
+
* The `w2a` cleans up spaces, but it does not strip them.
|
275
|
+
* Spaces are removed from anchors and cross-references.
|
276
|
+
* Double underscores are removed from anchors and cross-references.
|
277
|
+
* Cross-references to `_GoBack` and to `_Toc` followed by numbers (used to construct tables of contents) are ignored.
|
245
278
|
|
246
279
|
== Ruby library usage
|
247
280
|
|
@@ -251,7 +284,7 @@ Simple to use.
|
|
251
284
|
|
252
285
|
[source,ruby]
|
253
286
|
----
|
254
|
-
result =
|
287
|
+
result = ReverseAdoc.convert input
|
255
288
|
result.inspect # " *feelings* "
|
256
289
|
----
|
257
290
|
|
@@ -261,7 +294,7 @@ Just pass your chosen configuration options in after the input. The given option
|
|
261
294
|
|
262
295
|
[source,ruby]
|
263
296
|
----
|
264
|
-
|
297
|
+
ReverseAdoc.convert(input, unknown_tags: :raise, mathml2asciimath: true)
|
265
298
|
----
|
266
299
|
|
267
300
|
|
@@ -271,7 +304,7 @@ Or configure it block style on a initializer level. These configurations will la
|
|
271
304
|
|
272
305
|
[source,ruby]
|
273
306
|
----
|
274
|
-
|
307
|
+
ReverseAdoc.config do |config|
|
275
308
|
config.unknown_tags = :bypass
|
276
309
|
config.mathml2asciimath = true
|
277
310
|
config.tag_border = ''
|
data/Rakefile
CHANGED
data/bin/reverse_adoc
CHANGED
@@ -1,31 +1,38 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
# Usage:
|
3
|
-
# Usage: cat FILE |
|
4
|
-
require '
|
2
|
+
# Usage: reverse_adoc [FILE]...
|
3
|
+
# Usage: cat FILE | reverse_adoc
|
4
|
+
require 'rubygems'
|
5
|
+
require 'bundler/setup'
|
6
|
+
|
7
|
+
require 'reverse_adoc'
|
5
8
|
require 'optparse'
|
6
9
|
require 'fileutils'
|
7
10
|
|
8
11
|
OptionParser.new do |opts|
|
9
12
|
opts.banner = "Usage: reverse_adoc [options] <file>"
|
10
13
|
opts.on('-m', '--mathml2asciimath', 'Convert MathML to AsciiMath') do |v|
|
11
|
-
|
14
|
+
ReverseAdoc.config.mathml2asciimath = true
|
12
15
|
end
|
13
16
|
|
14
17
|
opts.on('-oFILENAME', '--output=FILENAME', 'Output file to write to') do |v|
|
15
|
-
|
16
|
-
# puts "output goes to #{
|
18
|
+
ReverseAdoc.config.destination = File.expand_path(v)
|
19
|
+
# puts "output goes to #{ReverseAdoc.config.destination}"
|
17
20
|
end
|
18
21
|
|
19
22
|
opts.on('-e', '--external-images', 'Export images if data URI') do |v|
|
20
|
-
|
23
|
+
ReverseAdoc.config.external_images = true
|
24
|
+
end
|
25
|
+
|
26
|
+
opts.on('-f', '--input_format [html, smrl_description]', 'Unknown input format (default: html)') do |v|
|
27
|
+
ReverseAdoc.config.input_format = v
|
21
28
|
end
|
22
29
|
|
23
30
|
opts.on('-u', '--unknown_tags [pass_through, drop, bypass, raise]', 'Unknown tag handling (default: pass_through)') do |v|
|
24
|
-
|
31
|
+
ReverseAdoc.config.unknown_tags = v
|
25
32
|
end
|
26
33
|
|
27
34
|
opts.on('-v', '--version', 'Version information') do |v|
|
28
|
-
puts "reverse_adoc: v#{
|
35
|
+
puts "reverse_adoc: v#{ReverseAdoc::VERSION}"
|
29
36
|
exit
|
30
37
|
end
|
31
38
|
|
@@ -38,30 +45,30 @@ end.parse!
|
|
38
45
|
|
39
46
|
if filename = ARGV.pop
|
40
47
|
input_content = IO.read(filename)
|
41
|
-
|
48
|
+
ReverseAdoc.config.sourcedir = File.dirname(File.expand_path(filename))
|
42
49
|
else
|
43
|
-
if
|
50
|
+
if ReverseAdoc.config.external_images
|
44
51
|
raise "The -e | --external-images feature cannot be used with STDIN input. Exiting."
|
45
52
|
end
|
46
53
|
|
47
54
|
input_content = ARGF.read
|
48
55
|
end
|
49
56
|
|
50
|
-
if
|
57
|
+
if ReverseAdoc.config.external_images && ReverseAdoc.config.destination.nil?
|
51
58
|
raise "The -e | --external-images feature must be used with -o | --output. Exiting."
|
52
59
|
end
|
53
60
|
|
54
61
|
# Read from STDIN
|
55
|
-
adoc_content =
|
62
|
+
adoc_content = ReverseAdoc.convert(input_content)
|
56
63
|
|
57
64
|
# Print to STDOUT
|
58
|
-
unless
|
65
|
+
unless ReverseAdoc.config.destination
|
59
66
|
puts adoc_content
|
60
67
|
exit
|
61
68
|
end
|
62
69
|
|
63
|
-
# Write output to
|
64
|
-
FileUtils.mkdir_p(File.dirname(
|
65
|
-
File.open(
|
70
|
+
# Write output to ReverseAdoc.config.destination
|
71
|
+
FileUtils.mkdir_p(File.dirname(ReverseAdoc.config.destination))
|
72
|
+
File.open(ReverseAdoc.config.destination, "w") do |file|
|
66
73
|
file.write(adoc_content)
|
67
74
|
end
|
data/bin/w2a
CHANGED
@@ -1,44 +1,32 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
|
+
require 'rubygems'
|
5
|
+
require 'bundler/setup'
|
6
|
+
|
4
7
|
require 'word-to-markdown'
|
5
8
|
require 'optparse'
|
6
|
-
require '
|
7
|
-
|
8
|
-
def scrub_whitespace(string)
|
9
|
-
string = string.dup
|
10
|
-
string.gsub!(/ |\ |\u00a0/i, ' ') # HTML encoded spaces
|
11
|
-
string.sub!(/^\A[[:space:]]+/m, '') # document leading whitespace
|
12
|
-
string.sub!(/[[:space:]]+\z$/m, '') # document trailing whitespace
|
13
|
-
string.gsub!(/([ ]+)$/, ' ') # line trailing whitespace
|
14
|
-
string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks
|
15
|
-
#string.delete!(' ') # Unicode non-breaking spaces, injected as tabs
|
16
|
-
# following added by me
|
17
|
-
string.gsub!(%r{<h[1-9][^>]*></h1>}, " ") # I don't know why Libre Office is inserting them, but they need to go
|
18
|
-
string.gsub!(%r{<h1[^>]* style="vertical-align: super;[^>]*>([^<]+)</h1>},
|
19
|
-
"<sup>\\1</sup>") # I absolutely don't know why Libre Office is rendering superscripts as h1
|
20
|
-
string
|
21
|
-
end
|
9
|
+
require 'reverse_adoc'
|
22
10
|
|
23
11
|
ARGV.push('-h') if ARGV.empty?
|
24
12
|
|
25
13
|
OptionParser.new do |opts|
|
26
14
|
opts.banner = "Usage: w2a [options] <file>"
|
27
15
|
opts.on('-m', '--mathml2asciimath', 'Convert MathML to AsciiMath') do |v|
|
28
|
-
|
16
|
+
ReverseAdoc.config.mathml2asciimath = true
|
29
17
|
end
|
30
18
|
|
31
19
|
opts.on('-oFILENAME', '--output=FILENAME', 'Output file to write to') do |v|
|
32
|
-
|
33
|
-
# puts "output goes to #{
|
20
|
+
ReverseAdoc.config.destination = File.expand_path(v)
|
21
|
+
# puts "output goes to #{ReverseAdoc.config.destination}"
|
34
22
|
end
|
35
23
|
|
36
24
|
opts.on('-e', '--external-images', 'Export images if data URI') do |v|
|
37
|
-
|
25
|
+
ReverseAdoc.config.external_images = true
|
38
26
|
end
|
39
27
|
|
40
28
|
opts.on('-v', '--version', 'Version information') do |v|
|
41
|
-
puts "reverse_adoc: v#{
|
29
|
+
puts "reverse_adoc: v#{ReverseAdoc::VERSION}"
|
42
30
|
puts "[dependency] WordToMarkdown: v#{WordToMarkdown::VERSION}"
|
43
31
|
unless Gem.win_platform?
|
44
32
|
puts "[dependency] LibreOffice: v#{WordToMarkdown.soffice.version}"
|
@@ -58,28 +46,28 @@ end.parse!
|
|
58
46
|
filename = ARGV.pop
|
59
47
|
raise "Please provide an input file to process. Exiting." unless filename
|
60
48
|
|
61
|
-
if
|
49
|
+
if ReverseAdoc.config.external_images && ReverseAdoc.config.destination.nil?
|
62
50
|
raise "The -e | --external-images feature must be used with -o | --output. Exiting."
|
63
51
|
end
|
64
52
|
|
65
|
-
|
66
|
-
# puts "ReverseAsciidoctor.config.sourcedir #{ReverseAsciidoctor.config.sourcedir}"
|
53
|
+
ReverseAdoc.config.sourcedir = Dir.mktmpdir
|
67
54
|
|
68
|
-
doc = WordToMarkdown.new(filename,
|
69
|
-
|
70
|
-
|
55
|
+
doc = WordToMarkdown.new(filename, ReverseAdoc.config.sourcedir)
|
56
|
+
#File.open("test.html", "w:UTF-8") { |f| f.write doc.document.html }
|
57
|
+
adoc_content = ReverseAdoc.convert(
|
58
|
+
ReverseAdoc.cleaner.preprocess_word_html(doc.document.html),
|
71
59
|
WordToMarkdown::REVERSE_MARKDOWN_OPTIONS
|
72
60
|
)
|
73
61
|
# puts scrub_whitespace(doc.document.html)
|
74
62
|
|
75
63
|
# Print to STDOUT
|
76
|
-
unless
|
64
|
+
unless ReverseAdoc.config.destination
|
77
65
|
puts adoc_content
|
78
66
|
exit
|
79
67
|
end
|
80
68
|
|
81
|
-
# Write output to
|
82
|
-
FileUtils.mkdir_p(File.dirname(
|
83
|
-
File.open(
|
69
|
+
# Write output to ReverseAdoc.config.destination
|
70
|
+
FileUtils.mkdir_p(File.dirname(ReverseAdoc.config.destination))
|
71
|
+
File.open(ReverseAdoc.config.destination, "w") do |file|
|
84
72
|
file.write(adoc_content)
|
85
73
|
end
|