reverse_asciidoctor 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE +32 -0
- data/README.adoc +186 -0
- data/Rakefile +14 -0
- data/bin/reverse_asciidoctor +14 -0
- data/bin/w2m +31 -0
- data/lib/reverse_asciidoctor.rb +70 -0
- data/lib/reverse_asciidoctor/cleaner.rb +85 -0
- data/lib/reverse_asciidoctor/config.rb +28 -0
- data/lib/reverse_asciidoctor/converters.rb +33 -0
- data/lib/reverse_asciidoctor/converters/a.rb +38 -0
- data/lib/reverse_asciidoctor/converters/aside.rb +14 -0
- data/lib/reverse_asciidoctor/converters/audio.rb +34 -0
- data/lib/reverse_asciidoctor/converters/base.rb +24 -0
- data/lib/reverse_asciidoctor/converters/blockquote.rb +18 -0
- data/lib/reverse_asciidoctor/converters/br.rb +11 -0
- data/lib/reverse_asciidoctor/converters/bypass.rb +77 -0
- data/lib/reverse_asciidoctor/converters/code.rb +15 -0
- data/lib/reverse_asciidoctor/converters/div.rb +14 -0
- data/lib/reverse_asciidoctor/converters/drop.rb +18 -0
- data/lib/reverse_asciidoctor/converters/em.rb +18 -0
- data/lib/reverse_asciidoctor/converters/figure.rb +21 -0
- data/lib/reverse_asciidoctor/converters/h.rb +19 -0
- data/lib/reverse_asciidoctor/converters/head.rb +18 -0
- data/lib/reverse_asciidoctor/converters/hr.rb +11 -0
- data/lib/reverse_asciidoctor/converters/ignore.rb +12 -0
- data/lib/reverse_asciidoctor/converters/img.rb +23 -0
- data/lib/reverse_asciidoctor/converters/li.rb +24 -0
- data/lib/reverse_asciidoctor/converters/mark.rb +12 -0
- data/lib/reverse_asciidoctor/converters/math.rb +14 -0
- data/lib/reverse_asciidoctor/converters/ol.rb +46 -0
- data/lib/reverse_asciidoctor/converters/p.rb +17 -0
- data/lib/reverse_asciidoctor/converters/pass_through.rb +9 -0
- data/lib/reverse_asciidoctor/converters/pre.rb +38 -0
- data/lib/reverse_asciidoctor/converters/q.rb +12 -0
- data/lib/reverse_asciidoctor/converters/strong.rb +17 -0
- data/lib/reverse_asciidoctor/converters/sub.rb +12 -0
- data/lib/reverse_asciidoctor/converters/sup.rb +12 -0
- data/lib/reverse_asciidoctor/converters/table.rb +64 -0
- data/lib/reverse_asciidoctor/converters/td.rb +67 -0
- data/lib/reverse_asciidoctor/converters/text.rb +65 -0
- data/lib/reverse_asciidoctor/converters/th.rb +16 -0
- data/lib/reverse_asciidoctor/converters/tr.rb +22 -0
- data/lib/reverse_asciidoctor/converters/video.rb +36 -0
- data/lib/reverse_asciidoctor/errors.rb +10 -0
- data/lib/reverse_asciidoctor/version.rb +3 -0
- data/reverse_asciidoctor.gemspec +30 -0
- data/spec/assets/anchors.html +22 -0
- data/spec/assets/basic.html +58 -0
- data/spec/assets/code.html +22 -0
- data/spec/assets/escapables.html +15 -0
- data/spec/assets/from_the_wild.html +23 -0
- data/spec/assets/full_example.html +49 -0
- data/spec/assets/html_fragment.html +3 -0
- data/spec/assets/lists.html +137 -0
- data/spec/assets/minimum.html +4 -0
- data/spec/assets/paragraphs.html +24 -0
- data/spec/assets/quotation.html +12 -0
- data/spec/assets/tables.html +99 -0
- data/spec/assets/unknown_tags.html +9 -0
- data/spec/components/anchors_spec.rb +21 -0
- data/spec/components/basic_spec.rb +49 -0
- data/spec/components/code_spec.rb +28 -0
- data/spec/components/escapables_spec.rb +23 -0
- data/spec/components/from_the_wild_spec.rb +17 -0
- data/spec/components/html_fragment_spec.rb +11 -0
- data/spec/components/lists_spec.rb +86 -0
- data/spec/components/paragraphs_spec.rb +15 -0
- data/spec/components/quotation_spec.rb +12 -0
- data/spec/components/tables_spec.rb +31 -0
- data/spec/components/unknown_tags_spec.rb +39 -0
- data/spec/lib/reverse_asciidoctor/cleaner_spec.rb +157 -0
- data/spec/lib/reverse_asciidoctor/config_spec.rb +26 -0
- data/spec/lib/reverse_asciidoctor/converters/aside_spec.rb +12 -0
- data/spec/lib/reverse_asciidoctor/converters/audio_spec.rb +18 -0
- data/spec/lib/reverse_asciidoctor/converters/blockquote_spec.rb +24 -0
- data/spec/lib/reverse_asciidoctor/converters/br_spec.rb +9 -0
- data/spec/lib/reverse_asciidoctor/converters/code_spec.rb +18 -0
- data/spec/lib/reverse_asciidoctor/converters/div_spec.rb +18 -0
- data/spec/lib/reverse_asciidoctor/converters/figure_spec.rb +13 -0
- data/spec/lib/reverse_asciidoctor/converters/img_spec.rb +28 -0
- data/spec/lib/reverse_asciidoctor/converters/li_spec.rb +13 -0
- data/spec/lib/reverse_asciidoctor/converters/mark_spec.rb +10 -0
- data/spec/lib/reverse_asciidoctor/converters/p_spec.rb +12 -0
- data/spec/lib/reverse_asciidoctor/converters/pre_spec.rb +45 -0
- data/spec/lib/reverse_asciidoctor/converters/q_spec.rb +10 -0
- data/spec/lib/reverse_asciidoctor/converters/strong_spec.rb +20 -0
- data/spec/lib/reverse_asciidoctor/converters/text_spec.rb +62 -0
- data/spec/lib/reverse_asciidoctor/converters/video_spec.rb +18 -0
- data/spec/lib/reverse_asciidoctor/converters_spec.rb +19 -0
- data/spec/lib/reverse_asciidoctor_spec.rb +37 -0
- data/spec/spec_helper.rb +21 -0
- metadata +281 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: cb94b96658189613a9f4cdf21a3fa5d3eefbad17
|
|
4
|
+
data.tar.gz: 392dd9b562dd8a1d24193d4031f62916edda8fd2
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: a51d1071f41d2627def8bb06929bcb1308a673741067b29be58ad847dfb2d70ca7028e1cca1f52f9844310c378009395e61e0e0ffad1b8686a9ab3a21b7966a7
|
|
7
|
+
data.tar.gz: b829c7f3f60651a916c2fd23322406a581c7524825a804e6e26ea6953cb45cfe44f6ba316baeb2cebc92c9bd6388dfa976757f4ae1a0cd6bc487e279a2c0b611
|
data/Gemfile
ADDED
data/LICENSE
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
BSD 2-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2018, Ribose
|
|
4
|
+
All rights reserved.
|
|
5
|
+
|
|
6
|
+
Redistribution and use in source and binary forms, with or without
|
|
7
|
+
modification, are permitted provided that the following conditions are met:
|
|
8
|
+
|
|
9
|
+
* Redistributions of source code must retain the above copyright notice, this
|
|
10
|
+
list of conditions and the following disclaimer.
|
|
11
|
+
|
|
12
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
and/or other materials provided with the distribution.
|
|
15
|
+
|
|
16
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
17
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
18
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
19
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
20
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
21
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
22
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
23
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
24
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
25
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
LICENSE OF https://github.com/xijo/reverse_markdown
|
|
30
|
+
|
|
31
|
+
https://github.com/xijo/reverse_markdown, on which this gem is based, was
|
|
32
|
+
licensed with the DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE, v2.
|
data/README.adoc
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
= reverse_asciidoctor
|
|
2
|
+
|
|
3
|
+
Based on https://github.com/xijo/reverse_markdown
|
|
4
|
+
|
|
5
|
+
Transforms HTML into asciidoctor.
|
|
6
|
+
|
|
7
|
+
== Requirements
|
|
8
|
+
|
|
9
|
+
. http://nokogiri.org/[Nokogiri]
|
|
10
|
+
. Ruby 1.9.3 or higher
|
|
11
|
+
|
|
12
|
+
== Installation
|
|
13
|
+
|
|
14
|
+
Install the gem
|
|
15
|
+
|
|
16
|
+
[source,console]
|
|
17
|
+
----
|
|
18
|
+
[sudo] gem install reverse_asciidoctor
|
|
19
|
+
----
|
|
20
|
+
|
|
21
|
+
or add it to your Gemfile
|
|
22
|
+
|
|
23
|
+
[source,ruby]
|
|
24
|
+
----
|
|
25
|
+
gem 'reverse_asciidoctor'
|
|
26
|
+
----
|
|
27
|
+
|
|
28
|
+
== Features
|
|
29
|
+
|
|
30
|
+
As a port of reverse_markdown, reverse_asciidoctor shares its features:
|
|
31
|
+
|
|
32
|
+
* Module based - if you miss a tag, just add it
|
|
33
|
+
* Can deal with nested lists
|
|
34
|
+
* Inline and block code is supported
|
|
35
|
+
* Supports blockquote
|
|
36
|
+
|
|
37
|
+
It supports the following html tags supported by reverse_markdown:
|
|
38
|
+
|
|
39
|
+
* `a`
|
|
40
|
+
* `blockquote`
|
|
41
|
+
* `br`
|
|
42
|
+
* `code`, `tt` (added: `kbd`, `samp`, `var`)
|
|
43
|
+
* `div`, `article`
|
|
44
|
+
* `em`, `i` (added: `cite`)
|
|
45
|
+
* `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `hr`
|
|
46
|
+
* `img`
|
|
47
|
+
* `li`, `ol`, `ul` (added: `dir`)
|
|
48
|
+
* `p`, `pre`
|
|
49
|
+
* `strong`, `b`
|
|
50
|
+
* `table`, `td`, `th`, `tr`
|
|
51
|
+
|
|
52
|
+
NOTE:
|
|
53
|
+
* reverse_asciidoctor does *not* support `del` or `strike`, because Asciidoctor out of the box does not
|
|
54
|
+
* As with reverse_markdown, `pre` is only treated as sourcecode if it is contained in a `div@class = highlight-` element, or has a `@brush` attribute naming the language (Confluence).
|
|
55
|
+
* The gem does not support `p@align`, because Asciidoctor doesn't
|
|
56
|
+
|
|
57
|
+
In addition, it supports:
|
|
58
|
+
|
|
59
|
+
* `aside`
|
|
60
|
+
* `audio`, `video` (with `@src` attributes)
|
|
61
|
+
* `figure`, `figcaption`
|
|
62
|
+
* `mark`
|
|
63
|
+
* `q`
|
|
64
|
+
* `sub`, `sup`
|
|
65
|
+
* `@id` anchors
|
|
66
|
+
* `blockquote@cite`
|
|
67
|
+
* `img/@width`, `img/@height`
|
|
68
|
+
* `ol/@style`, `ol/@start`, `ol/@reversed`, `ul/@type`
|
|
69
|
+
* `td/@colspan`, `td/@rowspan`, `td@/align`, `td@/valign`
|
|
70
|
+
* `table/caption`, `table/@width`, `table/@frame` (partial), `table/@rules` (partial)
|
|
71
|
+
* Lists and paragraphs within cells
|
|
72
|
+
** Not tables within cells: Asciidoctor cannot deal with nested tabls
|
|
73
|
+
|
|
74
|
+
It also supports MathML... sort of.
|
|
75
|
+
|
|
76
|
+
* Asciidoctor supports AsciiMath and LaTeX for stem expressions. HTML uses MathML.
|
|
77
|
+
The gem will recognise MathML expressions in HTML, and will wrap them in Asciidoctor
|
|
78
|
+
`stem:[ ]` macros. The result of this gem is not actually legal Asciidoctor for stem:
|
|
79
|
+
Asciidoctor will presumably
|
|
80
|
+
think this is AsciiMath in the `stem:[ ]` macro, try to pass it into MathJax as
|
|
81
|
+
AsciiMath, and fail. But of course, MathJax has no problem with MathML, and some postprocessing
|
|
82
|
+
on the Asciidoctor output can ensure that the MathML is treated by MathJax (or whatever else
|
|
83
|
+
uses the output) as such; so this is still much better than nothing for stem processing.
|
|
84
|
+
* An alternative would be to attempt to map MathML to either LaTeX or AsciiMath.
|
|
85
|
+
** The self-description of https://github.com/learningobjectsinc/mathml-to-asciimath
|
|
86
|
+
("subset"... "this module is not: comprehensive, performant") does not recommend it,
|
|
87
|
+
when MathJax is entirely happy with MathML anyway.
|
|
88
|
+
** https://github.com/transpect/mml2tex looks rather more robust, and is also used
|
|
89
|
+
to export Word documents and their OOMML to LaTeX via MathML. But we'd still rather
|
|
90
|
+
keep the MathML in place.
|
|
91
|
+
|
|
92
|
+
The gem does not support:
|
|
93
|
+
|
|
94
|
+
* `col`, `colgroup`
|
|
95
|
+
* `source`, `picture`
|
|
96
|
+
* `bdi`, `bdo`, `ruby`, `rt`, `rp`, `wbr`
|
|
97
|
+
* `frame`, `frameset`, `iframe`, `noframes`, `noscript`, `script`, `input`, `output`, `progress`
|
|
98
|
+
* `map`, `canvas`, `dialog`, `embed`, `object`, `param`, `svg`, `track`
|
|
99
|
+
* `fieldset`, `button`, `datalist`, `form`, `label`, `legend`, `menu`, `menulist`, `optgroup`, `option`, `select`, `textarea`
|
|
100
|
+
* `big`, `dfn`, `font`, `s`, `small`, `span`, `strike`, `u`
|
|
101
|
+
* `center`
|
|
102
|
+
* `data`, `meter`
|
|
103
|
+
* `del`, `ins`
|
|
104
|
+
* `footer`, `header`, `main`, `nav`, `details`, `section`, `summary`, `template`
|
|
105
|
+
|
|
106
|
+
== Usage
|
|
107
|
+
|
|
108
|
+
=== Ruby
|
|
109
|
+
|
|
110
|
+
You can convert html content as string or Nokogiri document:
|
|
111
|
+
|
|
112
|
+
[source,ruby]
|
|
113
|
+
----
|
|
114
|
+
input = '<strong>feelings</strong>'
|
|
115
|
+
result = ReverseAsciidoctor.convert input
|
|
116
|
+
result.inspect # " *feelings* "
|
|
117
|
+
----
|
|
118
|
+
|
|
119
|
+
=== Commandline
|
|
120
|
+
|
|
121
|
+
It's also possible to convert html files to markdown using the binary:
|
|
122
|
+
|
|
123
|
+
[source,console]
|
|
124
|
+
----
|
|
125
|
+
$ bin/reverse_asciidoctor file.html > file.adoc
|
|
126
|
+
$ cat file.html | bin/reverse_asciidoctor > file.adoc
|
|
127
|
+
----
|
|
128
|
+
|
|
129
|
+
In addition, the `bin/w2m` script (
|
|
130
|
+
adapted from https://github.com/benbalter/word-to-markdown[Ben Balter's word-to-markdown])
|
|
131
|
+
script extracts HTML from Word docx documents, and converts it to Asciidoc.
|
|
132
|
+
|
|
133
|
+
[source,console]
|
|
134
|
+
----
|
|
135
|
+
$ bundle exec bin/w2m document.docx > document.adoc
|
|
136
|
+
----
|
|
137
|
+
|
|
138
|
+
The script presumes that LibreOffice has already been installed: it uses LibreOffice's
|
|
139
|
+
export to XHTML. LibreOffice's export of XHTML is superior to the native Microsoft Word export
|
|
140
|
+
to HTML: it exports lists (which Word keeps as paragraphs), and it exports OOMML into MathML.
|
|
141
|
+
On the other hand, the LibreOffice export relies on
|
|
142
|
+
|
|
143
|
+
=== Configuration
|
|
144
|
+
|
|
145
|
+
The following options are available:
|
|
146
|
+
|
|
147
|
+
* `unknown_tags` (default `pass_through`) - how to handle unknown tags. Valid options are:
|
|
148
|
+
** `pass_through` - Include the unknown tag completely into the result
|
|
149
|
+
** `drop` - Drop the unknown tag and its content
|
|
150
|
+
** `bypass` - Ignore the unknown tag but try to convert its content
|
|
151
|
+
** `raise` - Raise an error to let you know
|
|
152
|
+
* `tag_border` (default `' '`) - how to handle tag borders. valid options are:
|
|
153
|
+
** `' '` - Add whitespace if there is none at tag borders.
|
|
154
|
+
** `''` - Do not not add whitespace.
|
|
155
|
+
|
|
156
|
+
==== As options
|
|
157
|
+
|
|
158
|
+
Just pass your chosen configuration options in after the input. The given options will last for this operation only.
|
|
159
|
+
|
|
160
|
+
[source,ruby]
|
|
161
|
+
----
|
|
162
|
+
ReverseAsciidoctor.convert(input, unknown_tags: :raise)
|
|
163
|
+
----
|
|
164
|
+
|
|
165
|
+
==== Preconfigure
|
|
166
|
+
|
|
167
|
+
Or configure it block style on a initializer level. These configurations will last for all conversions until they are set to something different.
|
|
168
|
+
|
|
169
|
+
[source,ruby]
|
|
170
|
+
----
|
|
171
|
+
ReverseAsciidoctor.config do |config|
|
|
172
|
+
config.unknown_tags = :bypass
|
|
173
|
+
config.github_flavored = true
|
|
174
|
+
config.tag_border = ''
|
|
175
|
+
end
|
|
176
|
+
----
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
== Related stuff
|
|
180
|
+
|
|
181
|
+
* https://github.com/xijo/reverse_markdown[Xijo's original reverse_markdown gem]
|
|
182
|
+
* https://github.com/xijo/reverse_markdown/wiki/Write-your-own-converter[Write custom converters] - Wiki entry about how to write your own converter
|
|
183
|
+
* https://github.com/harlantwood/html_massage[html_massage] - A gem by Harlan T. Wood to convert regular sites into markdown using reverse_markdown
|
|
184
|
+
* https://github.com/benbalter/word-to-markdown[word-to-markdown] - Convert word docs into markdown while using reverse_markdown, by Ben Balter
|
|
185
|
+
* https://github.com/asciidocfx/HtmlToAsciidoc[HtmlToAsciidoc] - Javascript regexp-based converter of HTML to Asciidoctor
|
|
186
|
+
* https://asciidoctor.org/docs/user-manual/[The Asciidoctor User Manual]
|
data/Rakefile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
require 'bundler/gem_tasks'
|
|
2
|
+
|
|
3
|
+
if File.exist?('.codeclimate')
|
|
4
|
+
ENV["CODECLIMATE_REPO_TOKEN"] = File.read('.codeclimate').strip
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
require 'rspec/core/rake_task'
|
|
8
|
+
RSpec::Core::RakeTask.new(:spec)
|
|
9
|
+
task :default => :spec
|
|
10
|
+
|
|
11
|
+
desc 'Open an irb session preloaded with this library'
|
|
12
|
+
task :console do
|
|
13
|
+
sh 'irb -rubygems -I lib -r reverse_asciidoctor.rb'
|
|
14
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# Usage: reverse_asciidoctor [FILE]...
|
|
3
|
+
# Usage: cat FILE | reverse_asciidoctor
|
|
4
|
+
require 'reverse_asciidoctor'
|
|
5
|
+
require 'optparse'
|
|
6
|
+
|
|
7
|
+
options = {}
|
|
8
|
+
OptionParser.new do |opts|
|
|
9
|
+
opts.banner = "Usage: reverse_asciidoctor [options] <file>"
|
|
10
|
+
|
|
11
|
+
opts.on('-u', '--unknown_tags [pass_through, drop, bypass, raise]', 'Unknown tag handling (default: pass_through)') { |v| ReverseMarkdown.config.unknown_tags = v }
|
|
12
|
+
end.parse!
|
|
13
|
+
|
|
14
|
+
puts ReverseAsciidoctor.convert(ARGF.read)
|
data/bin/w2m
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require 'word-to-markdown'
|
|
5
|
+
require 'reverse_asciidoctor'
|
|
6
|
+
|
|
7
|
+
def scrub_whitespace(string)
|
|
8
|
+
string = string.dup
|
|
9
|
+
string.gsub!(' ', ' ') # HTML encoded spaces
|
|
10
|
+
string.sub!(/\A[[:space:]]+/, '') # document leading whitespace
|
|
11
|
+
string.sub!(/[[:space:]]+\z/, '') # document trailing whitespace
|
|
12
|
+
string.gsub!(/([ ]+)$/, '') # line trailing whitespace
|
|
13
|
+
string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks
|
|
14
|
+
string.delete!(' ') # Unicode non-breaking spaces, injected as tabs
|
|
15
|
+
string
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
if ARGV.size != 1 || ARGV[0] == '--help'
|
|
19
|
+
puts 'Usage: bundle exec w2m path/to/document.docx'
|
|
20
|
+
exit 1
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
if ARGV[0] == '--version'
|
|
24
|
+
puts "WordToMarkdown v#{WordToMarkdown::VERSION}"
|
|
25
|
+
puts "LibreOffice v#{WordToMarkdown.soffice.version}" unless Gem.win_platform?
|
|
26
|
+
else
|
|
27
|
+
doc = WordToMarkdown.new ARGV[0]
|
|
28
|
+
# puts doc.to_s
|
|
29
|
+
puts ReverseAsciidoctor.convert(scrub_whitespace(doc.document.html), WordToMarkdown::REVERSE_MARKDOWN_OPTIONS)
|
|
30
|
+
end
|
|
31
|
+
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
require 'digest'
|
|
2
|
+
require 'nokogiri'
|
|
3
|
+
require 'reverse_asciidoctor/version'
|
|
4
|
+
require 'reverse_asciidoctor/errors'
|
|
5
|
+
require 'reverse_asciidoctor/cleaner'
|
|
6
|
+
require 'reverse_asciidoctor/config'
|
|
7
|
+
require 'reverse_asciidoctor/converters'
|
|
8
|
+
require 'reverse_asciidoctor/converters/base'
|
|
9
|
+
require 'reverse_asciidoctor/converters/a'
|
|
10
|
+
require 'reverse_asciidoctor/converters/aside'
|
|
11
|
+
require 'reverse_asciidoctor/converters/audio'
|
|
12
|
+
require 'reverse_asciidoctor/converters/blockquote'
|
|
13
|
+
require 'reverse_asciidoctor/converters/br'
|
|
14
|
+
require 'reverse_asciidoctor/converters/bypass'
|
|
15
|
+
require 'reverse_asciidoctor/converters/code'
|
|
16
|
+
require 'reverse_asciidoctor/converters/div'
|
|
17
|
+
require 'reverse_asciidoctor/converters/drop'
|
|
18
|
+
require 'reverse_asciidoctor/converters/em'
|
|
19
|
+
require 'reverse_asciidoctor/converters/figure'
|
|
20
|
+
require 'reverse_asciidoctor/converters/h'
|
|
21
|
+
require 'reverse_asciidoctor/converters/head'
|
|
22
|
+
require 'reverse_asciidoctor/converters/hr'
|
|
23
|
+
require 'reverse_asciidoctor/converters/ignore'
|
|
24
|
+
require 'reverse_asciidoctor/converters/img'
|
|
25
|
+
require 'reverse_asciidoctor/converters/mark'
|
|
26
|
+
require 'reverse_asciidoctor/converters/li'
|
|
27
|
+
require 'reverse_asciidoctor/converters/ol'
|
|
28
|
+
require 'reverse_asciidoctor/converters/p'
|
|
29
|
+
require 'reverse_asciidoctor/converters/pass_through'
|
|
30
|
+
require 'reverse_asciidoctor/converters/pre'
|
|
31
|
+
require 'reverse_asciidoctor/converters/q'
|
|
32
|
+
require 'reverse_asciidoctor/converters/strong'
|
|
33
|
+
require 'reverse_asciidoctor/converters/sup'
|
|
34
|
+
require 'reverse_asciidoctor/converters/sub'
|
|
35
|
+
require 'reverse_asciidoctor/converters/table'
|
|
36
|
+
require 'reverse_asciidoctor/converters/td'
|
|
37
|
+
require 'reverse_asciidoctor/converters/th'
|
|
38
|
+
require 'reverse_asciidoctor/converters/text'
|
|
39
|
+
require 'reverse_asciidoctor/converters/tr'
|
|
40
|
+
require 'reverse_asciidoctor/converters/video'
|
|
41
|
+
require 'reverse_asciidoctor/converters/math'
|
|
42
|
+
|
|
43
|
+
module ReverseAsciidoctor
|
|
44
|
+
|
|
45
|
+
def self.convert(input, options = {})
|
|
46
|
+
root = case input
|
|
47
|
+
when String then Nokogiri::HTML(input).root
|
|
48
|
+
when Nokogiri::XML::Document then input.root
|
|
49
|
+
when Nokogiri::XML::Node then input
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
root or return ''
|
|
53
|
+
|
|
54
|
+
config.with(options) do
|
|
55
|
+
result = ReverseAsciidoctor::Converters.lookup(root.name).convert(root)
|
|
56
|
+
cleaner.tidy(result)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def self.config
|
|
61
|
+
@config ||= Config.new
|
|
62
|
+
yield @config if block_given?
|
|
63
|
+
@config
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def self.cleaner
|
|
67
|
+
@cleaner ||= Cleaner.new
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
end
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
module ReverseAsciidoctor
|
|
2
|
+
class Cleaner
|
|
3
|
+
|
|
4
|
+
def tidy(string)
|
|
5
|
+
result = remove_inner_whitespaces(string)
|
|
6
|
+
result = remove_newlines(result)
|
|
7
|
+
result = remove_leading_newlines(result)
|
|
8
|
+
result = clean_tag_borders(result)
|
|
9
|
+
clean_punctuation_characters(result)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def remove_newlines(string)
|
|
13
|
+
string.gsub(/\n{3,}/, "\n\n")
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def remove_leading_newlines(string)
|
|
17
|
+
string.gsub(/\A\n+/, '')
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def remove_inner_whitespaces(string)
|
|
21
|
+
string.each_line.inject("") do |memo, line|
|
|
22
|
+
memo + preserve_border_whitespaces(line) do
|
|
23
|
+
line.strip.gsub(/[ \t]{2,}/, ' ')
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Find non-asterisk content that is enclosed by two or
|
|
29
|
+
# more asterisks. Ensure that only one whitespace occurs
|
|
30
|
+
# in the border area.
|
|
31
|
+
# Same for underscores and brackets.
|
|
32
|
+
def clean_tag_borders(string)
|
|
33
|
+
result = string.gsub(/\s?\*{2,}.*?\*{2,}\s?/) do |match|
|
|
34
|
+
preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do
|
|
35
|
+
match.strip.sub('** ', '**').sub(' **', '**')
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
result = result.gsub(/\s?\_{2,}.*?\_{2,}\s?/) do |match|
|
|
40
|
+
preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do
|
|
41
|
+
match.strip.sub('__ ', '__').sub(' __', '__')
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
result = result.gsub(/\s?~{2,}.*?~{2,}\s?/) do |match|
|
|
46
|
+
preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do
|
|
47
|
+
match.strip.sub('~~ ', '~~').sub(' ~~', '~~')
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
result.gsub(/\s?\[.*?\]\s?/) do |match|
|
|
52
|
+
preserve_border_whitespaces(match) do
|
|
53
|
+
match.strip.sub('[ ', '[').sub(' ]', ']')
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def clean_punctuation_characters(string)
|
|
59
|
+
string.gsub(/(\*\*|~~|__)\s([\.!\?'"])/, "\\1".strip + "\\2")
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def preserve_border_whitespaces(string, options = {}, &block)
|
|
65
|
+
return string if string =~ /\A\s*\Z/
|
|
66
|
+
default_border = options.fetch(:default_border, '')
|
|
67
|
+
# If the string contains part of a link so the characters [,],(,)
|
|
68
|
+
# then don't add any extra spaces
|
|
69
|
+
default_border = '' if string =~ /[\[\(\]\)]/
|
|
70
|
+
string_start = present_or_default(string[/\A\s*/], default_border)
|
|
71
|
+
string_end = present_or_default(string[/\s*\Z/], default_border)
|
|
72
|
+
result = yield
|
|
73
|
+
string_start + result + string_end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def present_or_default(string, default)
|
|
77
|
+
if string.nil? || string.empty?
|
|
78
|
+
default
|
|
79
|
+
else
|
|
80
|
+
string
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
end
|
|
85
|
+
end
|