reverse_asciidoctor 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE +32 -0
- data/README.adoc +186 -0
- data/Rakefile +14 -0
- data/bin/reverse_asciidoctor +14 -0
- data/bin/w2m +31 -0
- data/lib/reverse_asciidoctor.rb +70 -0
- data/lib/reverse_asciidoctor/cleaner.rb +85 -0
- data/lib/reverse_asciidoctor/config.rb +28 -0
- data/lib/reverse_asciidoctor/converters.rb +33 -0
- data/lib/reverse_asciidoctor/converters/a.rb +38 -0
- data/lib/reverse_asciidoctor/converters/aside.rb +14 -0
- data/lib/reverse_asciidoctor/converters/audio.rb +34 -0
- data/lib/reverse_asciidoctor/converters/base.rb +24 -0
- data/lib/reverse_asciidoctor/converters/blockquote.rb +18 -0
- data/lib/reverse_asciidoctor/converters/br.rb +11 -0
- data/lib/reverse_asciidoctor/converters/bypass.rb +77 -0
- data/lib/reverse_asciidoctor/converters/code.rb +15 -0
- data/lib/reverse_asciidoctor/converters/div.rb +14 -0
- data/lib/reverse_asciidoctor/converters/drop.rb +18 -0
- data/lib/reverse_asciidoctor/converters/em.rb +18 -0
- data/lib/reverse_asciidoctor/converters/figure.rb +21 -0
- data/lib/reverse_asciidoctor/converters/h.rb +19 -0
- data/lib/reverse_asciidoctor/converters/head.rb +18 -0
- data/lib/reverse_asciidoctor/converters/hr.rb +11 -0
- data/lib/reverse_asciidoctor/converters/ignore.rb +12 -0
- data/lib/reverse_asciidoctor/converters/img.rb +23 -0
- data/lib/reverse_asciidoctor/converters/li.rb +24 -0
- data/lib/reverse_asciidoctor/converters/mark.rb +12 -0
- data/lib/reverse_asciidoctor/converters/math.rb +14 -0
- data/lib/reverse_asciidoctor/converters/ol.rb +46 -0
- data/lib/reverse_asciidoctor/converters/p.rb +17 -0
- data/lib/reverse_asciidoctor/converters/pass_through.rb +9 -0
- data/lib/reverse_asciidoctor/converters/pre.rb +38 -0
- data/lib/reverse_asciidoctor/converters/q.rb +12 -0
- data/lib/reverse_asciidoctor/converters/strong.rb +17 -0
- data/lib/reverse_asciidoctor/converters/sub.rb +12 -0
- data/lib/reverse_asciidoctor/converters/sup.rb +12 -0
- data/lib/reverse_asciidoctor/converters/table.rb +64 -0
- data/lib/reverse_asciidoctor/converters/td.rb +67 -0
- data/lib/reverse_asciidoctor/converters/text.rb +65 -0
- data/lib/reverse_asciidoctor/converters/th.rb +16 -0
- data/lib/reverse_asciidoctor/converters/tr.rb +22 -0
- data/lib/reverse_asciidoctor/converters/video.rb +36 -0
- data/lib/reverse_asciidoctor/errors.rb +10 -0
- data/lib/reverse_asciidoctor/version.rb +3 -0
- data/reverse_asciidoctor.gemspec +30 -0
- data/spec/assets/anchors.html +22 -0
- data/spec/assets/basic.html +58 -0
- data/spec/assets/code.html +22 -0
- data/spec/assets/escapables.html +15 -0
- data/spec/assets/from_the_wild.html +23 -0
- data/spec/assets/full_example.html +49 -0
- data/spec/assets/html_fragment.html +3 -0
- data/spec/assets/lists.html +137 -0
- data/spec/assets/minimum.html +4 -0
- data/spec/assets/paragraphs.html +24 -0
- data/spec/assets/quotation.html +12 -0
- data/spec/assets/tables.html +99 -0
- data/spec/assets/unknown_tags.html +9 -0
- data/spec/components/anchors_spec.rb +21 -0
- data/spec/components/basic_spec.rb +49 -0
- data/spec/components/code_spec.rb +28 -0
- data/spec/components/escapables_spec.rb +23 -0
- data/spec/components/from_the_wild_spec.rb +17 -0
- data/spec/components/html_fragment_spec.rb +11 -0
- data/spec/components/lists_spec.rb +86 -0
- data/spec/components/paragraphs_spec.rb +15 -0
- data/spec/components/quotation_spec.rb +12 -0
- data/spec/components/tables_spec.rb +31 -0
- data/spec/components/unknown_tags_spec.rb +39 -0
- data/spec/lib/reverse_asciidoctor/cleaner_spec.rb +157 -0
- data/spec/lib/reverse_asciidoctor/config_spec.rb +26 -0
- data/spec/lib/reverse_asciidoctor/converters/aside_spec.rb +12 -0
- data/spec/lib/reverse_asciidoctor/converters/audio_spec.rb +18 -0
- data/spec/lib/reverse_asciidoctor/converters/blockquote_spec.rb +24 -0
- data/spec/lib/reverse_asciidoctor/converters/br_spec.rb +9 -0
- data/spec/lib/reverse_asciidoctor/converters/code_spec.rb +18 -0
- data/spec/lib/reverse_asciidoctor/converters/div_spec.rb +18 -0
- data/spec/lib/reverse_asciidoctor/converters/figure_spec.rb +13 -0
- data/spec/lib/reverse_asciidoctor/converters/img_spec.rb +28 -0
- data/spec/lib/reverse_asciidoctor/converters/li_spec.rb +13 -0
- data/spec/lib/reverse_asciidoctor/converters/mark_spec.rb +10 -0
- data/spec/lib/reverse_asciidoctor/converters/p_spec.rb +12 -0
- data/spec/lib/reverse_asciidoctor/converters/pre_spec.rb +45 -0
- data/spec/lib/reverse_asciidoctor/converters/q_spec.rb +10 -0
- data/spec/lib/reverse_asciidoctor/converters/strong_spec.rb +20 -0
- data/spec/lib/reverse_asciidoctor/converters/text_spec.rb +62 -0
- data/spec/lib/reverse_asciidoctor/converters/video_spec.rb +18 -0
- data/spec/lib/reverse_asciidoctor/converters_spec.rb +19 -0
- data/spec/lib/reverse_asciidoctor_spec.rb +37 -0
- data/spec/spec_helper.rb +21 -0
- metadata +281 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: cb94b96658189613a9f4cdf21a3fa5d3eefbad17
|
4
|
+
data.tar.gz: 392dd9b562dd8a1d24193d4031f62916edda8fd2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a51d1071f41d2627def8bb06929bcb1308a673741067b29be58ad847dfb2d70ca7028e1cca1f52f9844310c378009395e61e0e0ffad1b8686a9ab3a21b7966a7
|
7
|
+
data.tar.gz: b829c7f3f60651a916c2fd23322406a581c7524825a804e6e26ea6953cb45cfe44f6ba316baeb2cebc92c9bd6388dfa976757f4ae1a0cd6bc487e279a2c0b611
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
BSD 2-Clause License
|
2
|
+
|
3
|
+
Copyright (c) 2018, Ribose
|
4
|
+
All rights reserved.
|
5
|
+
|
6
|
+
Redistribution and use in source and binary forms, with or without
|
7
|
+
modification, are permitted provided that the following conditions are met:
|
8
|
+
|
9
|
+
* Redistributions of source code must retain the above copyright notice, this
|
10
|
+
list of conditions and the following disclaimer.
|
11
|
+
|
12
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
13
|
+
this list of conditions and the following disclaimer in the documentation
|
14
|
+
and/or other materials provided with the distribution.
|
15
|
+
|
16
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
17
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
18
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
19
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
20
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
21
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
22
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
23
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
24
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
25
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
LICENSE OF https://github.com/xijo/reverse_markdown
|
30
|
+
|
31
|
+
https://github.com/xijo/reverse_markdown, on which this gem is based, was
|
32
|
+
licensed with the DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE, v2.
|
data/README.adoc
ADDED
@@ -0,0 +1,186 @@
|
|
1
|
+
= reverse_asciidoctor
|
2
|
+
|
3
|
+
Based on https://github.com/xijo/reverse_markdown
|
4
|
+
|
5
|
+
Transforms HTML into asciidoctor.
|
6
|
+
|
7
|
+
== Requirements
|
8
|
+
|
9
|
+
. http://nokogiri.org/[Nokogiri]
|
10
|
+
. Ruby 1.9.3 or higher
|
11
|
+
|
12
|
+
== Installation
|
13
|
+
|
14
|
+
Install the gem
|
15
|
+
|
16
|
+
[source,console]
|
17
|
+
----
|
18
|
+
[sudo] gem install reverse_asciidoctor
|
19
|
+
----
|
20
|
+
|
21
|
+
or add it to your Gemfile
|
22
|
+
|
23
|
+
[source,ruby]
|
24
|
+
----
|
25
|
+
gem 'reverse_asciidoctor'
|
26
|
+
----
|
27
|
+
|
28
|
+
== Features
|
29
|
+
|
30
|
+
As a port of reverse_markdown, reverse_asciidoctor shares its features:
|
31
|
+
|
32
|
+
* Module based - if you miss a tag, just add it
|
33
|
+
* Can deal with nested lists
|
34
|
+
* Inline and block code is supported
|
35
|
+
* Supports blockquote
|
36
|
+
|
37
|
+
It supports the following html tags supported by reverse_markdown:
|
38
|
+
|
39
|
+
* `a`
|
40
|
+
* `blockquote`
|
41
|
+
* `br`
|
42
|
+
* `code`, `tt` (added: `kbd`, `samp`, `var`)
|
43
|
+
* `div`, `article`
|
44
|
+
* `em`, `i` (added: `cite`)
|
45
|
+
* `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `hr`
|
46
|
+
* `img`
|
47
|
+
* `li`, `ol`, `ul` (added: `dir`)
|
48
|
+
* `p`, `pre`
|
49
|
+
* `strong`, `b`
|
50
|
+
* `table`, `td`, `th`, `tr`
|
51
|
+
|
52
|
+
NOTE:
|
53
|
+
* reverse_asciidoctor does *not* support `del` or `strike`, because Asciidoctor out of the box does not
|
54
|
+
* As with reverse_markdown, `pre` is only treated as sourcecode if it is contained in a `div@class = highlight-` element, or has a `@brush` attribute naming the language (Confluence).
|
55
|
+
* The gem does not support `p@align`, because Asciidoctor doesn't
|
56
|
+
|
57
|
+
In addition, it supports:
|
58
|
+
|
59
|
+
* `aside`
|
60
|
+
* `audio`, `video` (with `@src` attributes)
|
61
|
+
* `figure`, `figcaption`
|
62
|
+
* `mark`
|
63
|
+
* `q`
|
64
|
+
* `sub`, `sup`
|
65
|
+
* `@id` anchors
|
66
|
+
* `blockquote@cite`
|
67
|
+
* `img/@width`, `img/@height`
|
68
|
+
* `ol/@style`, `ol/@start`, `ol/@reversed`, `ul/@type`
|
69
|
+
* `td/@colspan`, `td/@rowspan`, `td@/align`, `td@/valign`
|
70
|
+
* `table/caption`, `table/@width`, `table/@frame` (partial), `table/@rules` (partial)
|
71
|
+
* Lists and paragraphs within cells
|
72
|
+
** Not tables within cells: Asciidoctor cannot deal with nested tabls
|
73
|
+
|
74
|
+
It also supports MathML... sort of.
|
75
|
+
|
76
|
+
* Asciidoctor supports AsciiMath and LaTeX for stem expressions. HTML uses MathML.
|
77
|
+
The gem will recognise MathML expressions in HTML, and will wrap them in Asciidoctor
|
78
|
+
`stem:[ ]` macros. The result of this gem is not actually legal Asciidoctor for stem:
|
79
|
+
Asciidoctor will presumably
|
80
|
+
think this is AsciiMath in the `stem:[ ]` macro, try to pass it into MathJax as
|
81
|
+
AsciiMath, and fail. But of course, MathJax has no problem with MathML, and some postprocessing
|
82
|
+
on the Asciidoctor output can ensure that the MathML is treated by MathJax (or whatever else
|
83
|
+
uses the output) as such; so this is still much better than nothing for stem processing.
|
84
|
+
* An alternative would be to attempt to map MathML to either LaTeX or AsciiMath.
|
85
|
+
** The self-description of https://github.com/learningobjectsinc/mathml-to-asciimath
|
86
|
+
("subset"... "this module is not: comprehensive, performant") does not recommend it,
|
87
|
+
when MathJax is entirely happy with MathML anyway.
|
88
|
+
** https://github.com/transpect/mml2tex looks rather more robust, and is also used
|
89
|
+
to export Word documents and their OOMML to LaTeX via MathML. But we'd still rather
|
90
|
+
keep the MathML in place.
|
91
|
+
|
92
|
+
The gem does not support:
|
93
|
+
|
94
|
+
* `col`, `colgroup`
|
95
|
+
* `source`, `picture`
|
96
|
+
* `bdi`, `bdo`, `ruby`, `rt`, `rp`, `wbr`
|
97
|
+
* `frame`, `frameset`, `iframe`, `noframes`, `noscript`, `script`, `input`, `output`, `progress`
|
98
|
+
* `map`, `canvas`, `dialog`, `embed`, `object`, `param`, `svg`, `track`
|
99
|
+
* `fieldset`, `button`, `datalist`, `form`, `label`, `legend`, `menu`, `menulist`, `optgroup`, `option`, `select`, `textarea`
|
100
|
+
* `big`, `dfn`, `font`, `s`, `small`, `span`, `strike`, `u`
|
101
|
+
* `center`
|
102
|
+
* `data`, `meter`
|
103
|
+
* `del`, `ins`
|
104
|
+
* `footer`, `header`, `main`, `nav`, `details`, `section`, `summary`, `template`
|
105
|
+
|
106
|
+
== Usage
|
107
|
+
|
108
|
+
=== Ruby
|
109
|
+
|
110
|
+
You can convert html content as string or Nokogiri document:
|
111
|
+
|
112
|
+
[source,ruby]
|
113
|
+
----
|
114
|
+
input = '<strong>feelings</strong>'
|
115
|
+
result = ReverseAsciidoctor.convert input
|
116
|
+
result.inspect # " *feelings* "
|
117
|
+
----
|
118
|
+
|
119
|
+
=== Commandline
|
120
|
+
|
121
|
+
It's also possible to convert html files to markdown using the binary:
|
122
|
+
|
123
|
+
[source,console]
|
124
|
+
----
|
125
|
+
$ bin/reverse_asciidoctor file.html > file.adoc
|
126
|
+
$ cat file.html | bin/reverse_asciidoctor > file.adoc
|
127
|
+
----
|
128
|
+
|
129
|
+
In addition, the `bin/w2m` script (
|
130
|
+
adapted from https://github.com/benbalter/word-to-markdown[Ben Balter's word-to-markdown])
|
131
|
+
script extracts HTML from Word docx documents, and converts it to Asciidoc.
|
132
|
+
|
133
|
+
[source,console]
|
134
|
+
----
|
135
|
+
$ bundle exec bin/w2m document.docx > document.adoc
|
136
|
+
----
|
137
|
+
|
138
|
+
The script presumes that LibreOffice has already been installed: it uses LibreOffice's
|
139
|
+
export to XHTML. LibreOffice's export of XHTML is superior to the native Microsoft Word export
|
140
|
+
to HTML: it exports lists (which Word keeps as paragraphs), and it exports OOMML into MathML.
|
141
|
+
On the other hand, the LibreOffice export relies on
|
142
|
+
|
143
|
+
=== Configuration
|
144
|
+
|
145
|
+
The following options are available:
|
146
|
+
|
147
|
+
* `unknown_tags` (default `pass_through`) - how to handle unknown tags. Valid options are:
|
148
|
+
** `pass_through` - Include the unknown tag completely into the result
|
149
|
+
** `drop` - Drop the unknown tag and its content
|
150
|
+
** `bypass` - Ignore the unknown tag but try to convert its content
|
151
|
+
** `raise` - Raise an error to let you know
|
152
|
+
* `tag_border` (default `' '`) - how to handle tag borders. valid options are:
|
153
|
+
** `' '` - Add whitespace if there is none at tag borders.
|
154
|
+
** `''` - Do not not add whitespace.
|
155
|
+
|
156
|
+
==== As options
|
157
|
+
|
158
|
+
Just pass your chosen configuration options in after the input. The given options will last for this operation only.
|
159
|
+
|
160
|
+
[source,ruby]
|
161
|
+
----
|
162
|
+
ReverseAsciidoctor.convert(input, unknown_tags: :raise)
|
163
|
+
----
|
164
|
+
|
165
|
+
==== Preconfigure
|
166
|
+
|
167
|
+
Or configure it block style on a initializer level. These configurations will last for all conversions until they are set to something different.
|
168
|
+
|
169
|
+
[source,ruby]
|
170
|
+
----
|
171
|
+
ReverseAsciidoctor.config do |config|
|
172
|
+
config.unknown_tags = :bypass
|
173
|
+
config.github_flavored = true
|
174
|
+
config.tag_border = ''
|
175
|
+
end
|
176
|
+
----
|
177
|
+
|
178
|
+
|
179
|
+
== Related stuff
|
180
|
+
|
181
|
+
* https://github.com/xijo/reverse_markdown[Xijo's original reverse_markdown gem]
|
182
|
+
* https://github.com/xijo/reverse_markdown/wiki/Write-your-own-converter[Write custom converters] - Wiki entry about how to write your own converter
|
183
|
+
* https://github.com/harlantwood/html_massage[html_massage] - A gem by Harlan T. Wood to convert regular sites into markdown using reverse_markdown
|
184
|
+
* https://github.com/benbalter/word-to-markdown[word-to-markdown] - Convert word docs into markdown while using reverse_markdown, by Ben Balter
|
185
|
+
* https://github.com/asciidocfx/HtmlToAsciidoc[HtmlToAsciidoc] - Javascript regexp-based converter of HTML to Asciidoctor
|
186
|
+
* https://asciidoctor.org/docs/user-manual/[The Asciidoctor User Manual]
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
|
3
|
+
if File.exist?('.codeclimate')
|
4
|
+
ENV["CODECLIMATE_REPO_TOKEN"] = File.read('.codeclimate').strip
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'rspec/core/rake_task'
|
8
|
+
RSpec::Core::RakeTask.new(:spec)
|
9
|
+
task :default => :spec
|
10
|
+
|
11
|
+
desc 'Open an irb session preloaded with this library'
|
12
|
+
task :console do
|
13
|
+
sh 'irb -rubygems -I lib -r reverse_asciidoctor.rb'
|
14
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Usage: reverse_asciidoctor [FILE]...
|
3
|
+
# Usage: cat FILE | reverse_asciidoctor
|
4
|
+
require 'reverse_asciidoctor'
|
5
|
+
require 'optparse'
|
6
|
+
|
7
|
+
options = {}
|
8
|
+
OptionParser.new do |opts|
|
9
|
+
opts.banner = "Usage: reverse_asciidoctor [options] <file>"
|
10
|
+
|
11
|
+
opts.on('-u', '--unknown_tags [pass_through, drop, bypass, raise]', 'Unknown tag handling (default: pass_through)') { |v| ReverseMarkdown.config.unknown_tags = v }
|
12
|
+
end.parse!
|
13
|
+
|
14
|
+
puts ReverseAsciidoctor.convert(ARGF.read)
|
data/bin/w2m
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'word-to-markdown'
|
5
|
+
require 'reverse_asciidoctor'
|
6
|
+
|
7
|
+
def scrub_whitespace(string)
|
8
|
+
string = string.dup
|
9
|
+
string.gsub!(' ', ' ') # HTML encoded spaces
|
10
|
+
string.sub!(/\A[[:space:]]+/, '') # document leading whitespace
|
11
|
+
string.sub!(/[[:space:]]+\z/, '') # document trailing whitespace
|
12
|
+
string.gsub!(/([ ]+)$/, '') # line trailing whitespace
|
13
|
+
string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks
|
14
|
+
string.delete!(' ') # Unicode non-breaking spaces, injected as tabs
|
15
|
+
string
|
16
|
+
end
|
17
|
+
|
18
|
+
if ARGV.size != 1 || ARGV[0] == '--help'
|
19
|
+
puts 'Usage: bundle exec w2m path/to/document.docx'
|
20
|
+
exit 1
|
21
|
+
end
|
22
|
+
|
23
|
+
if ARGV[0] == '--version'
|
24
|
+
puts "WordToMarkdown v#{WordToMarkdown::VERSION}"
|
25
|
+
puts "LibreOffice v#{WordToMarkdown.soffice.version}" unless Gem.win_platform?
|
26
|
+
else
|
27
|
+
doc = WordToMarkdown.new ARGV[0]
|
28
|
+
# puts doc.to_s
|
29
|
+
puts ReverseAsciidoctor.convert(scrub_whitespace(doc.document.html), WordToMarkdown::REVERSE_MARKDOWN_OPTIONS)
|
30
|
+
end
|
31
|
+
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'digest'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'reverse_asciidoctor/version'
|
4
|
+
require 'reverse_asciidoctor/errors'
|
5
|
+
require 'reverse_asciidoctor/cleaner'
|
6
|
+
require 'reverse_asciidoctor/config'
|
7
|
+
require 'reverse_asciidoctor/converters'
|
8
|
+
require 'reverse_asciidoctor/converters/base'
|
9
|
+
require 'reverse_asciidoctor/converters/a'
|
10
|
+
require 'reverse_asciidoctor/converters/aside'
|
11
|
+
require 'reverse_asciidoctor/converters/audio'
|
12
|
+
require 'reverse_asciidoctor/converters/blockquote'
|
13
|
+
require 'reverse_asciidoctor/converters/br'
|
14
|
+
require 'reverse_asciidoctor/converters/bypass'
|
15
|
+
require 'reverse_asciidoctor/converters/code'
|
16
|
+
require 'reverse_asciidoctor/converters/div'
|
17
|
+
require 'reverse_asciidoctor/converters/drop'
|
18
|
+
require 'reverse_asciidoctor/converters/em'
|
19
|
+
require 'reverse_asciidoctor/converters/figure'
|
20
|
+
require 'reverse_asciidoctor/converters/h'
|
21
|
+
require 'reverse_asciidoctor/converters/head'
|
22
|
+
require 'reverse_asciidoctor/converters/hr'
|
23
|
+
require 'reverse_asciidoctor/converters/ignore'
|
24
|
+
require 'reverse_asciidoctor/converters/img'
|
25
|
+
require 'reverse_asciidoctor/converters/mark'
|
26
|
+
require 'reverse_asciidoctor/converters/li'
|
27
|
+
require 'reverse_asciidoctor/converters/ol'
|
28
|
+
require 'reverse_asciidoctor/converters/p'
|
29
|
+
require 'reverse_asciidoctor/converters/pass_through'
|
30
|
+
require 'reverse_asciidoctor/converters/pre'
|
31
|
+
require 'reverse_asciidoctor/converters/q'
|
32
|
+
require 'reverse_asciidoctor/converters/strong'
|
33
|
+
require 'reverse_asciidoctor/converters/sup'
|
34
|
+
require 'reverse_asciidoctor/converters/sub'
|
35
|
+
require 'reverse_asciidoctor/converters/table'
|
36
|
+
require 'reverse_asciidoctor/converters/td'
|
37
|
+
require 'reverse_asciidoctor/converters/th'
|
38
|
+
require 'reverse_asciidoctor/converters/text'
|
39
|
+
require 'reverse_asciidoctor/converters/tr'
|
40
|
+
require 'reverse_asciidoctor/converters/video'
|
41
|
+
require 'reverse_asciidoctor/converters/math'
|
42
|
+
|
43
|
+
module ReverseAsciidoctor
|
44
|
+
|
45
|
+
def self.convert(input, options = {})
|
46
|
+
root = case input
|
47
|
+
when String then Nokogiri::HTML(input).root
|
48
|
+
when Nokogiri::XML::Document then input.root
|
49
|
+
when Nokogiri::XML::Node then input
|
50
|
+
end
|
51
|
+
|
52
|
+
root or return ''
|
53
|
+
|
54
|
+
config.with(options) do
|
55
|
+
result = ReverseAsciidoctor::Converters.lookup(root.name).convert(root)
|
56
|
+
cleaner.tidy(result)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.config
|
61
|
+
@config ||= Config.new
|
62
|
+
yield @config if block_given?
|
63
|
+
@config
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.cleaner
|
67
|
+
@cleaner ||= Cleaner.new
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module ReverseAsciidoctor
|
2
|
+
class Cleaner
|
3
|
+
|
4
|
+
def tidy(string)
|
5
|
+
result = remove_inner_whitespaces(string)
|
6
|
+
result = remove_newlines(result)
|
7
|
+
result = remove_leading_newlines(result)
|
8
|
+
result = clean_tag_borders(result)
|
9
|
+
clean_punctuation_characters(result)
|
10
|
+
end
|
11
|
+
|
12
|
+
def remove_newlines(string)
|
13
|
+
string.gsub(/\n{3,}/, "\n\n")
|
14
|
+
end
|
15
|
+
|
16
|
+
def remove_leading_newlines(string)
|
17
|
+
string.gsub(/\A\n+/, '')
|
18
|
+
end
|
19
|
+
|
20
|
+
def remove_inner_whitespaces(string)
|
21
|
+
string.each_line.inject("") do |memo, line|
|
22
|
+
memo + preserve_border_whitespaces(line) do
|
23
|
+
line.strip.gsub(/[ \t]{2,}/, ' ')
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Find non-asterisk content that is enclosed by two or
|
29
|
+
# more asterisks. Ensure that only one whitespace occurs
|
30
|
+
# in the border area.
|
31
|
+
# Same for underscores and brackets.
|
32
|
+
def clean_tag_borders(string)
|
33
|
+
result = string.gsub(/\s?\*{2,}.*?\*{2,}\s?/) do |match|
|
34
|
+
preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do
|
35
|
+
match.strip.sub('** ', '**').sub(' **', '**')
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
result = result.gsub(/\s?\_{2,}.*?\_{2,}\s?/) do |match|
|
40
|
+
preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do
|
41
|
+
match.strip.sub('__ ', '__').sub(' __', '__')
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
result = result.gsub(/\s?~{2,}.*?~{2,}\s?/) do |match|
|
46
|
+
preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do
|
47
|
+
match.strip.sub('~~ ', '~~').sub(' ~~', '~~')
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
result.gsub(/\s?\[.*?\]\s?/) do |match|
|
52
|
+
preserve_border_whitespaces(match) do
|
53
|
+
match.strip.sub('[ ', '[').sub(' ]', ']')
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def clean_punctuation_characters(string)
|
59
|
+
string.gsub(/(\*\*|~~|__)\s([\.!\?'"])/, "\\1".strip + "\\2")
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def preserve_border_whitespaces(string, options = {}, &block)
|
65
|
+
return string if string =~ /\A\s*\Z/
|
66
|
+
default_border = options.fetch(:default_border, '')
|
67
|
+
# If the string contains part of a link so the characters [,],(,)
|
68
|
+
# then don't add any extra spaces
|
69
|
+
default_border = '' if string =~ /[\[\(\]\)]/
|
70
|
+
string_start = present_or_default(string[/\A\s*/], default_border)
|
71
|
+
string_end = present_or_default(string[/\s*\Z/], default_border)
|
72
|
+
result = yield
|
73
|
+
string_start + result + string_end
|
74
|
+
end
|
75
|
+
|
76
|
+
def present_or_default(string, default)
|
77
|
+
if string.nil? || string.empty?
|
78
|
+
default
|
79
|
+
else
|
80
|
+
string
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|