reverse_asciidoctor 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE +32 -0
  4. data/README.adoc +186 -0
  5. data/Rakefile +14 -0
  6. data/bin/reverse_asciidoctor +14 -0
  7. data/bin/w2m +31 -0
  8. data/lib/reverse_asciidoctor.rb +70 -0
  9. data/lib/reverse_asciidoctor/cleaner.rb +85 -0
  10. data/lib/reverse_asciidoctor/config.rb +28 -0
  11. data/lib/reverse_asciidoctor/converters.rb +33 -0
  12. data/lib/reverse_asciidoctor/converters/a.rb +38 -0
  13. data/lib/reverse_asciidoctor/converters/aside.rb +14 -0
  14. data/lib/reverse_asciidoctor/converters/audio.rb +34 -0
  15. data/lib/reverse_asciidoctor/converters/base.rb +24 -0
  16. data/lib/reverse_asciidoctor/converters/blockquote.rb +18 -0
  17. data/lib/reverse_asciidoctor/converters/br.rb +11 -0
  18. data/lib/reverse_asciidoctor/converters/bypass.rb +77 -0
  19. data/lib/reverse_asciidoctor/converters/code.rb +15 -0
  20. data/lib/reverse_asciidoctor/converters/div.rb +14 -0
  21. data/lib/reverse_asciidoctor/converters/drop.rb +18 -0
  22. data/lib/reverse_asciidoctor/converters/em.rb +18 -0
  23. data/lib/reverse_asciidoctor/converters/figure.rb +21 -0
  24. data/lib/reverse_asciidoctor/converters/h.rb +19 -0
  25. data/lib/reverse_asciidoctor/converters/head.rb +18 -0
  26. data/lib/reverse_asciidoctor/converters/hr.rb +11 -0
  27. data/lib/reverse_asciidoctor/converters/ignore.rb +12 -0
  28. data/lib/reverse_asciidoctor/converters/img.rb +23 -0
  29. data/lib/reverse_asciidoctor/converters/li.rb +24 -0
  30. data/lib/reverse_asciidoctor/converters/mark.rb +12 -0
  31. data/lib/reverse_asciidoctor/converters/math.rb +14 -0
  32. data/lib/reverse_asciidoctor/converters/ol.rb +46 -0
  33. data/lib/reverse_asciidoctor/converters/p.rb +17 -0
  34. data/lib/reverse_asciidoctor/converters/pass_through.rb +9 -0
  35. data/lib/reverse_asciidoctor/converters/pre.rb +38 -0
  36. data/lib/reverse_asciidoctor/converters/q.rb +12 -0
  37. data/lib/reverse_asciidoctor/converters/strong.rb +17 -0
  38. data/lib/reverse_asciidoctor/converters/sub.rb +12 -0
  39. data/lib/reverse_asciidoctor/converters/sup.rb +12 -0
  40. data/lib/reverse_asciidoctor/converters/table.rb +64 -0
  41. data/lib/reverse_asciidoctor/converters/td.rb +67 -0
  42. data/lib/reverse_asciidoctor/converters/text.rb +65 -0
  43. data/lib/reverse_asciidoctor/converters/th.rb +16 -0
  44. data/lib/reverse_asciidoctor/converters/tr.rb +22 -0
  45. data/lib/reverse_asciidoctor/converters/video.rb +36 -0
  46. data/lib/reverse_asciidoctor/errors.rb +10 -0
  47. data/lib/reverse_asciidoctor/version.rb +3 -0
  48. data/reverse_asciidoctor.gemspec +30 -0
  49. data/spec/assets/anchors.html +22 -0
  50. data/spec/assets/basic.html +58 -0
  51. data/spec/assets/code.html +22 -0
  52. data/spec/assets/escapables.html +15 -0
  53. data/spec/assets/from_the_wild.html +23 -0
  54. data/spec/assets/full_example.html +49 -0
  55. data/spec/assets/html_fragment.html +3 -0
  56. data/spec/assets/lists.html +137 -0
  57. data/spec/assets/minimum.html +4 -0
  58. data/spec/assets/paragraphs.html +24 -0
  59. data/spec/assets/quotation.html +12 -0
  60. data/spec/assets/tables.html +99 -0
  61. data/spec/assets/unknown_tags.html +9 -0
  62. data/spec/components/anchors_spec.rb +21 -0
  63. data/spec/components/basic_spec.rb +49 -0
  64. data/spec/components/code_spec.rb +28 -0
  65. data/spec/components/escapables_spec.rb +23 -0
  66. data/spec/components/from_the_wild_spec.rb +17 -0
  67. data/spec/components/html_fragment_spec.rb +11 -0
  68. data/spec/components/lists_spec.rb +86 -0
  69. data/spec/components/paragraphs_spec.rb +15 -0
  70. data/spec/components/quotation_spec.rb +12 -0
  71. data/spec/components/tables_spec.rb +31 -0
  72. data/spec/components/unknown_tags_spec.rb +39 -0
  73. data/spec/lib/reverse_asciidoctor/cleaner_spec.rb +157 -0
  74. data/spec/lib/reverse_asciidoctor/config_spec.rb +26 -0
  75. data/spec/lib/reverse_asciidoctor/converters/aside_spec.rb +12 -0
  76. data/spec/lib/reverse_asciidoctor/converters/audio_spec.rb +18 -0
  77. data/spec/lib/reverse_asciidoctor/converters/blockquote_spec.rb +24 -0
  78. data/spec/lib/reverse_asciidoctor/converters/br_spec.rb +9 -0
  79. data/spec/lib/reverse_asciidoctor/converters/code_spec.rb +18 -0
  80. data/spec/lib/reverse_asciidoctor/converters/div_spec.rb +18 -0
  81. data/spec/lib/reverse_asciidoctor/converters/figure_spec.rb +13 -0
  82. data/spec/lib/reverse_asciidoctor/converters/img_spec.rb +28 -0
  83. data/spec/lib/reverse_asciidoctor/converters/li_spec.rb +13 -0
  84. data/spec/lib/reverse_asciidoctor/converters/mark_spec.rb +10 -0
  85. data/spec/lib/reverse_asciidoctor/converters/p_spec.rb +12 -0
  86. data/spec/lib/reverse_asciidoctor/converters/pre_spec.rb +45 -0
  87. data/spec/lib/reverse_asciidoctor/converters/q_spec.rb +10 -0
  88. data/spec/lib/reverse_asciidoctor/converters/strong_spec.rb +20 -0
  89. data/spec/lib/reverse_asciidoctor/converters/text_spec.rb +62 -0
  90. data/spec/lib/reverse_asciidoctor/converters/video_spec.rb +18 -0
  91. data/spec/lib/reverse_asciidoctor/converters_spec.rb +19 -0
  92. data/spec/lib/reverse_asciidoctor_spec.rb +37 -0
  93. data/spec/spec_helper.rb +21 -0
  94. metadata +281 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: cb94b96658189613a9f4cdf21a3fa5d3eefbad17
4
+ data.tar.gz: 392dd9b562dd8a1d24193d4031f62916edda8fd2
5
+ SHA512:
6
+ metadata.gz: a51d1071f41d2627def8bb06929bcb1308a673741067b29be58ad847dfb2d70ca7028e1cca1f52f9844310c378009395e61e0e0ffad1b8686a9ab3a21b7966a7
7
+ data.tar.gz: b829c7f3f60651a916c2fd23322406a581c7524825a804e6e26ea6953cb45cfe44f6ba316baeb2cebc92c9bd6388dfa976757f4ae1a0cd6bc487e279a2c0b611
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in reverse_markdown.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,32 @@
1
+ BSD 2-Clause License
2
+
3
+ Copyright (c) 2018, Ribose
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ * Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ * Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+
27
+
28
+
29
+ LICENSE OF https://github.com/xijo/reverse_markdown
30
+
31
+ https://github.com/xijo/reverse_markdown, on which this gem is based, was
32
+ licensed with the DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE, v2.
@@ -0,0 +1,186 @@
1
+ = reverse_asciidoctor
2
+
3
+ Based on https://github.com/xijo/reverse_markdown
4
+
5
+ Transforms HTML into asciidoctor.
6
+
7
+ == Requirements
8
+
9
+ . http://nokogiri.org/[Nokogiri]
10
+ . Ruby 1.9.3 or higher
11
+
12
+ == Installation
13
+
14
+ Install the gem
15
+
16
+ [source,console]
17
+ ----
18
+ [sudo] gem install reverse_asciidoctor
19
+ ----
20
+
21
+ or add it to your Gemfile
22
+
23
+ [source,ruby]
24
+ ----
25
+ gem 'reverse_asciidoctor'
26
+ ----
27
+
28
+ == Features
29
+
30
+ As a port of reverse_markdown, reverse_asciidoctor shares its features:
31
+
32
+ * Module based - if you miss a tag, just add it
33
+ * Can deal with nested lists
34
+ * Inline and block code is supported
35
+ * Supports blockquote
36
+
37
+ It supports the following html tags supported by reverse_markdown:
38
+
39
+ * `a`
40
+ * `blockquote`
41
+ * `br`
42
+ * `code`, `tt` (added: `kbd`, `samp`, `var`)
43
+ * `div`, `article`
44
+ * `em`, `i` (added: `cite`)
45
+ * `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `hr`
46
+ * `img`
47
+ * `li`, `ol`, `ul` (added: `dir`)
48
+ * `p`, `pre`
49
+ * `strong`, `b`
50
+ * `table`, `td`, `th`, `tr`
51
+
52
+ NOTE:
53
+ * reverse_asciidoctor does *not* support `del` or `strike`, because Asciidoctor out of the box does not
54
+ * As with reverse_markdown, `pre` is only treated as sourcecode if it is contained in a `div@class = highlight-` element, or has a `@brush` attribute naming the language (Confluence).
55
+ * The gem does not support `p@align`, because Asciidoctor doesn't
56
+
57
+ In addition, it supports:
58
+
59
+ * `aside`
60
+ * `audio`, `video` (with `@src` attributes)
61
+ * `figure`, `figcaption`
62
+ * `mark`
63
+ * `q`
64
+ * `sub`, `sup`
65
+ * `@id` anchors
66
+ * `blockquote@cite`
67
+ * `img/@width`, `img/@height`
68
+ * `ol/@style`, `ol/@start`, `ol/@reversed`, `ul/@type`
69
+ * `td/@colspan`, `td/@rowspan`, `td@/align`, `td@/valign`
70
+ * `table/caption`, `table/@width`, `table/@frame` (partial), `table/@rules` (partial)
71
+ * Lists and paragraphs within cells
72
+ ** Not tables within cells: Asciidoctor cannot deal with nested tabls
73
+
74
+ It also supports MathML... sort of.
75
+
76
+ * Asciidoctor supports AsciiMath and LaTeX for stem expressions. HTML uses MathML.
77
+ The gem will recognise MathML expressions in HTML, and will wrap them in Asciidoctor
78
+ `stem:[ ]` macros. The result of this gem is not actually legal Asciidoctor for stem:
79
+ Asciidoctor will presumably
80
+ think this is AsciiMath in the `stem:[ ]` macro, try to pass it into MathJax as
81
+ AsciiMath, and fail. But of course, MathJax has no problem with MathML, and some postprocessing
82
+ on the Asciidoctor output can ensure that the MathML is treated by MathJax (or whatever else
83
+ uses the output) as such; so this is still much better than nothing for stem processing.
84
+ * An alternative would be to attempt to map MathML to either LaTeX or AsciiMath.
85
+ ** The self-description of https://github.com/learningobjectsinc/mathml-to-asciimath
86
+ ("subset"... "this module is not: comprehensive, performant") does not recommend it,
87
+ when MathJax is entirely happy with MathML anyway.
88
+ ** https://github.com/transpect/mml2tex looks rather more robust, and is also used
89
+ to export Word documents and their OOMML to LaTeX via MathML. But we'd still rather
90
+ keep the MathML in place.
91
+
92
+ The gem does not support:
93
+
94
+ * `col`, `colgroup`
95
+ * `source`, `picture`
96
+ * `bdi`, `bdo`, `ruby`, `rt`, `rp`, `wbr`
97
+ * `frame`, `frameset`, `iframe`, `noframes`, `noscript`, `script`, `input`, `output`, `progress`
98
+ * `map`, `canvas`, `dialog`, `embed`, `object`, `param`, `svg`, `track`
99
+ * `fieldset`, `button`, `datalist`, `form`, `label`, `legend`, `menu`, `menulist`, `optgroup`, `option`, `select`, `textarea`
100
+ * `big`, `dfn`, `font`, `s`, `small`, `span`, `strike`, `u`
101
+ * `center`
102
+ * `data`, `meter`
103
+ * `del`, `ins`
104
+ * `footer`, `header`, `main`, `nav`, `details`, `section`, `summary`, `template`
105
+
106
+ == Usage
107
+
108
+ === Ruby
109
+
110
+ You can convert html content as string or Nokogiri document:
111
+
112
+ [source,ruby]
113
+ ----
114
+ input = '<strong>feelings</strong>'
115
+ result = ReverseAsciidoctor.convert input
116
+ result.inspect # " *feelings* "
117
+ ----
118
+
119
+ === Commandline
120
+
121
+ It's also possible to convert html files to markdown using the binary:
122
+
123
+ [source,console]
124
+ ----
125
+ $ bin/reverse_asciidoctor file.html > file.adoc
126
+ $ cat file.html | bin/reverse_asciidoctor > file.adoc
127
+ ----
128
+
129
+ In addition, the `bin/w2m` script (
130
+ adapted from https://github.com/benbalter/word-to-markdown[Ben Balter's word-to-markdown])
131
+ script extracts HTML from Word docx documents, and converts it to Asciidoc.
132
+
133
+ [source,console]
134
+ ----
135
+ $ bundle exec bin/w2m document.docx > document.adoc
136
+ ----
137
+
138
+ The script presumes that LibreOffice has already been installed: it uses LibreOffice's
139
+ export to XHTML. LibreOffice's export of XHTML is superior to the native Microsoft Word export
140
+ to HTML: it exports lists (which Word keeps as paragraphs), and it exports OOMML into MathML.
141
+ On the other hand, the LibreOffice export relies on
142
+
143
+ === Configuration
144
+
145
+ The following options are available:
146
+
147
+ * `unknown_tags` (default `pass_through`) - how to handle unknown tags. Valid options are:
148
+ ** `pass_through` - Include the unknown tag completely into the result
149
+ ** `drop` - Drop the unknown tag and its content
150
+ ** `bypass` - Ignore the unknown tag but try to convert its content
151
+ ** `raise` - Raise an error to let you know
152
+ * `tag_border` (default `' '`) - how to handle tag borders. valid options are:
153
+ ** `' '` - Add whitespace if there is none at tag borders.
154
+ ** `''` - Do not not add whitespace.
155
+
156
+ ==== As options
157
+
158
+ Just pass your chosen configuration options in after the input. The given options will last for this operation only.
159
+
160
+ [source,ruby]
161
+ ----
162
+ ReverseAsciidoctor.convert(input, unknown_tags: :raise)
163
+ ----
164
+
165
+ ==== Preconfigure
166
+
167
+ Or configure it block style on a initializer level. These configurations will last for all conversions until they are set to something different.
168
+
169
+ [source,ruby]
170
+ ----
171
+ ReverseAsciidoctor.config do |config|
172
+ config.unknown_tags = :bypass
173
+ config.github_flavored = true
174
+ config.tag_border = ''
175
+ end
176
+ ----
177
+
178
+
179
+ == Related stuff
180
+
181
+ * https://github.com/xijo/reverse_markdown[Xijo's original reverse_markdown gem]
182
+ * https://github.com/xijo/reverse_markdown/wiki/Write-your-own-converter[Write custom converters] - Wiki entry about how to write your own converter
183
+ * https://github.com/harlantwood/html_massage[html_massage] - A gem by Harlan T. Wood to convert regular sites into markdown using reverse_markdown
184
+ * https://github.com/benbalter/word-to-markdown[word-to-markdown] - Convert word docs into markdown while using reverse_markdown, by Ben Balter
185
+ * https://github.com/asciidocfx/HtmlToAsciidoc[HtmlToAsciidoc] - Javascript regexp-based converter of HTML to Asciidoctor
186
+ * https://asciidoctor.org/docs/user-manual/[The Asciidoctor User Manual]
@@ -0,0 +1,14 @@
1
+ require 'bundler/gem_tasks'
2
+
3
+ if File.exist?('.codeclimate')
4
+ ENV["CODECLIMATE_REPO_TOKEN"] = File.read('.codeclimate').strip
5
+ end
6
+
7
+ require 'rspec/core/rake_task'
8
+ RSpec::Core::RakeTask.new(:spec)
9
+ task :default => :spec
10
+
11
+ desc 'Open an irb session preloaded with this library'
12
+ task :console do
13
+ sh 'irb -rubygems -I lib -r reverse_asciidoctor.rb'
14
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ # Usage: reverse_asciidoctor [FILE]...
3
+ # Usage: cat FILE | reverse_asciidoctor
4
+ require 'reverse_asciidoctor'
5
+ require 'optparse'
6
+
7
+ options = {}
8
+ OptionParser.new do |opts|
9
+ opts.banner = "Usage: reverse_asciidoctor [options] <file>"
10
+
11
+ opts.on('-u', '--unknown_tags [pass_through, drop, bypass, raise]', 'Unknown tag handling (default: pass_through)') { |v| ReverseMarkdown.config.unknown_tags = v }
12
+ end.parse!
13
+
14
+ puts ReverseAsciidoctor.convert(ARGF.read)
data/bin/w2m ADDED
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'word-to-markdown'
5
+ require 'reverse_asciidoctor'
6
+
7
+ def scrub_whitespace(string)
8
+ string = string.dup
9
+ string.gsub!('&nbsp;', ' ') # HTML encoded spaces
10
+ string.sub!(/\A[[:space:]]+/, '') # document leading whitespace
11
+ string.sub!(/[[:space:]]+\z/, '') # document trailing whitespace
12
+ string.gsub!(/([ ]+)$/, '') # line trailing whitespace
13
+ string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks
14
+ string.delete!(' ') # Unicode non-breaking spaces, injected as tabs
15
+ string
16
+ end
17
+
18
+ if ARGV.size != 1 || ARGV[0] == '--help'
19
+ puts 'Usage: bundle exec w2m path/to/document.docx'
20
+ exit 1
21
+ end
22
+
23
+ if ARGV[0] == '--version'
24
+ puts "WordToMarkdown v#{WordToMarkdown::VERSION}"
25
+ puts "LibreOffice v#{WordToMarkdown.soffice.version}" unless Gem.win_platform?
26
+ else
27
+ doc = WordToMarkdown.new ARGV[0]
28
+ # puts doc.to_s
29
+ puts ReverseAsciidoctor.convert(scrub_whitespace(doc.document.html), WordToMarkdown::REVERSE_MARKDOWN_OPTIONS)
30
+ end
31
+
@@ -0,0 +1,70 @@
1
+ require 'digest'
2
+ require 'nokogiri'
3
+ require 'reverse_asciidoctor/version'
4
+ require 'reverse_asciidoctor/errors'
5
+ require 'reverse_asciidoctor/cleaner'
6
+ require 'reverse_asciidoctor/config'
7
+ require 'reverse_asciidoctor/converters'
8
+ require 'reverse_asciidoctor/converters/base'
9
+ require 'reverse_asciidoctor/converters/a'
10
+ require 'reverse_asciidoctor/converters/aside'
11
+ require 'reverse_asciidoctor/converters/audio'
12
+ require 'reverse_asciidoctor/converters/blockquote'
13
+ require 'reverse_asciidoctor/converters/br'
14
+ require 'reverse_asciidoctor/converters/bypass'
15
+ require 'reverse_asciidoctor/converters/code'
16
+ require 'reverse_asciidoctor/converters/div'
17
+ require 'reverse_asciidoctor/converters/drop'
18
+ require 'reverse_asciidoctor/converters/em'
19
+ require 'reverse_asciidoctor/converters/figure'
20
+ require 'reverse_asciidoctor/converters/h'
21
+ require 'reverse_asciidoctor/converters/head'
22
+ require 'reverse_asciidoctor/converters/hr'
23
+ require 'reverse_asciidoctor/converters/ignore'
24
+ require 'reverse_asciidoctor/converters/img'
25
+ require 'reverse_asciidoctor/converters/mark'
26
+ require 'reverse_asciidoctor/converters/li'
27
+ require 'reverse_asciidoctor/converters/ol'
28
+ require 'reverse_asciidoctor/converters/p'
29
+ require 'reverse_asciidoctor/converters/pass_through'
30
+ require 'reverse_asciidoctor/converters/pre'
31
+ require 'reverse_asciidoctor/converters/q'
32
+ require 'reverse_asciidoctor/converters/strong'
33
+ require 'reverse_asciidoctor/converters/sup'
34
+ require 'reverse_asciidoctor/converters/sub'
35
+ require 'reverse_asciidoctor/converters/table'
36
+ require 'reverse_asciidoctor/converters/td'
37
+ require 'reverse_asciidoctor/converters/th'
38
+ require 'reverse_asciidoctor/converters/text'
39
+ require 'reverse_asciidoctor/converters/tr'
40
+ require 'reverse_asciidoctor/converters/video'
41
+ require 'reverse_asciidoctor/converters/math'
42
+
43
+ module ReverseAsciidoctor
44
+
45
+ def self.convert(input, options = {})
46
+ root = case input
47
+ when String then Nokogiri::HTML(input).root
48
+ when Nokogiri::XML::Document then input.root
49
+ when Nokogiri::XML::Node then input
50
+ end
51
+
52
+ root or return ''
53
+
54
+ config.with(options) do
55
+ result = ReverseAsciidoctor::Converters.lookup(root.name).convert(root)
56
+ cleaner.tidy(result)
57
+ end
58
+ end
59
+
60
+ def self.config
61
+ @config ||= Config.new
62
+ yield @config if block_given?
63
+ @config
64
+ end
65
+
66
+ def self.cleaner
67
+ @cleaner ||= Cleaner.new
68
+ end
69
+
70
+ end
@@ -0,0 +1,85 @@
1
+ module ReverseAsciidoctor
2
+ class Cleaner
3
+
4
+ def tidy(string)
5
+ result = remove_inner_whitespaces(string)
6
+ result = remove_newlines(result)
7
+ result = remove_leading_newlines(result)
8
+ result = clean_tag_borders(result)
9
+ clean_punctuation_characters(result)
10
+ end
11
+
12
+ def remove_newlines(string)
13
+ string.gsub(/\n{3,}/, "\n\n")
14
+ end
15
+
16
+ def remove_leading_newlines(string)
17
+ string.gsub(/\A\n+/, '')
18
+ end
19
+
20
+ def remove_inner_whitespaces(string)
21
+ string.each_line.inject("") do |memo, line|
22
+ memo + preserve_border_whitespaces(line) do
23
+ line.strip.gsub(/[ \t]{2,}/, ' ')
24
+ end
25
+ end
26
+ end
27
+
28
+ # Find non-asterisk content that is enclosed by two or
29
+ # more asterisks. Ensure that only one whitespace occurs
30
+ # in the border area.
31
+ # Same for underscores and brackets.
32
+ def clean_tag_borders(string)
33
+ result = string.gsub(/\s?\*{2,}.*?\*{2,}\s?/) do |match|
34
+ preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do
35
+ match.strip.sub('** ', '**').sub(' **', '**')
36
+ end
37
+ end
38
+
39
+ result = result.gsub(/\s?\_{2,}.*?\_{2,}\s?/) do |match|
40
+ preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do
41
+ match.strip.sub('__ ', '__').sub(' __', '__')
42
+ end
43
+ end
44
+
45
+ result = result.gsub(/\s?~{2,}.*?~{2,}\s?/) do |match|
46
+ preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do
47
+ match.strip.sub('~~ ', '~~').sub(' ~~', '~~')
48
+ end
49
+ end
50
+
51
+ result.gsub(/\s?\[.*?\]\s?/) do |match|
52
+ preserve_border_whitespaces(match) do
53
+ match.strip.sub('[ ', '[').sub(' ]', ']')
54
+ end
55
+ end
56
+ end
57
+
58
+ def clean_punctuation_characters(string)
59
+ string.gsub(/(\*\*|~~|__)\s([\.!\?'"])/, "\\1".strip + "\\2")
60
+ end
61
+
62
+ private
63
+
64
+ def preserve_border_whitespaces(string, options = {}, &block)
65
+ return string if string =~ /\A\s*\Z/
66
+ default_border = options.fetch(:default_border, '')
67
+ # If the string contains part of a link so the characters [,],(,)
68
+ # then don't add any extra spaces
69
+ default_border = '' if string =~ /[\[\(\]\)]/
70
+ string_start = present_or_default(string[/\A\s*/], default_border)
71
+ string_end = present_or_default(string[/\s*\Z/], default_border)
72
+ result = yield
73
+ string_start + result + string_end
74
+ end
75
+
76
+ def present_or_default(string, default)
77
+ if string.nil? || string.empty?
78
+ default
79
+ else
80
+ string
81
+ end
82
+ end
83
+
84
+ end
85
+ end