reverse_asciidoctor 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE +32 -0
  4. data/README.adoc +186 -0
  5. data/Rakefile +14 -0
  6. data/bin/reverse_asciidoctor +14 -0
  7. data/bin/w2m +31 -0
  8. data/lib/reverse_asciidoctor.rb +70 -0
  9. data/lib/reverse_asciidoctor/cleaner.rb +85 -0
  10. data/lib/reverse_asciidoctor/config.rb +28 -0
  11. data/lib/reverse_asciidoctor/converters.rb +33 -0
  12. data/lib/reverse_asciidoctor/converters/a.rb +38 -0
  13. data/lib/reverse_asciidoctor/converters/aside.rb +14 -0
  14. data/lib/reverse_asciidoctor/converters/audio.rb +34 -0
  15. data/lib/reverse_asciidoctor/converters/base.rb +24 -0
  16. data/lib/reverse_asciidoctor/converters/blockquote.rb +18 -0
  17. data/lib/reverse_asciidoctor/converters/br.rb +11 -0
  18. data/lib/reverse_asciidoctor/converters/bypass.rb +77 -0
  19. data/lib/reverse_asciidoctor/converters/code.rb +15 -0
  20. data/lib/reverse_asciidoctor/converters/div.rb +14 -0
  21. data/lib/reverse_asciidoctor/converters/drop.rb +18 -0
  22. data/lib/reverse_asciidoctor/converters/em.rb +18 -0
  23. data/lib/reverse_asciidoctor/converters/figure.rb +21 -0
  24. data/lib/reverse_asciidoctor/converters/h.rb +19 -0
  25. data/lib/reverse_asciidoctor/converters/head.rb +18 -0
  26. data/lib/reverse_asciidoctor/converters/hr.rb +11 -0
  27. data/lib/reverse_asciidoctor/converters/ignore.rb +12 -0
  28. data/lib/reverse_asciidoctor/converters/img.rb +23 -0
  29. data/lib/reverse_asciidoctor/converters/li.rb +24 -0
  30. data/lib/reverse_asciidoctor/converters/mark.rb +12 -0
  31. data/lib/reverse_asciidoctor/converters/math.rb +14 -0
  32. data/lib/reverse_asciidoctor/converters/ol.rb +46 -0
  33. data/lib/reverse_asciidoctor/converters/p.rb +17 -0
  34. data/lib/reverse_asciidoctor/converters/pass_through.rb +9 -0
  35. data/lib/reverse_asciidoctor/converters/pre.rb +38 -0
  36. data/lib/reverse_asciidoctor/converters/q.rb +12 -0
  37. data/lib/reverse_asciidoctor/converters/strong.rb +17 -0
  38. data/lib/reverse_asciidoctor/converters/sub.rb +12 -0
  39. data/lib/reverse_asciidoctor/converters/sup.rb +12 -0
  40. data/lib/reverse_asciidoctor/converters/table.rb +64 -0
  41. data/lib/reverse_asciidoctor/converters/td.rb +67 -0
  42. data/lib/reverse_asciidoctor/converters/text.rb +65 -0
  43. data/lib/reverse_asciidoctor/converters/th.rb +16 -0
  44. data/lib/reverse_asciidoctor/converters/tr.rb +22 -0
  45. data/lib/reverse_asciidoctor/converters/video.rb +36 -0
  46. data/lib/reverse_asciidoctor/errors.rb +10 -0
  47. data/lib/reverse_asciidoctor/version.rb +3 -0
  48. data/reverse_asciidoctor.gemspec +30 -0
  49. data/spec/assets/anchors.html +22 -0
  50. data/spec/assets/basic.html +58 -0
  51. data/spec/assets/code.html +22 -0
  52. data/spec/assets/escapables.html +15 -0
  53. data/spec/assets/from_the_wild.html +23 -0
  54. data/spec/assets/full_example.html +49 -0
  55. data/spec/assets/html_fragment.html +3 -0
  56. data/spec/assets/lists.html +137 -0
  57. data/spec/assets/minimum.html +4 -0
  58. data/spec/assets/paragraphs.html +24 -0
  59. data/spec/assets/quotation.html +12 -0
  60. data/spec/assets/tables.html +99 -0
  61. data/spec/assets/unknown_tags.html +9 -0
  62. data/spec/components/anchors_spec.rb +21 -0
  63. data/spec/components/basic_spec.rb +49 -0
  64. data/spec/components/code_spec.rb +28 -0
  65. data/spec/components/escapables_spec.rb +23 -0
  66. data/spec/components/from_the_wild_spec.rb +17 -0
  67. data/spec/components/html_fragment_spec.rb +11 -0
  68. data/spec/components/lists_spec.rb +86 -0
  69. data/spec/components/paragraphs_spec.rb +15 -0
  70. data/spec/components/quotation_spec.rb +12 -0
  71. data/spec/components/tables_spec.rb +31 -0
  72. data/spec/components/unknown_tags_spec.rb +39 -0
  73. data/spec/lib/reverse_asciidoctor/cleaner_spec.rb +157 -0
  74. data/spec/lib/reverse_asciidoctor/config_spec.rb +26 -0
  75. data/spec/lib/reverse_asciidoctor/converters/aside_spec.rb +12 -0
  76. data/spec/lib/reverse_asciidoctor/converters/audio_spec.rb +18 -0
  77. data/spec/lib/reverse_asciidoctor/converters/blockquote_spec.rb +24 -0
  78. data/spec/lib/reverse_asciidoctor/converters/br_spec.rb +9 -0
  79. data/spec/lib/reverse_asciidoctor/converters/code_spec.rb +18 -0
  80. data/spec/lib/reverse_asciidoctor/converters/div_spec.rb +18 -0
  81. data/spec/lib/reverse_asciidoctor/converters/figure_spec.rb +13 -0
  82. data/spec/lib/reverse_asciidoctor/converters/img_spec.rb +28 -0
  83. data/spec/lib/reverse_asciidoctor/converters/li_spec.rb +13 -0
  84. data/spec/lib/reverse_asciidoctor/converters/mark_spec.rb +10 -0
  85. data/spec/lib/reverse_asciidoctor/converters/p_spec.rb +12 -0
  86. data/spec/lib/reverse_asciidoctor/converters/pre_spec.rb +45 -0
  87. data/spec/lib/reverse_asciidoctor/converters/q_spec.rb +10 -0
  88. data/spec/lib/reverse_asciidoctor/converters/strong_spec.rb +20 -0
  89. data/spec/lib/reverse_asciidoctor/converters/text_spec.rb +62 -0
  90. data/spec/lib/reverse_asciidoctor/converters/video_spec.rb +18 -0
  91. data/spec/lib/reverse_asciidoctor/converters_spec.rb +19 -0
  92. data/spec/lib/reverse_asciidoctor_spec.rb +37 -0
  93. data/spec/spec_helper.rb +21 -0
  94. metadata +281 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: cb94b96658189613a9f4cdf21a3fa5d3eefbad17
4
+ data.tar.gz: 392dd9b562dd8a1d24193d4031f62916edda8fd2
5
+ SHA512:
6
+ metadata.gz: a51d1071f41d2627def8bb06929bcb1308a673741067b29be58ad847dfb2d70ca7028e1cca1f52f9844310c378009395e61e0e0ffad1b8686a9ab3a21b7966a7
7
+ data.tar.gz: b829c7f3f60651a916c2fd23322406a581c7524825a804e6e26ea6953cb45cfe44f6ba316baeb2cebc92c9bd6388dfa976757f4ae1a0cd6bc487e279a2c0b611
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in reverse_markdown.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,32 @@
1
+ BSD 2-Clause License
2
+
3
+ Copyright (c) 2018, Ribose
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ * Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ * Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+
27
+
28
+
29
+ LICENSE OF https://github.com/xijo/reverse_markdown
30
+
31
+ https://github.com/xijo/reverse_markdown, on which this gem is based, was
32
+ licensed with the DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE, v2.
@@ -0,0 +1,186 @@
1
+ = reverse_asciidoctor
2
+
3
+ Based on https://github.com/xijo/reverse_markdown
4
+
5
+ Transforms HTML into asciidoctor.
6
+
7
+ == Requirements
8
+
9
+ . http://nokogiri.org/[Nokogiri]
10
+ . Ruby 1.9.3 or higher
11
+
12
+ == Installation
13
+
14
+ Install the gem
15
+
16
+ [source,console]
17
+ ----
18
+ [sudo] gem install reverse_asciidoctor
19
+ ----
20
+
21
+ or add it to your Gemfile
22
+
23
+ [source,ruby]
24
+ ----
25
+ gem 'reverse_asciidoctor'
26
+ ----
27
+
28
+ == Features
29
+
30
+ As a port of reverse_markdown, reverse_asciidoctor shares its features:
31
+
32
+ * Module based - if you miss a tag, just add it
33
+ * Can deal with nested lists
34
+ * Inline and block code is supported
35
+ * Supports blockquote
36
+
37
+ It supports the following html tags supported by reverse_markdown:
38
+
39
+ * `a`
40
+ * `blockquote`
41
+ * `br`
42
+ * `code`, `tt` (added: `kbd`, `samp`, `var`)
43
+ * `div`, `article`
44
+ * `em`, `i` (added: `cite`)
45
+ * `h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `hr`
46
+ * `img`
47
+ * `li`, `ol`, `ul` (added: `dir`)
48
+ * `p`, `pre`
49
+ * `strong`, `b`
50
+ * `table`, `td`, `th`, `tr`
51
+
52
+ NOTE:
53
+ * reverse_asciidoctor does *not* support `del` or `strike`, because Asciidoctor out of the box does not
54
+ * As with reverse_markdown, `pre` is only treated as sourcecode if it is contained in a `div@class = highlight-` element, or has a `@brush` attribute naming the language (Confluence).
55
+ * The gem does not support `p@align`, because Asciidoctor doesn't
56
+
57
+ In addition, it supports:
58
+
59
+ * `aside`
60
+ * `audio`, `video` (with `@src` attributes)
61
+ * `figure`, `figcaption`
62
+ * `mark`
63
+ * `q`
64
+ * `sub`, `sup`
65
+ * `@id` anchors
66
+ * `blockquote@cite`
67
+ * `img/@width`, `img/@height`
68
+ * `ol/@style`, `ol/@start`, `ol/@reversed`, `ul/@type`
69
+ * `td/@colspan`, `td/@rowspan`, `td@/align`, `td@/valign`
70
+ * `table/caption`, `table/@width`, `table/@frame` (partial), `table/@rules` (partial)
71
+ * Lists and paragraphs within cells
72
+ ** Not tables within cells: Asciidoctor cannot deal with nested tabls
73
+
74
+ It also supports MathML... sort of.
75
+
76
+ * Asciidoctor supports AsciiMath and LaTeX for stem expressions. HTML uses MathML.
77
+ The gem will recognise MathML expressions in HTML, and will wrap them in Asciidoctor
78
+ `stem:[ ]` macros. The result of this gem is not actually legal Asciidoctor for stem:
79
+ Asciidoctor will presumably
80
+ think this is AsciiMath in the `stem:[ ]` macro, try to pass it into MathJax as
81
+ AsciiMath, and fail. But of course, MathJax has no problem with MathML, and some postprocessing
82
+ on the Asciidoctor output can ensure that the MathML is treated by MathJax (or whatever else
83
+ uses the output) as such; so this is still much better than nothing for stem processing.
84
+ * An alternative would be to attempt to map MathML to either LaTeX or AsciiMath.
85
+ ** The self-description of https://github.com/learningobjectsinc/mathml-to-asciimath
86
+ ("subset"... "this module is not: comprehensive, performant") does not recommend it,
87
+ when MathJax is entirely happy with MathML anyway.
88
+ ** https://github.com/transpect/mml2tex looks rather more robust, and is also used
89
+ to export Word documents and their OOMML to LaTeX via MathML. But we'd still rather
90
+ keep the MathML in place.
91
+
92
+ The gem does not support:
93
+
94
+ * `col`, `colgroup`
95
+ * `source`, `picture`
96
+ * `bdi`, `bdo`, `ruby`, `rt`, `rp`, `wbr`
97
+ * `frame`, `frameset`, `iframe`, `noframes`, `noscript`, `script`, `input`, `output`, `progress`
98
+ * `map`, `canvas`, `dialog`, `embed`, `object`, `param`, `svg`, `track`
99
+ * `fieldset`, `button`, `datalist`, `form`, `label`, `legend`, `menu`, `menulist`, `optgroup`, `option`, `select`, `textarea`
100
+ * `big`, `dfn`, `font`, `s`, `small`, `span`, `strike`, `u`
101
+ * `center`
102
+ * `data`, `meter`
103
+ * `del`, `ins`
104
+ * `footer`, `header`, `main`, `nav`, `details`, `section`, `summary`, `template`
105
+
106
+ == Usage
107
+
108
+ === Ruby
109
+
110
+ You can convert html content as string or Nokogiri document:
111
+
112
+ [source,ruby]
113
+ ----
114
+ input = '<strong>feelings</strong>'
115
+ result = ReverseAsciidoctor.convert input
116
+ result.inspect # " *feelings* "
117
+ ----
118
+
119
+ === Commandline
120
+
121
+ It's also possible to convert html files to markdown using the binary:
122
+
123
+ [source,console]
124
+ ----
125
+ $ bin/reverse_asciidoctor file.html > file.adoc
126
+ $ cat file.html | bin/reverse_asciidoctor > file.adoc
127
+ ----
128
+
129
+ In addition, the `bin/w2m` script (
130
+ adapted from https://github.com/benbalter/word-to-markdown[Ben Balter's word-to-markdown])
131
+ script extracts HTML from Word docx documents, and converts it to Asciidoc.
132
+
133
+ [source,console]
134
+ ----
135
+ $ bundle exec bin/w2m document.docx > document.adoc
136
+ ----
137
+
138
+ The script presumes that LibreOffice has already been installed: it uses LibreOffice's
139
+ export to XHTML. LibreOffice's export of XHTML is superior to the native Microsoft Word export
140
+ to HTML: it exports lists (which Word keeps as paragraphs), and it exports OOMML into MathML.
141
+ On the other hand, the LibreOffice export relies on
142
+
143
+ === Configuration
144
+
145
+ The following options are available:
146
+
147
+ * `unknown_tags` (default `pass_through`) - how to handle unknown tags. Valid options are:
148
+ ** `pass_through` - Include the unknown tag completely into the result
149
+ ** `drop` - Drop the unknown tag and its content
150
+ ** `bypass` - Ignore the unknown tag but try to convert its content
151
+ ** `raise` - Raise an error to let you know
152
+ * `tag_border` (default `' '`) - how to handle tag borders. valid options are:
153
+ ** `' '` - Add whitespace if there is none at tag borders.
154
+ ** `''` - Do not not add whitespace.
155
+
156
+ ==== As options
157
+
158
+ Just pass your chosen configuration options in after the input. The given options will last for this operation only.
159
+
160
+ [source,ruby]
161
+ ----
162
+ ReverseAsciidoctor.convert(input, unknown_tags: :raise)
163
+ ----
164
+
165
+ ==== Preconfigure
166
+
167
+ Or configure it block style on a initializer level. These configurations will last for all conversions until they are set to something different.
168
+
169
+ [source,ruby]
170
+ ----
171
+ ReverseAsciidoctor.config do |config|
172
+ config.unknown_tags = :bypass
173
+ config.github_flavored = true
174
+ config.tag_border = ''
175
+ end
176
+ ----
177
+
178
+
179
+ == Related stuff
180
+
181
+ * https://github.com/xijo/reverse_markdown[Xijo's original reverse_markdown gem]
182
+ * https://github.com/xijo/reverse_markdown/wiki/Write-your-own-converter[Write custom converters] - Wiki entry about how to write your own converter
183
+ * https://github.com/harlantwood/html_massage[html_massage] - A gem by Harlan T. Wood to convert regular sites into markdown using reverse_markdown
184
+ * https://github.com/benbalter/word-to-markdown[word-to-markdown] - Convert word docs into markdown while using reverse_markdown, by Ben Balter
185
+ * https://github.com/asciidocfx/HtmlToAsciidoc[HtmlToAsciidoc] - Javascript regexp-based converter of HTML to Asciidoctor
186
+ * https://asciidoctor.org/docs/user-manual/[The Asciidoctor User Manual]
@@ -0,0 +1,14 @@
1
+ require 'bundler/gem_tasks'
2
+
3
+ if File.exist?('.codeclimate')
4
+ ENV["CODECLIMATE_REPO_TOKEN"] = File.read('.codeclimate').strip
5
+ end
6
+
7
+ require 'rspec/core/rake_task'
8
+ RSpec::Core::RakeTask.new(:spec)
9
+ task :default => :spec
10
+
11
+ desc 'Open an irb session preloaded with this library'
12
+ task :console do
13
+ sh 'irb -rubygems -I lib -r reverse_asciidoctor.rb'
14
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ # Usage: reverse_asciidoctor [FILE]...
3
+ # Usage: cat FILE | reverse_asciidoctor
4
+ require 'reverse_asciidoctor'
5
+ require 'optparse'
6
+
7
+ options = {}
8
+ OptionParser.new do |opts|
9
+ opts.banner = "Usage: reverse_asciidoctor [options] <file>"
10
+
11
+ opts.on('-u', '--unknown_tags [pass_through, drop, bypass, raise]', 'Unknown tag handling (default: pass_through)') { |v| ReverseMarkdown.config.unknown_tags = v }
12
+ end.parse!
13
+
14
+ puts ReverseAsciidoctor.convert(ARGF.read)
data/bin/w2m ADDED
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'word-to-markdown'
5
+ require 'reverse_asciidoctor'
6
+
7
+ def scrub_whitespace(string)
8
+ string = string.dup
9
+ string.gsub!('&nbsp;', ' ') # HTML encoded spaces
10
+ string.sub!(/\A[[:space:]]+/, '') # document leading whitespace
11
+ string.sub!(/[[:space:]]+\z/, '') # document trailing whitespace
12
+ string.gsub!(/([ ]+)$/, '') # line trailing whitespace
13
+ string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks
14
+ string.delete!(' ') # Unicode non-breaking spaces, injected as tabs
15
+ string
16
+ end
17
+
18
+ if ARGV.size != 1 || ARGV[0] == '--help'
19
+ puts 'Usage: bundle exec w2m path/to/document.docx'
20
+ exit 1
21
+ end
22
+
23
+ if ARGV[0] == '--version'
24
+ puts "WordToMarkdown v#{WordToMarkdown::VERSION}"
25
+ puts "LibreOffice v#{WordToMarkdown.soffice.version}" unless Gem.win_platform?
26
+ else
27
+ doc = WordToMarkdown.new ARGV[0]
28
+ # puts doc.to_s
29
+ puts ReverseAsciidoctor.convert(scrub_whitespace(doc.document.html), WordToMarkdown::REVERSE_MARKDOWN_OPTIONS)
30
+ end
31
+
@@ -0,0 +1,70 @@
1
+ require 'digest'
2
+ require 'nokogiri'
3
+ require 'reverse_asciidoctor/version'
4
+ require 'reverse_asciidoctor/errors'
5
+ require 'reverse_asciidoctor/cleaner'
6
+ require 'reverse_asciidoctor/config'
7
+ require 'reverse_asciidoctor/converters'
8
+ require 'reverse_asciidoctor/converters/base'
9
+ require 'reverse_asciidoctor/converters/a'
10
+ require 'reverse_asciidoctor/converters/aside'
11
+ require 'reverse_asciidoctor/converters/audio'
12
+ require 'reverse_asciidoctor/converters/blockquote'
13
+ require 'reverse_asciidoctor/converters/br'
14
+ require 'reverse_asciidoctor/converters/bypass'
15
+ require 'reverse_asciidoctor/converters/code'
16
+ require 'reverse_asciidoctor/converters/div'
17
+ require 'reverse_asciidoctor/converters/drop'
18
+ require 'reverse_asciidoctor/converters/em'
19
+ require 'reverse_asciidoctor/converters/figure'
20
+ require 'reverse_asciidoctor/converters/h'
21
+ require 'reverse_asciidoctor/converters/head'
22
+ require 'reverse_asciidoctor/converters/hr'
23
+ require 'reverse_asciidoctor/converters/ignore'
24
+ require 'reverse_asciidoctor/converters/img'
25
+ require 'reverse_asciidoctor/converters/mark'
26
+ require 'reverse_asciidoctor/converters/li'
27
+ require 'reverse_asciidoctor/converters/ol'
28
+ require 'reverse_asciidoctor/converters/p'
29
+ require 'reverse_asciidoctor/converters/pass_through'
30
+ require 'reverse_asciidoctor/converters/pre'
31
+ require 'reverse_asciidoctor/converters/q'
32
+ require 'reverse_asciidoctor/converters/strong'
33
+ require 'reverse_asciidoctor/converters/sup'
34
+ require 'reverse_asciidoctor/converters/sub'
35
+ require 'reverse_asciidoctor/converters/table'
36
+ require 'reverse_asciidoctor/converters/td'
37
+ require 'reverse_asciidoctor/converters/th'
38
+ require 'reverse_asciidoctor/converters/text'
39
+ require 'reverse_asciidoctor/converters/tr'
40
+ require 'reverse_asciidoctor/converters/video'
41
+ require 'reverse_asciidoctor/converters/math'
42
+
43
+ module ReverseAsciidoctor
44
+
45
+ def self.convert(input, options = {})
46
+ root = case input
47
+ when String then Nokogiri::HTML(input).root
48
+ when Nokogiri::XML::Document then input.root
49
+ when Nokogiri::XML::Node then input
50
+ end
51
+
52
+ root or return ''
53
+
54
+ config.with(options) do
55
+ result = ReverseAsciidoctor::Converters.lookup(root.name).convert(root)
56
+ cleaner.tidy(result)
57
+ end
58
+ end
59
+
60
+ def self.config
61
+ @config ||= Config.new
62
+ yield @config if block_given?
63
+ @config
64
+ end
65
+
66
+ def self.cleaner
67
+ @cleaner ||= Cleaner.new
68
+ end
69
+
70
+ end
@@ -0,0 +1,85 @@
1
+ module ReverseAsciidoctor
2
+ class Cleaner
3
+
4
+ def tidy(string)
5
+ result = remove_inner_whitespaces(string)
6
+ result = remove_newlines(result)
7
+ result = remove_leading_newlines(result)
8
+ result = clean_tag_borders(result)
9
+ clean_punctuation_characters(result)
10
+ end
11
+
12
+ def remove_newlines(string)
13
+ string.gsub(/\n{3,}/, "\n\n")
14
+ end
15
+
16
+ def remove_leading_newlines(string)
17
+ string.gsub(/\A\n+/, '')
18
+ end
19
+
20
+ def remove_inner_whitespaces(string)
21
+ string.each_line.inject("") do |memo, line|
22
+ memo + preserve_border_whitespaces(line) do
23
+ line.strip.gsub(/[ \t]{2,}/, ' ')
24
+ end
25
+ end
26
+ end
27
+
28
+ # Find non-asterisk content that is enclosed by two or
29
+ # more asterisks. Ensure that only one whitespace occurs
30
+ # in the border area.
31
+ # Same for underscores and brackets.
32
+ def clean_tag_borders(string)
33
+ result = string.gsub(/\s?\*{2,}.*?\*{2,}\s?/) do |match|
34
+ preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do
35
+ match.strip.sub('** ', '**').sub(' **', '**')
36
+ end
37
+ end
38
+
39
+ result = result.gsub(/\s?\_{2,}.*?\_{2,}\s?/) do |match|
40
+ preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do
41
+ match.strip.sub('__ ', '__').sub(' __', '__')
42
+ end
43
+ end
44
+
45
+ result = result.gsub(/\s?~{2,}.*?~{2,}\s?/) do |match|
46
+ preserve_border_whitespaces(match, default_border: ReverseAsciidoctor.config.tag_border) do
47
+ match.strip.sub('~~ ', '~~').sub(' ~~', '~~')
48
+ end
49
+ end
50
+
51
+ result.gsub(/\s?\[.*?\]\s?/) do |match|
52
+ preserve_border_whitespaces(match) do
53
+ match.strip.sub('[ ', '[').sub(' ]', ']')
54
+ end
55
+ end
56
+ end
57
+
58
+ def clean_punctuation_characters(string)
59
+ string.gsub(/(\*\*|~~|__)\s([\.!\?'"])/, "\\1".strip + "\\2")
60
+ end
61
+
62
+ private
63
+
64
+ def preserve_border_whitespaces(string, options = {}, &block)
65
+ return string if string =~ /\A\s*\Z/
66
+ default_border = options.fetch(:default_border, '')
67
+ # If the string contains part of a link so the characters [,],(,)
68
+ # then don't add any extra spaces
69
+ default_border = '' if string =~ /[\[\(\]\)]/
70
+ string_start = present_or_default(string[/\A\s*/], default_border)
71
+ string_end = present_or_default(string[/\s*\Z/], default_border)
72
+ result = yield
73
+ string_start + result + string_end
74
+ end
75
+
76
+ def present_or_default(string, default)
77
+ if string.nil? || string.empty?
78
+ default
79
+ else
80
+ string
81
+ end
82
+ end
83
+
84
+ end
85
+ end