reverse_adoc 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/macos.yml +27 -0
  3. data/.github/workflows/ubuntu.yml +27 -0
  4. data/.github/workflows/windows.yml +30 -0
  5. data/.hound.yml +3 -0
  6. data/.rubocop.yml +10 -0
  7. data/Gemfile +6 -0
  8. data/LICENSE.txt +25 -0
  9. data/README.adoc +290 -0
  10. data/Rakefile +14 -0
  11. data/bin/reverse_adoc +67 -0
  12. data/bin/w2a +85 -0
  13. data/lib/reverse_asciidoctor.rb +70 -0
  14. data/lib/reverse_asciidoctor/cleaner.rb +90 -0
  15. data/lib/reverse_asciidoctor/config.rb +53 -0
  16. data/lib/reverse_asciidoctor/converters.rb +33 -0
  17. data/lib/reverse_asciidoctor/converters/a.rb +38 -0
  18. data/lib/reverse_asciidoctor/converters/aside.rb +14 -0
  19. data/lib/reverse_asciidoctor/converters/audio.rb +34 -0
  20. data/lib/reverse_asciidoctor/converters/base.rb +24 -0
  21. data/lib/reverse_asciidoctor/converters/blockquote.rb +18 -0
  22. data/lib/reverse_asciidoctor/converters/br.rb +11 -0
  23. data/lib/reverse_asciidoctor/converters/bypass.rb +77 -0
  24. data/lib/reverse_asciidoctor/converters/code.rb +15 -0
  25. data/lib/reverse_asciidoctor/converters/div.rb +14 -0
  26. data/lib/reverse_asciidoctor/converters/drop.rb +18 -0
  27. data/lib/reverse_asciidoctor/converters/em.rb +18 -0
  28. data/lib/reverse_asciidoctor/converters/figure.rb +21 -0
  29. data/lib/reverse_asciidoctor/converters/h.rb +19 -0
  30. data/lib/reverse_asciidoctor/converters/head.rb +18 -0
  31. data/lib/reverse_asciidoctor/converters/hr.rb +11 -0
  32. data/lib/reverse_asciidoctor/converters/ignore.rb +12 -0
  33. data/lib/reverse_asciidoctor/converters/img.rb +80 -0
  34. data/lib/reverse_asciidoctor/converters/li.rb +24 -0
  35. data/lib/reverse_asciidoctor/converters/mark.rb +12 -0
  36. data/lib/reverse_asciidoctor/converters/math.rb +20 -0
  37. data/lib/reverse_asciidoctor/converters/ol.rb +46 -0
  38. data/lib/reverse_asciidoctor/converters/p.rb +17 -0
  39. data/lib/reverse_asciidoctor/converters/pass_through.rb +9 -0
  40. data/lib/reverse_asciidoctor/converters/pre.rb +38 -0
  41. data/lib/reverse_asciidoctor/converters/q.rb +12 -0
  42. data/lib/reverse_asciidoctor/converters/strong.rb +17 -0
  43. data/lib/reverse_asciidoctor/converters/sub.rb +12 -0
  44. data/lib/reverse_asciidoctor/converters/sup.rb +12 -0
  45. data/lib/reverse_asciidoctor/converters/table.rb +64 -0
  46. data/lib/reverse_asciidoctor/converters/td.rb +67 -0
  47. data/lib/reverse_asciidoctor/converters/text.rb +65 -0
  48. data/lib/reverse_asciidoctor/converters/th.rb +16 -0
  49. data/lib/reverse_asciidoctor/converters/tr.rb +22 -0
  50. data/lib/reverse_asciidoctor/converters/video.rb +36 -0
  51. data/lib/reverse_asciidoctor/errors.rb +10 -0
  52. data/lib/reverse_asciidoctor/version.rb +3 -0
  53. data/reverse_adoc.gemspec +35 -0
  54. data/spec/assets/anchors.html +22 -0
  55. data/spec/assets/basic.html +58 -0
  56. data/spec/assets/code.html +22 -0
  57. data/spec/assets/escapables.html +15 -0
  58. data/spec/assets/from_the_wild.html +23 -0
  59. data/spec/assets/full_example.html +49 -0
  60. data/spec/assets/html_fragment.html +3 -0
  61. data/spec/assets/lists.html +137 -0
  62. data/spec/assets/minimum.html +4 -0
  63. data/spec/assets/paragraphs.html +24 -0
  64. data/spec/assets/quotation.html +12 -0
  65. data/spec/assets/tables.html +99 -0
  66. data/spec/assets/unknown_tags.html +9 -0
  67. data/spec/components/anchors_spec.rb +21 -0
  68. data/spec/components/basic_spec.rb +49 -0
  69. data/spec/components/code_spec.rb +28 -0
  70. data/spec/components/escapables_spec.rb +23 -0
  71. data/spec/components/from_the_wild_spec.rb +17 -0
  72. data/spec/components/html_fragment_spec.rb +11 -0
  73. data/spec/components/lists_spec.rb +86 -0
  74. data/spec/components/paragraphs_spec.rb +15 -0
  75. data/spec/components/quotation_spec.rb +12 -0
  76. data/spec/components/tables_spec.rb +31 -0
  77. data/spec/components/unknown_tags_spec.rb +39 -0
  78. data/spec/lib/reverse_asciidoctor/cleaner_spec.rb +157 -0
  79. data/spec/lib/reverse_asciidoctor/config_spec.rb +26 -0
  80. data/spec/lib/reverse_asciidoctor/converters/aside_spec.rb +12 -0
  81. data/spec/lib/reverse_asciidoctor/converters/audio_spec.rb +18 -0
  82. data/spec/lib/reverse_asciidoctor/converters/blockquote_spec.rb +24 -0
  83. data/spec/lib/reverse_asciidoctor/converters/br_spec.rb +9 -0
  84. data/spec/lib/reverse_asciidoctor/converters/code_spec.rb +18 -0
  85. data/spec/lib/reverse_asciidoctor/converters/div_spec.rb +18 -0
  86. data/spec/lib/reverse_asciidoctor/converters/figure_spec.rb +13 -0
  87. data/spec/lib/reverse_asciidoctor/converters/img_spec.rb +28 -0
  88. data/spec/lib/reverse_asciidoctor/converters/li_spec.rb +13 -0
  89. data/spec/lib/reverse_asciidoctor/converters/mark_spec.rb +10 -0
  90. data/spec/lib/reverse_asciidoctor/converters/p_spec.rb +12 -0
  91. data/spec/lib/reverse_asciidoctor/converters/pre_spec.rb +45 -0
  92. data/spec/lib/reverse_asciidoctor/converters/q_spec.rb +10 -0
  93. data/spec/lib/reverse_asciidoctor/converters/strong_spec.rb +20 -0
  94. data/spec/lib/reverse_asciidoctor/converters/text_spec.rb +62 -0
  95. data/spec/lib/reverse_asciidoctor/converters/video_spec.rb +18 -0
  96. data/spec/lib/reverse_asciidoctor/converters_spec.rb +19 -0
  97. data/spec/lib/reverse_asciidoctor_spec.rb +37 -0
  98. data/spec/spec_helper.rb +21 -0
  99. metadata +299 -0
@@ -0,0 +1,10 @@
1
+ module ReverseAsciidoctor
2
+ class Error < StandardError
3
+ end
4
+
5
+ class UnknownTagError < Error
6
+ end
7
+
8
+ class InvalidConfigurationError < Error
9
+ end
10
+ end
@@ -0,0 +1,3 @@
1
+ module ReverseAsciidoctor
2
+ VERSION = '0.2.3'
3
+ end
@@ -0,0 +1,35 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "reverse_asciidoctor/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "reverse_adoc"
7
+ s.version = ReverseAsciidoctor::VERSION
8
+ s.authors = ["Ribose Inc."]
9
+ s.email = ["open.source@ribose.com"]
10
+
11
+ s.homepage = "http://github.com/metanorma/reverse_adoc"
12
+ s.summary = %q{Generate AsciiDoc from HTML and Microsoft Word via CLI or library.}
13
+ s.description = %q{Generate AsciiDoc from HTML and Microsoft Word via CLI or library.}
14
+ s.license = "BSD-2-Clause"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+ s.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
21
+
22
+ # specify any dependencies here; for example:
23
+ s.add_dependency 'nokogiri', ">= 1.10.4"
24
+ s.add_dependency 'mathml2asciimath'
25
+ s.add_development_dependency 'rspec'
26
+ s.add_development_dependency 'simplecov'
27
+ s.add_development_dependency 'rake'
28
+ s.add_development_dependency 'redcarpet'
29
+ s.add_development_dependency 'codeclimate-test-reporter'
30
+
31
+ # spec.add_runtime_dependency "thor"
32
+
33
+ # All the following are for bin/w2m
34
+ s.add_dependency 'word-to-markdown'
35
+ end
@@ -0,0 +1,22 @@
1
+ <html>
2
+ <body>
3
+ some text...
4
+ <a href="http://foobar.com">Foobar</a>
5
+ <a href="http://foobar.com" title="f***** up beyond all recognition">Fubar</a>
6
+ <a href="http://foobar.com" title="f***** up beyond all redemption"/>
7
+ <a href="http://strong.foobar.com"><strong>Strong foobar</strong></a>
8
+ There should be space before but not after the anchor (<a href="http://foobar.com">stripped</a>).
9
+
10
+ do not ignore <a href="foo.html"> </a> anchor tags with no link text
11
+ link <a href="#content">internal jumplinks</a> with anchors
12
+ link <a href="#content2"/>internal jumplinks without anchors
13
+ treat <a id="content">anchor tags with no href</a> as bookmarks
14
+
15
+ some text...
16
+
17
+ <img src="http://foobar.com/logo.png">
18
+ <img alt="foobar image" src="http://foobar.com/foobar.png">
19
+ <img alt="foobar image 2" title="this is the foobar image 2" src="http://foobar.com/foobar2.png">
20
+ some text...
21
+ </body>
22
+ </html>
@@ -0,0 +1,58 @@
1
+ <html>
2
+ <body>
3
+ plain text
4
+ <h1>h1</h1>
5
+ <h1 id="A">h1 with anchor</h1>
6
+ <h2>h2</h2>
7
+ <h3>h3</h3>
8
+ <h4>h4</h4>
9
+ <h5>h5</h5>
10
+ <h6>h6</h6>
11
+
12
+ <em>em tag content</em>
13
+ before <em></em> and after empty em tags
14
+ before <em> </em> and after em tags containing whitespace
15
+ before <em> <em> <br /> </em> </em> and after em tags containing whitespace
16
+ <em><em>double em tags</em></em>
17
+ <p><em><em>double em tags in p tag</em></em></p>
18
+ a<em> em with leading and trailing </em>whitespace
19
+ a<em>
20
+ em with extra leading and trailing
21
+ </em>whitespace
22
+
23
+ <strong>strong tag content</strong>
24
+ before <strong></strong> and after empty strong tags
25
+ before <strong> </strong> and after strong tags containing whitespace
26
+ before <strong> <strong> <br /> </strong> </strong> and after strong tags containing whitespace
27
+ <strong><strong>double strong tags</strong></strong>
28
+ <p><strong><strong>double strong tags in p tag</strong></strong></p>
29
+ before
30
+ <strong>
31
+ <strong>
32
+ double strong tags containing whitespace
33
+ </strong>
34
+ </strong> after
35
+ a<strong> strong with leading and trailing </strong>whitespace
36
+ a<strong>
37
+ strong with extra leading and trailing
38
+ </strong>whitespace
39
+
40
+ <b>b tag content</b>
41
+ <i>i tag content</i>
42
+
43
+ H<sub>2</sub>O
44
+ A<sup>2</sup>B
45
+
46
+ br tags become double space followed by newline<br/>
47
+
48
+ before hr
49
+ <hr/>
50
+ after hr
51
+
52
+ <div>section 1</div>
53
+ <div>section 2</div>
54
+
55
+ ignore <abbr>abbr</abbr>
56
+
57
+ </body>
58
+ </html>
@@ -0,0 +1,22 @@
1
+ <html>
2
+ <body>
3
+ <pre>pre block</pre>
4
+ <code>code block</code>
5
+ <pre><code>pre code block</code></pre>
6
+
7
+ <p>Paragraph with inline <code>code</code> block</p>
8
+
9
+ <pre><code>var this;
10
+ this.is("A multi line code block")
11
+ console.log("Yup, it is")
12
+ </code></pre>
13
+
14
+ Code with indentation:
15
+ <pre><code>tell application "Foo"
16
+ beep
17
+ end tell
18
+ </code></pre>
19
+
20
+ </body>
21
+ </html>
22
+
@@ -0,0 +1,15 @@
1
+ <html>
2
+ <body>
3
+ some text...
4
+
5
+ **two asterisks**
6
+ ***three asterisks***
7
+ __two underscores__
8
+ ___three underscores___
9
+
10
+ some text...
11
+
12
+ <pre><code>var theoretical_max_infin = 1.0;</code></pre>
13
+
14
+ </body>
15
+ </html>
@@ -0,0 +1,23 @@
1
+ <p>
2
+ <strong>
3
+ <strong>
4
+ .<br />
5
+ </strong>
6
+ *** intentcast
7
+ </strong>
8
+ : logo design
9
+ <strong>
10
+ <strong>
11
+ <br />
12
+ </strong>
13
+ </strong>
14
+ <strong>
15
+ <strong>
16
+ .
17
+ </strong>
18
+ </strong>
19
+ </p>
20
+
21
+ <a href="example.com/foo_bar">
22
+ <img src="example.com/foo_bar.png"> I\_AM\_HELPFUL
23
+ </a>
@@ -0,0 +1,49 @@
1
+ <html>
2
+ <body>
3
+ <ul>
4
+ <li>li 1</li>
5
+ <li>li 2</li>
6
+ <li>li 3</li>
7
+ </ul>
8
+ <ul>
9
+ <li>li 1</li>
10
+ <li>li 2</li>
11
+ <li>li 3</li>
12
+ </ul>
13
+ <ol>
14
+ <li>li 1</li>
15
+ <li>
16
+ <ul>
17
+ <li>eins</li>
18
+ <li>eins</li>
19
+ <li>eins</li>
20
+ </ul>
21
+ </li>
22
+ </ol>
23
+ <ol>
24
+ <li>li 1</li>
25
+ <li>li 2</li>
26
+ </ol>
27
+ <h1>h1</h1>
28
+ <h2>h2</h2>
29
+ <h3>h3</h3>
30
+ <h4>h4</h4>
31
+ <p>
32
+ Hallo <em>em</em> Text
33
+ </p>
34
+ <p>
35
+ <strong>strong</strong>
36
+ </p>
37
+ <pre>
38
+ <code>Block of code</code>
39
+ </pre>
40
+
41
+ <blockquote>
42
+ <p>First quoted paragraph</p>
43
+ <p>Second quoted paragraph</p>
44
+ </blockquote>
45
+ <a href="http://www.bla.com">link</a>
46
+ <img src="http://raw.com">
47
+ <hr>
48
+ </body>
49
+ </html>
@@ -0,0 +1,3 @@
1
+ naked text 1
2
+ <p>paragraph text</p>
3
+ naked text 2
@@ -0,0 +1,137 @@
1
+ <html>
2
+ <body>
3
+ <p>some text...</p>
4
+
5
+ <ul>
6
+ <li>unordered list entry</li>
7
+ <li>unordered list entry 2</li>
8
+ </ul>
9
+
10
+ <ol>
11
+ <li>ordered list entry</li>
12
+ <li>ordered list entry 2</li>
13
+ </ol>
14
+
15
+ <ol start="3">
16
+ <li>another ordered list entry</li>
17
+ </ol>
18
+
19
+ <ol reversed>
20
+ <li>a reversed ordered list entry</li>
21
+ </ol>
22
+
23
+ <ol>
24
+ <li>list entry 1st hierarchy</li>
25
+ <li>
26
+ <ul>
27
+ <li>nested unsorted list entry</li>
28
+ <li>
29
+ <ol>
30
+ <li>deep nested list entry</li>
31
+ </ol>
32
+ </li>
33
+ </ul>
34
+ </li>
35
+ </ol>
36
+
37
+ <ol id="1">
38
+ <li>arabic1</li>
39
+ </ol>
40
+
41
+ <ul id="A">
42
+ <li>upperalpha1</li>
43
+ </ul>
44
+
45
+ <ol style="1">
46
+ <li>arabic</li>
47
+ </ol>
48
+
49
+ <ol style="A">
50
+ <li>upperalpha</li>
51
+ </ol>
52
+
53
+ <ol style="a">
54
+ <li>loweralpha</li>
55
+ </ol>
56
+
57
+ <ol style="I">
58
+ <li>upperroman</li>
59
+ </ol>
60
+
61
+ <ol style="i">
62
+ <li>lowerroman</li>
63
+ </ol>
64
+
65
+ <ul type="disc">
66
+ <li>disc</li>
67
+ </ul>
68
+
69
+ <p>a nested list with no whitespace:</p>
70
+ <ul><li>item a</li><li>item b<ul><li>item bb</li><li>item bc</li></ul></li></ul>
71
+
72
+ <p>a nested list with lots of whitespace:</p>
73
+ <ul> <li> item wa </li> <li> item wb <ul> <li> item wbb </li> <li> item wbc </li> </ul> </li> </ul>
74
+
75
+ <ul>
76
+ <li class="toclevel-1 tocsection-1"><a href="Basic_concepts"><span class="tocnumber">1</span> <span class="toctext">Basic concepts</span></a></li>
77
+ <li class="toclevel-1 tocsection-2"><a href="History_of_the_idea"><span class="tocnumber">2</span> <span class="toctext">History of the idea</span></a></li>
78
+ <li class="toclevel-1 tocsection-3"><a href="Intelligence_explosion"><span class="tocnumber">3</span> <span class="toctext">Intelligence explosion</span></a>
79
+ </ul>
80
+
81
+ <ul>
82
+ <li>
83
+ <p dir="ltr">I want to have a party at my house!</p>
84
+ </li>
85
+ </ul>
86
+
87
+ <ul>
88
+ <li>
89
+ <p>li 1, p 1</p>
90
+ <p>li 1, p 2</p>
91
+ </li>
92
+ <li><p>li 2, p 1</p></li>
93
+ </ul>
94
+
95
+ <ol>
96
+ <li>
97
+ one
98
+ <ol>
99
+ <li>one one</li>
100
+ <li>one two</li>
101
+ </ol>
102
+ </li>
103
+ <li>
104
+ two
105
+ <ol>
106
+ <li>
107
+ two one
108
+ <ol>
109
+ <li>two one one</li>
110
+ <li>two one two</li>
111
+ </ol>
112
+ </li>
113
+ <li>two two</li>
114
+ </ol>
115
+ </li>
116
+ <li>three</li>
117
+ </ol>
118
+
119
+ <p>a nested list between adjacent list items</p>
120
+ <ul>
121
+ <li>alpha</li>
122
+ <li>bravo
123
+ <ul>
124
+ <li>bravo alpha</li>
125
+ <li>bravo bravo
126
+ <ul>
127
+ <li>bravo bravo alpha</i>
128
+ </ul>
129
+ </li>
130
+ </ul>
131
+ </li>
132
+ <li>charlie</li>
133
+ <li>delta</li>
134
+ </ul>
135
+
136
+ </body>
137
+ </html>
@@ -0,0 +1,4 @@
1
+ <html>
2
+ <body>
3
+ </body>
4
+ </html>
@@ -0,0 +1,24 @@
1
+ <html>
2
+ <body>
3
+ <p>First content</p>
4
+ <p>
5
+ Second
6
+ content
7
+ </p>
8
+ <p>
9
+ <em>Complex</em>
10
+ <pre>
11
+ <code>Content</code>
12
+ </pre>
13
+ </p>
14
+ <p>
15
+ <strong>Trailing whitespace: </strong>
16
+ </p>
17
+ <p>
18
+ <strong>Trailing non-breaking space:&nbsp;</strong>
19
+ </p>
20
+ <p>
21
+ <strong><em>Combination:&nbsp;</em></strong>
22
+ </p>
23
+ </body>
24
+ </html>
@@ -0,0 +1,12 @@
1
+ <html>
2
+ <body>
3
+ <pre>
4
+ <code>Block of code</code>
5
+ </pre>
6
+
7
+ <blockquote>
8
+ <p>First quoted paragraph</p>
9
+ <p>Second quoted paragraph</p>
10
+ </blockquote>
11
+ </body>
12
+ </html>
@@ -0,0 +1,99 @@
1
+ <html>
2
+ <body>
3
+ some text...
4
+
5
+ <table id="A">
6
+ <thead>
7
+ <colgroup>
8
+ <col span="2" style="background-color:red">
9
+ <col style="background-color:yellow">
10
+ </colgroup>
11
+ </thead>
12
+ <tbody>
13
+ <tr id="B">
14
+ <th id="C">header 1</th>
15
+ <th>header 2</th>
16
+ <th>header 3</th>
17
+ </tr>
18
+ <tr>
19
+ <th id="D">data 1-1</td>
20
+ <td>data 2-1</td>
21
+ <td>data 3-1</td>
22
+ </tr>
23
+ <tr>
24
+ <th>data 1-2</td>
25
+ <td>data 2-2</td>
26
+ <td>data 3-2</td>
27
+ </tr>
28
+ </tbody>
29
+ </table>
30
+
31
+ <table>
32
+ <tr>
33
+ <th><i>header oblique</i></th>
34
+ <th><strong>header bold</strong></th>
35
+ <th><code>header code</code></th>
36
+ </tr>
37
+ <tr>
38
+ <td><i>data oblique</i></td>
39
+ <td><strong>data bold</strong></td>
40
+ <td><code>data code</code></td>
41
+ </tr>
42
+ </table>
43
+
44
+ <table>
45
+ <tr>
46
+ <td colspan=2>colspan 2</td>
47
+ </tr>
48
+ <tr>
49
+ <td rowspan=2>rowspan 2</td>
50
+ </tr>
51
+ <tr>
52
+ <td rowspan=2 colspan=2>colrowspan 2</td>
53
+ </tr>
54
+ </table>
55
+
56
+ <table>
57
+ <tr>
58
+ <td align="left">horizontal left</td>
59
+ <td align="center">horizontal center</td>
60
+ <td align="right">horizontal right</td>
61
+ </tr>
62
+ <tr>
63
+ <td valign="top">vertical top</td>
64
+ <td valign="middle">vertical middle</td>
65
+ <td valign="bottom">vertical bottom</td>
66
+ </tr>
67
+ <tr>
68
+ <td align="center" valign="middle">center middle</td>
69
+ </tr>
70
+ </table>
71
+
72
+ <table>
73
+ <caption>Table <i>caption</i></caption>
74
+ <tr>
75
+ <td>
76
+ <p>Hello</p>
77
+ <p>This cell has multiple paragraphs</p>
78
+ </td>
79
+ <td>
80
+ <p>This cell has a single paragraph</p>
81
+ </td>
82
+ </tr>
83
+ </table>
84
+
85
+ <table width="75%">
86
+ <tr>
87
+ <td>75% width table</td>
88
+ </tr>
89
+ </table>
90
+
91
+ <table frame="hsides" rules="cols">
92
+ <tr>
93
+ <td>topbot</td>
94
+ </tr>
95
+ </table>
96
+
97
+ some text...
98
+ </body>
99
+ </html>