reverse_markdown 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +1 -0
- data/License-MIT +7 -0
- data/README.md +2 -2
- data/lib/reverse_markdown.rb +2 -2
- data/lib/reverse_markdown/mapper.rb +132 -28
- data/lib/reverse_markdown/version.rb +1 -1
- data/reverse_markdown.gemspec +1 -1
- data/spec/assets/anchors.html +12 -3
- data/spec/assets/basic.html +33 -3
- data/spec/assets/code.html +22 -0
- data/spec/assets/escapables.html +15 -0
- data/spec/assets/from_the_wild.html +19 -0
- data/spec/assets/html_fragment.html +3 -0
- data/spec/assets/lists.html +32 -3
- data/spec/assets/paragraphs.html +4 -1
- data/spec/components/anchors_spec.rb +13 -4
- data/spec/components/basic_spec.rb +27 -8
- data/spec/components/code_spec.rb +28 -0
- data/spec/components/escapables_spec.rb +22 -0
- data/spec/components/from_the_wild_spec.rb +16 -0
- data/spec/components/html_fragment_spec.rb +11 -0
- data/spec/components/lists_spec.rb +35 -5
- data/spec/components/paragraphs_spec.rb +4 -3
- data/spec/components/quotation_spec.rb +2 -2
- data/spec/html_to_markdown_to_html_spec.rb +104 -0
- metadata +39 -4
data/.travis.yml
CHANGED
data/License-MIT
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
Copyright (c) 2012 Johannes Opper
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
4
|
+
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
6
|
+
|
7
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Transform existing html into markdown in a simple way, for example if you want to import existings tags into your markdown based application.
|
4
4
|
|
5
|
-
[![
|
5
|
+
[![Build Status](https://secure.travis-ci.org/xijo/reverse_markdown.png?branch=master)](https://travis-ci.org/xijo/reverse_markdown)
|
6
6
|
|
7
7
|
# Installation
|
8
8
|
|
@@ -46,4 +46,4 @@ Only basic html tags are supported right now. However, it should not be to diffi
|
|
46
46
|
|
47
47
|
# Thanks
|
48
48
|
|
49
|
-
..to Ben Woosley for his improvements to the first version.
|
49
|
+
..to Ben Woosley for his improvements to the first version.
|
data/lib/reverse_markdown.rb
CHANGED
@@ -5,13 +5,13 @@ require 'nokogiri'
|
|
5
5
|
|
6
6
|
module ReverseMarkdown
|
7
7
|
|
8
|
-
def self.parse(input)
|
8
|
+
def self.parse(input, opts={})
|
9
9
|
root = case input
|
10
10
|
when String then Nokogiri::HTML(input).root
|
11
11
|
when Nokogiri::XML::Document then input.root
|
12
12
|
when Nokogiri::XML::Node then input
|
13
13
|
end
|
14
|
-
ReverseMarkdown::Mapper.new.
|
14
|
+
ReverseMarkdown::Mapper.new(opts).process_root(root)
|
15
15
|
end
|
16
16
|
|
17
17
|
# 2012/08/11 joe: possibly deprecate in favour of #parse
|
@@ -3,29 +3,76 @@ module ReverseMarkdown
|
|
3
3
|
attr_accessor :raise_errors
|
4
4
|
attr_accessor :log_enabled, :log_level
|
5
5
|
attr_accessor :li_counter
|
6
|
+
attr_accessor :github_style_code_blocks
|
6
7
|
|
7
|
-
def initialize
|
8
|
+
def initialize(opts={})
|
8
9
|
self.log_level = :info
|
9
10
|
self.log_enabled = true
|
10
11
|
self.li_counter = 0
|
12
|
+
self.github_style_code_blocks = opts[:github_style_code_blocks] || false
|
13
|
+
end
|
14
|
+
|
15
|
+
def process_root(element)
|
16
|
+
markdown = process_element(element) # recursively process all elements to get full markdown
|
17
|
+
|
18
|
+
# Extract github style code blocks
|
19
|
+
extractions = {}
|
20
|
+
markdown.gsub!(%r{```.*?```}m) do |match|
|
21
|
+
md5 = Digest::MD5.hexdigest(match)
|
22
|
+
extractions[md5] = match
|
23
|
+
"{code-block-extraction-#{md5}}"
|
24
|
+
end
|
25
|
+
|
26
|
+
markdown = markdown.split("\n").map do |line|
|
27
|
+
if line.match(/^( {4}|\t)/)
|
28
|
+
line
|
29
|
+
else
|
30
|
+
"#{ ' ' if line.match(/^ {2,3}/) }" +
|
31
|
+
normalize_whitespace(line).strip +
|
32
|
+
"#{ ' ' if line.match(/ {2}$/) }"
|
33
|
+
end
|
34
|
+
end.join("\n")
|
35
|
+
|
36
|
+
markdown.gsub!(/\n{3,}/, "\n\n")
|
37
|
+
|
38
|
+
# Insert pre block extractions
|
39
|
+
markdown.gsub!(/\{code-block-extraction-([0-9a-f]{32})\}/){ extractions[$1] }
|
40
|
+
|
41
|
+
markdown
|
11
42
|
end
|
12
43
|
|
13
44
|
def process_element(element)
|
14
45
|
output = ''
|
15
|
-
|
16
|
-
element
|
46
|
+
if element.text?
|
47
|
+
text = process_text(element)
|
48
|
+
if output.end_with?(' ') && text.start_with?(' ')
|
49
|
+
output << text.lstrip
|
50
|
+
else
|
51
|
+
output << text
|
52
|
+
end
|
17
53
|
else
|
18
|
-
opening(element)
|
19
|
-
|
20
|
-
|
21
|
-
|
54
|
+
output << opening(element).to_s
|
55
|
+
|
56
|
+
markdown_chunks = element.children.map { |child| process_element(child) }
|
57
|
+
remove_adjacent_whitespace!(markdown_chunks)
|
58
|
+
output << markdown_chunks.join
|
59
|
+
|
60
|
+
output << ending(element).to_s
|
22
61
|
end
|
23
|
-
output << ending(element) unless element.text?
|
24
62
|
output
|
25
63
|
end
|
26
64
|
|
27
65
|
private
|
28
66
|
|
67
|
+
# removes whitespace-only chunk if the previous chunk ends with whitespace
|
68
|
+
def remove_adjacent_whitespace!(chunks)
|
69
|
+
(chunks.size - 1).downto(1).each do |i|
|
70
|
+
chunk = chunks[i]
|
71
|
+
previous_chunk = chunks[i-1]
|
72
|
+
chunks.delete_at(i) if chunk == ' ' && previous_chunk.end_with?(' ')
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
29
76
|
def opening(element)
|
30
77
|
parent = element.parent ? element.parent.name.to_sym : nil
|
31
78
|
case element.name.to_sym
|
@@ -45,29 +92,49 @@ module ReverseMarkdown
|
|
45
92
|
"\n"
|
46
93
|
when :ul, :root#, :p
|
47
94
|
"\n"
|
95
|
+
when :div
|
96
|
+
"\n"
|
48
97
|
when :p
|
49
98
|
if element.ancestors.map(&:name).include?('blockquote')
|
50
99
|
"\n\n> "
|
100
|
+
elsif [nil, :body].include? parent
|
101
|
+
is_first = true
|
102
|
+
previous = element.previous
|
103
|
+
while is_first == true and previous do
|
104
|
+
is_first = false unless previous.content.strip == "" || previous.text?
|
105
|
+
previous = previous.previous
|
106
|
+
end
|
107
|
+
is_first ? "" : "\n\n"
|
51
108
|
else
|
52
109
|
"\n\n"
|
53
110
|
end
|
54
111
|
when :h1, :h2, :h3, :h4 # /h(\d)/ for 1.9
|
55
112
|
element.name =~ /h(\d)/
|
56
|
-
'#' * $1.to_i + ' '
|
57
|
-
when :em
|
58
|
-
|
59
|
-
when :strong
|
60
|
-
|
113
|
+
"\n" + ('#' * $1.to_i) + ' '
|
114
|
+
when :em, :i
|
115
|
+
element.text.strip.empty? ? '' : '_' if (element.ancestors('em') + element.ancestors('i')).empty?
|
116
|
+
when :strong, :b
|
117
|
+
element.text.strip.empty? ? '' : '**' if (element.ancestors('strong') + element.ancestors('b')).empty?
|
61
118
|
when :blockquote
|
62
119
|
"> "
|
63
120
|
when :code
|
64
|
-
parent == :pre
|
121
|
+
if parent == :pre
|
122
|
+
self.github_style_code_blocks ? "\n```\n" : "\n "
|
123
|
+
else
|
124
|
+
" `"
|
125
|
+
end
|
65
126
|
when :a
|
66
|
-
|
127
|
+
if !element.text.strip.empty? && element['href'] && !element['href'].start_with?('#')
|
128
|
+
" ["
|
129
|
+
else
|
130
|
+
" "
|
131
|
+
end
|
67
132
|
when :img
|
68
|
-
"!["
|
133
|
+
" !["
|
69
134
|
when :hr
|
70
|
-
"
|
135
|
+
"\n* * *\n"
|
136
|
+
when :br
|
137
|
+
" \n"
|
71
138
|
else
|
72
139
|
handle_error "unknown start tag: #{element.name.to_s}"
|
73
140
|
""
|
@@ -77,32 +144,69 @@ module ReverseMarkdown
|
|
77
144
|
def ending(element)
|
78
145
|
parent = element.parent ? element.parent.name.to_sym : nil
|
79
146
|
case element.name.to_sym
|
80
|
-
when :html, :body, :pre, :hr
|
147
|
+
when :html, :body, :pre, :hr
|
81
148
|
""
|
149
|
+
when :p
|
150
|
+
"\n\n"
|
151
|
+
when :div
|
152
|
+
"\n"
|
82
153
|
when :h1, :h2, :h3, :h4 # /h(\d)/ for 1.9
|
83
154
|
"\n"
|
84
|
-
when :em
|
85
|
-
'
|
86
|
-
when :strong
|
87
|
-
'**'
|
155
|
+
when :em, :i
|
156
|
+
element.text.strip.empty? ? '' : '_' if (element.ancestors('em') + element.ancestors('i')).empty?
|
157
|
+
when :strong, :b
|
158
|
+
element.text.strip.empty? ? '' : '**' if (element.ancestors('strong') + element.ancestors('b')).empty?
|
88
159
|
when :li, :blockquote, :root, :ol, :ul
|
89
160
|
"\n"
|
90
161
|
when :code
|
91
|
-
parent == :pre
|
162
|
+
if parent == :pre
|
163
|
+
self.github_style_code_blocks ? "\n```" : "\n"
|
164
|
+
else
|
165
|
+
'` '
|
166
|
+
end
|
92
167
|
when :a
|
93
|
-
|
94
|
-
|
95
|
-
if element.has_attribute?('alt')
|
96
|
-
"#{element.attribute('alt')}][#{element.attribute('src')}] "
|
168
|
+
if !element.text.strip.empty? && element['href'] && !element['href'].start_with?('#')
|
169
|
+
"](#{element['href']}#{title_markdown(element)}) "
|
97
170
|
else
|
98
|
-
"
|
171
|
+
""
|
99
172
|
end
|
173
|
+
when :img
|
174
|
+
"#{element['alt']}](#{element['src']}#{title_markdown(element)}) "
|
100
175
|
else
|
101
176
|
handle_error "unknown end tag: #{element.name}"
|
102
177
|
""
|
103
178
|
end
|
104
179
|
end
|
105
180
|
|
181
|
+
def title_markdown(element)
|
182
|
+
title = element['title']
|
183
|
+
title ? %[ "#{title}"] : ''
|
184
|
+
end
|
185
|
+
|
186
|
+
def process_text(element)
|
187
|
+
parent = element.parent ? element.parent.name.to_sym : nil
|
188
|
+
case
|
189
|
+
when parent == :code
|
190
|
+
if self.github_style_code_blocks
|
191
|
+
element.text
|
192
|
+
else
|
193
|
+
element.text.strip.gsub(/\n/,"\n ")
|
194
|
+
end
|
195
|
+
else
|
196
|
+
normalize_whitespace(escape_text(element.text))
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def normalize_whitespace(text)
|
201
|
+
text.tr("\n\t", ' ').squeeze(' ')
|
202
|
+
end
|
203
|
+
|
204
|
+
def escape_text(text)
|
205
|
+
text.
|
206
|
+
gsub('*', '\*').
|
207
|
+
gsub('_', '\_')
|
208
|
+
end
|
209
|
+
|
106
210
|
def handle_error(message)
|
107
211
|
if raise_errors
|
108
212
|
raise ReverseMarkdown::ParserError, message
|
data/reverse_markdown.gemspec
CHANGED
data/spec/assets/anchors.html
CHANGED
@@ -1,10 +1,19 @@
|
|
1
1
|
<html>
|
2
2
|
<body>
|
3
|
+
some text...
|
3
4
|
<a href="http://foobar.com">Foobar</a>
|
4
|
-
<a href="http://
|
5
|
-
|
6
|
-
|
5
|
+
<a href="http://foobar.com" title="f***** up beyond all recognition">Fubar</a>
|
6
|
+
<a href="http://strong.foobar.com"><strong>Strong foobar</strong></a>
|
7
|
+
|
8
|
+
ignore <a href="foo.html"> </a> anchor tags with no link text
|
9
|
+
pass through the text of <a href="#content">internal jumplinks</a> without treating them as links
|
10
|
+
pass through the text of <a id="content">anchor tags with no href</a> without treating them as links
|
11
|
+
|
12
|
+
some text...
|
13
|
+
|
7
14
|
<img src="http://foobar.com/logo.png">
|
8
15
|
<img alt="foobar image" src="http://foobar.com/foobar.png">
|
16
|
+
<img alt="foobar image 2" title="this is the foobar image 2" src="http://foobar.com/foobar2.png">
|
17
|
+
some text...
|
9
18
|
</body>
|
10
19
|
</html>
|
data/spec/assets/basic.html
CHANGED
@@ -1,12 +1,42 @@
|
|
1
1
|
<html>
|
2
2
|
<body>
|
3
|
+
plain text
|
3
4
|
<h1>h1</h1>
|
4
5
|
<h2>h2</h2>
|
5
6
|
<h3>h3</h3>
|
6
7
|
<h4>h4</h4>
|
7
|
-
|
8
|
-
<
|
9
|
-
<
|
8
|
+
|
9
|
+
<em>em tag content</em>
|
10
|
+
before <em></em> and after empty em tags
|
11
|
+
before <em> </em> and after em tags containing whitespace
|
12
|
+
before <em> <em> <br /> </em> </em> and after em tags containing whitespace
|
13
|
+
<em><em>double em tags</em></em>
|
14
|
+
<p><em><em>double em tags in p tag</em></em></p>
|
15
|
+
|
16
|
+
<strong>strong tag content</strong>
|
17
|
+
before <strong></strong> and after empty strong tags
|
18
|
+
before <strong> </strong> and after strong tags containing whitespace
|
19
|
+
before <strong> <strong> <br /> </strong> </strong> and after strong tags containing whitespace
|
20
|
+
<strong><strong>double strong tags</strong></strong>
|
21
|
+
<p><strong><strong>double strong tags in p tag</strong></strong></p>
|
22
|
+
before
|
23
|
+
<strong>
|
24
|
+
<strong>
|
25
|
+
double strong tags containing whitespace
|
26
|
+
</strong>
|
27
|
+
</strong> after
|
28
|
+
|
29
|
+
<b>b tag content</b>
|
30
|
+
<i>i tag content</i>
|
31
|
+
|
32
|
+
br tags become double space followed by newline<br/>
|
33
|
+
|
34
|
+
before hr
|
10
35
|
<hr/>
|
36
|
+
after hr
|
37
|
+
|
38
|
+
<div>section 1</div>
|
39
|
+
<div>section 2</div>
|
40
|
+
|
11
41
|
</body>
|
12
42
|
</html>
|
@@ -0,0 +1,22 @@
|
|
1
|
+
<html>
|
2
|
+
<body>
|
3
|
+
<pre>pre block</pre>
|
4
|
+
<code>code block</code>
|
5
|
+
<pre><code>pre code block</code></pre>
|
6
|
+
|
7
|
+
<p>Paragraph with inline <code>code</code> block</p>
|
8
|
+
|
9
|
+
<pre><code>var this;
|
10
|
+
this.is("A multi line code block")
|
11
|
+
console.log("Yup, it is")
|
12
|
+
</code></pre>
|
13
|
+
|
14
|
+
Code with indentation:
|
15
|
+
<pre><code>tell application "Foo"
|
16
|
+
beep
|
17
|
+
end tell
|
18
|
+
</code></pre>
|
19
|
+
|
20
|
+
</body>
|
21
|
+
</html>
|
22
|
+
|
data/spec/assets/lists.html
CHANGED
@@ -1,13 +1,15 @@
|
|
1
1
|
<html>
|
2
2
|
<body>
|
3
|
+
some text...
|
4
|
+
|
3
5
|
<ul>
|
4
6
|
<li>unordered list entry</li>
|
5
|
-
<li>unordered list entry</li>
|
7
|
+
<li>unordered list entry 2</li>
|
6
8
|
</ul>
|
7
9
|
|
8
10
|
<ol>
|
9
11
|
<li>ordered list entry</li>
|
10
|
-
<li>ordered list entry</li>
|
12
|
+
<li>ordered list entry 2</li>
|
11
13
|
</ol>
|
12
14
|
|
13
15
|
<ol>
|
@@ -18,10 +20,37 @@
|
|
18
20
|
<li>
|
19
21
|
<ol>
|
20
22
|
<li>deep nested list entry</li>
|
21
|
-
|
23
|
+
</ol>
|
22
24
|
</li>
|
23
25
|
</ul>
|
24
26
|
</li>
|
25
27
|
</ol>
|
28
|
+
|
29
|
+
a nested list with no whitespace:
|
30
|
+
<ul><li>item a</li><li>item b<ul><li>item bb</li><li>item bc</li></ul></li></ul>
|
31
|
+
|
32
|
+
a nested list with lots of whitespace:
|
33
|
+
<ul> <li> item wa </li> <li> item wb <ul> <li> item wbb </li> <li> item wbc </li> </ul> </li> </ul>
|
34
|
+
|
35
|
+
<ul>
|
36
|
+
<li class="toclevel-1 tocsection-1"><a href="Basic_concepts"><span class="tocnumber">1</span> <span class="toctext">Basic concepts</span></a></li>
|
37
|
+
<li class="toclevel-1 tocsection-2"><a href="History_of_the_idea"><span class="tocnumber">2</span> <span class="toctext">History of the idea</span></a></li>
|
38
|
+
<li class="toclevel-1 tocsection-3"><a href="Intelligence_explosion"><span class="tocnumber">3</span> <span class="toctext">Intelligence explosion</span></a>
|
39
|
+
</ul>
|
40
|
+
|
41
|
+
<ul>
|
42
|
+
<li>
|
43
|
+
<p dir="ltr">I want to have a party at my house!</p>
|
44
|
+
</li>
|
45
|
+
</ul>
|
46
|
+
|
47
|
+
<ul>
|
48
|
+
<li>
|
49
|
+
<p>li 1, p 1</p>
|
50
|
+
<p>li 1, p 2</p>
|
51
|
+
</li>
|
52
|
+
<li><p>li 2, p 1</p></li>
|
53
|
+
</ul>
|
54
|
+
|
26
55
|
</body>
|
27
56
|
</html>
|
data/spec/assets/paragraphs.html
CHANGED
@@ -6,9 +6,18 @@ describe ReverseMarkdown::Mapper do
|
|
6
6
|
let(:document) { Nokogiri::HTML(input) }
|
7
7
|
subject { ReverseMarkdown.parse_string(input) }
|
8
8
|
|
9
|
-
it {
|
10
|
-
it {
|
11
|
-
it {
|
12
|
-
|
9
|
+
it { should include ' [Foobar](http://foobar.com) ' }
|
10
|
+
it { should include ' [Fubar](http://foobar.com "f***** up beyond all recognition") ' }
|
11
|
+
it { should include ' [**Strong foobar**](http://strong.foobar.com) ' }
|
12
|
+
|
13
|
+
it { should include ' ![](http://foobar.com/logo.png) ' }
|
14
|
+
it { should include ' ![foobar image](http://foobar.com/foobar.png) ' }
|
15
|
+
it { should include ' ![foobar image 2](http://foobar.com/foobar2.png "this is the foobar image 2") ' }
|
16
|
+
|
17
|
+
context "links to ignore" do
|
18
|
+
it { should include ' ignore anchor tags with no link text ' }
|
19
|
+
it { should include ' pass through the text of internal jumplinks without treating them as links ' }
|
20
|
+
it { should include ' pass through the text of anchor tags with no href without treating them as links ' }
|
21
|
+
end
|
13
22
|
|
14
23
|
end
|
@@ -6,13 +6,32 @@ describe ReverseMarkdown::Mapper do
|
|
6
6
|
let(:document) { Nokogiri::HTML(input) }
|
7
7
|
subject { ReverseMarkdown.parse_string(input) }
|
8
8
|
|
9
|
-
it {
|
10
|
-
it {
|
11
|
-
it {
|
12
|
-
it {
|
13
|
-
it {
|
14
|
-
it { subject.should match /\*\*strong\*\*/ }
|
15
|
-
it { subject.should match /`code`/ }
|
16
|
-
it { subject.should match /---/ }
|
9
|
+
it { should match /plain text ?\n/ }
|
10
|
+
it { should match /# h1\n/ }
|
11
|
+
it { should match /## h2\n/ }
|
12
|
+
it { should match /### h3\n/ }
|
13
|
+
it { should match /#### h4\n/ }
|
17
14
|
|
15
|
+
it { should match /_em tag content_/ }
|
16
|
+
it { should match /before and after empty em tags/ }
|
17
|
+
it { should match /before and after em tags containing whitespace/ }
|
18
|
+
it { should match /_double em tags_/ }
|
19
|
+
it { should match /_double em tags in p tag_/ }
|
20
|
+
|
21
|
+
it { should match /\*\*strong tag content\*\*/ }
|
22
|
+
it { should match /before and after empty strong tags/ }
|
23
|
+
it { should match /before and after strong tags containing whitespace/ }
|
24
|
+
it { should match /\*\*double strong tags\*\*/ }
|
25
|
+
it { should match /\*\*double strong tags in p tag\*\*/ }
|
26
|
+
it { should match /before \*\* double strong tags containing whitespace \*\* after/ }
|
27
|
+
|
28
|
+
it { should match /_i tag content_/ }
|
29
|
+
it { should match /\*\*b tag content\*\*/ }
|
30
|
+
|
31
|
+
it { should match /br tags become double space followed by newline \n/ }
|
32
|
+
#it { should match /br tags XXX \n/ }
|
33
|
+
|
34
|
+
it { should match /\nbefore hr ?\n\* \* \*\n ?after hr\n/ }
|
35
|
+
|
36
|
+
it { should match /section 1\n ?\nsection 2/ }
|
18
37
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe ReverseMarkdown::Mapper do
|
4
|
+
|
5
|
+
let(:input) { File.read('spec/assets/code.html') }
|
6
|
+
let(:document) { Nokogiri::HTML(input) }
|
7
|
+
subject { ReverseMarkdown.parse_string(input) }
|
8
|
+
|
9
|
+
it { should match /inline `code` block/ }
|
10
|
+
it { should match /\ var this\;\n this\.is/ }
|
11
|
+
it { should match /block"\)\n console/ }
|
12
|
+
|
13
|
+
context "with github style code blocks" do
|
14
|
+
subject { ReverseMarkdown.parse_string(input, :github_style_code_blocks => true) }
|
15
|
+
it { should match /inline `code` block/ }
|
16
|
+
it { should match /```\nvar this\;\nthis/ }
|
17
|
+
it { should match /it is"\) ?\n\t\n```/ }
|
18
|
+
end
|
19
|
+
|
20
|
+
context "code with indentation" do
|
21
|
+
subject { ReverseMarkdown.parse_string(input) }
|
22
|
+
it { should match(/^ tell application "Foo"\n/) }
|
23
|
+
it { should match(/^ beep\n/) }
|
24
|
+
it { should match(/^ end tell\n/) }
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe ReverseMarkdown::Mapper do
|
4
|
+
|
5
|
+
let(:input) { File.read('spec/assets/escapables.html') }
|
6
|
+
let(:document) { Nokogiri::HTML(input) }
|
7
|
+
subject { ReverseMarkdown.parse_string(input) }
|
8
|
+
|
9
|
+
context "multiple asterisks" do
|
10
|
+
it { should include ' \*\*two asterisks\*\* ' }
|
11
|
+
it { should include ' \*\*\*three asterisks\*\*\* ' }
|
12
|
+
end
|
13
|
+
|
14
|
+
context "multiple underscores" do
|
15
|
+
it { should include ' \_\_two underscores\_\_ ' }
|
16
|
+
it { should include ' \_\_\_three underscores\_\_\_ ' }
|
17
|
+
end
|
18
|
+
|
19
|
+
context "underscores within words in code blocks" do
|
20
|
+
it { should include ' var theoretical_max_infin = 1.0;' }
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe ReverseMarkdown::Mapper do
|
4
|
+
|
5
|
+
let(:input) { File.read('spec/assets/from_the_wild.html') }
|
6
|
+
let(:document) { Nokogiri::HTML(input) }
|
7
|
+
subject { ReverseMarkdown.parse_string(input) }
|
8
|
+
|
9
|
+
it "should make sense of strong-crazy markup (as seen in the wild)" do
|
10
|
+
subject.should ==
|
11
|
+
'** .' + " \n" +
|
12
|
+
'\*\*\* intentcast ** : logo design' + " \n" +
|
13
|
+
'** . **'
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe ReverseMarkdown::Mapper do
|
4
|
+
|
5
|
+
let(:input) { File.read('spec/assets/html_fragment.html') }
|
6
|
+
let(:document) { Nokogiri::HTML(input) }
|
7
|
+
subject { ReverseMarkdown.parse_string(input) }
|
8
|
+
|
9
|
+
it { should == "naked text 1\n\nparagraph text\n\nnaked text 2" }
|
10
|
+
end
|
11
|
+
|
@@ -6,10 +6,40 @@ describe ReverseMarkdown::Mapper do
|
|
6
6
|
let(:document) { Nokogiri::HTML(input) }
|
7
7
|
subject { ReverseMarkdown.parse_string(input) }
|
8
8
|
|
9
|
-
it {
|
10
|
-
it {
|
11
|
-
it {
|
12
|
-
it {
|
13
|
-
it {
|
9
|
+
it { should match /\n- unordered list entry\n/ }
|
10
|
+
it { should match /\n- unordered list entry 2\n/ }
|
11
|
+
it { should match /\n1. ordered list entry\n/ }
|
12
|
+
it { should match /\n2. ordered list entry 2\n/ }
|
13
|
+
it { should match /\n1. list entry 1st hierarchy\n/ }
|
14
|
+
it { should match /\n {2}- nested unsorted list entry\n/ }
|
15
|
+
it { should match /\n {4}1. deep nested list entry\n/ }
|
16
|
+
|
17
|
+
context "nested list with no whitespace" do
|
18
|
+
it { should match /\n- item a\n/ }
|
19
|
+
it { should match /\n- item b\n/ }
|
20
|
+
it { should match /\n {2}- item bb\n/ }
|
21
|
+
it { should match /\n {2}- item bc\n/ }
|
22
|
+
end
|
23
|
+
|
24
|
+
context "nested list with lots of whitespace" do
|
25
|
+
it { should match /\n- item wa\n/ }
|
26
|
+
it { should match /\n- item wb\n/ }
|
27
|
+
it { should match /\n {2}- item wbb\n/ }
|
28
|
+
it { should match /\n {2}- item wbc\n/ }
|
29
|
+
end
|
30
|
+
|
31
|
+
context "lists containing links" do
|
32
|
+
it { should match /\n- \[1 Basic concepts\]\(Basic_concepts\)\n/ }
|
33
|
+
it { should match /\n- \[2 History of the idea\]\(History_of_the_idea\)\n/ }
|
34
|
+
it { should match /\n- \[3 Intelligence explosion\]\(Intelligence_explosion\)\n/ }
|
35
|
+
end
|
36
|
+
|
37
|
+
context "lists containing embedded <p> tags" do
|
38
|
+
xit { should match /\n- I want to have a party at my house!\n/ }
|
39
|
+
end
|
40
|
+
|
41
|
+
context "list item containing multiple <p> tags" do
|
42
|
+
xit { should match /\n- li 1, p 1\n\n- li 1, p 2\n/ }
|
43
|
+
end
|
14
44
|
|
15
45
|
end
|
@@ -6,6 +6,7 @@ describe ReverseMarkdown::Mapper do
|
|
6
6
|
let(:document) { Nokogiri::HTML(input) }
|
7
7
|
subject { ReverseMarkdown.parse_string(input) }
|
8
8
|
|
9
|
-
it {
|
10
|
-
it {
|
11
|
-
|
9
|
+
it { should_not start_with "\n\n" }
|
10
|
+
it { should start_with "First content\n\nSecond content\n\n" }
|
11
|
+
it { should include "\n\n_Complex_\n\n Content" }
|
12
|
+
end
|
@@ -6,7 +6,7 @@ describe ReverseMarkdown::Mapper do
|
|
6
6
|
let(:document) { Nokogiri::HTML(input) }
|
7
7
|
subject { ReverseMarkdown.parse_string(input) }
|
8
8
|
|
9
|
-
it {
|
10
|
-
it {
|
9
|
+
it { should include "\n Block of code" }
|
10
|
+
it { should include "\n> First quoted paragraph\n\n> Second quoted paragraph" }
|
11
11
|
|
12
12
|
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# coding:utf-8
|
2
|
+
|
3
|
+
require 'redcarpet'
|
4
|
+
require 'spec_helper'
|
5
|
+
|
6
|
+
describe 'Round trip: HTML to markdown (via reverse_markdown) to HTML (via redcarpet)' do
|
7
|
+
|
8
|
+
# helpers
|
9
|
+
|
10
|
+
def roundtrip_should_preserve(orig_html)
|
11
|
+
normalize_html(html2markdown2html orig_html).should == normalize_html(orig_html)
|
12
|
+
end
|
13
|
+
|
14
|
+
def html2markdown2html(orig_html)
|
15
|
+
markdown = ReverseMarkdown.parse_string orig_html
|
16
|
+
new_html = Redcarpet::Markdown.new(Redcarpet::Render::HTML).render(markdown)
|
17
|
+
new_html
|
18
|
+
end
|
19
|
+
|
20
|
+
def normalize_html(html)
|
21
|
+
squeeze_whitespace(html).gsub('> <', '><').strip
|
22
|
+
end
|
23
|
+
|
24
|
+
def squeeze_whitespace(string)
|
25
|
+
string.tr("\n\t", ' ').squeeze(' ').gsub(/\A \z/, '')
|
26
|
+
end
|
27
|
+
|
28
|
+
# specs
|
29
|
+
|
30
|
+
it "should preserve <blockquote> blocks" do
|
31
|
+
roundtrip_should_preserve('<blockquote><p>some text</p></blockquote>')
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should preserve unordered lists" do
|
35
|
+
roundtrip_should_preserve("
|
36
|
+
<ol>
|
37
|
+
<li>Bird</li>
|
38
|
+
<li>McHale</li>
|
39
|
+
<li>Parish</li>
|
40
|
+
</ol>
|
41
|
+
")
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should preserve ordered lists" do
|
45
|
+
roundtrip_should_preserve("
|
46
|
+
<ul>
|
47
|
+
<li>Bird</li>
|
48
|
+
<li>McHale</li>
|
49
|
+
<li>Parish</li>
|
50
|
+
</ul>
|
51
|
+
")
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should preserve <hr> tags" do
|
55
|
+
roundtrip_should_preserve("<hr>")
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should preserve <em> tags" do
|
59
|
+
roundtrip_should_preserve("<p><em>yes!</em></p>")
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should preserve links inside <strong> tags" do
|
63
|
+
pending
|
64
|
+
roundtrip_should_preserve(%{<p><strong><a href="/wiki/Western_philosophy" title="Western philosophy">Western philosophy</a></strong></p>})
|
65
|
+
end
|
66
|
+
|
67
|
+
it "should preserve <strong> tags" do
|
68
|
+
roundtrip_should_preserve("<p><strong>yes!</strong></p>")
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should preserve <br> tags" do
|
72
|
+
roundtrip_should_preserve("<p>yes!<br>\n we can!</p>")
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should preserve <a> tags" do
|
76
|
+
roundtrip_should_preserve(%{<p>This is <a href="http://example.com/" title="Title">an example</a> inline link.</p>})
|
77
|
+
roundtrip_should_preserve(%{<p><a href="http://example.net/">This link</a> has no title attribute.</p>})
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should preserve <img> tags" do
|
81
|
+
roundtrip_should_preserve(%{<p><img src="http://foo.bar/dog.png" alt="My Dog" title="Ralph"></p>})
|
82
|
+
roundtrip_should_preserve(%{<p><img src="http://foo.bar/dog.png" alt="My Dog"></p>})
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should preserve code blocks" do
|
86
|
+
roundtrip_should_preserve(%{
|
87
|
+
<p>This is a normal paragraph:</p>
|
88
|
+
|
89
|
+
<pre><code>This is a code block. </code></pre>
|
90
|
+
})
|
91
|
+
end
|
92
|
+
|
93
|
+
it "should preserve code blocks with embedded whitespace" do
|
94
|
+
roundtrip_should_preserve(%{
|
95
|
+
<p>Here is an example of AppleScript:</p>
|
96
|
+
|
97
|
+
<pre><code>tell application Foo
|
98
|
+
beep
|
99
|
+
end tell
|
100
|
+
</code></pre>
|
101
|
+
})
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: reverse_markdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-11-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -75,6 +75,22 @@ dependencies:
|
|
75
75
|
- - ! '>='
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: redcarpet
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
type: :development
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
78
94
|
description: Map simple html back into markdown, e.g. if you want to import existing
|
79
95
|
html data in your application.
|
80
96
|
email:
|
@@ -86,6 +102,7 @@ files:
|
|
86
102
|
- .gitignore
|
87
103
|
- .travis.yml
|
88
104
|
- Gemfile
|
105
|
+
- License-MIT
|
89
106
|
- README.md
|
90
107
|
- Rakefile
|
91
108
|
- lib/reverse_markdown.rb
|
@@ -95,16 +112,25 @@ files:
|
|
95
112
|
- reverse_markdown.gemspec
|
96
113
|
- spec/assets/anchors.html
|
97
114
|
- spec/assets/basic.html
|
115
|
+
- spec/assets/code.html
|
116
|
+
- spec/assets/escapables.html
|
117
|
+
- spec/assets/from_the_wild.html
|
98
118
|
- spec/assets/full_example.html
|
119
|
+
- spec/assets/html_fragment.html
|
99
120
|
- spec/assets/lists.html
|
100
121
|
- spec/assets/minimum.html
|
101
122
|
- spec/assets/paragraphs.html
|
102
123
|
- spec/assets/quotation.html
|
103
124
|
- spec/components/anchors_spec.rb
|
104
125
|
- spec/components/basic_spec.rb
|
126
|
+
- spec/components/code_spec.rb
|
127
|
+
- spec/components/escapables_spec.rb
|
128
|
+
- spec/components/from_the_wild_spec.rb
|
129
|
+
- spec/components/html_fragment_spec.rb
|
105
130
|
- spec/components/lists_spec.rb
|
106
131
|
- spec/components/paragraphs_spec.rb
|
107
132
|
- spec/components/quotation_spec.rb
|
133
|
+
- spec/html_to_markdown_to_html_spec.rb
|
108
134
|
- spec/mapper_spec.rb
|
109
135
|
- spec/reverse_markdown_spec.rb
|
110
136
|
- spec/spec_helper.rb
|
@@ -122,7 +148,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
122
148
|
version: '0'
|
123
149
|
segments:
|
124
150
|
- 0
|
125
|
-
hash: -
|
151
|
+
hash: -1957963003780104262
|
126
152
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
127
153
|
none: false
|
128
154
|
requirements:
|
@@ -131,7 +157,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
131
157
|
version: '0'
|
132
158
|
segments:
|
133
159
|
- 0
|
134
|
-
hash: -
|
160
|
+
hash: -1957963003780104262
|
135
161
|
requirements: []
|
136
162
|
rubyforge_project: reverse_markdown
|
137
163
|
rubygems_version: 1.8.24
|
@@ -141,16 +167,25 @@ summary: Transform html code into markdown.
|
|
141
167
|
test_files:
|
142
168
|
- spec/assets/anchors.html
|
143
169
|
- spec/assets/basic.html
|
170
|
+
- spec/assets/code.html
|
171
|
+
- spec/assets/escapables.html
|
172
|
+
- spec/assets/from_the_wild.html
|
144
173
|
- spec/assets/full_example.html
|
174
|
+
- spec/assets/html_fragment.html
|
145
175
|
- spec/assets/lists.html
|
146
176
|
- spec/assets/minimum.html
|
147
177
|
- spec/assets/paragraphs.html
|
148
178
|
- spec/assets/quotation.html
|
149
179
|
- spec/components/anchors_spec.rb
|
150
180
|
- spec/components/basic_spec.rb
|
181
|
+
- spec/components/code_spec.rb
|
182
|
+
- spec/components/escapables_spec.rb
|
183
|
+
- spec/components/from_the_wild_spec.rb
|
184
|
+
- spec/components/html_fragment_spec.rb
|
151
185
|
- spec/components/lists_spec.rb
|
152
186
|
- spec/components/paragraphs_spec.rb
|
153
187
|
- spec/components/quotation_spec.rb
|
188
|
+
- spec/html_to_markdown_to_html_spec.rb
|
154
189
|
- spec/mapper_spec.rb
|
155
190
|
- spec/reverse_markdown_spec.rb
|
156
191
|
- spec/spec_helper.rb
|