reverse_markdown 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/.travis.yml CHANGED
@@ -10,3 +10,4 @@ notifications:
10
10
  disabled: false
11
11
  recipients:
12
12
  - xijo@gmx.de
13
+ - code@harlantwood.net
data/License-MIT ADDED
@@ -0,0 +1,7 @@
1
+ Copyright (c) 2012 Johannes Opper
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Transform existing html into markdown in a simple way, for example if you want to import existings tags into your markdown based application.
4
4
 
5
- [![reverse_markdown build status](http://travis-ci.org/xijo/reverse_markdown.png)](http://travis-ci.org/#!/xijo/reverse_markdown)
5
+ [![Build Status](https://secure.travis-ci.org/xijo/reverse_markdown.png?branch=master)](https://travis-ci.org/xijo/reverse_markdown)
6
6
 
7
7
  # Installation
8
8
 
@@ -46,4 +46,4 @@ Only basic html tags are supported right now. However, it should not be to diffi
46
46
 
47
47
  # Thanks
48
48
 
49
- ..to Ben Woosley for his improvements to the first version.
49
+ ..to Ben Woosley for his improvements to the first version.
@@ -5,13 +5,13 @@ require 'nokogiri'
5
5
 
6
6
  module ReverseMarkdown
7
7
 
8
- def self.parse(input)
8
+ def self.parse(input, opts={})
9
9
  root = case input
10
10
  when String then Nokogiri::HTML(input).root
11
11
  when Nokogiri::XML::Document then input.root
12
12
  when Nokogiri::XML::Node then input
13
13
  end
14
- ReverseMarkdown::Mapper.new.process_element(root)
14
+ ReverseMarkdown::Mapper.new(opts).process_root(root)
15
15
  end
16
16
 
17
17
  # 2012/08/11 joe: possibly deprecate in favour of #parse
@@ -3,29 +3,76 @@ module ReverseMarkdown
3
3
  attr_accessor :raise_errors
4
4
  attr_accessor :log_enabled, :log_level
5
5
  attr_accessor :li_counter
6
+ attr_accessor :github_style_code_blocks
6
7
 
7
- def initialize
8
+ def initialize(opts={})
8
9
  self.log_level = :info
9
10
  self.log_enabled = true
10
11
  self.li_counter = 0
12
+ self.github_style_code_blocks = opts[:github_style_code_blocks] || false
13
+ end
14
+
15
+ def process_root(element)
16
+ markdown = process_element(element) # recursively process all elements to get full markdown
17
+
18
+ # Extract github style code blocks
19
+ extractions = {}
20
+ markdown.gsub!(%r{```.*?```}m) do |match|
21
+ md5 = Digest::MD5.hexdigest(match)
22
+ extractions[md5] = match
23
+ "{code-block-extraction-#{md5}}"
24
+ end
25
+
26
+ markdown = markdown.split("\n").map do |line|
27
+ if line.match(/^( {4}|\t)/)
28
+ line
29
+ else
30
+ "#{ ' ' if line.match(/^ {2,3}/) }" +
31
+ normalize_whitespace(line).strip +
32
+ "#{ ' ' if line.match(/ {2}$/) }"
33
+ end
34
+ end.join("\n")
35
+
36
+ markdown.gsub!(/\n{3,}/, "\n\n")
37
+
38
+ # Insert pre block extractions
39
+ markdown.gsub!(/\{code-block-extraction-([0-9a-f]{32})\}/){ extractions[$1] }
40
+
41
+ markdown
11
42
  end
12
43
 
13
44
  def process_element(element)
14
45
  output = ''
15
- output << if element.text?
16
- element.text.strip
46
+ if element.text?
47
+ text = process_text(element)
48
+ if output.end_with?(' ') && text.start_with?(' ')
49
+ output << text.lstrip
50
+ else
51
+ output << text
52
+ end
17
53
  else
18
- opening(element)
19
- end
20
- element.children.each do |child|
21
- output << process_element(child)
54
+ output << opening(element).to_s
55
+
56
+ markdown_chunks = element.children.map { |child| process_element(child) }
57
+ remove_adjacent_whitespace!(markdown_chunks)
58
+ output << markdown_chunks.join
59
+
60
+ output << ending(element).to_s
22
61
  end
23
- output << ending(element) unless element.text?
24
62
  output
25
63
  end
26
64
 
27
65
  private
28
66
 
67
+ # removes whitespace-only chunk if the previous chunk ends with whitespace
68
+ def remove_adjacent_whitespace!(chunks)
69
+ (chunks.size - 1).downto(1).each do |i|
70
+ chunk = chunks[i]
71
+ previous_chunk = chunks[i-1]
72
+ chunks.delete_at(i) if chunk == ' ' && previous_chunk.end_with?(' ')
73
+ end
74
+ end
75
+
29
76
  def opening(element)
30
77
  parent = element.parent ? element.parent.name.to_sym : nil
31
78
  case element.name.to_sym
@@ -45,29 +92,49 @@ module ReverseMarkdown
45
92
  "\n"
46
93
  when :ul, :root#, :p
47
94
  "\n"
95
+ when :div
96
+ "\n"
48
97
  when :p
49
98
  if element.ancestors.map(&:name).include?('blockquote')
50
99
  "\n\n> "
100
+ elsif [nil, :body].include? parent
101
+ is_first = true
102
+ previous = element.previous
103
+ while is_first == true and previous do
104
+ is_first = false unless previous.content.strip == "" || previous.text?
105
+ previous = previous.previous
106
+ end
107
+ is_first ? "" : "\n\n"
51
108
  else
52
109
  "\n\n"
53
110
  end
54
111
  when :h1, :h2, :h3, :h4 # /h(\d)/ for 1.9
55
112
  element.name =~ /h(\d)/
56
- '#' * $1.to_i + ' '
57
- when :em
58
- "*"
59
- when :strong
60
- "**"
113
+ "\n" + ('#' * $1.to_i) + ' '
114
+ when :em, :i
115
+ element.text.strip.empty? ? '' : '_' if (element.ancestors('em') + element.ancestors('i')).empty?
116
+ when :strong, :b
117
+ element.text.strip.empty? ? '' : '**' if (element.ancestors('strong') + element.ancestors('b')).empty?
61
118
  when :blockquote
62
119
  "> "
63
120
  when :code
64
- parent == :pre ? " " : "`"
121
+ if parent == :pre
122
+ self.github_style_code_blocks ? "\n```\n" : "\n "
123
+ else
124
+ " `"
125
+ end
65
126
  when :a
66
- "["
127
+ if !element.text.strip.empty? && element['href'] && !element['href'].start_with?('#')
128
+ " ["
129
+ else
130
+ " "
131
+ end
67
132
  when :img
68
- "!["
133
+ " !["
69
134
  when :hr
70
- "----------\n\n"
135
+ "\n* * *\n"
136
+ when :br
137
+ " \n"
71
138
  else
72
139
  handle_error "unknown start tag: #{element.name.to_s}"
73
140
  ""
@@ -77,32 +144,69 @@ module ReverseMarkdown
77
144
  def ending(element)
78
145
  parent = element.parent ? element.parent.name.to_sym : nil
79
146
  case element.name.to_sym
80
- when :html, :body, :pre, :hr, :p
147
+ when :html, :body, :pre, :hr
81
148
  ""
149
+ when :p
150
+ "\n\n"
151
+ when :div
152
+ "\n"
82
153
  when :h1, :h2, :h3, :h4 # /h(\d)/ for 1.9
83
154
  "\n"
84
- when :em
85
- '*'
86
- when :strong
87
- '**'
155
+ when :em, :i
156
+ element.text.strip.empty? ? '' : '_' if (element.ancestors('em') + element.ancestors('i')).empty?
157
+ when :strong, :b
158
+ element.text.strip.empty? ? '' : '**' if (element.ancestors('strong') + element.ancestors('b')).empty?
88
159
  when :li, :blockquote, :root, :ol, :ul
89
160
  "\n"
90
161
  when :code
91
- parent == :pre ? '' : '`'
162
+ if parent == :pre
163
+ self.github_style_code_blocks ? "\n```" : "\n"
164
+ else
165
+ '` '
166
+ end
92
167
  when :a
93
- "](#{element.attribute('href').to_s}) "
94
- when :img
95
- if element.has_attribute?('alt')
96
- "#{element.attribute('alt')}][#{element.attribute('src')}] "
168
+ if !element.text.strip.empty? && element['href'] && !element['href'].start_with?('#')
169
+ "](#{element['href']}#{title_markdown(element)}) "
97
170
  else
98
- "#{element.attribute('src')}] "
171
+ ""
99
172
  end
173
+ when :img
174
+ "#{element['alt']}](#{element['src']}#{title_markdown(element)}) "
100
175
  else
101
176
  handle_error "unknown end tag: #{element.name}"
102
177
  ""
103
178
  end
104
179
  end
105
180
 
181
+ def title_markdown(element)
182
+ title = element['title']
183
+ title ? %[ "#{title}"] : ''
184
+ end
185
+
186
+ def process_text(element)
187
+ parent = element.parent ? element.parent.name.to_sym : nil
188
+ case
189
+ when parent == :code
190
+ if self.github_style_code_blocks
191
+ element.text
192
+ else
193
+ element.text.strip.gsub(/\n/,"\n ")
194
+ end
195
+ else
196
+ normalize_whitespace(escape_text(element.text))
197
+ end
198
+ end
199
+
200
+ def normalize_whitespace(text)
201
+ text.tr("\n\t", ' ').squeeze(' ')
202
+ end
203
+
204
+ def escape_text(text)
205
+ text.
206
+ gsub('*', '\*').
207
+ gsub('_', '\_')
208
+ end
209
+
106
210
  def handle_error(message)
107
211
  if raise_errors
108
212
  raise ReverseMarkdown::ParserError, message
@@ -1,3 +1,3 @@
1
1
  module ReverseMarkdown
2
- VERSION = "0.3.0"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -23,5 +23,5 @@ Gem::Specification.new do |s|
23
23
  s.add_development_dependency 'rspec'
24
24
  s.add_development_dependency 'simplecov'
25
25
  s.add_development_dependency 'rake'
26
-
26
+ s.add_development_dependency 'redcarpet'
27
27
  end
@@ -1,10 +1,19 @@
1
1
  <html>
2
2
  <body>
3
+ some text...
3
4
  <a href="http://foobar.com">Foobar</a>
4
- <a href="http://strong.foobar.com">
5
- <strong>Strong foobar</strong>
6
- </a>
5
+ <a href="http://foobar.com" title="f***** up beyond all recognition">Fubar</a>
6
+ <a href="http://strong.foobar.com"><strong>Strong foobar</strong></a>
7
+
8
+ ignore <a href="foo.html"> </a> anchor tags with no link text
9
+ pass through the text of <a href="#content">internal jumplinks</a> without treating them as links
10
+ pass through the text of <a id="content">anchor tags with no href</a> without treating them as links
11
+
12
+ some text...
13
+
7
14
  <img src="http://foobar.com/logo.png">
8
15
  <img alt="foobar image" src="http://foobar.com/foobar.png">
16
+ <img alt="foobar image 2" title="this is the foobar image 2" src="http://foobar.com/foobar2.png">
17
+ some text...
9
18
  </body>
10
19
  </html>
@@ -1,12 +1,42 @@
1
1
  <html>
2
2
  <body>
3
+ plain text
3
4
  <h1>h1</h1>
4
5
  <h2>h2</h2>
5
6
  <h3>h3</h3>
6
7
  <h4>h4</h4>
7
- <em>em</em>
8
- <strong>strong</strong>
9
- <code>code</code>
8
+
9
+ <em>em tag content</em>
10
+ before <em></em> and after empty em tags
11
+ before <em> </em> and after em tags containing whitespace
12
+ before <em> <em> <br /> </em> </em> and after em tags containing whitespace
13
+ <em><em>double em tags</em></em>
14
+ <p><em><em>double em tags in p tag</em></em></p>
15
+
16
+ <strong>strong tag content</strong>
17
+ before <strong></strong> and after empty strong tags
18
+ before <strong> </strong> and after strong tags containing whitespace
19
+ before <strong> <strong> <br /> </strong> </strong> and after strong tags containing whitespace
20
+ <strong><strong>double strong tags</strong></strong>
21
+ <p><strong><strong>double strong tags in p tag</strong></strong></p>
22
+ before
23
+ <strong>
24
+ <strong>
25
+ double strong tags containing whitespace
26
+ </strong>
27
+ </strong> after
28
+
29
+ <b>b tag content</b>
30
+ <i>i tag content</i>
31
+
32
+ br tags become double space followed by newline<br/>
33
+
34
+ before hr
10
35
  <hr/>
36
+ after hr
37
+
38
+ <div>section 1</div>
39
+ <div>section 2</div>
40
+
11
41
  </body>
12
42
  </html>
@@ -0,0 +1,22 @@
1
+ <html>
2
+ <body>
3
+ <pre>pre block</pre>
4
+ <code>code block</code>
5
+ <pre><code>pre code block</code></pre>
6
+
7
+ <p>Paragraph with inline <code>code</code> block</p>
8
+
9
+ <pre><code>var this;
10
+ this.is("A multi line code block")
11
+ console.log("Yup, it is")
12
+ </code></pre>
13
+
14
+ Code with indentation:
15
+ <pre><code>tell application "Foo"
16
+ beep
17
+ end tell
18
+ </code></pre>
19
+
20
+ </body>
21
+ </html>
22
+
@@ -0,0 +1,15 @@
1
+ <html>
2
+ <body>
3
+ some text...
4
+
5
+ **two asterisks**
6
+ ***three asterisks***
7
+ __two underscores__
8
+ ___three underscores___
9
+
10
+ some text...
11
+
12
+ <pre><code>var theoretical_max_infin = 1.0;</code></pre>
13
+
14
+ </body>
15
+ </html>
@@ -0,0 +1,19 @@
1
+ <p>
2
+ <strong>
3
+ <strong>
4
+ .<br />
5
+ </strong>
6
+ *** intentcast
7
+ </strong>
8
+ : logo design
9
+ <strong>
10
+ <strong>
11
+ <br />
12
+ </strong>
13
+ </strong>
14
+ <strong>
15
+ <strong>
16
+ .
17
+ </strong>
18
+ </strong>
19
+ </p>
@@ -0,0 +1,3 @@
1
+ naked text 1
2
+ <p>paragraph text</p>
3
+ naked text 2
@@ -1,13 +1,15 @@
1
1
  <html>
2
2
  <body>
3
+ some text...
4
+
3
5
  <ul>
4
6
  <li>unordered list entry</li>
5
- <li>unordered list entry</li>
7
+ <li>unordered list entry 2</li>
6
8
  </ul>
7
9
 
8
10
  <ol>
9
11
  <li>ordered list entry</li>
10
- <li>ordered list entry</li>
12
+ <li>ordered list entry 2</li>
11
13
  </ol>
12
14
 
13
15
  <ol>
@@ -18,10 +20,37 @@
18
20
  <li>
19
21
  <ol>
20
22
  <li>deep nested list entry</li>
21
- <ol>
23
+ </ol>
22
24
  </li>
23
25
  </ul>
24
26
  </li>
25
27
  </ol>
28
+
29
+ a nested list with no whitespace:
30
+ <ul><li>item a</li><li>item b<ul><li>item bb</li><li>item bc</li></ul></li></ul>
31
+
32
+ a nested list with lots of whitespace:
33
+ <ul> <li> item wa </li> <li> item wb <ul> <li> item wbb </li> <li> item wbc </li> </ul> </li> </ul>
34
+
35
+ <ul>
36
+ <li class="toclevel-1 tocsection-1"><a href="Basic_concepts"><span class="tocnumber">1</span> <span class="toctext">Basic concepts</span></a></li>
37
+ <li class="toclevel-1 tocsection-2"><a href="History_of_the_idea"><span class="tocnumber">2</span> <span class="toctext">History of the idea</span></a></li>
38
+ <li class="toclevel-1 tocsection-3"><a href="Intelligence_explosion"><span class="tocnumber">3</span> <span class="toctext">Intelligence explosion</span></a>
39
+ </ul>
40
+
41
+ <ul>
42
+ <li>
43
+ <p dir="ltr">I want to have a party at my house!</p>
44
+ </li>
45
+ </ul>
46
+
47
+ <ul>
48
+ <li>
49
+ <p>li 1, p 1</p>
50
+ <p>li 1, p 2</p>
51
+ </li>
52
+ <li><p>li 2, p 1</p></li>
53
+ </ul>
54
+
26
55
  </body>
27
56
  </html>
@@ -1,7 +1,10 @@
1
1
  <html>
2
2
  <body>
3
3
  <p>First content</p>
4
- <p>Second content</p>
4
+ <p>
5
+ Second
6
+ content
7
+ </p>
5
8
  <p>
6
9
  <em>Complex</em>
7
10
  <pre>
@@ -6,9 +6,18 @@ describe ReverseMarkdown::Mapper do
6
6
  let(:document) { Nokogiri::HTML(input) }
7
7
  subject { ReverseMarkdown.parse_string(input) }
8
8
 
9
- it { subject.should include '[Foobar](http://foobar.com)' }
10
- it { subject.should include '[**Strong foobar**](http://strong.foobar.com)' }
11
- it { subject.should include '![http://foobar.com/logo.png]' }
12
- it { subject.should include '![foobar image][http://foobar.com/foobar.png]' }
9
+ it { should include ' [Foobar](http://foobar.com) ' }
10
+ it { should include ' [Fubar](http://foobar.com "f***** up beyond all recognition") ' }
11
+ it { should include ' [**Strong foobar**](http://strong.foobar.com) ' }
12
+
13
+ it { should include ' ![](http://foobar.com/logo.png) ' }
14
+ it { should include ' ![foobar image](http://foobar.com/foobar.png) ' }
15
+ it { should include ' ![foobar image 2](http://foobar.com/foobar2.png "this is the foobar image 2") ' }
16
+
17
+ context "links to ignore" do
18
+ it { should include ' ignore anchor tags with no link text ' }
19
+ it { should include ' pass through the text of internal jumplinks without treating them as links ' }
20
+ it { should include ' pass through the text of anchor tags with no href without treating them as links ' }
21
+ end
13
22
 
14
23
  end
@@ -6,13 +6,32 @@ describe ReverseMarkdown::Mapper do
6
6
  let(:document) { Nokogiri::HTML(input) }
7
7
  subject { ReverseMarkdown.parse_string(input) }
8
8
 
9
- it { subject.should match /# h1\n/ }
10
- it { subject.should match /## h2\n/ }
11
- it { subject.should match /### h3\n/ }
12
- it { subject.should match /#### h4\n/ }
13
- it { subject.should match /\*em\*/ }
14
- it { subject.should match /\*\*strong\*\*/ }
15
- it { subject.should match /`code`/ }
16
- it { subject.should match /---/ }
9
+ it { should match /plain text ?\n/ }
10
+ it { should match /# h1\n/ }
11
+ it { should match /## h2\n/ }
12
+ it { should match /### h3\n/ }
13
+ it { should match /#### h4\n/ }
17
14
 
15
+ it { should match /_em tag content_/ }
16
+ it { should match /before and after empty em tags/ }
17
+ it { should match /before and after em tags containing whitespace/ }
18
+ it { should match /_double em tags_/ }
19
+ it { should match /_double em tags in p tag_/ }
20
+
21
+ it { should match /\*\*strong tag content\*\*/ }
22
+ it { should match /before and after empty strong tags/ }
23
+ it { should match /before and after strong tags containing whitespace/ }
24
+ it { should match /\*\*double strong tags\*\*/ }
25
+ it { should match /\*\*double strong tags in p tag\*\*/ }
26
+ it { should match /before \*\* double strong tags containing whitespace \*\* after/ }
27
+
28
+ it { should match /_i tag content_/ }
29
+ it { should match /\*\*b tag content\*\*/ }
30
+
31
+ it { should match /br tags become double space followed by newline \n/ }
32
+ #it { should match /br tags XXX \n/ }
33
+
34
+ it { should match /\nbefore hr ?\n\* \* \*\n ?after hr\n/ }
35
+
36
+ it { should match /section 1\n ?\nsection 2/ }
18
37
  end
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+
3
+ describe ReverseMarkdown::Mapper do
4
+
5
+ let(:input) { File.read('spec/assets/code.html') }
6
+ let(:document) { Nokogiri::HTML(input) }
7
+ subject { ReverseMarkdown.parse_string(input) }
8
+
9
+ it { should match /inline `code` block/ }
10
+ it { should match /\ var this\;\n this\.is/ }
11
+ it { should match /block"\)\n console/ }
12
+
13
+ context "with github style code blocks" do
14
+ subject { ReverseMarkdown.parse_string(input, :github_style_code_blocks => true) }
15
+ it { should match /inline `code` block/ }
16
+ it { should match /```\nvar this\;\nthis/ }
17
+ it { should match /it is"\) ?\n\t\n```/ }
18
+ end
19
+
20
+ context "code with indentation" do
21
+ subject { ReverseMarkdown.parse_string(input) }
22
+ it { should match(/^ tell application "Foo"\n/) }
23
+ it { should match(/^ beep\n/) }
24
+ it { should match(/^ end tell\n/) }
25
+ end
26
+
27
+ end
28
+
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+
3
+ describe ReverseMarkdown::Mapper do
4
+
5
+ let(:input) { File.read('spec/assets/escapables.html') }
6
+ let(:document) { Nokogiri::HTML(input) }
7
+ subject { ReverseMarkdown.parse_string(input) }
8
+
9
+ context "multiple asterisks" do
10
+ it { should include ' \*\*two asterisks\*\* ' }
11
+ it { should include ' \*\*\*three asterisks\*\*\* ' }
12
+ end
13
+
14
+ context "multiple underscores" do
15
+ it { should include ' \_\_two underscores\_\_ ' }
16
+ it { should include ' \_\_\_three underscores\_\_\_ ' }
17
+ end
18
+
19
+ context "underscores within words in code blocks" do
20
+ it { should include ' var theoretical_max_infin = 1.0;' }
21
+ end
22
+ end
@@ -0,0 +1,16 @@
1
+ require 'spec_helper'
2
+
3
+ describe ReverseMarkdown::Mapper do
4
+
5
+ let(:input) { File.read('spec/assets/from_the_wild.html') }
6
+ let(:document) { Nokogiri::HTML(input) }
7
+ subject { ReverseMarkdown.parse_string(input) }
8
+
9
+ it "should make sense of strong-crazy markup (as seen in the wild)" do
10
+ subject.should ==
11
+ '** .' + " \n" +
12
+ '\*\*\* intentcast ** : logo design' + " \n" +
13
+ '** . **'
14
+ end
15
+
16
+ end
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+
3
+ describe ReverseMarkdown::Mapper do
4
+
5
+ let(:input) { File.read('spec/assets/html_fragment.html') }
6
+ let(:document) { Nokogiri::HTML(input) }
7
+ subject { ReverseMarkdown.parse_string(input) }
8
+
9
+ it { should == "naked text 1\n\nparagraph text\n\nnaked text 2" }
10
+ end
11
+
@@ -6,10 +6,40 @@ describe ReverseMarkdown::Mapper do
6
6
  let(:document) { Nokogiri::HTML(input) }
7
7
  subject { ReverseMarkdown.parse_string(input) }
8
8
 
9
- it { subject.should match /- unordered list entry\n/ }
10
- it { subject.should match /1. ordered list entry\n/ }
11
- it { subject.should match /1. list entry 1st hierarchy\n/ }
12
- it { subject.should match /\s{2}- nested unsorted list entry/ }
13
- it { subject.should match /\s{4}1. deep nested list entry/ }
9
+ it { should match /\n- unordered list entry\n/ }
10
+ it { should match /\n- unordered list entry 2\n/ }
11
+ it { should match /\n1. ordered list entry\n/ }
12
+ it { should match /\n2. ordered list entry 2\n/ }
13
+ it { should match /\n1. list entry 1st hierarchy\n/ }
14
+ it { should match /\n {2}- nested unsorted list entry\n/ }
15
+ it { should match /\n {4}1. deep nested list entry\n/ }
16
+
17
+ context "nested list with no whitespace" do
18
+ it { should match /\n- item a\n/ }
19
+ it { should match /\n- item b\n/ }
20
+ it { should match /\n {2}- item bb\n/ }
21
+ it { should match /\n {2}- item bc\n/ }
22
+ end
23
+
24
+ context "nested list with lots of whitespace" do
25
+ it { should match /\n- item wa\n/ }
26
+ it { should match /\n- item wb\n/ }
27
+ it { should match /\n {2}- item wbb\n/ }
28
+ it { should match /\n {2}- item wbc\n/ }
29
+ end
30
+
31
+ context "lists containing links" do
32
+ it { should match /\n- \[1 Basic concepts\]\(Basic_concepts\)\n/ }
33
+ it { should match /\n- \[2 History of the idea\]\(History_of_the_idea\)\n/ }
34
+ it { should match /\n- \[3 Intelligence explosion\]\(Intelligence_explosion\)\n/ }
35
+ end
36
+
37
+ context "lists containing embedded <p> tags" do
38
+ xit { should match /\n- I want to have a party at my house!\n/ }
39
+ end
40
+
41
+ context "list item containing multiple <p> tags" do
42
+ xit { should match /\n- li 1, p 1\n\n- li 1, p 2\n/ }
43
+ end
14
44
 
15
45
  end
@@ -6,6 +6,7 @@ describe ReverseMarkdown::Mapper do
6
6
  let(:document) { Nokogiri::HTML(input) }
7
7
  subject { ReverseMarkdown.parse_string(input) }
8
8
 
9
- it { subject.should match /First content\n\nSecond content\n\n/ }
10
- it { subject.should include "\n\n*Complex*\n Content" }
11
- end
9
+ it { should_not start_with "\n\n" }
10
+ it { should start_with "First content\n\nSecond content\n\n" }
11
+ it { should include "\n\n_Complex_\n\n Content" }
12
+ end
@@ -6,7 +6,7 @@ describe ReverseMarkdown::Mapper do
6
6
  let(:document) { Nokogiri::HTML(input) }
7
7
  subject { ReverseMarkdown.parse_string(input) }
8
8
 
9
- it { subject.should include "\n Block of code" }
10
- it { subject.should include "\n> First quoted paragraph\n\n> Second quoted paragraph" }
9
+ it { should include "\n Block of code" }
10
+ it { should include "\n> First quoted paragraph\n\n> Second quoted paragraph" }
11
11
 
12
12
  end
@@ -0,0 +1,104 @@
1
+ # coding:utf-8
2
+
3
+ require 'redcarpet'
4
+ require 'spec_helper'
5
+
6
+ describe 'Round trip: HTML to markdown (via reverse_markdown) to HTML (via redcarpet)' do
7
+
8
+ # helpers
9
+
10
+ def roundtrip_should_preserve(orig_html)
11
+ normalize_html(html2markdown2html orig_html).should == normalize_html(orig_html)
12
+ end
13
+
14
+ def html2markdown2html(orig_html)
15
+ markdown = ReverseMarkdown.parse_string orig_html
16
+ new_html = Redcarpet::Markdown.new(Redcarpet::Render::HTML).render(markdown)
17
+ new_html
18
+ end
19
+
20
+ def normalize_html(html)
21
+ squeeze_whitespace(html).gsub('> <', '><').strip
22
+ end
23
+
24
+ def squeeze_whitespace(string)
25
+ string.tr("\n\t", ' ').squeeze(' ').gsub(/\A \z/, '')
26
+ end
27
+
28
+ # specs
29
+
30
+ it "should preserve <blockquote> blocks" do
31
+ roundtrip_should_preserve('<blockquote><p>some text</p></blockquote>')
32
+ end
33
+
34
+ it "should preserve unordered lists" do
35
+ roundtrip_should_preserve("
36
+ <ol>
37
+ <li>Bird</li>
38
+ <li>McHale</li>
39
+ <li>Parish</li>
40
+ </ol>
41
+ ")
42
+ end
43
+
44
+ it "should preserve ordered lists" do
45
+ roundtrip_should_preserve("
46
+ <ul>
47
+ <li>Bird</li>
48
+ <li>McHale</li>
49
+ <li>Parish</li>
50
+ </ul>
51
+ ")
52
+ end
53
+
54
+ it "should preserve <hr> tags" do
55
+ roundtrip_should_preserve("<hr>")
56
+ end
57
+
58
+ it "should preserve <em> tags" do
59
+ roundtrip_should_preserve("<p><em>yes!</em></p>")
60
+ end
61
+
62
+ it "should preserve links inside <strong> tags" do
63
+ pending
64
+ roundtrip_should_preserve(%{<p><strong><a href="/wiki/Western_philosophy" title="Western philosophy">Western philosophy</a></strong></p>})
65
+ end
66
+
67
+ it "should preserve <strong> tags" do
68
+ roundtrip_should_preserve("<p><strong>yes!</strong></p>")
69
+ end
70
+
71
+ it "should preserve <br> tags" do
72
+ roundtrip_should_preserve("<p>yes!<br>\n we can!</p>")
73
+ end
74
+
75
+ it "should preserve <a> tags" do
76
+ roundtrip_should_preserve(%{<p>This is <a href="http://example.com/" title="Title">an example</a> inline link.</p>})
77
+ roundtrip_should_preserve(%{<p><a href="http://example.net/">This link</a> has no title attribute.</p>})
78
+ end
79
+
80
+ it "should preserve <img> tags" do
81
+ roundtrip_should_preserve(%{<p><img src="http://foo.bar/dog.png" alt="My Dog" title="Ralph"></p>})
82
+ roundtrip_should_preserve(%{<p><img src="http://foo.bar/dog.png" alt="My Dog"></p>})
83
+ end
84
+
85
+ it "should preserve code blocks" do
86
+ roundtrip_should_preserve(%{
87
+ <p>This is a normal paragraph:</p>
88
+
89
+ <pre><code>This is a code block. </code></pre>
90
+ })
91
+ end
92
+
93
+ it "should preserve code blocks with embedded whitespace" do
94
+ roundtrip_should_preserve(%{
95
+ <p>Here is an example of AppleScript:</p>
96
+
97
+ <pre><code>tell application Foo
98
+ beep
99
+ end tell
100
+ </code></pre>
101
+ })
102
+ end
103
+ end
104
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: reverse_markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-13 00:00:00.000000000 Z
12
+ date: 2012-11-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -75,6 +75,22 @@ dependencies:
75
75
  - - ! '>='
76
76
  - !ruby/object:Gem::Version
77
77
  version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: redcarpet
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
78
94
  description: Map simple html back into markdown, e.g. if you want to import existing
79
95
  html data in your application.
80
96
  email:
@@ -86,6 +102,7 @@ files:
86
102
  - .gitignore
87
103
  - .travis.yml
88
104
  - Gemfile
105
+ - License-MIT
89
106
  - README.md
90
107
  - Rakefile
91
108
  - lib/reverse_markdown.rb
@@ -95,16 +112,25 @@ files:
95
112
  - reverse_markdown.gemspec
96
113
  - spec/assets/anchors.html
97
114
  - spec/assets/basic.html
115
+ - spec/assets/code.html
116
+ - spec/assets/escapables.html
117
+ - spec/assets/from_the_wild.html
98
118
  - spec/assets/full_example.html
119
+ - spec/assets/html_fragment.html
99
120
  - spec/assets/lists.html
100
121
  - spec/assets/minimum.html
101
122
  - spec/assets/paragraphs.html
102
123
  - spec/assets/quotation.html
103
124
  - spec/components/anchors_spec.rb
104
125
  - spec/components/basic_spec.rb
126
+ - spec/components/code_spec.rb
127
+ - spec/components/escapables_spec.rb
128
+ - spec/components/from_the_wild_spec.rb
129
+ - spec/components/html_fragment_spec.rb
105
130
  - spec/components/lists_spec.rb
106
131
  - spec/components/paragraphs_spec.rb
107
132
  - spec/components/quotation_spec.rb
133
+ - spec/html_to_markdown_to_html_spec.rb
108
134
  - spec/mapper_spec.rb
109
135
  - spec/reverse_markdown_spec.rb
110
136
  - spec/spec_helper.rb
@@ -122,7 +148,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
122
148
  version: '0'
123
149
  segments:
124
150
  - 0
125
- hash: -1059485323114314033
151
+ hash: -1957963003780104262
126
152
  required_rubygems_version: !ruby/object:Gem::Requirement
127
153
  none: false
128
154
  requirements:
@@ -131,7 +157,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
131
157
  version: '0'
132
158
  segments:
133
159
  - 0
134
- hash: -1059485323114314033
160
+ hash: -1957963003780104262
135
161
  requirements: []
136
162
  rubyforge_project: reverse_markdown
137
163
  rubygems_version: 1.8.24
@@ -141,16 +167,25 @@ summary: Transform html code into markdown.
141
167
  test_files:
142
168
  - spec/assets/anchors.html
143
169
  - spec/assets/basic.html
170
+ - spec/assets/code.html
171
+ - spec/assets/escapables.html
172
+ - spec/assets/from_the_wild.html
144
173
  - spec/assets/full_example.html
174
+ - spec/assets/html_fragment.html
145
175
  - spec/assets/lists.html
146
176
  - spec/assets/minimum.html
147
177
  - spec/assets/paragraphs.html
148
178
  - spec/assets/quotation.html
149
179
  - spec/components/anchors_spec.rb
150
180
  - spec/components/basic_spec.rb
181
+ - spec/components/code_spec.rb
182
+ - spec/components/escapables_spec.rb
183
+ - spec/components/from_the_wild_spec.rb
184
+ - spec/components/html_fragment_spec.rb
151
185
  - spec/components/lists_spec.rb
152
186
  - spec/components/paragraphs_spec.rb
153
187
  - spec/components/quotation_spec.rb
188
+ - spec/html_to_markdown_to_html_spec.rb
154
189
  - spec/mapper_spec.rb
155
190
  - spec/reverse_markdown_spec.rb
156
191
  - spec/spec_helper.rb