RubyGems - reverse_markdown - Versions diffs - 0.3.0 → 0.4.0 - Mend

reverse_markdown 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

data/.travis.yml +1 -0
data/License-MIT +7 -0
data/README.md +2 -2
data/lib/reverse_markdown.rb +2 -2
data/lib/reverse_markdown/mapper.rb +132 -28
data/lib/reverse_markdown/version.rb +1 -1
data/reverse_markdown.gemspec +1 -1
data/spec/assets/anchors.html +12 -3
data/spec/assets/basic.html +33 -3
data/spec/assets/code.html +22 -0
data/spec/assets/escapables.html +15 -0
data/spec/assets/from_the_wild.html +19 -0
data/spec/assets/html_fragment.html +3 -0
data/spec/assets/lists.html +32 -3
data/spec/assets/paragraphs.html +4 -1
data/spec/components/anchors_spec.rb +13 -4
data/spec/components/basic_spec.rb +27 -8
data/spec/components/code_spec.rb +28 -0
data/spec/components/escapables_spec.rb +22 -0
data/spec/components/from_the_wild_spec.rb +16 -0
data/spec/components/html_fragment_spec.rb +11 -0
data/spec/components/lists_spec.rb +35 -5
data/spec/components/paragraphs_spec.rb +4 -3
data/spec/components/quotation_spec.rb +2 -2
data/spec/html_to_markdown_to_html_spec.rb +104 -0
metadata +39 -4

data/.travis.yml CHANGED Viewed

@@ -10,3 +10,4 @@ notifications:
   disabled: false
   recipients:
     - xijo@gmx.de
+    - code@harlantwood.net

data/License-MIT ADDED Viewed

@@ -0,0 +1,7 @@
+Copyright (c) 2012 Johannes Opper
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 Transform existing html into markdown in a simple way, for example if you want to import existings tags into your markdown based application.
-[![reverse_markdown build status](http://travis-ci.org/xijo/reverse_markdown.png)](http://travis-ci.org/#!/xijo/reverse_markdown)
+[![Build Status](https://secure.travis-ci.org/xijo/reverse_markdown.png?branch=master)](https://travis-ci.org/xijo/reverse_markdown)
 # Installation
@@ -46,4 +46,4 @@ Only basic html tags are supported right now. However, it should not be to diffi
 # Thanks
-..to Ben Woosley for his improvements to the first version.
+..to Ben Woosley for his improvements to the first version.

data/lib/reverse_markdown.rb CHANGED Viewed

@@ -5,13 +5,13 @@ require 'nokogiri'
 module ReverseMarkdown
-  def self.parse(input)
+  def self.parse(input, opts={})
     root = case input
       when String                  then Nokogiri::HTML(input).root
       when Nokogiri::XML::Document then input.root
       when Nokogiri::XML::Node     then input
     end
-    ReverseMarkdown::Mapper.new.process_element(root)
+    ReverseMarkdown::Mapper.new(opts).process_root(root)
   end
   # 2012/08/11 joe: possibly deprecate in favour of #parse

data/lib/reverse_markdown/mapper.rb CHANGED Viewed

@@ -3,29 +3,76 @@ module ReverseMarkdown
     attr_accessor :raise_errors
     attr_accessor :log_enabled, :log_level
     attr_accessor :li_counter
+    attr_accessor :github_style_code_blocks
-    def initialize
+    def initialize(opts={})
       self.log_level   = :info
       self.log_enabled = true
       self.li_counter  = 0
+      self.github_style_code_blocks = opts[:github_style_code_blocks] || false
+    end
+    def process_root(element)
+      markdown = process_element(element)  # recursively process all elements to get full markdown
+      # Extract github style code blocks
+      extractions = {}
+      markdown.gsub!(%r{```.*?```}m) do |match|
+        md5 = Digest::MD5.hexdigest(match)
+        extractions[md5] = match
+        "{code-block-extraction-#{md5}}"
+      end
+      markdown = markdown.split("\n").map do |line|
+        if line.match(/^( {4}|\t)/)
+          line
+        else
+          "#{ '  ' if line.match(/^ {2,3}/) }" +
+          normalize_whitespace(line).strip +
+          "#{ '  ' if line.match(/ {2}$/) }"
+        end
+      end.join("\n")
+      markdown.gsub!(/\n{3,}/, "\n\n")
+      # Insert pre block extractions
+      markdown.gsub!(/\{code-block-extraction-([0-9a-f]{32})\}/){ extractions[$1] }
+      markdown
     end
     def process_element(element)
       output = ''
-      output << if element.text?
-        element.text.strip
+      if element.text?
+        text = process_text(element)
+        if output.end_with?(' ') && text.start_with?(' ')
+          output << text.lstrip
+        else
+          output << text
+        end
       else
-        opening(element)
-      end
-      element.children.each do |child|
-        output << process_element(child)
+        output << opening(element).to_s
+        markdown_chunks = element.children.map { |child| process_element(child) }
+        remove_adjacent_whitespace!(markdown_chunks)
+        output << markdown_chunks.join
+        output << ending(element).to_s
       end
-      output << ending(element) unless element.text?
       output
     end
     private
+    # removes whitespace-only chunk if the previous chunk ends with whitespace
+    def remove_adjacent_whitespace!(chunks)
+      (chunks.size - 1).downto(1).each do |i|
+        chunk = chunks[i]
+        previous_chunk = chunks[i-1]
+        chunks.delete_at(i) if chunk == ' ' && previous_chunk.end_with?(' ')
+      end
+    end
     def opening(element)
       parent = element.parent ? element.parent.name.to_sym : nil
       case element.name.to_sym
@@ -45,29 +92,49 @@ module ReverseMarkdown
           "\n"
         when :ul, :root#, :p
           "\n"
+        when :div
+          "\n"
         when :p
           if element.ancestors.map(&:name).include?('blockquote')
             "\n\n> "
+          elsif [nil, :body].include? parent
+            is_first = true
+            previous = element.previous
+            while is_first == true and previous do
+              is_first = false unless previous.content.strip == "" || previous.text?
+              previous = previous.previous
+            end
+            is_first ? "" : "\n\n"
           else
             "\n\n"
           end
         when :h1, :h2, :h3, :h4 # /h(\d)/ for 1.9
           element.name =~ /h(\d)/
-          '#' * $1.to_i + ' '
-        when :em
-          "*"
-        when :strong
-          "**"
+          "\n" + ('#' * $1.to_i) + ' '
+        when :em, :i
+          element.text.strip.empty? ? '' : '_' if (element.ancestors('em') + element.ancestors('i')).empty?
+        when :strong, :b
+          element.text.strip.empty? ? '' : '**' if (element.ancestors('strong') + element.ancestors('b')).empty?
         when :blockquote
           "> "
         when :code
-          parent == :pre ? "    " : "`"
+          if parent == :pre
+            self.github_style_code_blocks ? "\n```\n" : "\n    "
+          else
+            " `"
+          end
         when :a
-          "["
+          if !element.text.strip.empty? && element['href'] && !element['href'].start_with?('#')
+            " ["
+          else
+            " "
+          end
         when :img
-          "!["
+          " !["
         when :hr
-          "----------\n\n"
+          "\n* * *\n"
+        when :br
+          "  \n"
         else
           handle_error "unknown start tag: #{element.name.to_s}"
           ""
@@ -77,32 +144,69 @@ module ReverseMarkdown
     def ending(element)
       parent = element.parent ? element.parent.name.to_sym : nil
       case element.name.to_sym
-        when :html, :body, :pre, :hr, :p
+        when :html, :body, :pre, :hr
           ""
+        when :p
+          "\n\n"
+        when :div
+          "\n"
         when :h1, :h2, :h3, :h4 # /h(\d)/ for 1.9
           "\n"
-        when :em
-          '*'
-        when :strong
-          '**'
+        when :em, :i
+          element.text.strip.empty? ? '' : '_' if (element.ancestors('em') + element.ancestors('i')).empty?
+        when :strong, :b
+          element.text.strip.empty? ? '' : '**' if (element.ancestors('strong') + element.ancestors('b')).empty?
         when :li, :blockquote, :root, :ol, :ul
           "\n"
         when :code
-          parent == :pre ? '' : '`'
+          if parent == :pre
+            self.github_style_code_blocks ? "\n```" : "\n"
+          else
+           '` '
+          end
         when :a
-          "](#{element.attribute('href').to_s}) "
-        when :img
-          if element.has_attribute?('alt')
-            "#{element.attribute('alt')}][#{element.attribute('src')}] "
+          if !element.text.strip.empty? && element['href'] && !element['href'].start_with?('#')
+            "](#{element['href']}#{title_markdown(element)}) "
           else
-            "#{element.attribute('src')}] "
+            ""
           end
+        when :img
+          "#{element['alt']}](#{element['src']}#{title_markdown(element)}) "
         else
           handle_error "unknown end tag: #{element.name}"
           ""
       end
     end
+    def title_markdown(element)
+      title = element['title']
+      title ? %[ "#{title}"] : ''
+    end
+    def process_text(element)
+      parent = element.parent ? element.parent.name.to_sym : nil
+      case
+        when parent == :code
+          if self.github_style_code_blocks
+            element.text
+          else
+            element.text.strip.gsub(/\n/,"\n    ")
+          end
+        else
+          normalize_whitespace(escape_text(element.text))
+      end
+    end
+    def normalize_whitespace(text)
+      text.tr("\n\t", ' ').squeeze(' ')
+    end
+    def escape_text(text)
+      text.
+        gsub('*', '\*').
+        gsub('_', '\_')
+    end
     def handle_error(message)
       if raise_errors
         raise ReverseMarkdown::ParserError, message

data/lib/reverse_markdown/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module ReverseMarkdown
-  VERSION = "0.3.0"
+  VERSION = "0.4.0"
 end

data/reverse_markdown.gemspec CHANGED Viewed

@@ -23,5 +23,5 @@ Gem::Specification.new do |s|
   s.add_development_dependency 'rspec'
   s.add_development_dependency 'simplecov'
   s.add_development_dependency 'rake'
+  s.add_development_dependency 'redcarpet'
 end

data/spec/assets/anchors.html CHANGED Viewed

@@ -1,10 +1,19 @@
 <html>
   <body>
+    some text...
     <a href="http://foobar.com">Foobar</a>
-    <a href="http://strong.foobar.com">
-      <strong>Strong foobar</strong>
-    </a>
+    <a href="http://foobar.com" title="f***** up beyond all recognition">Fubar</a>
+    <a href="http://strong.foobar.com"><strong>Strong foobar</strong></a>
+    ignore <a href="foo.html">   </a> anchor tags with no link text
+    pass through the text of <a href="#content">internal jumplinks</a> without treating them as links
+    pass through the text of <a id="content">anchor tags with no href</a> without treating them as links
+    some text...
     <img src="http://foobar.com/logo.png">
     <img alt="foobar image" src="http://foobar.com/foobar.png">
+    <img alt="foobar image 2" title="this is the foobar image 2" src="http://foobar.com/foobar2.png">
+    some text...
   </body>
 </html>

data/spec/assets/basic.html CHANGED Viewed

@@ -1,12 +1,42 @@
 <html>
   <body>
+    plain text
     <h1>h1</h1>
     <h2>h2</h2>
     <h3>h3</h3>
     <h4>h4</h4>
-    <em>em</em>
-    <strong>strong</strong>
-    <code>code</code>
+    <em>em tag content</em>
+    before <em></em> and after empty em tags
+    before <em> </em> and after em tags containing whitespace
+    before <em> <em> <br /> </em> </em> and after em tags containing whitespace
+    <em><em>double em tags</em></em>
+    <p><em><em>double em tags in p tag</em></em></p>
+    <strong>strong tag content</strong>
+    before <strong></strong> and after empty strong tags
+    before <strong> </strong> and after strong tags containing whitespace
+    before <strong> <strong> <br /> </strong> </strong> and after strong tags containing whitespace
+    <strong><strong>double strong tags</strong></strong>
+    <p><strong><strong>double strong tags in p tag</strong></strong></p>
+    before
+    <strong>
+        <strong>
+          double strong tags containing whitespace
+        </strong>
+      </strong> after
+    <b>b tag content</b>
+    <i>i tag content</i>
+    br tags become double space followed by newline<br/>
+    before hr
     <hr/>
+    after hr
+    <div>section 1</div>
+    <div>section 2</div>
   </body>
 </html>

data/spec/assets/code.html ADDED Viewed

@@ -0,0 +1,22 @@
+<html>
+  <body>
+  <pre>pre block</pre>
+  <code>code block</code>
+  <pre><code>pre code block</code></pre>
+	<p>Paragraph with inline <code>code</code> block</p>
+	<pre><code>var this;
+this.is("A multi line code block")
+console.log("Yup, it is")
+	</code></pre>
+Code with indentation:
+<pre><code>tell application "Foo"
+    beep
+end tell
+</code></pre>
+  </body>
+</html>

data/spec/assets/escapables.html ADDED Viewed

@@ -0,0 +1,15 @@
+<html>
+  <body>
+    some text...
+    **two asterisks**
+    ***three asterisks***
+    __two underscores__
+    ___three underscores___
+    some text...
+    <pre><code>var theoretical_max_infin = 1.0;</code></pre>
+  </body>
+</html>

data/spec/assets/from_the_wild.html ADDED Viewed

@@ -0,0 +1,19 @@
+<p>
+  <strong>
+    <strong>
+      .<br />
+    </strong>
+    *** intentcast
+  </strong>
+  : logo design
+  <strong>
+    <strong>
+      <br />
+    </strong>
+  </strong>
+  <strong>
+    <strong>
+      .
+    </strong>
+  </strong>
+</p>

data/spec/assets/html_fragment.html ADDED Viewed

@@ -0,0 +1,3 @@
+naked text 1
+<p>paragraph text</p>
+naked text 2

data/spec/assets/lists.html CHANGED Viewed

@@ -1,13 +1,15 @@
 <html>
   <body>
+    some text...
     <ul>
       <li>unordered list entry</li>
-      <li>unordered list entry</li>
+      <li>unordered list entry 2</li>
     </ul>
     <ol>
       <li>ordered list entry</li>
-      <li>ordered list entry</li>
+      <li>ordered list entry 2</li>
     </ol>
     <ol>
@@ -18,10 +20,37 @@
           <li>
             <ol>
               <li>deep nested list entry</li>
-            <ol>
+            </ol>
           </li>
         </ul>
       </li>
     </ol>
+    a nested list with no whitespace:
+    <ul><li>item a</li><li>item b<ul><li>item bb</li><li>item bc</li></ul></li></ul>
+    a nested list with lots of whitespace:
+    <ul>  <li>  item   wa  </li>  <li> item wb <ul> <li> item wbb </li> <li> item wbc </li> </ul> </li> </ul>
+    <ul>
+      <li class="toclevel-1 tocsection-1"><a href="Basic_concepts"><span class="tocnumber">1</span> <span class="toctext">Basic concepts</span></a></li>
+      <li class="toclevel-1 tocsection-2"><a href="History_of_the_idea"><span class="tocnumber">2</span> <span class="toctext">History of the idea</span></a></li>
+      <li class="toclevel-1 tocsection-3"><a href="Intelligence_explosion"><span class="tocnumber">3</span> <span class="toctext">Intelligence explosion</span></a>
+    </ul>
+    <ul>
+      <li>
+        <p dir="ltr">I want to have a party at my house!</p>
+      </li>
+    </ul>
+    <ul>
+      <li>
+        <p>li 1, p 1</p>
+        <p>li 1, p 2</p>
+      </li>
+      <li><p>li 2, p 1</p></li>
+    </ul>
   </body>
 </html>

data/spec/assets/paragraphs.html CHANGED Viewed

@@ -1,7 +1,10 @@
 <html>
   <body>
     <p>First content</p>
-    <p>Second content</p>
+    <p>
+      Second
+      content
+    </p>
     <p>
       <em>Complex</em>
       <pre>

data/spec/components/anchors_spec.rb CHANGED Viewed

@@ -6,9 +6,18 @@ describe ReverseMarkdown::Mapper do
   let(:document) { Nokogiri::HTML(input) }
   subject { ReverseMarkdown.parse_string(input) }
-  it { subject.should include '[Foobar](http://foobar.com)' }
-  it { subject.should include '[**Strong foobar**](http://strong.foobar.com)' }
-  it { subject.should include '![http://foobar.com/logo.png]' }
-  it { subject.should include '![foobar image][http://foobar.com/foobar.png]' }
+  it { should include ' [Foobar](http://foobar.com) ' }
+  it { should include ' [Fubar](http://foobar.com "f***** up beyond all recognition") ' }
+  it { should include ' [**Strong foobar**](http://strong.foobar.com) ' }
+  it { should include ' ![](http://foobar.com/logo.png) ' }
+  it { should include ' ![foobar image](http://foobar.com/foobar.png) ' }
+  it { should include ' ![foobar image 2](http://foobar.com/foobar2.png "this is the foobar image 2") ' }
+  context "links to ignore" do
+    it { should include ' ignore anchor tags with no link text ' }
+    it { should include ' pass through the text of internal jumplinks without treating them as links ' }
+    it { should include ' pass through the text of anchor tags with no href without treating them as links ' }
+  end
 end

data/spec/components/basic_spec.rb CHANGED Viewed

@@ -6,13 +6,32 @@ describe ReverseMarkdown::Mapper do
   let(:document) { Nokogiri::HTML(input) }
   subject { ReverseMarkdown.parse_string(input) }
-  it { subject.should match /# h1\n/ }
-  it { subject.should match /## h2\n/ }
-  it { subject.should match /### h3\n/ }
-  it { subject.should match /#### h4\n/ }
-  it { subject.should match /\*em\*/ }
-  it { subject.should match /\*\*strong\*\*/ }
-  it { subject.should match /`code`/ }
-  it { subject.should match /---/ }
+  it { should match /plain text ?\n/ }
+  it { should match /# h1\n/ }
+  it { should match /## h2\n/ }
+  it { should match /### h3\n/ }
+  it { should match /#### h4\n/ }
+  it { should match /_em tag content_/ }
+  it { should match /before and after empty em tags/ }
+  it { should match /before and after em tags containing whitespace/ }
+  it { should match /_double em tags_/ }
+  it { should match /_double em tags in p tag_/ }
+  it { should match /\*\*strong tag content\*\*/ }
+  it { should match /before and after empty strong tags/ }
+  it { should match /before and after strong tags containing whitespace/ }
+  it { should match /\*\*double strong tags\*\*/ }
+  it { should match /\*\*double strong tags in p tag\*\*/ }
+  it { should match /before \*\* double strong tags containing whitespace \*\* after/ }
+  it { should match /_i tag content_/ }
+  it { should match /\*\*b tag content\*\*/ }
+  it { should match /br tags become double space followed by newline  \n/ }
+  #it { should match /br tags XXX  \n/ }
+  it { should match /\nbefore hr ?\n\* \* \*\n ?after hr\n/ }
+  it { should match /section 1\n ?\nsection 2/ }
 end

data/spec/components/code_spec.rb ADDED Viewed

@@ -0,0 +1,28 @@
+require 'spec_helper'
+describe ReverseMarkdown::Mapper do
+  let(:input)    { File.read('spec/assets/code.html') }
+  let(:document) { Nokogiri::HTML(input) }
+  subject { ReverseMarkdown.parse_string(input) }
+  it { should match /inline `code` block/ }
+  it { should match /\    var this\;\n    this\.is/ }
+  it { should match /block"\)\n    console/ }
+  context "with github style code blocks" do
+    subject { ReverseMarkdown.parse_string(input, :github_style_code_blocks => true) }
+    it { should match /inline `code` block/ }
+    it { should match /```\nvar this\;\nthis/ }
+    it { should match /it is"\) ?\n\t\n```/ }
+  end
+  context "code with indentation" do
+    subject { ReverseMarkdown.parse_string(input) }
+    it { should match(/^    tell application "Foo"\n/) }
+    it { should match(/^        beep\n/) }
+    it { should match(/^    end tell\n/) }
+  end
+end

data/spec/components/escapables_spec.rb ADDED Viewed

@@ -0,0 +1,22 @@
+require 'spec_helper'
+describe ReverseMarkdown::Mapper do
+  let(:input)    { File.read('spec/assets/escapables.html') }
+  let(:document) { Nokogiri::HTML(input) }
+  subject { ReverseMarkdown.parse_string(input) }
+  context "multiple asterisks" do
+    it { should include ' \*\*two asterisks\*\* ' }
+    it { should include ' \*\*\*three asterisks\*\*\* ' }
+  end
+  context "multiple underscores" do
+    it { should include ' \_\_two underscores\_\_ ' }
+    it { should include ' \_\_\_three underscores\_\_\_ ' }
+  end
+  context "underscores within words in code blocks" do
+    it { should include '    var theoretical_max_infin = 1.0;' }
+  end
+end

data/spec/components/from_the_wild_spec.rb ADDED Viewed

@@ -0,0 +1,16 @@
+require 'spec_helper'
+describe ReverseMarkdown::Mapper do
+  let(:input)    { File.read('spec/assets/from_the_wild.html') }
+  let(:document) { Nokogiri::HTML(input) }
+  subject { ReverseMarkdown.parse_string(input) }
+  it "should make sense of strong-crazy markup (as seen in the wild)" do
+    subject.should ==
+      '** .' + "  \n" +
+      '\*\*\* intentcast ** : logo design' + "  \n" +
+      '** . **'
+  end
+end

data/spec/components/html_fragment_spec.rb ADDED Viewed

@@ -0,0 +1,11 @@
+require 'spec_helper'
+describe ReverseMarkdown::Mapper do
+  let(:input)    { File.read('spec/assets/html_fragment.html') }
+  let(:document) { Nokogiri::HTML(input) }
+  subject { ReverseMarkdown.parse_string(input) }
+  it { should == "naked text 1\n\nparagraph text\n\nnaked text 2" }
+end

data/spec/components/lists_spec.rb CHANGED Viewed

@@ -6,10 +6,40 @@ describe ReverseMarkdown::Mapper do
   let(:document) { Nokogiri::HTML(input) }
   subject { ReverseMarkdown.parse_string(input) }
-  it { subject.should match /- unordered list entry\n/ }
-  it { subject.should match /1. ordered list entry\n/ }
-  it { subject.should match /1. list entry 1st hierarchy\n/ }
-  it { subject.should match /\s{2}- nested unsorted list entry/ }
-  it { subject.should match /\s{4}1. deep nested list entry/ }
+  it { should match /\n- unordered list entry\n/ }
+  it { should match /\n- unordered list entry 2\n/ }
+  it { should match /\n1. ordered list entry\n/ }
+  it { should match /\n2. ordered list entry 2\n/ }
+  it { should match /\n1. list entry 1st hierarchy\n/ }
+  it { should match /\n {2}- nested unsorted list entry\n/ }
+  it { should match /\n {4}1. deep nested list entry\n/ }
+  context "nested list with no whitespace" do
+    it { should match /\n- item a\n/ }
+    it { should match /\n- item b\n/ }
+    it { should match /\n {2}- item bb\n/ }
+    it { should match /\n {2}- item bc\n/ }
+  end
+  context "nested list with lots of whitespace" do
+    it { should match /\n- item wa\n/ }
+    it { should match /\n- item wb\n/ }
+    it { should match /\n {2}- item wbb\n/ }
+    it { should match /\n {2}- item wbc\n/ }
+  end
+  context "lists containing links" do
+    it { should match /\n- \[1 Basic concepts\]\(Basic_concepts\)\n/ }
+    it { should match /\n- \[2 History of the idea\]\(History_of_the_idea\)\n/ }
+    it { should match /\n- \[3 Intelligence explosion\]\(Intelligence_explosion\)\n/ }
+  end
+  context "lists containing embedded <p> tags" do
+    xit { should match /\n- I want to have a party at my house!\n/ }
+  end
+  context "list item containing multiple <p> tags" do
+    xit { should match /\n- li 1, p 1\n\n- li 1, p 2\n/ }
+  end
 end

data/spec/components/paragraphs_spec.rb CHANGED Viewed

@@ -6,6 +6,7 @@ describe ReverseMarkdown::Mapper do
   let(:document) { Nokogiri::HTML(input) }
   subject { ReverseMarkdown.parse_string(input) }
-  it { subject.should match /First content\n\nSecond content\n\n/ }
-  it { subject.should include "\n\n*Complex*\n    Content" }
-end
+  it { should_not start_with "\n\n" }
+  it { should start_with "First content\n\nSecond content\n\n" }
+  it { should include "\n\n_Complex_\n\n    Content" }
+end

data/spec/components/quotation_spec.rb CHANGED Viewed

@@ -6,7 +6,7 @@ describe ReverseMarkdown::Mapper do
   let(:document) { Nokogiri::HTML(input) }
   subject { ReverseMarkdown.parse_string(input) }
-  it { subject.should include "\n    Block of code" }
-  it { subject.should include "\n> First quoted paragraph\n\n> Second quoted paragraph" }
+  it { should include "\n    Block of code" }
+  it { should include "\n> First quoted paragraph\n\n> Second quoted paragraph" }
 end

data/spec/html_to_markdown_to_html_spec.rb ADDED Viewed

@@ -0,0 +1,104 @@
+# coding:utf-8
+require 'redcarpet'
+require 'spec_helper'
+describe 'Round trip: HTML to markdown (via reverse_markdown) to HTML (via redcarpet)' do
+  # helpers
+  def roundtrip_should_preserve(orig_html)
+    normalize_html(html2markdown2html orig_html).should == normalize_html(orig_html)
+  end
+  def html2markdown2html(orig_html)
+    markdown = ReverseMarkdown.parse_string orig_html
+    new_html = Redcarpet::Markdown.new(Redcarpet::Render::HTML).render(markdown)
+    new_html
+  end
+  def normalize_html(html)
+    squeeze_whitespace(html).gsub('> <', '><').strip
+  end
+  def squeeze_whitespace(string)
+    string.tr("\n\t", ' ').squeeze(' ').gsub(/\A \z/, '')
+  end
+  # specs
+  it "should preserve <blockquote> blocks" do
+    roundtrip_should_preserve('<blockquote><p>some text</p></blockquote>')
+  end
+  it "should preserve unordered lists" do
+    roundtrip_should_preserve("
+      <ol>
+        <li>Bird</li>
+        <li>McHale</li>
+        <li>Parish</li>
+      </ol>
+    ")
+  end
+  it "should preserve ordered lists" do
+    roundtrip_should_preserve("
+      <ul>
+        <li>Bird</li>
+        <li>McHale</li>
+        <li>Parish</li>
+      </ul>
+    ")
+  end
+  it "should preserve <hr> tags" do
+    roundtrip_should_preserve("<hr>")
+  end
+  it "should preserve <em> tags" do
+    roundtrip_should_preserve("<p><em>yes!</em></p>")
+  end
+  it "should preserve links inside <strong> tags" do
+    pending
+    roundtrip_should_preserve(%{<p><strong><a href="/wiki/Western_philosophy" title="Western philosophy">Western philosophy</a></strong></p>})
+  end
+  it "should preserve <strong> tags" do
+    roundtrip_should_preserve("<p><strong>yes!</strong></p>")
+  end
+  it "should preserve <br> tags" do
+    roundtrip_should_preserve("<p>yes!<br>\n we can!</p>")
+  end
+  it "should preserve <a> tags" do
+    roundtrip_should_preserve(%{<p>This is <a href="http://example.com/" title="Title">an example</a> inline link.</p>})
+    roundtrip_should_preserve(%{<p><a href="http://example.net/">This link</a> has no title attribute.</p>})
+  end
+  it "should preserve <img> tags" do
+    roundtrip_should_preserve(%{<p><img src="http://foo.bar/dog.png" alt="My Dog" title="Ralph"></p>})
+    roundtrip_should_preserve(%{<p><img src="http://foo.bar/dog.png" alt="My Dog"></p>})
+  end
+  it "should preserve code blocks" do
+    roundtrip_should_preserve(%{
+      <p>This is a normal paragraph:</p>
+      <pre><code>This is a code block. </code></pre>
+    })
+  end
+  it "should preserve code blocks with embedded whitespace" do
+    roundtrip_should_preserve(%{
+      <p>Here is an example of AppleScript:</p>
+      <pre><code>tell application Foo
+          beep
+      end tell
+      </code></pre>
+    })
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: reverse_markdown
 version: !ruby/object:Gem::Version
-  version: 0.3.0
+  version: 0.4.0
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-08-13 00:00:00.000000000 Z
+date: 2012-11-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -75,6 +75,22 @@ dependencies:
     - - ! '>='
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: redcarpet
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
 description: Map simple html back into markdown, e.g. if you want to import existing
   html data in your application.
 email:
@@ -86,6 +102,7 @@ files:
 - .gitignore
 - .travis.yml
 - Gemfile
+- License-MIT
 - README.md
 - Rakefile
 - lib/reverse_markdown.rb
@@ -95,16 +112,25 @@ files:
 - reverse_markdown.gemspec
 - spec/assets/anchors.html
 - spec/assets/basic.html
+- spec/assets/code.html
+- spec/assets/escapables.html
+- spec/assets/from_the_wild.html
 - spec/assets/full_example.html
+- spec/assets/html_fragment.html
 - spec/assets/lists.html
 - spec/assets/minimum.html
 - spec/assets/paragraphs.html
 - spec/assets/quotation.html
 - spec/components/anchors_spec.rb
 - spec/components/basic_spec.rb
+- spec/components/code_spec.rb
+- spec/components/escapables_spec.rb
+- spec/components/from_the_wild_spec.rb
+- spec/components/html_fragment_spec.rb
 - spec/components/lists_spec.rb
 - spec/components/paragraphs_spec.rb
 - spec/components/quotation_spec.rb
+- spec/html_to_markdown_to_html_spec.rb
 - spec/mapper_spec.rb
 - spec/reverse_markdown_spec.rb
 - spec/spec_helper.rb
@@ -122,7 +148,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: -1059485323114314033
+      hash: -1957963003780104262
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements:
@@ -131,7 +157,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: -1059485323114314033
+      hash: -1957963003780104262
 requirements: []
 rubyforge_project: reverse_markdown
 rubygems_version: 1.8.24
@@ -141,16 +167,25 @@ summary: Transform html code into markdown.
 test_files:
 - spec/assets/anchors.html
 - spec/assets/basic.html
+- spec/assets/code.html
+- spec/assets/escapables.html
+- spec/assets/from_the_wild.html
 - spec/assets/full_example.html
+- spec/assets/html_fragment.html
 - spec/assets/lists.html
 - spec/assets/minimum.html
 - spec/assets/paragraphs.html
 - spec/assets/quotation.html
 - spec/components/anchors_spec.rb
 - spec/components/basic_spec.rb
+- spec/components/code_spec.rb
+- spec/components/escapables_spec.rb
+- spec/components/from_the_wild_spec.rb
+- spec/components/html_fragment_spec.rb
 - spec/components/lists_spec.rb
 - spec/components/paragraphs_spec.rb
 - spec/components/quotation_spec.rb
+- spec/html_to_markdown_to_html_spec.rb
 - spec/mapper_spec.rb
 - spec/reverse_markdown_spec.rb
 - spec/spec_helper.rb