RubyGems - zevarito-undress - Versions diffs - 0.2.2 → 0.2.3 - Mend

zevarito-undress 0.2.2 → 0.2.3

Files changed (8) hide show

data/lib/undress.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-require "hpricot"
+require File.expand_path(File.dirname(__FILE__) + "/hpricot_ext")
 require File.expand_path(File.dirname(__FILE__) + "/core_ext/object")
 require File.expand_path(File.dirname(__FILE__) + "/undress/grammar")
@@ -12,9 +12,7 @@ end
 module Undress
-  # if this array is empty we allow all tags
-  # if the processed node name not exist in this array we drop it
-  ALLOWED_TAGS = []
+  INLINE_ELEMENTS = ['span', 'b', 'strong', 'i', 'em', 'ins', 'del','strike', 'abbr', 'acronym', 'cite', 'code', 'label', 'sub', 'sup']
   # Register a markup language. The name will become the method used to convert
   # HTML to this markup language: for example registering the name +:textile+
@@ -58,7 +56,8 @@ module Undress
         if e.elem? && e.inner_html != "" && e.name !~ (/pre|code/) && e.children.size == 0
           e.inner_html = e.inner_html.gsub(/\n|\t/,"").gsub(/\s+/," ")
         elsif e.text? && e.parent.name !~ /pre|code/
-          e.content = e.content.gsub(/\n|\t/,"").gsub(/\s+/," ").gsub(/^\s$/, "")
+          e.content = e.content.gsub(/\n|\t/,"").gsub(/\s+/," ")
+          e.content = e.content.gsub(/^\s+$/, "") if e.next_node && ! INLINE_ELEMENTS.include?(e.next_node.name)
         end
       end
     end
@@ -67,12 +66,29 @@ module Undress
     # such as those used on wysiwyg editors, we remove that after convert to not
     # use them on the final convertion.
     def fixup_span_with_styles(e)
-        return if !e.has_attribute?("style")
+      return if !e.has_attribute?("style")
-        if e["style"] =~ /italic/        then e.inner_html = "<em>#{e.inner_html}</em>"          end
-        if e["style"] =~ /underline/     then e.inner_html = "<ins>#{e.inner_html}</ins>"        end
-        if e["style"] =~ /line-through/  then e.inner_html = "<del>#{e.inner_html}</del>"        end
-        if e["style"] =~ /bold/          then e.inner_html = "<strong>#{e.inner_html}</strong>"  end
+      if e.get_style("font-style") == "italic"
+        e.inner_html = "<em>#{e.inner_html}</em>"
+        e.del_style("font-style")
+      end
+      if e.get_style("text-decoration") == "underline"
+        e.inner_html = "<ins>#{e.inner_html}</ins>"
+        e.del_style("text-decoration")
+      end
+      if e.get_style("text-decoration") == "line-through"
+        e.inner_html = "<del>#{e.inner_html}</del>"
+        e.del_style("text-decoration")
+      end
+      if e.get_style("font-weight") == "bold"
+        e.inner_html = "<strong>#{e.inner_html}</strong>"
+        e.del_style("font-weight")
+      end
+      e.swap e.inner_html if e.styles.empty? && e.name == "span"
     end
     # Fixup a badly nested list such as <ul> sibling to <li> instead inside of <li>.
@@ -90,17 +106,4 @@ module Undress
       end
     end
   end
-  module ::Hpricot #:nodoc:
-    class Elem #:nodoc:
-      def ancestors
-        node, ancestors = parent, Elements[]
-        while node.respond_to?(:parent) && node.parent
-          ancestors << node
-          node = node.parent
-        end
-        ancestors
-      end
-    end
-  end
 end

data/lib/undress/grammar.rb CHANGED Viewed

@@ -89,7 +89,7 @@ module Undress
         if node.text?
           node.to_html
         elsif node.elem?
-          send node.name.to_sym, node if ALLOWED_TAGS.empty? || ALLOWED_TAGS.include?(node.name)
+          send node.name.to_sym, node if ! defined?(ALLOWED_TAGS) || ALLOWED_TAGS.empty? || ALLOWED_TAGS.include?(node.name)
         else
           ""
         end
@@ -116,8 +116,27 @@ module Undress
     # Helper method that tells you if the given DOM node is immediately
     # surrounded by whitespace.
     def surrounded_by_whitespace?(node)
-      (node.previous.text? && node.previous.to_s =~ /\s+$/) ||
-        (node.next.text? && node.next.to_s =~ /^\s+/)
+      (node.previous && node.previous.text? && node.previous.to_s =~ /\s+$/) ||
+        (node.next && node.next.text? && node.next.to_s =~ /^\s+/)
+    end
+    # Helper to determine if a node contents a whole word
+    # useful to convert for example a letter italic inside a word
+    def complete_word?(node)
+      return true if ! node.previous_node || ! node.next_node
+      p, n = node.previous_node, node.next_node
+      if p.respond_to?(:content)
+        return false if p.content       !~ /\s$/
+      elsif p.respond_to?(:inner_html)
+        return false if p.inner_html    !~ /\s$/
+      elsif n.respond_to?(:content)
+        return false if n.content       !~ /^\s/
+      elsif n.respond_to?(:inner_html)
+        return false if n.content       !~ /^\s/
+      end
+      true
     end
     def method_missing(tag, node, *args) #:nodoc:

data/lib/undress/greencloth.rb CHANGED Viewed

@@ -5,9 +5,9 @@ module Undress
     Undress::ALLOWED_TAGS = [
       'div', 'a', 'img', 'br', 'i', 'u', 'b', 'pre', 'kbd', 'code', 'cite', 'strong', 'em',
-      'ins', 'sup', 'sub', 'del', 'table', 'tr', 'td', 'th', 'ol', 'ul', 'li', 'p', 'span',
-      'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'notextile', 'blockquote', 'object', 'embed',
-      'param', 'acronym', 'dd', 'dl', 'dt'
+      'ins', 'sup', 'sub', 'del', 'table', 'tbody', 'thead', 'tr', 'td', 'th', 'ol', 'ul',
+      'li', 'p', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'notextile', 'blockquote',
+      'object', 'embed', 'param', 'acronym', 'dd', 'dl', 'dt'
     ]
     # table of contents

data/lib/undress/textile.rb CHANGED Viewed

@@ -2,6 +2,8 @@ require File.expand_path(File.dirname(__FILE__) + "/../undress")
 module Undress
   class Textile < Grammar
+    # entities
+    post_processing(/&nbsp;/, " ")
     # whitespace handling
     post_processing(/\n\n+/, "\n\n")
@@ -28,14 +30,15 @@ module Undress
       alt = e.has_attribute?("alt") ? "(#{e["alt"]})" : ""
       "!#{e["src"]}#{alt}!"
     }
-    rule_for(:strong)  {|e| "*#{content_of(e)}*" }
-    rule_for(:em)      {|e| "_#{content_of(e)}_" }
+    rule_for(:strong)  {|e| complete_word?(e) ? "*#{content_of(e)}*" : "[*#{content_of(e)}*]"}
+    rule_for(:em)      {|e| complete_word?(e) ? "_#{content_of(e)}_" : "[_#{content_of(e)}_]"}
     rule_for(:code)    {|e| "@#{content_of(e)}@" }
     rule_for(:cite)    {|e| "??#{content_of(e)}??" }
     rule_for(:sup)     {|e| surrounded_by_whitespace?(e) ? "^#{content_of(e)}^" : "[^#{content_of(e)}^]" }
     rule_for(:sub)     {|e| surrounded_by_whitespace?(e) ? "~#{content_of(e)}~" : "[~#{content_of(e)}~]" }
-    rule_for(:ins)     {|e| "+#{content_of(e)}+" }
-    rule_for(:del)     {|e| "-#{content_of(e)}-" }
+    rule_for(:ins)     {|e| complete_word?(e) ? "+#{content_of(e)}+" : "[+#{content_of(e)}+]"}
+    rule_for(:del)     {|e| complete_word?(e) ? "-#{content_of(e)}-" : "[-#{content_of(e)}-]"}
     rule_for(:acronym) {|e| e.has_attribute?("title") ? "#{content_of(e)}(#{e["title"]})" : content_of(e) }
     # text formatting and layout

data/test/test_greencloth.rb CHANGED Viewed

@@ -21,9 +21,15 @@ class Undress::GreenClothTest < Test::Unit::TestCase
   # this is ok to ensure invalid html -> to greencloth but xhtmlize! must have
   # tests on test_undress or something too
   context "parsing not valid xhtml documents" do
+    test "space between 2 spans with styles" do
+      html = "<p><span style='font-weight: bold;'>bold</span> <span style='font-style: italic;'>italic</span></p>"
+      greencloth = "*bold* _italic_\n"
+      assert_renders_greencloth greencloth, html
+    end
     test "a <span> bold, italic, underline, line-through at the same time" do
-      html = "<p>some text <span style='font-weight:bold; font-style:italic; text-decoration:underline; text-decoration:line-through'>bold</span> with style</p>"
-      greencloth = "some text *-+_bold_+-* with style\n"
+      html = "<p>some text <span style='font-weight:bold; font-style:italic; text-decoration:underline;'>bold</span> with style</p>"
+      greencloth = "some text *+_bold_+* with style\n"
       assert_renders_greencloth greencloth, html
     end
@@ -37,7 +43,7 @@ class Undress::GreenClothTest < Test::Unit::TestCase
     end
     test "style 'line-through' should be converted to <del> in <span> elements" do
-	    html = "<p>with <span style='text-decoration: line-through;'>some</span> in the <span style='text-decoration-: line-through;'>paragraph</span></p>"
+	    html = "<p>with <span style='text-decoration: line-through;'>some</span> in the <span style='text-decoration: line-through;'>paragraph</span></p>"
       greencloth = "with -some- in the -paragraph-\n"
       assert_renders_greencloth greencloth, html
 	    html = "<p style='text-decoration: line-through;'>with some in the paragraph</p>"

data/test/test_textile.rb CHANGED Viewed

@@ -11,6 +11,54 @@ module Undress
         assert_renders_textile "h2. _this is *very* important_\n", "<h2><em>this is <strong>very</strong> important</em></h2>"
       end
+      context "some troubles" do
+        test "with sup" do
+          html = "<p>e = mc<sup>2</sup></p>"
+          textile = "e = mc[^2^]\n"
+          assert_renders_textile textile, html
+        end
+      end
+      context "convert enetities" do
+        test "&nbsp;" do
+          textile = "some word\n"
+          html = "<p>some&nbsp;word</p>"
+          assert_renders_textile textile, html
+        end
+      end
+      context "convert parts of a word" do
+        test "some" do
+          textile = "s[*o*]me\n"
+          html = "<p>s<span style='font-weight:bold;'>o</span>me</p>"
+          assert_renders_textile textile, html
+        end
+        test "italics" do
+          textile = "a perfect wo[_r_]ld\n"
+          html = "<p>a perfect wo<em>r</em>ld</p>"
+          assert_renders_textile textile, html
+        end
+        test "bolds" do
+          textile = "a perfect wo[*r*]ld\n"
+          html = "<p>a perfect wo<strong>r</strong>ld</p>"
+          assert_renders_textile textile, html
+        end
+        test "underlines" do
+          textile = "a perfect wo[+r+]ld\n"
+          html = "<p>a perfect wo<ins>r</ins>ld</p>"
+          assert_renders_textile textile, html
+        end
+        test "line through" do
+          textile = "a perfect wo[-r-]ld\n"
+          html = "<p>a perfect wo<del>r</del>ld</p>"
+          assert_renders_textile textile, html
+        end
+      end
       context "inline elements" do
         test "converts <strong> tags" do
           assert_renders_textile "*foo bar*", "<strong>foo bar</strong>"

data/undress.gemspec CHANGED Viewed

@@ -1,6 +1,6 @@
 Gem::Specification.new do |s|
   s.name    = "undress"
-  s.version = "0.2.2"
+  s.version = "0.2.3"
   s.date    = "2009-07-29"
   s.description = "Simply translate HTML to Textile, Markdown, or whatever other markup format you need"

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: zevarito-undress
 version: !ruby/object:Gem::Version
-  version: 0.2.2
+  version: 0.2.3
 platform: ruby
 authors:
 - "Nicol\xC3\xA1s Sanguinetti"