RubyGems - undress - Versions diffs - 0.1 → 0.1.1 - Mend

undress 0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

data/README.rdoc CHANGED

@@ -3,7 +3,7 @@
 Easily convert back HTML to Textile, Markdown, RDoc or whatever other
 markup language you like.
-    require "undress"
+    require "undress/textile"
     code =<<html
       <h1>Hello world!</h1>
@@ -33,6 +33,10 @@ Will produce
 For now the only language supported is Textile. But I'll be happy to accept
 patches to add more languages :)
+== Get it
+    gem install undress
 == License
 Authors:: Nicolas Sanguinetti (foca[http://github.com/foca])

data/lib/undress/grammar.rb CHANGED

@@ -61,6 +61,21 @@ module Undress
       pre_processing_rules[selector] = handler
     end
+    # Set a list of attributes you wish to whitelist
+    #
+    # Any attribute not in this list at the moment of parsing will be ignored by the
+    # parser. The method Grammar#attributes(node) will return a hash of the filtered
+    # attributes. Read its documentation for more details.
+    #
+    #     whitelist_attributes :id, :class, :lang
+    def self.whitelist_attributes(*attrs)
+      @whitelisted_attributes = attrs
+    end
+    def self.whitelisted_attributes #:nodoc:
+      @whitelisted_attributes || []
+    end
     def self.post_processing_rules #:nodoc:
       @post_processing_rules ||= {}
     end
@@ -75,10 +90,12 @@ module Undress
     attr_reader :pre_processing_rules #:nodoc:
     attr_reader :post_processing_rules #:nodoc:
+    attr_reader :whitelisted_attributes #:nodoc:
     def initialize #:nodoc:
       @pre_processing_rules = self.class.pre_processing_rules.dup
       @post_processing_rules = self.class.post_processing_rules.dup
+      @whitelisted_attributes = self.class.whitelisted_attributes.dup
     end
     # Process a DOM node, converting it to your markup language according to
@@ -120,6 +137,29 @@ module Undress
         (node.next.text? && node.next.to_s =~ /^\s+/)
     end
+    # Hash of attributes, according to the white list. By default, no attributes
+    # are whitelisted, so you must set which ones to whitelist on each grammar.
+    #
+    # Supposing you set <tt>:id</tt> and <tt>:class</tt> as your
+    # <tt>whitelisted_attributes</tt>, and you have a node representing this
+    # HTML:
+    #
+    #     <p lang="en" class="greeting">Hello World</p>
+    #
+    # Then the method would return:
+    #
+    #     { :class => "greeting" }
+    #
+    # You can override this method in each grammar and call +super+ if you
+    # will represent your attributes consistently across all nodes (for
+    # example, +Textile+ always shows class an id inside parenthesis.)
+    def attributes(node)
+      node.attributes.inject({}) do |attrs,(key,value)|
+        attrs[key.to_sym] = value if whitelisted_attributes.include?(key.to_sym)
+        attrs
+      end
+    end
     def method_missing(tag, node, *args) #:nodoc:
       process(node.children)
     end

data/lib/undress/textile.rb CHANGED

@@ -2,6 +2,8 @@ require File.expand_path(File.dirname(__FILE__) + "/../undress")
 module Undress
   class Textile < Grammar
+    whitelist_attributes :class, :id, :lang, :style, :colspan, :rowspan
     # whitespace handling
     post_processing(/\n\n+/, "\n\n")
     post_processing(/\A\s+/, "")
@@ -27,35 +29,35 @@ module Undress
       alt = e.has_attribute?("alt") ? "(#{e["alt"]})" : ""
       "!#{e["src"]}#{alt}!"
     }
-    rule_for(:strong)  {|e| "*#{content_of(e)}*" }
-    rule_for(:em)      {|e| "_#{content_of(e)}_" }
-    rule_for(:code)    {|e| "@#{content_of(e)}@" }
-    rule_for(:cite)    {|e| "??#{content_of(e)}??" }
-    rule_for(:sup)     {|e| surrounded_by_whitespace?(e) ? "^#{content_of(e)}^" : "[^#{content_of(e)}^]" }
-    rule_for(:sub)     {|e| surrounded_by_whitespace?(e) ? "~#{content_of(e)}~" : "[~#{content_of(e)}~]" }
-    rule_for(:ins)     {|e| "+#{content_of(e)}+" }
-    rule_for(:del)     {|e| "-#{content_of(e)}-" }
+    rule_for(:strong)  {|e| "*#{attributes(e)}#{content_of(e)}*" }
+    rule_for(:em)      {|e| "_#{attributes(e)}#{content_of(e)}_" }
+    rule_for(:code)    {|e| "@#{attributes(e)}#{content_of(e)}@" }
+    rule_for(:cite)    {|e| "??#{attributes(e)}#{content_of(e)}??" }
+    rule_for(:sup)     {|e| surrounded_by_whitespace?(e) ? "^#{attributes(e)}#{content_of(e)}^" : "[^#{attributes(e)}#{content_of(e)}^]" }
+    rule_for(:sub)     {|e| surrounded_by_whitespace?(e) ? "~#{attributes(e)}#{content_of(e)}~" : "[~#{attributes(e)}#{content_of(e)}~]" }
+    rule_for(:ins)     {|e| "+#{attributes(e)}#{content_of(e)}+" }
+    rule_for(:del)     {|e| "-#{attributes(e)}#{content_of(e)}-" }
     rule_for(:acronym) {|e| e.has_attribute?("title") ? "#{content_of(e)}(#{e["title"]})" : content_of(e) }
     # text formatting and layout
-    rule_for(:p)          {|e| "\n\n#{content_of(e)}\n\n" }
+    rule_for(:p)          {|e| "\n\n#{attributes(e) != "" ? "p#{attributes(e)}. " : ""}#{content_of(e)}\n\n" }
     rule_for(:br)         {|e| "\n" }
-    rule_for(:blockquote) {|e| "\n\nbq. #{content_of(e)}\n\n" }
+    rule_for(:blockquote) {|e| "\n\nbq#{attributes(e)}. #{content_of(e)}\n\n" }
     rule_for(:pre)        {|e|
       if e.children.all? {|n| n.text? && n.content =~ /^\s+$/ || n.elem? && n.name == "code" }
-        "\n\npc. #{content_of(e % "code")}\n\n"
+        "\n\npc#{attributes(e)}. #{content_of(e % "code")}\n\n"
       else
         "<pre>#{content_of(e)}</pre>"
       end
     }
     # headings
-    rule_for(:h1) {|e| "\n\nh1. #{content_of(e)}\n\n" }
-    rule_for(:h2) {|e| "\n\nh2. #{content_of(e)}\n\n" }
-    rule_for(:h3) {|e| "\n\nh3. #{content_of(e)}\n\n" }
-    rule_for(:h4) {|e| "\n\nh4. #{content_of(e)}\n\n" }
-    rule_for(:h5) {|e| "\n\nh5. #{content_of(e)}\n\n" }
-    rule_for(:h6) {|e| "\n\nh6. #{content_of(e)}\n\n" }
+    rule_for(:h1) {|e| "\n\nh1#{attributes(e)}. #{content_of(e)}\n\n" }
+    rule_for(:h2) {|e| "\n\nh2#{attributes(e)}. #{content_of(e)}\n\n" }
+    rule_for(:h3) {|e| "\n\nh3#{attributes(e)}. #{content_of(e)}\n\n" }
+    rule_for(:h4) {|e| "\n\nh4#{attributes(e)}. #{content_of(e)}\n\n" }
+    rule_for(:h5) {|e| "\n\nh5#{attributes(e)}. #{content_of(e)}\n\n" }
+    rule_for(:h6) {|e| "\n\nh6#{attributes(e)}. #{content_of(e)}\n\n" }
     # lists
     rule_for(:li) {|e|
@@ -77,19 +79,37 @@ module Undress
     rule_for(:dd) {|e| ":= #{content_of(e)} =:\n" }
     # tables
-    rule_for(:table) {|e| "\n\n#{content_of(e)}\n" }
-    rule_for(:tr) {|e| "#{content_of(e)}|\n" }
-    rule_for(:td, :th) {|e|
-      prefix = if e.name == "th"
-        "_. "
-      elsif e.has_attribute?("colspan")
-        "\\#{e["colspan"]}. "
-      elsif e.has_attribute?("rowspan")
-        "/#{e["rowspan"]}. "
+    rule_for(:table)   {|e| "\n\n#{content_of(e)}\n" }
+    rule_for(:tr)      {|e| "#{content_of(e)}|\n" }
+    rule_for(:td, :th) {|e| "|#{e.name == "th" ? "_. " : attributes(e)}#{content_of(e)}" }
+    def attributes(node) #:nodoc:
+      filtered = super(node)
+      if filtered.has_key?(:colspan)
+        return "\\#{filtered[:colspan]}. "
       end
-      "|#{prefix}#{content_of(e)}"
-    }
+      if filtered.has_key?(:rowspan)
+        return "/#{filtered[:rowspan]}. "
+      end
+      if filtered.has_key?(:lang)
+        return "[#{filtered[:lang]}]"
+      end
+      if filtered.has_key?(:class) || filtered.has_key?(:id)
+        klass = filtered.fetch(:class, "")
+        id = filtered.fetch(:id, false) ? "#" + filtered[:id] : ""
+        return "(#{klass}#{id})"
+      end
+      if filtered.has_key?(:style)
+        return "{#{filtered[:style]}}"
+      end
+      ""
+    end
   end
   add_markup :textile, Textile

data/test/test_grammar.rb CHANGED

@@ -21,6 +21,10 @@ module Undress
       rule_for(:a) {|e| "" }
     end
+    class WithAttributes < Parent
+      whitelist_attributes :id, :class
+    end
     def parse_with(grammar, html)
       grammar.process!(Hpricot(html))
     end
@@ -51,5 +55,21 @@ module Undress
         assert_equal "<this was a div>Cuack</this was a div><this is a paragraph>O hai</this is a paragraph>", output
       end
     end
+    context "handles attributes" do
+      def attributes_for_tag(html)
+        WithAttributes.new.attributes(Hpricot(html).children.first)
+      end
+      test "whitelisted attributes are picked up in the attributes hash" do
+        attributes = attributes_for_tag("<p class='foo bar' id='baz'>Cuack</p>")
+        assert_equal({ :class => "foo bar", :id => "baz" }, attributes)
+      end
+      test "attributes that are not in the whitelist are ignored" do
+        attributes = attributes_for_tag("<p lang='es' id='saludo'>Hola</p>")
+        assert_equal({ :id => "saludo" }, attributes)
+      end
+    end
   end
 end

data/test/test_textile.rb CHANGED

@@ -193,6 +193,28 @@ module Undress
           assert_renders_textile "Trademarked(tm)", "Trademarked&#8482;"
         end
       end
+      context "handling nodes with attributes" do
+        test "converts 'lang' to [_]" do
+          assert_renders_textile "*[es]hola*", "<strong lang='es'>hola</strong>"
+        end
+        test "converts 'class' to (_)" do
+          assert_renders_textile "*(foo)hola*", "<strong class='foo'>hola</strong>"
+        end
+        test "converts 'id' to (#_)" do
+          assert_renders_textile "*(#bar)hola*", "<strong id='bar'>hola</strong>"
+        end
+        test "converts both 'class' and 'id' to (_#_)" do
+          assert_renders_textile "*(foo#bar)hola*", "<strong id='bar' class='foo'>hola</strong>"
+        end
+        test "converts 'style' into {_}" do
+          assert_renders_textile "*{color:blue;}hola*", "<strong style='color:blue;'>hola</strong>"
+        end
+      end
     end
   end
 end

data/undress.gemspec CHANGED

@@ -1,7 +1,7 @@
 Gem::Specification.new do |s|
   s.name    = "undress"
-  s.version = "0.1"
-  s.date    = "2009-07-13"
+  s.version = "0.1.1"
+  s.date    = "2009-07-21"
   s.description = "Simply translate HTML to Textile, Markdown, or whatever other markup format you need"
   s.summary     = "Convert HTML into other markup languages"

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: undress
 version: !ruby/object:Gem::Version
-  version: "0.1"
+  version: 0.1.1
 platform: ruby
 authors:
 - "Nicol\xC3\xA1s Sanguinetti"
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-07-13 00:00:00 -03:00
+date: 2009-07-21 00:00:00 -03:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -97,7 +97,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
 requirements: []
 rubyforge_project: undress
-rubygems_version: 1.3.4
+rubygems_version: 1.3.5
 signing_key:
 specification_version: 3
 summary: Convert HTML into other markup languages