zevarito-undress 0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ doc
2
+ dist
3
+ tmp
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ (The MIT License)
2
+
3
+ Copyright (c) 2009 Nicolas Sanguinetti, entp.com
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ 'Software'), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,42 @@
1
+ = Undress
2
+
3
+ Easily convert back HTML to Textile, Greencloth.
4
+
5
+ require "undress/textile"
6
+
7
+ code =<<html
8
+ <h1>Hello world!</h1>
9
+ <p><strong>Hey!</strong> How is it going?</p>
10
+ <h2>Supported Markup Languages so far:</h2>
11
+ <ul>
12
+ <li>Textile</li>
13
+ <li>Greencloth</li>
14
+ </ul>
15
+ html
16
+
17
+ Undress(code).to_textile
18
+
19
+ Will produce
20
+
21
+ h1. Hello world!
22
+
23
+ *Hey!* How is it going?
24
+
25
+ h2. Supported Markup Languages so far:
26
+
27
+ * Textile
28
+ * Greencloth
29
+
30
+ == Supported Markup Languages
31
+
32
+ * Textile
33
+ * Greencloth, see [http://we.riseup.net]
34
+
35
+ == Get it
36
+
37
+ gem install undress
38
+
39
+ == License
40
+
41
+ Authors:: Nicolas Sanguinetti (foca[http://github.com/foca]), Alvaro Gil (zevarito[http://github.com/zevarito])
42
+ License:: MIT (Check LICENSE for details)
data/Rakefile ADDED
@@ -0,0 +1,32 @@
1
+ require "rake/testtask"
2
+
3
+ begin
4
+ require "hanna/rdoctask"
5
+ rescue LoadError
6
+ require "rake/rdoctask"
7
+ end
8
+
9
+ Rake::RDocTask.new do |rd|
10
+ rd.main = "README"
11
+ rd.title = "API Documentation for Undress"
12
+ rd.rdoc_files.include("README.rdoc", "LICENSE", "lib/**/*.rb")
13
+ rd.rdoc_dir = "doc"
14
+ end
15
+
16
+ begin
17
+ require "metric_fu"
18
+ rescue LoadError
19
+ end
20
+
21
+ begin
22
+ require "mg"
23
+ MG.new("undress.gemspec")
24
+ rescue LoadError
25
+ end
26
+
27
+ desc "Default: run tests"
28
+ task :default => :test
29
+
30
+ Rake::TestTask.new do |t|
31
+ t.test_files = FileList["test/test_*.rb"]
32
+ end
@@ -0,0 +1,6 @@
1
+ class Object #:nodoc:
2
+ def tap
3
+ yield self
4
+ self
5
+ end
6
+ end
data/lib/undress.rb ADDED
@@ -0,0 +1,51 @@
1
+ require "hpricot"
2
+ require File.expand_path(File.dirname(__FILE__) + "/core_ext/object")
3
+ require File.expand_path(File.dirname(__FILE__) + "/undress/grammar")
4
+
5
+ # Load an HTML document so you can undress it. Pass it either a string or an IO
6
+ # object. You can pass an optional hash of options, which will be forwarded
7
+ # straight to Hpricot. Check it's
8
+ # documentation[http://code.whytheluckystiff.net/doc/hpricot] for details.
9
+ def Undress(html, options={})
10
+ Undress::Document.new(html, options)
11
+ end
12
+
13
+ module Undress
14
+
15
+ # if this array is empty we allow all tags
16
+ # if the processed node name not exist in this array we drop it
17
+ ALLOWED_TAGS = []
18
+
19
+ # Register a markup language. The name will become the method used to convert
20
+ # HTML to this markup language: for example registering the name +:textile+
21
+ # gives you <tt>Undress(code).to_textile</tt>, registering +:markdown+ would
22
+ # give you <tt>Undress(code).to_markdown</tt>, etc.
23
+ def self.add_markup(name, grammar)
24
+ Document.add_markup(name, grammar)
25
+ end
26
+
27
+ class Document #:nodoc:
28
+ def initialize(html, options)
29
+ @doc = Hpricot(html, options)
30
+ end
31
+
32
+ def self.add_markup(name, grammar)
33
+ define_method "to_#{name}" do
34
+ grammar.process!(@doc)
35
+ end
36
+ end
37
+ end
38
+
39
+ module ::Hpricot #:nodoc:
40
+ class Elem #:nodoc:
41
+ def ancestors
42
+ node, ancestors = parent, Elements[]
43
+ while node.respond_to?(:parent) && node.parent
44
+ ancestors << node
45
+ node = node.parent
46
+ end
47
+ ancestors
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,127 @@
1
+ module Undress
2
+ # Grammars give you a DSL to declare how to convert an HTML document into a
3
+ # different markup language.
4
+ class Grammar
5
+ def self.inherited(base) # :nodoc:
6
+ base.instance_variable_set(:@post_processing_rules, post_processing_rules)
7
+ base.instance_variable_set(:@pre_processing_rules, pre_processing_rules)
8
+ end
9
+
10
+ # Add a parsing rule for a group of html tags.
11
+ #
12
+ # rule_for :p do |element|
13
+ # "<this was a paragraph>#{content_of(element)}</this was a paragraph>"
14
+ # end
15
+ #
16
+ # will replace your <tt><p></tt> tags for <tt><this was a paragraph></tt>
17
+ # tags, without altering the contents.
18
+ #
19
+ # The element yielded to the block is an Hpricot element for the given tag.
20
+ def self.rule_for(*tags, &handler) # :yields: element
21
+ tags.each do |tag|
22
+ define_method tag.to_sym, &handler
23
+ end
24
+ end
25
+
26
+ # Set a default rule for unrecognized tags.
27
+ #
28
+ # Unless you define a special case, it will ignore the tags and just output
29
+ # the contents of unrecognized tags.
30
+ def self.default(&handler) # :yields: element
31
+ define_method :method_missing do |tag, node, *args|
32
+ handler.call(node)
33
+ end
34
+ end
35
+
36
+ # Add a post-processing rule to your parser.
37
+ #
38
+ # This takes a regular expression that will be applied to the output after
39
+ # processing any nodes. It can take a string as a replacement, or a block
40
+ # that will be passed to String#gsub.
41
+ #
42
+ # post_processing(/\n\n+/, "\n\n") # compress more than two newlines
43
+ # post_processing(/whatever/) { ... }
44
+ def self.post_processing(regexp, replacement = nil, &handler) #:yields: matched_string
45
+ post_processing_rules[regexp] = replacement || handler
46
+ end
47
+
48
+ # Add a pre-processing rule to your parser.
49
+ #
50
+ # This lets you mutate the DOM before applying any rule defined with
51
+ # +rule_for+. You need to pass a CSS/XPath selector, and a block that
52
+ # takes an Hpricot element to parse it.
53
+ #
54
+ # pre_processing "ul.toc" do |element|
55
+ # element.swap("<p>[[toc]]</p>")
56
+ # end
57
+ #
58
+ # Would replace any unordered lists with the class +toc+ for a
59
+ # paragraph containing the code <tt>[[toc]]</tt>.
60
+ def self.pre_processing(selector, &handler) # :yields: element
61
+ pre_processing_rules[selector] = handler
62
+ end
63
+
64
+ def self.post_processing_rules #:nodoc:
65
+ @post_processing_rules ||= {}
66
+ end
67
+
68
+ def self.pre_processing_rules #:nodoc:
69
+ @pre_processing_rules ||= {}
70
+ end
71
+
72
+ def self.process!(node) #:nodoc:
73
+ new.process!(node)
74
+ end
75
+
76
+ attr_reader :pre_processing_rules #:nodoc:
77
+ attr_reader :post_processing_rules #:nodoc:
78
+
79
+ def initialize #:nodoc:
80
+ @pre_processing_rules = self.class.pre_processing_rules.dup
81
+ @post_processing_rules = self.class.post_processing_rules.dup
82
+ end
83
+
84
+ # Process a DOM node, converting it to your markup language according to
85
+ # your defined rules. If the node is a Text node, it will return it's
86
+ # string representation. Otherwise it will call the rule defined for it.
87
+ def process(nodes)
88
+ Array(nodes).map do |node|
89
+ if node.text?
90
+ node.to_html
91
+ elsif node.elem?
92
+ send node.name.to_sym, node if ALLOWED_TAGS.empty? || ALLOWED_TAGS.include?(node.name)
93
+ else
94
+ ""
95
+ end
96
+ end.join("")
97
+ end
98
+
99
+ def process!(node) #:nodoc:
100
+ pre_processing_rules.each do |selector, handler|
101
+ node.search(selector).each(&handler)
102
+ end
103
+
104
+ process(node.children).tap do |text|
105
+ post_processing_rules.each do |rule, handler|
106
+ handler.is_a?(String) ? text.gsub!(rule, handler) : text.gsub!(rule, &handler)
107
+ end
108
+ end
109
+ end
110
+
111
+ # Get the result of parsing the contents of a node.
112
+ def content_of(node)
113
+ process(node.respond_to?(:children) ? node.children : node)
114
+ end
115
+
116
+ # Helper method that tells you if the given DOM node is immediately
117
+ # surrounded by whitespace.
118
+ def surrounded_by_whitespace?(node)
119
+ (node.previous.text? && node.previous.to_s =~ /\s+$/) ||
120
+ (node.next.text? && node.next.to_s =~ /^\s+/)
121
+ end
122
+
123
+ def method_missing(tag, node, *args) #:nodoc:
124
+ process(node.children)
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,138 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/textile")
2
+
3
+ module Undress
4
+ class GreenCloth < Textile
5
+
6
+ Undress::ALLOWED_TAGS = [
7
+ 'div', 'a', 'img', 'br', 'i', 'u', 'b', 'pre', 'kbd', 'code', 'cite', 'strong', 'em',
8
+ 'ins', 'sup', 'sub', 'del', 'table', 'tr', 'td', 'th', 'ol', 'ul', 'li', 'p', 'span',
9
+ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'notextile', 'blockquote', 'object', 'embed',
10
+ 'param', 'acronym', 'dd', 'dl', 'dt'
11
+ ]
12
+
13
+ # table of contents
14
+ pre_processing("ul.toc") do |toc|
15
+ toc.swap "[[toc]]"
16
+ end
17
+
18
+ # headings
19
+ rule_for(:h1, :h2, :h3, :h4, :h5, :h6) {|e| process_headings(e) }
20
+
21
+ # inline elements
22
+ rule_for(:a) {|e|
23
+ "#{process_links_and_anchors(e)}"
24
+ }
25
+
26
+ # lists
27
+ rule_for(:li) {|e|
28
+ offset = ""
29
+ li = e
30
+ while li.parent
31
+ if li.parent.name == "ul" then offset = "*#{offset}"
32
+ elsif li.parent.name == "ol" then offset = "##{offset}"
33
+ else return offset end
34
+ li = li.parent.parent ? li.parent.parent : nil
35
+ end
36
+ "\n#{offset} #{content_of(e)}"
37
+ }
38
+
39
+ # text formatting
40
+ rule_for(:pre) {|e|
41
+ if e.children.all? {|n| n.text? && n.content =~ /^\s+$/ || n.elem? && n.name == "code" }
42
+ "\n\n<pre><code>#{content_of(e % "code")}</code></pre>"
43
+ else
44
+ "\n\n<pre>#{content_of(e)}</pre>"
45
+ end
46
+ }
47
+
48
+ rule_for(:code) {|e|
49
+ if e.inner_html.match(/\n/)
50
+ if e.parent && e.parent.name != "pre"
51
+ "<pre><code>#{content_of(e)}</code></pre>"
52
+ else
53
+ "<code>#{content_of(e)}</code>"
54
+ end
55
+ else
56
+ "@#{content_of(e)}@"
57
+ end
58
+ }
59
+
60
+ # passing trough objects
61
+ rule_for(:embed, :object, :param) {|e|
62
+ e.to_html
63
+ }
64
+
65
+ def process_headings(h)
66
+ h.children.each {|e|
67
+ next if e.class == Hpricot::Text
68
+ e.parent.replace_child(e, "") if e.name != "a" || e.has_attribute?("href") && e["href"] !~ /^\/|(https?|s?ftp):\/\//
69
+ }
70
+ case h.name
71
+ when "h1"
72
+ "#{content_of(h)}\n#{'=' * h.inner_text.size}\n\n" if h.name == "h1"
73
+ when "h2"
74
+ "#{content_of(h)}\n#{'-' * h.inner_text.size}\n\n" if h.name == "h2"
75
+ else
76
+ "#{h.name}. #{content_of(h)}\n\n"
77
+ end
78
+ end
79
+
80
+ def process_links_and_anchors(e)
81
+ return "" if e.empty?
82
+ inner, name, href = e.inner_html, e.get_attribute("name"), e.get_attribute("href")
83
+
84
+ # is an anchor? and cannot be child of any h1..h6
85
+ if name && !e.parent.name.match(/^h1|2|3|4|5|6$/)
86
+ inner == name || inner == name.gsub(/-/,"\s") ? "[# #{inner} #]" : "[# #{inner} -> #{name} #]"
87
+ # is a link?
88
+ elsif href && href != ""
89
+ case href
90
+ when /^\/#/
91
+ "[\"#{inner}\":#{href}"
92
+ when /^#/
93
+ "[#{inner} -> #{href}]"
94
+ when /^(https?|s?ftp):\/\//
95
+ href.gsub(/^(https?|s?ftp):\/\//, "") == inner ? "[#{href}]" : "[#{inner} -> #{href}]"
96
+ when /^[^\/]/
97
+ "[#{e.inner_text}]"
98
+ when /^\/.[^\/]*\/.[^\/]*\//
99
+ "[#{inner} -> #{href}]"
100
+ when /(?:\/page\/\+)[0-9]+$/
101
+ "[#{inner} -> +#{href.gsub(/\+[0-9]+$/)}]"
102
+ else
103
+ process_as_wiki_link(e)
104
+ end
105
+ else
106
+ ""
107
+ end
108
+ end
109
+
110
+ def process_as_wiki_link(e)
111
+ inner, name, href = e.inner_html, e.get_attribute("name"), e.get_attribute("href")
112
+
113
+ # pages or group pages
114
+ context_name, page_name = href.split("/")[1..2]
115
+ page_name = context_name if page_name.nil?
116
+ wiki_page_name = page_name.gsub(/[a-z-]*[^\/]$/m) {|m| m.tr('-',' ')}
117
+
118
+ # simple page
119
+ if context_name == "page"
120
+ return "[#{inner}]" if wiki_page_name == inner
121
+ return "[#{inner} -> #{wiki_page_name}]"
122
+ end
123
+ # group page
124
+ if context_name != page_name
125
+ return "[#{context_name} / #{wiki_page_name}]" if wiki_page_name == inner
126
+ return "[#{inner} -> #{wiki_page_name}]" if context_name == "page"
127
+ return "[#{inner} -> #{context_name} / #{wiki_page_name}]"
128
+ end
129
+ if inner == page_name || inner == wiki_page_name || inner == wiki_page_name.gsub(/\s/,"-")
130
+ return "[#{wiki_page_name}]"
131
+ end
132
+ # fall back
133
+ return "[#{inner} -> #{href}]"
134
+ end
135
+
136
+ end
137
+ add_markup :greencloth, GreenCloth
138
+ end
@@ -0,0 +1,104 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/../undress")
2
+
3
+ module Undress
4
+ class Textile < Grammar
5
+
6
+ # delete tabs and newlines from inside elements
7
+ pre_processing("*") do |e|
8
+ if e.elem? && e.parent.doc? && e.inner_html != "" && e.name != "pre"
9
+ e.inner_html = e.inner_html.gsub(/\n|\t/,"")
10
+ end
11
+ end
12
+
13
+ # whitespace handling
14
+ post_processing(/\n\n+/, "\n\n")
15
+ post_processing(/\A\s+/, "")
16
+ post_processing(/\s+\z/, "\n")
17
+
18
+ # special characters introduced by textile
19
+ post_processing(/&#8230;/, "...")
20
+ post_processing(/&#8217;/, "'")
21
+ post_processing(/&#822[01];/, '"')
22
+ post_processing(/&#8212;/, "--")
23
+ post_processing(/&#8211;/, "-")
24
+ post_processing(/(\d+\s*)&#215;(\s*\d+)/, '\1x\2')
25
+ post_processing(/&#174;/, "(r)")
26
+ post_processing(/&#169;/, "(c)")
27
+ post_processing(/&#8482;/, "(tm)")
28
+
29
+ # inline elements
30
+ rule_for(:a) {|e|
31
+ title = e.has_attribute?("title") ? " (#{e["title"]})" : ""
32
+ "[#{content_of(e)}#{title}:#{e["href"]}]"
33
+ }
34
+ rule_for(:img) {|e|
35
+ alt = e.has_attribute?("alt") ? "(#{e["alt"]})" : ""
36
+ "!#{e["src"]}#{alt}!"
37
+ }
38
+ rule_for(:strong) {|e| "*#{content_of(e)}*" }
39
+ rule_for(:em) {|e| "_#{content_of(e)}_" }
40
+ rule_for(:code) {|e| "@#{content_of(e)}@" }
41
+ rule_for(:cite) {|e| "??#{content_of(e)}??" }
42
+ rule_for(:sup) {|e| surrounded_by_whitespace?(e) ? "^#{content_of(e)}^" : "[^#{content_of(e)}^]" }
43
+ rule_for(:sub) {|e| surrounded_by_whitespace?(e) ? "~#{content_of(e)}~" : "[~#{content_of(e)}~]" }
44
+ rule_for(:ins) {|e| "+#{content_of(e)}+" }
45
+ rule_for(:del) {|e| "-#{content_of(e)}-" }
46
+ rule_for(:acronym) {|e| e.has_attribute?("title") ? "#{content_of(e)}(#{e["title"]})" : content_of(e) }
47
+
48
+ # text formatting and layout
49
+ rule_for(:p) {|e| "\n\n#{content_of(e)}\n\n" }
50
+ rule_for(:br) {|e| "\n" }
51
+ rule_for(:blockquote) {|e| "\n\nbq. #{content_of(e)}\n\n" }
52
+ rule_for(:pre) {|e|
53
+ if e.children.all? {|n| n.text? && n.content =~ /^\s+$/ || n.elem? && n.name == "code" }
54
+ "\n\npc. #{content_of(e % "code")}\n\n"
55
+ else
56
+ "<pre>#{content_of(e)}</pre>"
57
+ end
58
+ }
59
+
60
+ # headings
61
+ rule_for(:h1) {|e| "\n\nh1. #{content_of(e)}\n\n" }
62
+ rule_for(:h2) {|e| "\n\nh2. #{content_of(e)}\n\n" }
63
+ rule_for(:h3) {|e| "\n\nh3. #{content_of(e)}\n\n" }
64
+ rule_for(:h4) {|e| "\n\nh4. #{content_of(e)}\n\n" }
65
+ rule_for(:h5) {|e| "\n\nh5. #{content_of(e)}\n\n" }
66
+ rule_for(:h6) {|e| "\n\nh6. #{content_of(e)}\n\n" }
67
+
68
+ # lists
69
+ rule_for(:li) {|e|
70
+ token = e.parent.name == "ul" ? "*" : "#"
71
+ nesting = e.ancestors.inject(1) {|total,node| total + (%(ul ol).include?(node.name) ? 0 : 1) }
72
+ "\n#{token * nesting} #{content_of(e)}"
73
+ }
74
+ rule_for(:ul, :ol) {|e|
75
+ if e.ancestors.detect {|node| %(ul ol).include?(node.name) }
76
+ content_of(e)
77
+ else
78
+ "\n#{content_of(e)}\n\n"
79
+ end
80
+ }
81
+
82
+ # definition lists
83
+ rule_for(:dl) {|e| "\n\n#{content_of(e)}\n" }
84
+ rule_for(:dt) {|e| "- #{content_of(e)} " }
85
+ rule_for(:dd) {|e| ":= #{content_of(e)} =:\n" }
86
+
87
+ # tables
88
+ rule_for(:table) {|e| "\n\n#{content_of(e)}\n" }
89
+ rule_for(:tr) {|e| "#{content_of(e)}|\n" }
90
+ rule_for(:td, :th) {|e|
91
+ prefix = if e.name == "th"
92
+ "_. "
93
+ elsif e.has_attribute?("colspan")
94
+ "\\#{e["colspan"]}. "
95
+ elsif e.has_attribute?("rowspan")
96
+ "/#{e["rowspan"]}. "
97
+ end
98
+
99
+ "|#{prefix}#{content_of(e)}"
100
+ }
101
+ end
102
+
103
+ add_markup :textile, Textile
104
+ end
@@ -0,0 +1,55 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/test_helper")
2
+
3
+ module Undress
4
+ class TestGrammar < Test::Unit::TestCase
5
+ class Parent < Grammar
6
+ rule_for(:p) {|e| "<this is a paragraph>#{content_of(e)}</this is a paragraph>" }
7
+ end
8
+
9
+ class WithPreProcessingRules < Parent
10
+ pre_processing("p.foo") {|e| e.swap("<div>Cuack</div>") }
11
+ rule_for(:div) {|e| "<this was a div>#{content_of(e)}</this was a div>" }
12
+ end
13
+
14
+ class Child < Parent; end
15
+
16
+ class OverWriter < WithPreProcessingRules
17
+ rule_for(:div) {|e| content_of(e) }
18
+ end
19
+
20
+ class TextileExtension < Textile
21
+ rule_for(:a) {|e| "" }
22
+ end
23
+
24
+ def parse_with(grammar, html)
25
+ grammar.process!(Hpricot(html))
26
+ end
27
+
28
+ context "extending a grammar" do
29
+ test "the extended grammar should inherit the rules of the parent" do
30
+ output = parse_with Child, "<p>Foo Bar</p>"
31
+ assert_equal "<this is a paragraph>Foo Bar</this is a paragraph>", output
32
+ end
33
+
34
+ test "extending a grammar doesn't overwrite the parent's rules" do
35
+ output = parse_with OverWriter, "<div>Foo</div>"
36
+ assert_equal "Foo", output
37
+
38
+ output = parse_with WithPreProcessingRules, "<div>Foo</div>"
39
+ assert_equal "<this was a div>Foo</this was a div>", output
40
+ end
41
+
42
+ test "extending textile doesn't blow up" do
43
+ output = parse_with TextileExtension, "<p><a href='/'>Cuack</a></p><p>Foo Bar</p><p>I <a href='/'>work</a></p>"
44
+ assert_equal "Foo Bar\n\nI\n", output
45
+ end
46
+ end
47
+
48
+ context "pre processing rules" do
49
+ test "mutate the DOM before parsing the tags" do
50
+ output = parse_with WithPreProcessingRules, "<p class='foo'>Blah</p><p>O hai</p>"
51
+ assert_equal "<this was a div>Cuack</this was a div><this is a paragraph>O hai</this is a paragraph>", output
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,276 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/test_helper")
2
+
3
+ class Undress::GreenClothTest < Test::Unit::TestCase
4
+ def assert_renders_greencloth(greencloth, html)
5
+ assert_equal greencloth, Undress(html, :xhtml_strict => true).to_greencloth
6
+ end
7
+
8
+ # unallowed tags
9
+ context "remove unallowed tags" do
10
+ test "remove a head tag" do
11
+ html = "<html><head><title>Title</title></head>"
12
+ greencloth = ""
13
+ assert_renders_greencloth greencloth, html
14
+ end
15
+
16
+ test "remove a script tag" do
17
+ html = "<div>Some script inside a<script type='text/javascript'>window.alert('alert')</script> paragraph</div>"
18
+ greencloth = "Some script inside a paragraph"
19
+ assert_renders_greencloth greencloth, html
20
+ end
21
+ end
22
+
23
+ # code
24
+ context "converting code tags" do
25
+ test "a code inside a paragraph" do
26
+ html = "<p>do you like my <code>function</code>?</p>"
27
+ greencloth = "do you like my @function@?\n"
28
+ assert_renders_greencloth greencloth, html
29
+ end
30
+
31
+ test "code tag inside pre tag" do
32
+ html = "<pre><code>def say_hi\n\tputs 'hi'\nend</code></pre>"
33
+ greencloth = "<pre><code>def say_hi\n\tputs 'hi'\nend</code></pre>"
34
+ assert_renders_greencloth greencloth, html
35
+ end
36
+
37
+ test "code inside list items" do
38
+ html = "<ul><li><code>foo</code></li><li><code>bar</code></li><li>and <code>x</code> is also.</li></ul>"
39
+ greencloth = "* @foo@\n* @bar@\n* and @x@ is also.\n"
40
+ assert_renders_greencloth greencloth, html
41
+ end
42
+
43
+ test "code tag not inside a pre and without new lines inside" do
44
+ html = "<code>some code inside</code>"
45
+ greencloth = "@some code inside@"
46
+ assert_renders_greencloth greencloth, html
47
+ end
48
+ end
49
+
50
+ # embed and object
51
+ # the elements pass trough but the order of the attributes change
52
+ context "embed and object" do
53
+ test "embed" do
54
+ html = "<p>do you like my embedded blip.tv <embed src='http://blip.tv/play/Ac3GfI+2HA' allowfullscreen='true' type='application/x-shockwave-flash' allowscriptaccess='always' height='510' width='720' />?</p>"
55
+ greencloth = "do you like my embedded blip.tv <embed src=\"http://blip.tv/play/Ac3GfI+2HA\" allowfullscreen=\"true\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" height=\"510\" width=\"720\" />?\n"
56
+ assert_renders_greencloth greencloth, html
57
+ end
58
+
59
+ test "object" do
60
+ html = "<p>do you like my embedded youtube <object width='425' height='344'><param name='movie' value='http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1' /><param name='allowFullScreen' value='true' /><embed src='http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1' type='application/x-shockwave-flash' width='425' height='344' allowfullscreen='true' /></object>?</p>"
61
+ greencloth = "do you like my embedded youtube <object height=\"344\" width=\"425\"><param name=\"movie\" value=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" /><param name=\"allowFullScreen\" value=\"true\" /><embed src=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" allowfullscreen=\"true\" type=\"application/x-shockwave-flash\" height=\"344\" width=\"425\" /></object>?\n"
62
+ assert_renders_greencloth greencloth, html
63
+ end
64
+ end
65
+
66
+ # outline
67
+ # don't allow link to anchors or anchor defs inside hx, greencloth -> html
68
+ # take cares of it, so we are only allowing links inside hx elements for now
69
+ context "outline" do
70
+ test "table of contents toc" do
71
+ html = "<ul class='toc'><li class='toc1'><a href='#fruits'><span>1</span> Fruits</a></li><ul><li class='toc2'><a href='#tasty-apples'><span>1.1</span> Tasty Apples</a></li><ul><li class='toc3'><a href='green'><span>1.1.1</span> Green</a></li><li class='toc3'><a href='#red'><span>1.1.2</span> Red</a></li></ul>"
72
+ greencloth = "[[toc]]"
73
+ assert_renders_greencloth greencloth, html
74
+ end
75
+
76
+ test "headings with links, anchors and links to anchors" do
77
+ html = "<h1 class='first'><a name='russian-anarchists'></a>Russian Anarchists<a class='anchor' href='#russian-anarchists'>&para;</a></h1><h2><a name='michel-bakunin'></a>Michel <a href='http://en.wikipedia.org/wiki/Mikhail_Bakunin'>Bakunin</a><a class='anchor' href='#michel-bakunin'>&para;</a></h2><h2><a name='peter-kropotkin'></a><a href='http://en.wikipedia.org/wiki/Peter_Kropotkin'>Peter</a> Kropotkin<a class='anchor' href='#peter-kropotkin'>&para;</a></h2><h1><a name='russian-american-anarchists'></a>Russian-American Anarchists<a class='anchor' href='#russian-american-anarchists'>&para;</a></h1><h2><a name='emma-goldman'></a><a href='http://en.wikipedia.org/wiki/Emma_Goldman'>Emma Goldman</a><a class='anchor' href='#emma-goldman'>&para;</a></h2><h2><a name='alexander-berkman'></a>Alexander <a href='http://en.wikipedia.org/wiki/Alexander_Berkman'>Berkman</a><a class='anchor' href='#alexander-berkman'>&para;</a></h2>"
78
+ greencloth = "Russian Anarchists\n==================\n\nMichel [Bakunin -> http://en.wikipedia.org/wiki/Mikhail_Bakunin]\n--------------\n\n[Peter -> http://en.wikipedia.org/wiki/Peter_Kropotkin] Kropotkin\n---------------\n\nRussian-American Anarchists\n===========================\n\n[Emma Goldman -> http://en.wikipedia.org/wiki/Emma_Goldman]\n------------\n\nAlexander [Berkman -> http://en.wikipedia.org/wiki/Alexander_Berkman]\n-----------------\n"
79
+ assert_renders_greencloth greencloth, html
80
+ end
81
+
82
+ test "double trouble" do
83
+ html = "<h1 class='first'><a name='title'></a>Title<a class='anchor' href='#title'>&para;</a></h1><h3><a name='under-first'></a>Under first<a class='anchor' href='#under-first'>&para;</a></h3><h1><a name='title_2'></a>Title<a class='anchor' href='#title_2'>&para;</a></h1><h3><a name='under-second'></a>Under second<a class='anchor' href='#under-second'>&para;</a></h3>"
84
+ greencloth = "Title\n=====\n\nh3. Under first\n\nTitle\n=====\n\nh3. Under second\n"
85
+ assert_renders_greencloth greencloth, html
86
+ end
87
+ end
88
+
89
+ # basics
90
+ context "basics" do
91
+ test "headers" do
92
+ html = "<h1 class='first'>header one</h1>\n<h2>header two</h2>"
93
+ greencloth = "header one\n==========\n\nheader two\n----------\n"
94
+ assert_renders_greencloth greencloth, html
95
+ end
96
+
97
+ test "headers with paragraph" do
98
+ html = "<p>la la la</p>\n<h1 class='first'>header one</h1>\n<h2>header two</h2>\n<p>la la la</p>"
99
+ greencloth = "la la la\n\nheader one\n==========\n\nheader two\n----------\n\nla la la\n"
100
+ assert_renders_greencloth greencloth, html
101
+ end
102
+ end
103
+
104
+ # sections
105
+ # allways we render h1 with ==== and h2 with ----
106
+ context "Convert sections" do
107
+ test "one section no heading" do
108
+ html = "<div class='wiki_section' id='wiki_section-0'><p>start unheaded section</p><p>line line line</p></div>"
109
+ greencloth = "start unheaded section\n\nline line line\n"
110
+ assert_renders_greencloth greencloth, html
111
+ end
112
+
113
+ test "one section with heading" do
114
+ html = "<div class='wiki_section' id='wiki_section-0'><h2 class='first'>are you ready?!!?</h2><p>here we go now!</p></div>"
115
+ greencloth = "are you ready?!!?\n-----------------\n\nhere we go now!\n"
116
+ assert_renders_greencloth greencloth, html
117
+ end
118
+
119
+ test "all headings" do
120
+ html = "<h1>First</h1><h2>Second</h2><h3>Tres</h3><h4>Cuatro</h4><h5>Five</h5><h6>Six</h6>"
121
+ greencloth = "First\n=====\n\nSecond\n------\n\nh3. Tres\n\nh4. Cuatro\n\nh5. Five\n\nh6. Six\n"
122
+ assert_renders_greencloth greencloth, html
123
+ end
124
+
125
+ test "multiple sections with text" do
126
+ html = "<div class='wiki_section' id='wiki_section-0'><h2 class='first'>Section One</h2><p>section one line one is here<br />section one line two is next</p><p>Here is section one still</p></div><div class='wiki_section' id='wiki_section-1'><h1>Section Two</h1><p>Section two first line<br />Section two another line</p></div><div class='wiki_section' id='wiki_section-2'><h2>Section 3 with h2</h2><p>One more line for section 3</p></div><div class='wiki_section' id='wiki_section-3'><h3>final section 4</h3><p>section 4 first non-blank line</p>\n</div>"
127
+ greencloth = "Section One\n-----------\n\nsection one line one is here\nsection one line two is next\n\nHere is section one still\n\nSection Two\n===========\n\nSection two first line\nSection two another line\n\nSection 3 with h2\n-----------------\n\nOne more line for section 3\n\nh3. final section 4\n\nsection 4 first non-blank line\n"
128
+ assert_renders_greencloth greencloth, html
129
+ end
130
+ end
131
+
132
+ # lists
133
+ # TODO: start attribute not implemented
134
+ context "Converting html lists to greencloth" do
135
+ test "hard break in list" do
136
+ html = "<ul>\n\t<li>first line</li>\n\t<li>second<br />\n\tline</li>\n\t<li>third line</li>\n</ul>\n"
137
+ greencloth = "* first line\n* second\nline\n* third line\n"
138
+ assert_renders_greencloth greencloth, html
139
+ end
140
+
141
+ test "mixed nesting" do
142
+ html = "<ul><li>bullet\n<ol>\n<li>number</li>\n<li>number\n<ul>\n\t<li>bullet</li>\n</ul></li>\n<li>number</li>\n<li>number with<br />a break</li>\n</ol></li>\n<li>bullet\n<ul><li>okay</li></ul></li></ul>"
143
+ greencloth = "* bullet\n*# number\n*# number\n*#* bullet\n*# number\n*# number with\na break\n* bullet\n** okay\n"
144
+ assert_renders_greencloth greencloth, html
145
+ end
146
+
147
+ test "list continuation" do # uses start
148
+ html = "<ol><li>one</li><li>two</li><li>three</li></ol><ol><li>one</li><li>two</li><li>three</li></ol><ol start='4'><li>four</li><li>five</li><li>six</li></ol>"
149
+ greencloth = "# one\n# two\n# three\n\n# one\n# two\n# three\n\n# four\n# five\n# six\n"
150
+ assert_renders_greencloth greencloth, html
151
+ end
152
+
153
+ test "continue after break" do # uses start
154
+ html = "<ol><li>one</li><li>two</li><li>three</li></ol><p>test</p><ol><li>one</li><li>two</li><li>three</li></ol><p>test</p><ol start='4'><li>four</li><li>five</li><li>six</li></ol>"
155
+ greencloth = "# one\n# two\n# three\n\ntest\n\n# one\n# two\n# three\n\ntest\n\n# four\n# five\n# six\n"
156
+ assert_renders_greencloth greencloth, html
157
+ end
158
+
159
+ test "continue list when prior list contained nested list" do # uses start
160
+ greencloth = "# one\n# two\n# three\n\n# four\n# five\n## sub-note\n## another sub-note\n# six\n\n# seven\n# eight\n# nine\n"
161
+ html = "<ol><li>one</li><li>two</li><li>three</li></ol><ol start='4'><li>four</li><li>five<ol><li>sub-note</li><li>another sub-note</li></ol></li><li>six</li></ol><ol start='7'><li>seven</li><li>eight</li><li>nine</li></ol>"
162
+ assert_renders_greencloth greencloth, html
163
+ end
164
+
165
+ test "" do
166
+
167
+ end
168
+ end
169
+
170
+ # links
171
+ context "Converting html links to greencloth" do
172
+ test "convert a link to a wiki page inside a paragraph" do
173
+ html = "<p>this is a <a href='/page/plain-link'>plain link</a> in some text</p>"
174
+ greencloth = "this is a [plain link] in some text\n"
175
+ assert_renders_greencloth greencloth, html
176
+ end
177
+
178
+ test "convert a link to a wiki page with namespace" do
179
+ html= "<p>this is a <a href='/namespaced/link'>link</a> in some text</p>"
180
+ greencloth = "this is a [namespaced / link] in some text\n"
181
+ assert_renders_greencloth greencloth, html
182
+ end
183
+
184
+ test "convert a link to a wiki page" do
185
+ html= "<p>this is a <a href='/page/something-else'>link to</a> in some text</p>"
186
+ greencloth = "this is a [link to -> something else] in some text\n"
187
+ assert_renders_greencloth greencloth, html
188
+ end
189
+
190
+ test "convert a link to a wiki page with namespace and text different than link dest" do
191
+ html= "<p>this is a <a href='/namespace/something-else'>link to</a> in some text</p>"
192
+ greencloth = "this is a [link to -> namespace / something else] in some text\n"
193
+ assert_renders_greencloth greencloth, html
194
+ end
195
+
196
+ test "convert a link to an absolute path" do
197
+ html = "<p>this is a <a href='/an/absolute/path'>link to</a> in some text</p>"
198
+ greencloth = "this is a [link to -> /an/absolute/path] in some text\n"
199
+ assert_renders_greencloth greencloth, html
200
+ end
201
+
202
+ test "convert a link to an external domain" do
203
+ html = "<p>this is a <a href='https://riseup.net'>link to</a> a url</p>"
204
+ greencloth = "this is a [link to -> https://riseup.net] a url\n"
205
+ assert_renders_greencloth greencloth, html
206
+ end
207
+
208
+ test "a link to an external domain with the same text as dest" do
209
+ html = "<p>url in brackets <a href='https://riseup.net/'>riseup.net</a></p>"
210
+ greencloth = "url in brackets [riseup.net -> https://riseup.net/]\n"
211
+ assert_renders_greencloth greencloth, html
212
+ end
213
+
214
+ test "a link to a wiki page with the same name as dest" do
215
+ html = "<p>a <a href='/page/name-link'>name link</a> in need of humanizing</p>"
216
+ greencloth = "a [name link] in need of humanizing\n"
217
+ assert_renders_greencloth greencloth, html
218
+ end
219
+
220
+ test "link to a user blue" do
221
+ html = "<p>link to a user <a href='/blue'>blue</a></p>"
222
+ greencloth = "link to a user [blue]\n"
223
+ assert_renders_greencloth greencloth, html
224
+ end
225
+
226
+ test "link with dashes should keep the dashes" do
227
+ html = "<p><a href='/-dashes/in/the/link-'>link to</a></p>"
228
+ greencloth = "[link to -> /-dashes/in/the/link-]\n"
229
+ assert_renders_greencloth greencloth, html
230
+ end
231
+
232
+ test "link with underscores should keep the underscores" do
233
+ html = "<p>links <a href='/page/with_underscores'>with_underscores</a> should keep underscore</p>"
234
+ greencloth = "links [with_underscores] should keep underscore\n"
235
+ assert_renders_greencloth greencloth, html
236
+ end
237
+
238
+ test "a link inside a li element" do
239
+ html ="<ul>\n<li>\n\t\t\n<a href='/page/this'>link to</a></li></ul>"
240
+ greencloth = "* [link to -> this]\n"
241
+ assert_renders_greencloth greencloth, html
242
+ end
243
+
244
+ test "an external link inside a li element" do
245
+ html = "<ul>\n<li><a href='https://riseup.net/'>riseup.net</a></li>\n</ul>"
246
+ greencloth = "* [riseup.net -> https://riseup.net/]\n"
247
+ assert_renders_greencloth greencloth, html
248
+ end
249
+
250
+ test "many anchors inside a paragraph" do
251
+ html = "<p>make anchors <a name='here'>here</a> or <a name='maybe-here'>maybe here</a> or <a name='there'>over</a></p>"
252
+ greencloth = "make anchors [# here #] or [# maybe here #] or [# over -> there #]\n"
253
+ assert_renders_greencloth greencloth, html
254
+ end
255
+
256
+ # TODO: there are differents in this test about how cg support writing anchors
257
+ # this is a reduced support of it
258
+ test "anchors and links" do
259
+ html = "<p>link to <a href='/page/anchors#like-so'>anchors</a> or <a href='/page/like#so'>maybe</a> or <a href='#so'>just</a> or <a href='#so'>so</a></p>"
260
+ greencloth = "link to [anchors -> anchors#like so] or [maybe -> like#so] or [just -> #so] or [so -> #so]\n"
261
+ assert_renders_greencloth greencloth, html
262
+ end
263
+
264
+ test "more anchors" do
265
+ html = "<p><a href='#5'>link</a> to a numeric anchor <a name='5'>5</a></p>"
266
+ greencloth = "[link -> #5] to a numeric anchor [# 5 #]\n"
267
+ assert_renders_greencloth greencloth, html
268
+ end
269
+
270
+ test "3 links without /" do
271
+ html = "<p><a href='some'>some</a> and <a href='other'>other</a> and <a href='one_more'>one_more</a></p>"
272
+ greencloth = "[some] and [other] and [one_more]\n"
273
+ assert_renders_greencloth greencloth, html
274
+ end
275
+ end
276
+ end
@@ -0,0 +1,11 @@
1
+ require "rubygems"
2
+ require "test/unit"
3
+ require "contest"
4
+ Dir[File.expand_path(File.dirname(__FILE__) + "/../lib/**/*.rb")].each do |file|
5
+ require file
6
+ end
7
+
8
+ begin
9
+ require "redgreen"
10
+ rescue LoadError
11
+ end
@@ -0,0 +1,198 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/test_helper")
2
+
3
+ module Undress
4
+ class TextileTest < Test::Unit::TestCase
5
+ def assert_renders_textile(textile, html)
6
+ assert_equal textile, Undress(html).to_textile
7
+ end
8
+
9
+ context "Converting HTML to textile" do
10
+ test "converts nested tags" do
11
+ assert_renders_textile "h2. _this is *very* important_\n", "<h2><em>this is <strong>very</strong> important</em></h2>"
12
+ end
13
+
14
+ context "inline elements" do
15
+ test "converts <strong> tags" do
16
+ assert_renders_textile "*foo bar*", "<strong>foo bar</strong>"
17
+ end
18
+
19
+ test "converts <em> tags" do
20
+ assert_renders_textile "_foo bar_", "<em>foo bar</em>"
21
+ end
22
+
23
+ test "converts <code> tags" do
24
+ assert_renders_textile "@foo bar@", "<code>foo bar</code>"
25
+ end
26
+
27
+ test "converts <cite> tags" do
28
+ assert_renders_textile "??foo bar??", "<cite>foo bar</cite>"
29
+ end
30
+
31
+ test "converts <sup> tags" do
32
+ assert_renders_textile "foo ^sup^ bar", "foo <sup>sup</sup> bar"
33
+ assert_renders_textile "foo[^sup^]bar", "foo<sup>sup</sup>bar"
34
+ end
35
+
36
+ test "converts <sub> tags" do
37
+ assert_renders_textile "foo ~sub~ bar", "foo <sub>sub</sub> bar"
38
+ assert_renders_textile "foo[~sub~]bar", "foo<sub>sub</sub>bar"
39
+ end
40
+
41
+ test "converts <ins> tags" do
42
+ assert_renders_textile "+foo bar+", "<ins>foo bar</ins>"
43
+ end
44
+
45
+ test "converts <del> tags" do
46
+ assert_renders_textile "-foo bar-", "<del>foo bar</del>"
47
+ end
48
+
49
+ test "converts <acronym> tags" do
50
+ assert_renders_textile "EPA(Environmental Protection Agency)", "<acronym title='Environmental Protection Agency'>EPA</acronym>"
51
+ assert_renders_textile "EPA", "<acronym>EPA</acronym>"
52
+ end
53
+ end
54
+
55
+ context "links" do
56
+ test "converts simple links (without title)" do
57
+ assert_renders_textile "[Foo Bar:/cuack]", "<a href='/cuack'>Foo Bar</a>"
58
+ end
59
+
60
+ test "converts links with titles" do
61
+ assert_renders_textile "[Foo Bar (You should see this):/cuack]", "<a href='/cuack' title='You should see this'>Foo Bar</a>"
62
+ end
63
+ end
64
+
65
+ context "images" do
66
+ test "converts images without alt attributes" do
67
+ assert_renders_textile "!http://example.com/image.png!", "<img src='http://example.com/image.png'/>"
68
+ end
69
+
70
+ test "converts images with alt attributes" do
71
+ assert_renders_textile "!http://example.com/image.png(Awesome Pic)!", "<img src='http://example.com/image.png' alt='Awesome Pic'/>"
72
+ end
73
+ end
74
+
75
+ context "text formatting" do
76
+ test "converts paragraphs" do
77
+ assert_renders_textile "foo\n\nbar\n", "<p>foo</p><p>bar</p>"
78
+ end
79
+
80
+ test "converts <pre> tags which only contain a <code> child" do
81
+ assert_renders_textile "pc. var foo = 1;\n", "<pre><code>var foo = 1;</code></pre>"
82
+ assert_renders_textile "pc. var foo = 1;\n", "<pre> <code>var foo = 1;</code> </pre>"
83
+ end
84
+
85
+ test "leaves <pre> tags which contain mixed content as HTML" do
86
+ assert_renders_textile "<pre> foo bar</pre>", "<pre> foo bar</pre>"
87
+ end
88
+
89
+ test "converts <br> into a new line" do
90
+ assert_renders_textile "Foo\nBar", "Foo<br/>Bar"
91
+ end
92
+
93
+ test "converts blockquotes" do
94
+ assert_renders_textile "bq. foo bar\n", "<blockquote><div>foo bar</div></blockquote>"
95
+ end
96
+ end
97
+
98
+ context "headers" do
99
+ test "converts <h1> tags" do
100
+ assert_renders_textile "h1. foo bar\n", "<h1>foo bar</h1>"
101
+ end
102
+
103
+ test "converts <h2> tags" do
104
+ assert_renders_textile "h2. foo bar\n", "<h2>foo bar</h2>"
105
+ end
106
+
107
+ test "converts <h3> tags" do
108
+ assert_renders_textile "h3. foo bar\n", "<h3>foo bar</h3>"
109
+ end
110
+
111
+ test "converts <h4> tags" do
112
+ assert_renders_textile "h4. foo bar\n", "<h4>foo bar</h4>"
113
+ end
114
+
115
+ test "converts <h5> tags" do
116
+ assert_renders_textile "h5. foo bar\n", "<h5>foo bar</h5>"
117
+ end
118
+
119
+ test "converts <h6> tags" do
120
+ assert_renders_textile "h6. foo bar\n", "<h6>foo bar</h6>"
121
+ end
122
+ end
123
+
124
+ context "lists" do
125
+ test "converts bullet lists" do
126
+ assert_renders_textile "* foo\n* bar\n", "<ul><li>foo</li><li>bar</li></ul>"
127
+ end
128
+
129
+ test "converts numbered lists" do
130
+ assert_renders_textile "# foo\n# bar\n", "<ol><li>foo</li><li>bar</li></ol>"
131
+ end
132
+
133
+ test "converts nested bullet lists" do
134
+ assert_renders_textile "* foo\n** bar\n* baz\n", "<ul><li>foo<ul><li>bar</li></ul></li><li>baz</li></ul>"
135
+ end
136
+
137
+ test "converts nested numbered lists" do
138
+ assert_renders_textile "# foo\n## bar\n# baz\n", "<ol><li>foo<ol><li>bar</li></ol></li><li>baz</li></ol>"
139
+ end
140
+
141
+ test "converts nested mixed lists" do
142
+ assert_renders_textile "* foo\n## bar\n## baz\n*** quux\n* cuack\n",
143
+ "<ul><li>foo<ol><li>bar</li><li>baz<ul><li>quux</li></ul></li></ol></li><li>cuack</li></ul>"
144
+ end
145
+
146
+ test "converts a definition list" do
147
+ assert_renders_textile "- foo := defining foo =:\n- bar := defining bar =:\n",
148
+ "<dl><dt>foo</dt><dd>defining foo</dd><dt>bar</dt><dd>defining bar</dd></dl>"
149
+ end
150
+ end
151
+
152
+ context "tables" do
153
+ test "converts a simple table" do
154
+ assert_renders_textile "|foo|bar|baz|\n|1|2|3|\n",
155
+ "<table><tr><td>foo</td><td>bar</td><td>baz</td></tr><tr><td>1</td><td>2</td><td>3</td></tr></table>"
156
+ end
157
+
158
+ test "converts a table with headers" do
159
+ assert_renders_textile "|_. foo|_. bar|_. baz|\n|1|2|3|\n",
160
+ "<table><tr><th>foo</th><th>bar</th><th>baz</th></tr><tr><td>1</td><td>2</td><td>3</td></tr></table>"
161
+ end
162
+
163
+ test "converts a table with cells that span multiple columns" do
164
+ assert_renders_textile "|foo|bar|baz|\n|\\2. 1|2|\n",
165
+ "<table><tr><td>foo</td><td>bar</td><td>baz</td></tr><tr><td colspan='2'>1</td><td>2</td></tr></table>"
166
+ end
167
+
168
+ test "converts a table with cells that span multiple rows" do
169
+ assert_renders_textile "|/2. foo|bar|baz|\n|1|2|\n",
170
+ "<table><tr><td rowspan='2'>foo</td><td>bar</td><td>baz</td></tr><tr><td>1</td><td>2</td></tr></table>"
171
+ end
172
+ end
173
+
174
+ context "applying post processing rules" do
175
+ test "compresses newlines to a maximum of two consecutive newlines" do
176
+ assert_renders_textile "Foo\n\nBar\n\nBaz\n\n* Quux 1\n* Quux 2\n", "<p>Foo</p><p>Bar</p><p>Baz</p><ul><li>Quux 1</li><li>Quux 2</li></p>"
177
+ end
178
+
179
+ test "strips trailing newlines from the start and end of the output string" do
180
+ assert_renders_textile "Foo\n", "<p>Foo</p>"
181
+ end
182
+
183
+ test "converts all fancy characters introduced by textile back into their 'source code'" do
184
+ assert_renders_textile "What the ... hell?", "What the &#8230; hell?"
185
+ assert_renders_textile "It's mine", "It&#8217;s mine"
186
+ assert_renders_textile "\"Fancy quoting\"", "&#8220;Fancy quoting&#8221;"
187
+ assert_renders_textile "How dashing--right?", "How dashing&#8212;right?"
188
+ assert_renders_textile "How dashing - right?", "How dashing &#8211; right?"
189
+ assert_renders_textile "2 x 2 = 4", "2 &#215; 2 = 4"
190
+ assert_renders_textile "2x2 = 4", "2&#215;2 = 4"
191
+ assert_renders_textile "Registered(r)", "Registered&#174;"
192
+ assert_renders_textile "Copyrighted(c)", "Copyrighted&#169;"
193
+ assert_renders_textile "Trademarked(tm)", "Trademarked&#8482;"
194
+ end
195
+ end
196
+ end
197
+ end
198
+ end
data/undress.gemspec ADDED
@@ -0,0 +1,42 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "undress"
3
+ s.version = "0.1"
4
+ s.date = "2009-07-13"
5
+
6
+ s.description = "Simply translate HTML to Textile, Markdown, or whatever other markup format you need"
7
+ s.summary = "Convert HTML into other markup languages"
8
+ s.homepage = "http://undress.rubyforge.org"
9
+
10
+ s.authors = "Nicolás Sanguinetti"
11
+ s.email = "contacto@nicolassanguinetti.info"
12
+
13
+ s.require_paths = ["lib"]
14
+ s.rubyforge_project = "undress"
15
+ s.has_rdoc = true
16
+ s.rubygems_version = "1.3.1"
17
+
18
+ s.add_dependency "hpricot"
19
+
20
+ if s.respond_to?(:add_development_dependency)
21
+ s.add_development_dependency "sr-mg"
22
+ s.add_development_dependency "contest"
23
+ s.add_development_dependency "redgreen"
24
+ end
25
+
26
+ s.files = %w[
27
+ .gitignore
28
+ LICENSE
29
+ README.rdoc
30
+ Rakefile
31
+ undress.gemspec
32
+ lib/undress.rb
33
+ lib/undress/grammar.rb
34
+ lib/undress/textile.rb
35
+ lib/undress/greencloth.rb
36
+ lib/core_ext/object.rb
37
+ test/test_helper.rb
38
+ test/test_grammar.rb
39
+ test/test_textile.rb
40
+ test/test_greencloth.rb
41
+ ]
42
+ end
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: zevarito-undress
3
+ version: !ruby/object:Gem::Version
4
+ version: "0.1"
5
+ platform: ruby
6
+ authors:
7
+ - "Nicol\xC3\xA1s Sanguinetti"
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-07-13 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: sr-mg
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: contest
37
+ type: :development
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0"
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: redgreen
47
+ type: :development
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: "0"
54
+ version:
55
+ description: Simply translate HTML to Textile, Markdown, or whatever other markup format you need
56
+ email: contacto@nicolassanguinetti.info
57
+ executables: []
58
+
59
+ extensions: []
60
+
61
+ extra_rdoc_files: []
62
+
63
+ files:
64
+ - .gitignore
65
+ - LICENSE
66
+ - README.rdoc
67
+ - Rakefile
68
+ - undress.gemspec
69
+ - lib/undress.rb
70
+ - lib/undress/grammar.rb
71
+ - lib/undress/textile.rb
72
+ - lib/undress/greencloth.rb
73
+ - lib/core_ext/object.rb
74
+ - test/test_helper.rb
75
+ - test/test_grammar.rb
76
+ - test/test_textile.rb
77
+ - test/test_greencloth.rb
78
+ has_rdoc: true
79
+ homepage: http://undress.rubyforge.org
80
+ post_install_message:
81
+ rdoc_options: []
82
+
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: "0"
90
+ version:
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: "0"
96
+ version:
97
+ requirements: []
98
+
99
+ rubyforge_project: undress
100
+ rubygems_version: 1.2.0
101
+ signing_key:
102
+ specification_version: 2
103
+ summary: Convert HTML into other markup languages
104
+ test_files: []
105
+