zevarito-undress 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ doc
2
+ dist
3
+ tmp
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ (The MIT License)
2
+
3
+ Copyright (c) 2009 Nicolas Sanguinetti, entp.com
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ 'Software'), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,42 @@
1
+ = Undress
2
+
3
+ Easily convert back HTML to Textile, Greencloth.
4
+
5
+ require "undress/textile"
6
+
7
+ code =<<html
8
+ <h1>Hello world!</h1>
9
+ <p><strong>Hey!</strong> How is it going?</p>
10
+ <h2>Supported Markup Languages so far:</h2>
11
+ <ul>
12
+ <li>Textile</li>
13
+ <li>Greencloth</li>
14
+ </ul>
15
+ html
16
+
17
+ Undress(code).to_textile
18
+
19
+ Will produce
20
+
21
+ h1. Hello world!
22
+
23
+ *Hey!* How is it going?
24
+
25
+ h2. Supported Markup Languages so far:
26
+
27
+ * Textile
28
+ * Greencloth
29
+
30
+ == Supported Markup Languages
31
+
32
+ * Textile
33
+ * Greencloth, see [http://we.riseup.net]
34
+
35
+ == Get it
36
+
37
+ gem install undress
38
+
39
+ == License
40
+
41
+ Authors:: Nicolas Sanguinetti (foca[http://github.com/foca]), Alvaro Gil (zevarito[http://github.com/zevarito])
42
+ License:: MIT (Check LICENSE for details)
data/Rakefile ADDED
@@ -0,0 +1,32 @@
1
+ require "rake/testtask"
2
+
3
+ begin
4
+ require "hanna/rdoctask"
5
+ rescue LoadError
6
+ require "rake/rdoctask"
7
+ end
8
+
9
+ Rake::RDocTask.new do |rd|
10
+ rd.main = "README"
11
+ rd.title = "API Documentation for Undress"
12
+ rd.rdoc_files.include("README.rdoc", "LICENSE", "lib/**/*.rb")
13
+ rd.rdoc_dir = "doc"
14
+ end
15
+
16
+ begin
17
+ require "metric_fu"
18
+ rescue LoadError
19
+ end
20
+
21
+ begin
22
+ require "mg"
23
+ MG.new("undress.gemspec")
24
+ rescue LoadError
25
+ end
26
+
27
+ desc "Default: run tests"
28
+ task :default => :test
29
+
30
+ Rake::TestTask.new do |t|
31
+ t.test_files = FileList["test/test_*.rb"]
32
+ end
@@ -0,0 +1,6 @@
1
+ class Object #:nodoc:
2
+ def tap
3
+ yield self
4
+ self
5
+ end
6
+ end
data/lib/undress.rb ADDED
@@ -0,0 +1,51 @@
1
+ require "hpricot"
2
+ require File.expand_path(File.dirname(__FILE__) + "/core_ext/object")
3
+ require File.expand_path(File.dirname(__FILE__) + "/undress/grammar")
4
+
5
+ # Load an HTML document so you can undress it. Pass it either a string or an IO
6
+ # object. You can pass an optional hash of options, which will be forwarded
7
+ # straight to Hpricot. Check it's
8
+ # documentation[http://code.whytheluckystiff.net/doc/hpricot] for details.
9
+ def Undress(html, options={})
10
+ Undress::Document.new(html, options)
11
+ end
12
+
13
+ module Undress
14
+
15
+ # if this array is empty we allow all tags
16
+ # if the processed node name not exist in this array we drop it
17
+ ALLOWED_TAGS = []
18
+
19
+ # Register a markup language. The name will become the method used to convert
20
+ # HTML to this markup language: for example registering the name +:textile+
21
+ # gives you <tt>Undress(code).to_textile</tt>, registering +:markdown+ would
22
+ # give you <tt>Undress(code).to_markdown</tt>, etc.
23
+ def self.add_markup(name, grammar)
24
+ Document.add_markup(name, grammar)
25
+ end
26
+
27
+ class Document #:nodoc:
28
+ def initialize(html, options)
29
+ @doc = Hpricot(html, options)
30
+ end
31
+
32
+ def self.add_markup(name, grammar)
33
+ define_method "to_#{name}" do
34
+ grammar.process!(@doc)
35
+ end
36
+ end
37
+ end
38
+
39
+ module ::Hpricot #:nodoc:
40
+ class Elem #:nodoc:
41
+ def ancestors
42
+ node, ancestors = parent, Elements[]
43
+ while node.respond_to?(:parent) && node.parent
44
+ ancestors << node
45
+ node = node.parent
46
+ end
47
+ ancestors
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,127 @@
1
+ module Undress
2
+ # Grammars give you a DSL to declare how to convert an HTML document into a
3
+ # different markup language.
4
+ class Grammar
5
+ def self.inherited(base) # :nodoc:
6
+ base.instance_variable_set(:@post_processing_rules, post_processing_rules)
7
+ base.instance_variable_set(:@pre_processing_rules, pre_processing_rules)
8
+ end
9
+
10
+ # Add a parsing rule for a group of html tags.
11
+ #
12
+ # rule_for :p do |element|
13
+ # "<this was a paragraph>#{content_of(element)}</this was a paragraph>"
14
+ # end
15
+ #
16
+ # will replace your <tt><p></tt> tags for <tt><this was a paragraph></tt>
17
+ # tags, without altering the contents.
18
+ #
19
+ # The element yielded to the block is an Hpricot element for the given tag.
20
+ def self.rule_for(*tags, &handler) # :yields: element
21
+ tags.each do |tag|
22
+ define_method tag.to_sym, &handler
23
+ end
24
+ end
25
+
26
+ # Set a default rule for unrecognized tags.
27
+ #
28
+ # Unless you define a special case, it will ignore the tags and just output
29
+ # the contents of unrecognized tags.
30
+ def self.default(&handler) # :yields: element
31
+ define_method :method_missing do |tag, node, *args|
32
+ handler.call(node)
33
+ end
34
+ end
35
+
36
+ # Add a post-processing rule to your parser.
37
+ #
38
+ # This takes a regular expression that will be applied to the output after
39
+ # processing any nodes. It can take a string as a replacement, or a block
40
+ # that will be passed to String#gsub.
41
+ #
42
+ # post_processing(/\n\n+/, "\n\n") # compress more than two newlines
43
+ # post_processing(/whatever/) { ... }
44
+ def self.post_processing(regexp, replacement = nil, &handler) #:yields: matched_string
45
+ post_processing_rules[regexp] = replacement || handler
46
+ end
47
+
48
+ # Add a pre-processing rule to your parser.
49
+ #
50
+ # This lets you mutate the DOM before applying any rule defined with
51
+ # +rule_for+. You need to pass a CSS/XPath selector, and a block that
52
+ # takes an Hpricot element to parse it.
53
+ #
54
+ # pre_processing "ul.toc" do |element|
55
+ # element.swap("<p>[[toc]]</p>")
56
+ # end
57
+ #
58
+ # Would replace any unordered lists with the class +toc+ for a
59
+ # paragraph containing the code <tt>[[toc]]</tt>.
60
+ def self.pre_processing(selector, &handler) # :yields: element
61
+ pre_processing_rules[selector] = handler
62
+ end
63
+
64
+ def self.post_processing_rules #:nodoc:
65
+ @post_processing_rules ||= {}
66
+ end
67
+
68
+ def self.pre_processing_rules #:nodoc:
69
+ @pre_processing_rules ||= {}
70
+ end
71
+
72
+ def self.process!(node) #:nodoc:
73
+ new.process!(node)
74
+ end
75
+
76
+ attr_reader :pre_processing_rules #:nodoc:
77
+ attr_reader :post_processing_rules #:nodoc:
78
+
79
+ def initialize #:nodoc:
80
+ @pre_processing_rules = self.class.pre_processing_rules.dup
81
+ @post_processing_rules = self.class.post_processing_rules.dup
82
+ end
83
+
84
+ # Process a DOM node, converting it to your markup language according to
85
+ # your defined rules. If the node is a Text node, it will return it's
86
+ # string representation. Otherwise it will call the rule defined for it.
87
+ def process(nodes)
88
+ Array(nodes).map do |node|
89
+ if node.text?
90
+ node.to_html
91
+ elsif node.elem?
92
+ send node.name.to_sym, node if ALLOWED_TAGS.empty? || ALLOWED_TAGS.include?(node.name)
93
+ else
94
+ ""
95
+ end
96
+ end.join("")
97
+ end
98
+
99
+ def process!(node) #:nodoc:
100
+ pre_processing_rules.each do |selector, handler|
101
+ node.search(selector).each(&handler)
102
+ end
103
+
104
+ process(node.children).tap do |text|
105
+ post_processing_rules.each do |rule, handler|
106
+ handler.is_a?(String) ? text.gsub!(rule, handler) : text.gsub!(rule, &handler)
107
+ end
108
+ end
109
+ end
110
+
111
+ # Get the result of parsing the contents of a node.
112
+ def content_of(node)
113
+ process(node.respond_to?(:children) ? node.children : node)
114
+ end
115
+
116
+ # Helper method that tells you if the given DOM node is immediately
117
+ # surrounded by whitespace.
118
+ def surrounded_by_whitespace?(node)
119
+ (node.previous.text? && node.previous.to_s =~ /\s+$/) ||
120
+ (node.next.text? && node.next.to_s =~ /^\s+/)
121
+ end
122
+
123
+ def method_missing(tag, node, *args) #:nodoc:
124
+ process(node.children)
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,138 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/textile")
2
+
3
+ module Undress
4
+ class GreenCloth < Textile
5
+
6
+ Undress::ALLOWED_TAGS = [
7
+ 'div', 'a', 'img', 'br', 'i', 'u', 'b', 'pre', 'kbd', 'code', 'cite', 'strong', 'em',
8
+ 'ins', 'sup', 'sub', 'del', 'table', 'tr', 'td', 'th', 'ol', 'ul', 'li', 'p', 'span',
9
+ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'notextile', 'blockquote', 'object', 'embed',
10
+ 'param', 'acronym', 'dd', 'dl', 'dt'
11
+ ]
12
+
13
+ # table of contents
14
+ pre_processing("ul.toc") do |toc|
15
+ toc.swap "[[toc]]"
16
+ end
17
+
18
+ # headings
19
+ rule_for(:h1, :h2, :h3, :h4, :h5, :h6) {|e| process_headings(e) }
20
+
21
+ # inline elements
22
+ rule_for(:a) {|e|
23
+ "#{process_links_and_anchors(e)}"
24
+ }
25
+
26
+ # lists
27
+ rule_for(:li) {|e|
28
+ offset = ""
29
+ li = e
30
+ while li.parent
31
+ if li.parent.name == "ul" then offset = "*#{offset}"
32
+ elsif li.parent.name == "ol" then offset = "##{offset}"
33
+ else return offset end
34
+ li = li.parent.parent ? li.parent.parent : nil
35
+ end
36
+ "\n#{offset} #{content_of(e)}"
37
+ }
38
+
39
+ # text formatting
40
+ rule_for(:pre) {|e|
41
+ if e.children.all? {|n| n.text? && n.content =~ /^\s+$/ || n.elem? && n.name == "code" }
42
+ "\n\n<pre><code>#{content_of(e % "code")}</code></pre>"
43
+ else
44
+ "\n\n<pre>#{content_of(e)}</pre>"
45
+ end
46
+ }
47
+
48
+ rule_for(:code) {|e|
49
+ if e.inner_html.match(/\n/)
50
+ if e.parent && e.parent.name != "pre"
51
+ "<pre><code>#{content_of(e)}</code></pre>"
52
+ else
53
+ "<code>#{content_of(e)}</code>"
54
+ end
55
+ else
56
+ "@#{content_of(e)}@"
57
+ end
58
+ }
59
+
60
+ # passing trough objects
61
+ rule_for(:embed, :object, :param) {|e|
62
+ e.to_html
63
+ }
64
+
65
+ def process_headings(h)
66
+ h.children.each {|e|
67
+ next if e.class == Hpricot::Text
68
+ e.parent.replace_child(e, "") if e.name != "a" || e.has_attribute?("href") && e["href"] !~ /^\/|(https?|s?ftp):\/\//
69
+ }
70
+ case h.name
71
+ when "h1"
72
+ "#{content_of(h)}\n#{'=' * h.inner_text.size}\n\n" if h.name == "h1"
73
+ when "h2"
74
+ "#{content_of(h)}\n#{'-' * h.inner_text.size}\n\n" if h.name == "h2"
75
+ else
76
+ "#{h.name}. #{content_of(h)}\n\n"
77
+ end
78
+ end
79
+
80
+ def process_links_and_anchors(e)
81
+ return "" if e.empty?
82
+ inner, name, href = e.inner_html, e.get_attribute("name"), e.get_attribute("href")
83
+
84
+ # is an anchor? and cannot be child of any h1..h6
85
+ if name && !e.parent.name.match(/^h1|2|3|4|5|6$/)
86
+ inner == name || inner == name.gsub(/-/,"\s") ? "[# #{inner} #]" : "[# #{inner} -> #{name} #]"
87
+ # is a link?
88
+ elsif href && href != ""
89
+ case href
90
+ when /^\/#/
91
+ "[\"#{inner}\":#{href}"
92
+ when /^#/
93
+ "[#{inner} -> #{href}]"
94
+ when /^(https?|s?ftp):\/\//
95
+ href.gsub(/^(https?|s?ftp):\/\//, "") == inner ? "[#{href}]" : "[#{inner} -> #{href}]"
96
+ when /^[^\/]/
97
+ "[#{e.inner_text}]"
98
+ when /^\/.[^\/]*\/.[^\/]*\//
99
+ "[#{inner} -> #{href}]"
100
+ when /(?:\/page\/\+)[0-9]+$/
101
+ "[#{inner} -> +#{href.gsub(/\+[0-9]+$/)}]"
102
+ else
103
+ process_as_wiki_link(e)
104
+ end
105
+ else
106
+ ""
107
+ end
108
+ end
109
+
110
+ def process_as_wiki_link(e)
111
+ inner, name, href = e.inner_html, e.get_attribute("name"), e.get_attribute("href")
112
+
113
+ # pages or group pages
114
+ context_name, page_name = href.split("/")[1..2]
115
+ page_name = context_name if page_name.nil?
116
+ wiki_page_name = page_name.gsub(/[a-z-]*[^\/]$/m) {|m| m.tr('-',' ')}
117
+
118
+ # simple page
119
+ if context_name == "page"
120
+ return "[#{inner}]" if wiki_page_name == inner
121
+ return "[#{inner} -> #{wiki_page_name}]"
122
+ end
123
+ # group page
124
+ if context_name != page_name
125
+ return "[#{context_name} / #{wiki_page_name}]" if wiki_page_name == inner
126
+ return "[#{inner} -> #{wiki_page_name}]" if context_name == "page"
127
+ return "[#{inner} -> #{context_name} / #{wiki_page_name}]"
128
+ end
129
+ if inner == page_name || inner == wiki_page_name || inner == wiki_page_name.gsub(/\s/,"-")
130
+ return "[#{wiki_page_name}]"
131
+ end
132
+ # fall back
133
+ return "[#{inner} -> #{href}]"
134
+ end
135
+
136
+ end
137
+ add_markup :greencloth, GreenCloth
138
+ end
@@ -0,0 +1,104 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/../undress")
2
+
3
+ module Undress
4
+ class Textile < Grammar
5
+
6
+ # delete tabs and newlines from inside elements
7
+ pre_processing("*") do |e|
8
+ if e.elem? && e.parent.doc? && e.inner_html != "" && e.name != "pre"
9
+ e.inner_html = e.inner_html.gsub(/\n|\t/,"")
10
+ end
11
+ end
12
+
13
+ # whitespace handling
14
+ post_processing(/\n\n+/, "\n\n")
15
+ post_processing(/\A\s+/, "")
16
+ post_processing(/\s+\z/, "\n")
17
+
18
+ # special characters introduced by textile
19
+ post_processing(/&#8230;/, "...")
20
+ post_processing(/&#8217;/, "'")
21
+ post_processing(/&#822[01];/, '"')
22
+ post_processing(/&#8212;/, "--")
23
+ post_processing(/&#8211;/, "-")
24
+ post_processing(/(\d+\s*)&#215;(\s*\d+)/, '\1x\2')
25
+ post_processing(/&#174;/, "(r)")
26
+ post_processing(/&#169;/, "(c)")
27
+ post_processing(/&#8482;/, "(tm)")
28
+
29
+ # inline elements
30
+ rule_for(:a) {|e|
31
+ title = e.has_attribute?("title") ? " (#{e["title"]})" : ""
32
+ "[#{content_of(e)}#{title}:#{e["href"]}]"
33
+ }
34
+ rule_for(:img) {|e|
35
+ alt = e.has_attribute?("alt") ? "(#{e["alt"]})" : ""
36
+ "!#{e["src"]}#{alt}!"
37
+ }
38
+ rule_for(:strong) {|e| "*#{content_of(e)}*" }
39
+ rule_for(:em) {|e| "_#{content_of(e)}_" }
40
+ rule_for(:code) {|e| "@#{content_of(e)}@" }
41
+ rule_for(:cite) {|e| "??#{content_of(e)}??" }
42
+ rule_for(:sup) {|e| surrounded_by_whitespace?(e) ? "^#{content_of(e)}^" : "[^#{content_of(e)}^]" }
43
+ rule_for(:sub) {|e| surrounded_by_whitespace?(e) ? "~#{content_of(e)}~" : "[~#{content_of(e)}~]" }
44
+ rule_for(:ins) {|e| "+#{content_of(e)}+" }
45
+ rule_for(:del) {|e| "-#{content_of(e)}-" }
46
+ rule_for(:acronym) {|e| e.has_attribute?("title") ? "#{content_of(e)}(#{e["title"]})" : content_of(e) }
47
+
48
+ # text formatting and layout
49
+ rule_for(:p) {|e| "\n\n#{content_of(e)}\n\n" }
50
+ rule_for(:br) {|e| "\n" }
51
+ rule_for(:blockquote) {|e| "\n\nbq. #{content_of(e)}\n\n" }
52
+ rule_for(:pre) {|e|
53
+ if e.children.all? {|n| n.text? && n.content =~ /^\s+$/ || n.elem? && n.name == "code" }
54
+ "\n\npc. #{content_of(e % "code")}\n\n"
55
+ else
56
+ "<pre>#{content_of(e)}</pre>"
57
+ end
58
+ }
59
+
60
+ # headings
61
+ rule_for(:h1) {|e| "\n\nh1. #{content_of(e)}\n\n" }
62
+ rule_for(:h2) {|e| "\n\nh2. #{content_of(e)}\n\n" }
63
+ rule_for(:h3) {|e| "\n\nh3. #{content_of(e)}\n\n" }
64
+ rule_for(:h4) {|e| "\n\nh4. #{content_of(e)}\n\n" }
65
+ rule_for(:h5) {|e| "\n\nh5. #{content_of(e)}\n\n" }
66
+ rule_for(:h6) {|e| "\n\nh6. #{content_of(e)}\n\n" }
67
+
68
+ # lists
69
+ rule_for(:li) {|e|
70
+ token = e.parent.name == "ul" ? "*" : "#"
71
+ nesting = e.ancestors.inject(1) {|total,node| total + (%(ul ol).include?(node.name) ? 0 : 1) }
72
+ "\n#{token * nesting} #{content_of(e)}"
73
+ }
74
+ rule_for(:ul, :ol) {|e|
75
+ if e.ancestors.detect {|node| %(ul ol).include?(node.name) }
76
+ content_of(e)
77
+ else
78
+ "\n#{content_of(e)}\n\n"
79
+ end
80
+ }
81
+
82
+ # definition lists
83
+ rule_for(:dl) {|e| "\n\n#{content_of(e)}\n" }
84
+ rule_for(:dt) {|e| "- #{content_of(e)} " }
85
+ rule_for(:dd) {|e| ":= #{content_of(e)} =:\n" }
86
+
87
+ # tables
88
+ rule_for(:table) {|e| "\n\n#{content_of(e)}\n" }
89
+ rule_for(:tr) {|e| "#{content_of(e)}|\n" }
90
+ rule_for(:td, :th) {|e|
91
+ prefix = if e.name == "th"
92
+ "_. "
93
+ elsif e.has_attribute?("colspan")
94
+ "\\#{e["colspan"]}. "
95
+ elsif e.has_attribute?("rowspan")
96
+ "/#{e["rowspan"]}. "
97
+ end
98
+
99
+ "|#{prefix}#{content_of(e)}"
100
+ }
101
+ end
102
+
103
+ add_markup :textile, Textile
104
+ end
@@ -0,0 +1,55 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/test_helper")
2
+
3
+ module Undress
4
+ class TestGrammar < Test::Unit::TestCase
5
+ class Parent < Grammar
6
+ rule_for(:p) {|e| "<this is a paragraph>#{content_of(e)}</this is a paragraph>" }
7
+ end
8
+
9
+ class WithPreProcessingRules < Parent
10
+ pre_processing("p.foo") {|e| e.swap("<div>Cuack</div>") }
11
+ rule_for(:div) {|e| "<this was a div>#{content_of(e)}</this was a div>" }
12
+ end
13
+
14
+ class Child < Parent; end
15
+
16
+ class OverWriter < WithPreProcessingRules
17
+ rule_for(:div) {|e| content_of(e) }
18
+ end
19
+
20
+ class TextileExtension < Textile
21
+ rule_for(:a) {|e| "" }
22
+ end
23
+
24
+ def parse_with(grammar, html)
25
+ grammar.process!(Hpricot(html))
26
+ end
27
+
28
+ context "extending a grammar" do
29
+ test "the extended grammar should inherit the rules of the parent" do
30
+ output = parse_with Child, "<p>Foo Bar</p>"
31
+ assert_equal "<this is a paragraph>Foo Bar</this is a paragraph>", output
32
+ end
33
+
34
+ test "extending a grammar doesn't overwrite the parent's rules" do
35
+ output = parse_with OverWriter, "<div>Foo</div>"
36
+ assert_equal "Foo", output
37
+
38
+ output = parse_with WithPreProcessingRules, "<div>Foo</div>"
39
+ assert_equal "<this was a div>Foo</this was a div>", output
40
+ end
41
+
42
+ test "extending textile doesn't blow up" do
43
+ output = parse_with TextileExtension, "<p><a href='/'>Cuack</a></p><p>Foo Bar</p><p>I <a href='/'>work</a></p>"
44
+ assert_equal "Foo Bar\n\nI\n", output
45
+ end
46
+ end
47
+
48
+ context "pre processing rules" do
49
+ test "mutate the DOM before parsing the tags" do
50
+ output = parse_with WithPreProcessingRules, "<p class='foo'>Blah</p><p>O hai</p>"
51
+ assert_equal "<this was a div>Cuack</this was a div><this is a paragraph>O hai</this is a paragraph>", output
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,276 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/test_helper")
2
+
3
+ class Undress::GreenClothTest < Test::Unit::TestCase
4
+ def assert_renders_greencloth(greencloth, html)
5
+ assert_equal greencloth, Undress(html, :xhtml_strict => true).to_greencloth
6
+ end
7
+
8
+ # unallowed tags
9
+ context "remove unallowed tags" do
10
+ test "remove a head tag" do
11
+ html = "<html><head><title>Title</title></head>"
12
+ greencloth = ""
13
+ assert_renders_greencloth greencloth, html
14
+ end
15
+
16
+ test "remove a script tag" do
17
+ html = "<div>Some script inside a<script type='text/javascript'>window.alert('alert')</script> paragraph</div>"
18
+ greencloth = "Some script inside a paragraph"
19
+ assert_renders_greencloth greencloth, html
20
+ end
21
+ end
22
+
23
+ # code
24
+ context "converting code tags" do
25
+ test "a code inside a paragraph" do
26
+ html = "<p>do you like my <code>function</code>?</p>"
27
+ greencloth = "do you like my @function@?\n"
28
+ assert_renders_greencloth greencloth, html
29
+ end
30
+
31
+ test "code tag inside pre tag" do
32
+ html = "<pre><code>def say_hi\n\tputs 'hi'\nend</code></pre>"
33
+ greencloth = "<pre><code>def say_hi\n\tputs 'hi'\nend</code></pre>"
34
+ assert_renders_greencloth greencloth, html
35
+ end
36
+
37
+ test "code inside list items" do
38
+ html = "<ul><li><code>foo</code></li><li><code>bar</code></li><li>and <code>x</code> is also.</li></ul>"
39
+ greencloth = "* @foo@\n* @bar@\n* and @x@ is also.\n"
40
+ assert_renders_greencloth greencloth, html
41
+ end
42
+
43
+ test "code tag not inside a pre and without new lines inside" do
44
+ html = "<code>some code inside</code>"
45
+ greencloth = "@some code inside@"
46
+ assert_renders_greencloth greencloth, html
47
+ end
48
+ end
49
+
50
+ # embed and object
51
+ # the elements pass trough but the order of the attributes change
52
+ context "embed and object" do
53
+ test "embed" do
54
+ html = "<p>do you like my embedded blip.tv <embed src='http://blip.tv/play/Ac3GfI+2HA' allowfullscreen='true' type='application/x-shockwave-flash' allowscriptaccess='always' height='510' width='720' />?</p>"
55
+ greencloth = "do you like my embedded blip.tv <embed src=\"http://blip.tv/play/Ac3GfI+2HA\" allowfullscreen=\"true\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" height=\"510\" width=\"720\" />?\n"
56
+ assert_renders_greencloth greencloth, html
57
+ end
58
+
59
+ test "object" do
60
+ html = "<p>do you like my embedded youtube <object width='425' height='344'><param name='movie' value='http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1' /><param name='allowFullScreen' value='true' /><embed src='http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1' type='application/x-shockwave-flash' width='425' height='344' allowfullscreen='true' /></object>?</p>"
61
+ greencloth = "do you like my embedded youtube <object height=\"344\" width=\"425\"><param name=\"movie\" value=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" /><param name=\"allowFullScreen\" value=\"true\" /><embed src=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" allowfullscreen=\"true\" type=\"application/x-shockwave-flash\" height=\"344\" width=\"425\" /></object>?\n"
62
+ assert_renders_greencloth greencloth, html
63
+ end
64
+ end
65
+
66
+ # outline
67
+ # don't allow link to anchors or anchor defs inside hx, greencloth -> html
68
+ # take cares of it, so we are only allowing links inside hx elements for now
69
+ context "outline" do
70
+ test "table of contents toc" do
71
+ html = "<ul class='toc'><li class='toc1'><a href='#fruits'><span>1</span> Fruits</a></li><ul><li class='toc2'><a href='#tasty-apples'><span>1.1</span> Tasty Apples</a></li><ul><li class='toc3'><a href='green'><span>1.1.1</span> Green</a></li><li class='toc3'><a href='#red'><span>1.1.2</span> Red</a></li></ul>"
72
+ greencloth = "[[toc]]"
73
+ assert_renders_greencloth greencloth, html
74
+ end
75
+
76
+ test "headings with links, anchors and links to anchors" do
77
+ html = "<h1 class='first'><a name='russian-anarchists'></a>Russian Anarchists<a class='anchor' href='#russian-anarchists'>&para;</a></h1><h2><a name='michel-bakunin'></a>Michel <a href='http://en.wikipedia.org/wiki/Mikhail_Bakunin'>Bakunin</a><a class='anchor' href='#michel-bakunin'>&para;</a></h2><h2><a name='peter-kropotkin'></a><a href='http://en.wikipedia.org/wiki/Peter_Kropotkin'>Peter</a> Kropotkin<a class='anchor' href='#peter-kropotkin'>&para;</a></h2><h1><a name='russian-american-anarchists'></a>Russian-American Anarchists<a class='anchor' href='#russian-american-anarchists'>&para;</a></h1><h2><a name='emma-goldman'></a><a href='http://en.wikipedia.org/wiki/Emma_Goldman'>Emma Goldman</a><a class='anchor' href='#emma-goldman'>&para;</a></h2><h2><a name='alexander-berkman'></a>Alexander <a href='http://en.wikipedia.org/wiki/Alexander_Berkman'>Berkman</a><a class='anchor' href='#alexander-berkman'>&para;</a></h2>"
78
+ greencloth = "Russian Anarchists\n==================\n\nMichel [Bakunin -> http://en.wikipedia.org/wiki/Mikhail_Bakunin]\n--------------\n\n[Peter -> http://en.wikipedia.org/wiki/Peter_Kropotkin] Kropotkin\n---------------\n\nRussian-American Anarchists\n===========================\n\n[Emma Goldman -> http://en.wikipedia.org/wiki/Emma_Goldman]\n------------\n\nAlexander [Berkman -> http://en.wikipedia.org/wiki/Alexander_Berkman]\n-----------------\n"
79
+ assert_renders_greencloth greencloth, html
80
+ end
81
+
82
+ test "double trouble" do
83
+ html = "<h1 class='first'><a name='title'></a>Title<a class='anchor' href='#title'>&para;</a></h1><h3><a name='under-first'></a>Under first<a class='anchor' href='#under-first'>&para;</a></h3><h1><a name='title_2'></a>Title<a class='anchor' href='#title_2'>&para;</a></h1><h3><a name='under-second'></a>Under second<a class='anchor' href='#under-second'>&para;</a></h3>"
84
+ greencloth = "Title\n=====\n\nh3. Under first\n\nTitle\n=====\n\nh3. Under second\n"
85
+ assert_renders_greencloth greencloth, html
86
+ end
87
+ end
88
+
89
+ # basics
90
+ context "basics" do
91
+ test "headers" do
92
+ html = "<h1 class='first'>header one</h1>\n<h2>header two</h2>"
93
+ greencloth = "header one\n==========\n\nheader two\n----------\n"
94
+ assert_renders_greencloth greencloth, html
95
+ end
96
+
97
+ test "headers with paragraph" do
98
+ html = "<p>la la la</p>\n<h1 class='first'>header one</h1>\n<h2>header two</h2>\n<p>la la la</p>"
99
+ greencloth = "la la la\n\nheader one\n==========\n\nheader two\n----------\n\nla la la\n"
100
+ assert_renders_greencloth greencloth, html
101
+ end
102
+ end
103
+
104
+ # sections
105
+ # allways we render h1 with ==== and h2 with ----
106
+ context "Convert sections" do
107
+ test "one section no heading" do
108
+ html = "<div class='wiki_section' id='wiki_section-0'><p>start unheaded section</p><p>line line line</p></div>"
109
+ greencloth = "start unheaded section\n\nline line line\n"
110
+ assert_renders_greencloth greencloth, html
111
+ end
112
+
113
+ test "one section with heading" do
114
+ html = "<div class='wiki_section' id='wiki_section-0'><h2 class='first'>are you ready?!!?</h2><p>here we go now!</p></div>"
115
+ greencloth = "are you ready?!!?\n-----------------\n\nhere we go now!\n"
116
+ assert_renders_greencloth greencloth, html
117
+ end
118
+
119
+ test "all headings" do
120
+ html = "<h1>First</h1><h2>Second</h2><h3>Tres</h3><h4>Cuatro</h4><h5>Five</h5><h6>Six</h6>"
121
+ greencloth = "First\n=====\n\nSecond\n------\n\nh3. Tres\n\nh4. Cuatro\n\nh5. Five\n\nh6. Six\n"
122
+ assert_renders_greencloth greencloth, html
123
+ end
124
+
125
+ test "multiple sections with text" do
126
+ html = "<div class='wiki_section' id='wiki_section-0'><h2 class='first'>Section One</h2><p>section one line one is here<br />section one line two is next</p><p>Here is section one still</p></div><div class='wiki_section' id='wiki_section-1'><h1>Section Two</h1><p>Section two first line<br />Section two another line</p></div><div class='wiki_section' id='wiki_section-2'><h2>Section 3 with h2</h2><p>One more line for section 3</p></div><div class='wiki_section' id='wiki_section-3'><h3>final section 4</h3><p>section 4 first non-blank line</p>\n</div>"
127
+ greencloth = "Section One\n-----------\n\nsection one line one is here\nsection one line two is next\n\nHere is section one still\n\nSection Two\n===========\n\nSection two first line\nSection two another line\n\nSection 3 with h2\n-----------------\n\nOne more line for section 3\n\nh3. final section 4\n\nsection 4 first non-blank line\n"
128
+ assert_renders_greencloth greencloth, html
129
+ end
130
+ end
131
+
132
+ # lists
133
+ # TODO: start attribute not implemented
134
+ context "Converting html lists to greencloth" do
135
+ test "hard break in list" do
136
+ html = "<ul>\n\t<li>first line</li>\n\t<li>second<br />\n\tline</li>\n\t<li>third line</li>\n</ul>\n"
137
+ greencloth = "* first line\n* second\nline\n* third line\n"
138
+ assert_renders_greencloth greencloth, html
139
+ end
140
+
141
+ test "mixed nesting" do
142
+ html = "<ul><li>bullet\n<ol>\n<li>number</li>\n<li>number\n<ul>\n\t<li>bullet</li>\n</ul></li>\n<li>number</li>\n<li>number with<br />a break</li>\n</ol></li>\n<li>bullet\n<ul><li>okay</li></ul></li></ul>"
143
+ greencloth = "* bullet\n*# number\n*# number\n*#* bullet\n*# number\n*# number with\na break\n* bullet\n** okay\n"
144
+ assert_renders_greencloth greencloth, html
145
+ end
146
+
147
+ test "list continuation" do # uses start
148
+ html = "<ol><li>one</li><li>two</li><li>three</li></ol><ol><li>one</li><li>two</li><li>three</li></ol><ol start='4'><li>four</li><li>five</li><li>six</li></ol>"
149
+ greencloth = "# one\n# two\n# three\n\n# one\n# two\n# three\n\n# four\n# five\n# six\n"
150
+ assert_renders_greencloth greencloth, html
151
+ end
152
+
153
+ test "continue after break" do # uses start
154
+ html = "<ol><li>one</li><li>two</li><li>three</li></ol><p>test</p><ol><li>one</li><li>two</li><li>three</li></ol><p>test</p><ol start='4'><li>four</li><li>five</li><li>six</li></ol>"
155
+ greencloth = "# one\n# two\n# three\n\ntest\n\n# one\n# two\n# three\n\ntest\n\n# four\n# five\n# six\n"
156
+ assert_renders_greencloth greencloth, html
157
+ end
158
+
159
+ test "continue list when prior list contained nested list" do # uses start
160
+ greencloth = "# one\n# two\n# three\n\n# four\n# five\n## sub-note\n## another sub-note\n# six\n\n# seven\n# eight\n# nine\n"
161
+ html = "<ol><li>one</li><li>two</li><li>three</li></ol><ol start='4'><li>four</li><li>five<ol><li>sub-note</li><li>another sub-note</li></ol></li><li>six</li></ol><ol start='7'><li>seven</li><li>eight</li><li>nine</li></ol>"
162
+ assert_renders_greencloth greencloth, html
163
+ end
164
+
165
+ test "" do
166
+
167
+ end
168
+ end
169
+
170
+ # links
171
+ context "Converting html links to greencloth" do
172
+ test "convert a link to a wiki page inside a paragraph" do
173
+ html = "<p>this is a <a href='/page/plain-link'>plain link</a> in some text</p>"
174
+ greencloth = "this is a [plain link] in some text\n"
175
+ assert_renders_greencloth greencloth, html
176
+ end
177
+
178
+ test "convert a link to a wiki page with namespace" do
179
+ html= "<p>this is a <a href='/namespaced/link'>link</a> in some text</p>"
180
+ greencloth = "this is a [namespaced / link] in some text\n"
181
+ assert_renders_greencloth greencloth, html
182
+ end
183
+
184
+ test "convert a link to a wiki page" do
185
+ html= "<p>this is a <a href='/page/something-else'>link to</a> in some text</p>"
186
+ greencloth = "this is a [link to -> something else] in some text\n"
187
+ assert_renders_greencloth greencloth, html
188
+ end
189
+
190
+ test "convert a link to a wiki page with namespace and text different than link dest" do
191
+ html= "<p>this is a <a href='/namespace/something-else'>link to</a> in some text</p>"
192
+ greencloth = "this is a [link to -> namespace / something else] in some text\n"
193
+ assert_renders_greencloth greencloth, html
194
+ end
195
+
196
+ test "convert a link to an absolute path" do
197
+ html = "<p>this is a <a href='/an/absolute/path'>link to</a> in some text</p>"
198
+ greencloth = "this is a [link to -> /an/absolute/path] in some text\n"
199
+ assert_renders_greencloth greencloth, html
200
+ end
201
+
202
+ test "convert a link to an external domain" do
203
+ html = "<p>this is a <a href='https://riseup.net'>link to</a> a url</p>"
204
+ greencloth = "this is a [link to -> https://riseup.net] a url\n"
205
+ assert_renders_greencloth greencloth, html
206
+ end
207
+
208
+ test "a link to an external domain with the same text as dest" do
209
+ html = "<p>url in brackets <a href='https://riseup.net/'>riseup.net</a></p>"
210
+ greencloth = "url in brackets [riseup.net -> https://riseup.net/]\n"
211
+ assert_renders_greencloth greencloth, html
212
+ end
213
+
214
+ test "a link to a wiki page with the same name as dest" do
215
+ html = "<p>a <a href='/page/name-link'>name link</a> in need of humanizing</p>"
216
+ greencloth = "a [name link] in need of humanizing\n"
217
+ assert_renders_greencloth greencloth, html
218
+ end
219
+
220
+ test "link to a user blue" do
221
+ html = "<p>link to a user <a href='/blue'>blue</a></p>"
222
+ greencloth = "link to a user [blue]\n"
223
+ assert_renders_greencloth greencloth, html
224
+ end
225
+
226
+ test "link with dashes should keep the dashes" do
227
+ html = "<p><a href='/-dashes/in/the/link-'>link to</a></p>"
228
+ greencloth = "[link to -> /-dashes/in/the/link-]\n"
229
+ assert_renders_greencloth greencloth, html
230
+ end
231
+
232
+ test "link with underscores should keep the underscores" do
233
+ html = "<p>links <a href='/page/with_underscores'>with_underscores</a> should keep underscore</p>"
234
+ greencloth = "links [with_underscores] should keep underscore\n"
235
+ assert_renders_greencloth greencloth, html
236
+ end
237
+
238
+ test "a link inside a li element" do
239
+ html ="<ul>\n<li>\n\t\t\n<a href='/page/this'>link to</a></li></ul>"
240
+ greencloth = "* [link to -> this]\n"
241
+ assert_renders_greencloth greencloth, html
242
+ end
243
+
244
+ test "an external link inside a li element" do
245
+ html = "<ul>\n<li><a href='https://riseup.net/'>riseup.net</a></li>\n</ul>"
246
+ greencloth = "* [riseup.net -> https://riseup.net/]\n"
247
+ assert_renders_greencloth greencloth, html
248
+ end
249
+
250
+ test "many anchors inside a paragraph" do
251
+ html = "<p>make anchors <a name='here'>here</a> or <a name='maybe-here'>maybe here</a> or <a name='there'>over</a></p>"
252
+ greencloth = "make anchors [# here #] or [# maybe here #] or [# over -> there #]\n"
253
+ assert_renders_greencloth greencloth, html
254
+ end
255
+
256
+ # TODO: there are differents in this test about how cg support writing anchors
257
+ # this is a reduced support of it
258
+ test "anchors and links" do
259
+ html = "<p>link to <a href='/page/anchors#like-so'>anchors</a> or <a href='/page/like#so'>maybe</a> or <a href='#so'>just</a> or <a href='#so'>so</a></p>"
260
+ greencloth = "link to [anchors -> anchors#like so] or [maybe -> like#so] or [just -> #so] or [so -> #so]\n"
261
+ assert_renders_greencloth greencloth, html
262
+ end
263
+
264
+ test "more anchors" do
265
+ html = "<p><a href='#5'>link</a> to a numeric anchor <a name='5'>5</a></p>"
266
+ greencloth = "[link -> #5] to a numeric anchor [# 5 #]\n"
267
+ assert_renders_greencloth greencloth, html
268
+ end
269
+
270
+ test "3 links without /" do
271
+ html = "<p><a href='some'>some</a> and <a href='other'>other</a> and <a href='one_more'>one_more</a></p>"
272
+ greencloth = "[some] and [other] and [one_more]\n"
273
+ assert_renders_greencloth greencloth, html
274
+ end
275
+ end
276
+ end
@@ -0,0 +1,11 @@
1
+ require "rubygems"
2
+ require "test/unit"
3
+ require "contest"
4
+ Dir[File.expand_path(File.dirname(__FILE__) + "/../lib/**/*.rb")].each do |file|
5
+ require file
6
+ end
7
+
8
+ begin
9
+ require "redgreen"
10
+ rescue LoadError
11
+ end
@@ -0,0 +1,198 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/test_helper")
2
+
3
+ module Undress
4
+ class TextileTest < Test::Unit::TestCase
5
+ def assert_renders_textile(textile, html)
6
+ assert_equal textile, Undress(html).to_textile
7
+ end
8
+
9
+ context "Converting HTML to textile" do
10
+ test "converts nested tags" do
11
+ assert_renders_textile "h2. _this is *very* important_\n", "<h2><em>this is <strong>very</strong> important</em></h2>"
12
+ end
13
+
14
+ context "inline elements" do
15
+ test "converts <strong> tags" do
16
+ assert_renders_textile "*foo bar*", "<strong>foo bar</strong>"
17
+ end
18
+
19
+ test "converts <em> tags" do
20
+ assert_renders_textile "_foo bar_", "<em>foo bar</em>"
21
+ end
22
+
23
+ test "converts <code> tags" do
24
+ assert_renders_textile "@foo bar@", "<code>foo bar</code>"
25
+ end
26
+
27
+ test "converts <cite> tags" do
28
+ assert_renders_textile "??foo bar??", "<cite>foo bar</cite>"
29
+ end
30
+
31
+ test "converts <sup> tags" do
32
+ assert_renders_textile "foo ^sup^ bar", "foo <sup>sup</sup> bar"
33
+ assert_renders_textile "foo[^sup^]bar", "foo<sup>sup</sup>bar"
34
+ end
35
+
36
+ test "converts <sub> tags" do
37
+ assert_renders_textile "foo ~sub~ bar", "foo <sub>sub</sub> bar"
38
+ assert_renders_textile "foo[~sub~]bar", "foo<sub>sub</sub>bar"
39
+ end
40
+
41
+ test "converts <ins> tags" do
42
+ assert_renders_textile "+foo bar+", "<ins>foo bar</ins>"
43
+ end
44
+
45
+ test "converts <del> tags" do
46
+ assert_renders_textile "-foo bar-", "<del>foo bar</del>"
47
+ end
48
+
49
+ test "converts <acronym> tags" do
50
+ assert_renders_textile "EPA(Environmental Protection Agency)", "<acronym title='Environmental Protection Agency'>EPA</acronym>"
51
+ assert_renders_textile "EPA", "<acronym>EPA</acronym>"
52
+ end
53
+ end
54
+
55
+ context "links" do
56
+ test "converts simple links (without title)" do
57
+ assert_renders_textile "[Foo Bar:/cuack]", "<a href='/cuack'>Foo Bar</a>"
58
+ end
59
+
60
+ test "converts links with titles" do
61
+ assert_renders_textile "[Foo Bar (You should see this):/cuack]", "<a href='/cuack' title='You should see this'>Foo Bar</a>"
62
+ end
63
+ end
64
+
65
+ context "images" do
66
+ test "converts images without alt attributes" do
67
+ assert_renders_textile "!http://example.com/image.png!", "<img src='http://example.com/image.png'/>"
68
+ end
69
+
70
+ test "converts images with alt attributes" do
71
+ assert_renders_textile "!http://example.com/image.png(Awesome Pic)!", "<img src='http://example.com/image.png' alt='Awesome Pic'/>"
72
+ end
73
+ end
74
+
75
+ context "text formatting" do
76
+ test "converts paragraphs" do
77
+ assert_renders_textile "foo\n\nbar\n", "<p>foo</p><p>bar</p>"
78
+ end
79
+
80
+ test "converts <pre> tags which only contain a <code> child" do
81
+ assert_renders_textile "pc. var foo = 1;\n", "<pre><code>var foo = 1;</code></pre>"
82
+ assert_renders_textile "pc. var foo = 1;\n", "<pre> <code>var foo = 1;</code> </pre>"
83
+ end
84
+
85
+ test "leaves <pre> tags which contain mixed content as HTML" do
86
+ assert_renders_textile "<pre> foo bar</pre>", "<pre> foo bar</pre>"
87
+ end
88
+
89
+ test "converts <br> into a new line" do
90
+ assert_renders_textile "Foo\nBar", "Foo<br/>Bar"
91
+ end
92
+
93
+ test "converts blockquotes" do
94
+ assert_renders_textile "bq. foo bar\n", "<blockquote><div>foo bar</div></blockquote>"
95
+ end
96
+ end
97
+
98
+ context "headers" do
99
+ test "converts <h1> tags" do
100
+ assert_renders_textile "h1. foo bar\n", "<h1>foo bar</h1>"
101
+ end
102
+
103
+ test "converts <h2> tags" do
104
+ assert_renders_textile "h2. foo bar\n", "<h2>foo bar</h2>"
105
+ end
106
+
107
+ test "converts <h3> tags" do
108
+ assert_renders_textile "h3. foo bar\n", "<h3>foo bar</h3>"
109
+ end
110
+
111
+ test "converts <h4> tags" do
112
+ assert_renders_textile "h4. foo bar\n", "<h4>foo bar</h4>"
113
+ end
114
+
115
+ test "converts <h5> tags" do
116
+ assert_renders_textile "h5. foo bar\n", "<h5>foo bar</h5>"
117
+ end
118
+
119
+ test "converts <h6> tags" do
120
+ assert_renders_textile "h6. foo bar\n", "<h6>foo bar</h6>"
121
+ end
122
+ end
123
+
124
+ context "lists" do
125
+ test "converts bullet lists" do
126
+ assert_renders_textile "* foo\n* bar\n", "<ul><li>foo</li><li>bar</li></ul>"
127
+ end
128
+
129
+ test "converts numbered lists" do
130
+ assert_renders_textile "# foo\n# bar\n", "<ol><li>foo</li><li>bar</li></ol>"
131
+ end
132
+
133
+ test "converts nested bullet lists" do
134
+ assert_renders_textile "* foo\n** bar\n* baz\n", "<ul><li>foo<ul><li>bar</li></ul></li><li>baz</li></ul>"
135
+ end
136
+
137
+ test "converts nested numbered lists" do
138
+ assert_renders_textile "# foo\n## bar\n# baz\n", "<ol><li>foo<ol><li>bar</li></ol></li><li>baz</li></ol>"
139
+ end
140
+
141
+ test "converts nested mixed lists" do
142
+ assert_renders_textile "* foo\n## bar\n## baz\n*** quux\n* cuack\n",
143
+ "<ul><li>foo<ol><li>bar</li><li>baz<ul><li>quux</li></ul></li></ol></li><li>cuack</li></ul>"
144
+ end
145
+
146
+ test "converts a definition list" do
147
+ assert_renders_textile "- foo := defining foo =:\n- bar := defining bar =:\n",
148
+ "<dl><dt>foo</dt><dd>defining foo</dd><dt>bar</dt><dd>defining bar</dd></dl>"
149
+ end
150
+ end
151
+
152
+ context "tables" do
153
+ test "converts a simple table" do
154
+ assert_renders_textile "|foo|bar|baz|\n|1|2|3|\n",
155
+ "<table><tr><td>foo</td><td>bar</td><td>baz</td></tr><tr><td>1</td><td>2</td><td>3</td></tr></table>"
156
+ end
157
+
158
+ test "converts a table with headers" do
159
+ assert_renders_textile "|_. foo|_. bar|_. baz|\n|1|2|3|\n",
160
+ "<table><tr><th>foo</th><th>bar</th><th>baz</th></tr><tr><td>1</td><td>2</td><td>3</td></tr></table>"
161
+ end
162
+
163
+ test "converts a table with cells that span multiple columns" do
164
+ assert_renders_textile "|foo|bar|baz|\n|\\2. 1|2|\n",
165
+ "<table><tr><td>foo</td><td>bar</td><td>baz</td></tr><tr><td colspan='2'>1</td><td>2</td></tr></table>"
166
+ end
167
+
168
+ test "converts a table with cells that span multiple rows" do
169
+ assert_renders_textile "|/2. foo|bar|baz|\n|1|2|\n",
170
+ "<table><tr><td rowspan='2'>foo</td><td>bar</td><td>baz</td></tr><tr><td>1</td><td>2</td></tr></table>"
171
+ end
172
+ end
173
+
174
+ context "applying post processing rules" do
175
+ test "compresses newlines to a maximum of two consecutive newlines" do
176
+ assert_renders_textile "Foo\n\nBar\n\nBaz\n\n* Quux 1\n* Quux 2\n", "<p>Foo</p><p>Bar</p><p>Baz</p><ul><li>Quux 1</li><li>Quux 2</li></p>"
177
+ end
178
+
179
+ test "strips trailing newlines from the start and end of the output string" do
180
+ assert_renders_textile "Foo\n", "<p>Foo</p>"
181
+ end
182
+
183
+ test "converts all fancy characters introduced by textile back into their 'source code'" do
184
+ assert_renders_textile "What the ... hell?", "What the &#8230; hell?"
185
+ assert_renders_textile "It's mine", "It&#8217;s mine"
186
+ assert_renders_textile "\"Fancy quoting\"", "&#8220;Fancy quoting&#8221;"
187
+ assert_renders_textile "How dashing--right?", "How dashing&#8212;right?"
188
+ assert_renders_textile "How dashing - right?", "How dashing &#8211; right?"
189
+ assert_renders_textile "2 x 2 = 4", "2 &#215; 2 = 4"
190
+ assert_renders_textile "2x2 = 4", "2&#215;2 = 4"
191
+ assert_renders_textile "Registered(r)", "Registered&#174;"
192
+ assert_renders_textile "Copyrighted(c)", "Copyrighted&#169;"
193
+ assert_renders_textile "Trademarked(tm)", "Trademarked&#8482;"
194
+ end
195
+ end
196
+ end
197
+ end
198
+ end
data/undress.gemspec ADDED
@@ -0,0 +1,42 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "undress"
3
+ s.version = "0.1"
4
+ s.date = "2009-07-13"
5
+
6
+ s.description = "Simply translate HTML to Textile, Markdown, or whatever other markup format you need"
7
+ s.summary = "Convert HTML into other markup languages"
8
+ s.homepage = "http://undress.rubyforge.org"
9
+
10
+ s.authors = "Nicolás Sanguinetti"
11
+ s.email = "contacto@nicolassanguinetti.info"
12
+
13
+ s.require_paths = ["lib"]
14
+ s.rubyforge_project = "undress"
15
+ s.has_rdoc = true
16
+ s.rubygems_version = "1.3.1"
17
+
18
+ s.add_dependency "hpricot"
19
+
20
+ if s.respond_to?(:add_development_dependency)
21
+ s.add_development_dependency "sr-mg"
22
+ s.add_development_dependency "contest"
23
+ s.add_development_dependency "redgreen"
24
+ end
25
+
26
+ s.files = %w[
27
+ .gitignore
28
+ LICENSE
29
+ README.rdoc
30
+ Rakefile
31
+ undress.gemspec
32
+ lib/undress.rb
33
+ lib/undress/grammar.rb
34
+ lib/undress/textile.rb
35
+ lib/undress/greencloth.rb
36
+ lib/core_ext/object.rb
37
+ test/test_helper.rb
38
+ test/test_grammar.rb
39
+ test/test_textile.rb
40
+ test/test_greencloth.rb
41
+ ]
42
+ end
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: zevarito-undress
3
+ version: !ruby/object:Gem::Version
4
+ version: "0.1"
5
+ platform: ruby
6
+ authors:
7
+ - "Nicol\xC3\xA1s Sanguinetti"
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-07-13 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: sr-mg
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: contest
37
+ type: :development
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0"
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: redgreen
47
+ type: :development
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: "0"
54
+ version:
55
+ description: Simply translate HTML to Textile, Markdown, or whatever other markup format you need
56
+ email: contacto@nicolassanguinetti.info
57
+ executables: []
58
+
59
+ extensions: []
60
+
61
+ extra_rdoc_files: []
62
+
63
+ files:
64
+ - .gitignore
65
+ - LICENSE
66
+ - README.rdoc
67
+ - Rakefile
68
+ - undress.gemspec
69
+ - lib/undress.rb
70
+ - lib/undress/grammar.rb
71
+ - lib/undress/textile.rb
72
+ - lib/undress/greencloth.rb
73
+ - lib/core_ext/object.rb
74
+ - test/test_helper.rb
75
+ - test/test_grammar.rb
76
+ - test/test_textile.rb
77
+ - test/test_greencloth.rb
78
+ has_rdoc: true
79
+ homepage: http://undress.rubyforge.org
80
+ post_install_message:
81
+ rdoc_options: []
82
+
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: "0"
90
+ version:
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: "0"
96
+ version:
97
+ requirements: []
98
+
99
+ rubyforge_project: undress
100
+ rubygems_version: 1.2.0
101
+ signing_key:
102
+ specification_version: 2
103
+ summary: Convert HTML into other markup languages
104
+ test_files: []
105
+