undress 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ doc
2
+ dist
3
+ tmp
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ (The MIT License)
2
+
3
+ Copyright (c) 2009 Nicolas Sanguinetti, entp.com
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ 'Software'), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,39 @@
1
+ = Undress
2
+
3
+ Easily convert back HTML to Textile, Markdown, RDoc or whatever other
4
+ markup language you like.
5
+
6
+ require "undress"
7
+
8
+ code =<<html
9
+ <h1>Hello world!</h1>
10
+ <p><strong>Hey!</strong> How is it going?</p>
11
+ <h2>Supported Markup Languages so far:</h2>
12
+ <ul>
13
+ <li>Textile</li>
14
+ <li>And more to come :P</li>
15
+ </ul>
16
+ html
17
+
18
+ Undress(code).to_textile
19
+
20
+ Will produce
21
+
22
+ h1. Hello world!
23
+
24
+ *Hey!* How is it going?
25
+
26
+ h2. Supported Markup Languages so far:
27
+
28
+ * Textile
29
+ * And more to come :P
30
+
31
+ == Supported Markup Languages
32
+
33
+ For now the only language supported is Textile. But I'll be happy to accept
34
+ patches to add more languages :)
35
+
36
+ == License
37
+
38
+ Authors:: Nicolas Sanguinetti (foca[http://github.com/foca])
39
+ License:: MIT (Check LICENSE for details)
@@ -0,0 +1,32 @@
1
+ require "rake/testtask"
2
+
3
+ begin
4
+ require "hanna/rdoctask"
5
+ rescue LoadError
6
+ require "rake/rdoctask"
7
+ end
8
+
9
+ Rake::RDocTask.new do |rd|
10
+ rd.main = "README"
11
+ rd.title = "API Documentation for Undress"
12
+ rd.rdoc_files.include("README.rdoc", "LICENSE", "lib/**/*.rb")
13
+ rd.rdoc_dir = "doc"
14
+ end
15
+
16
+ begin
17
+ require "metric_fu"
18
+ rescue LoadError
19
+ end
20
+
21
+ begin
22
+ require "mg"
23
+ MG.new("undress.gemspec")
24
+ rescue LoadError
25
+ end
26
+
27
+ desc "Default: run tests"
28
+ task :default => :test
29
+
30
+ Rake::TestTask.new do |t|
31
+ t.test_files = FileList["test/test_*.rb"]
32
+ end
@@ -0,0 +1,6 @@
1
+ class Object #:nodoc:
2
+ def tap
3
+ yield self
4
+ self
5
+ end
6
+ end
@@ -0,0 +1,46 @@
1
+ require "hpricot"
2
+ require File.expand_path(File.dirname(__FILE__) + "/core_ext/object")
3
+ require File.expand_path(File.dirname(__FILE__) + "/undress/grammar")
4
+
5
+ # Load an HTML document so you can undress it. Pass it either a string or an IO
6
+ # object. You can pass an optional hash of options, which will be forwarded
7
+ # straight to Hpricot. Check it's
8
+ # documentation[http://code.whytheluckystiff.net/doc/hpricot] for details.
9
+ def Undress(html, options={})
10
+ Undress::Document.new(html, options)
11
+ end
12
+
13
+ module Undress
14
+ # Register a markup language. The name will become the method used to convert
15
+ # HTML to this markup language: for example registering the name +:textile+
16
+ # gives you <tt>Undress(code).to_textile</tt>, registering +:markdown+ would
17
+ # give you <tt>Undress(code).to_markdown</tt>, etc.
18
+ def self.add_markup(name, grammar)
19
+ Document.add_markup(name, grammar)
20
+ end
21
+
22
+ class Document #:nodoc:
23
+ def initialize(html, options)
24
+ @doc = Hpricot(html, options)
25
+ end
26
+
27
+ def self.add_markup(name, grammar)
28
+ define_method "to_#{name}" do
29
+ grammar.process!(@doc)
30
+ end
31
+ end
32
+ end
33
+
34
+ module ::Hpricot #:nodoc:
35
+ class Elem #:nodoc:
36
+ def ancestors
37
+ node, ancestors = parent, Elements[]
38
+ while node.respond_to?(:parent) && node.parent
39
+ ancestors << node
40
+ node = node.parent
41
+ end
42
+ ancestors
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,127 @@
1
+ module Undress
2
+ # Grammars give you a DSL to declare how to convert an HTML document into a
3
+ # different markup language.
4
+ class Grammar
5
+ def self.inherited(base) # :nodoc:
6
+ base.instance_variable_set(:@post_processing_rules, post_processing_rules)
7
+ base.instance_variable_set(:@pre_processing_rules, pre_processing_rules)
8
+ end
9
+
10
+ # Add a parsing rule for a group of html tags.
11
+ #
12
+ # rule_for :p do |element|
13
+ # "<this was a paragraph>#{content_of(element)}</this was a paragraph>"
14
+ # end
15
+ #
16
+ # will replace your <tt><p></tt> tags for <tt><this was a paragraph></tt>
17
+ # tags, without altering the contents.
18
+ #
19
+ # The element yielded to the block is an Hpricot element for the given tag.
20
+ def self.rule_for(*tags, &handler) # :yields: element
21
+ tags.each do |tag|
22
+ define_method tag.to_sym, &handler
23
+ end
24
+ end
25
+
26
+ # Set a default rule for unrecognized tags.
27
+ #
28
+ # Unless you define a special case, it will ignore the tags and just output
29
+ # the contents of unrecognized tags.
30
+ def self.default(&handler) # :yields: element
31
+ define_method :method_missing do |tag, node, *args|
32
+ handler.call(node)
33
+ end
34
+ end
35
+
36
+ # Add a post-processing rule to your parser.
37
+ #
38
+ # This takes a regular expression that will be applied to the output after
39
+ # processing any nodes. It can take a string as a replacement, or a block
40
+ # that will be passed to String#gsub.
41
+ #
42
+ # post_processing(/\n\n+/, "\n\n") # compress more than two newlines
43
+ # post_processing(/whatever/) { ... }
44
+ def self.post_processing(regexp, replacement = nil, &handler) #:yields: matched_string
45
+ post_processing_rules[regexp] = replacement || handler
46
+ end
47
+
48
+ # Add a pre-processing rule to your parser.
49
+ #
50
+ # This lets you mutate the DOM before applying any rule defined with
51
+ # +rule_for+. You need to pass a CSS/XPath selector, and a block that
52
+ # takes an Hpricot element to parse it.
53
+ #
54
+ # pre_processing "ul.toc" do |element|
55
+ # element.swap("<p>[[toc]]</p>")
56
+ # end
57
+ #
58
+ # Would replace any unordered lists with the class +toc+ for a
59
+ # paragraph containing the code <tt>[[toc]]</tt>.
60
+ def self.pre_processing(selector, &handler) # :yields: element
61
+ pre_processing_rules[selector] = handler
62
+ end
63
+
64
+ def self.post_processing_rules #:nodoc:
65
+ @post_processing_rules ||= {}
66
+ end
67
+
68
+ def self.pre_processing_rules #:nodoc:
69
+ @pre_processing_rules ||= {}
70
+ end
71
+
72
+ def self.process!(node) #:nodoc:
73
+ new.process!(node)
74
+ end
75
+
76
+ attr_reader :pre_processing_rules #:nodoc:
77
+ attr_reader :post_processing_rules #:nodoc:
78
+
79
+ def initialize #:nodoc:
80
+ @pre_processing_rules = self.class.pre_processing_rules.dup
81
+ @post_processing_rules = self.class.post_processing_rules.dup
82
+ end
83
+
84
+ # Process a DOM node, converting it to your markup language according to
85
+ # your defined rules. If the node is a Text node, it will return it's
86
+ # string representation. Otherwise it will call the rule defined for it.
87
+ def process(nodes)
88
+ Array(nodes).map do |node|
89
+ if node.text?
90
+ node.to_html
91
+ elsif node.elem?
92
+ send node.name.to_sym, node
93
+ else
94
+ ""
95
+ end
96
+ end.join("")
97
+ end
98
+
99
+ def process!(node) #:nodoc:
100
+ pre_processing_rules.each do |selector, handler|
101
+ node.search(selector).each(&handler)
102
+ end
103
+
104
+ process(node.children).tap do |text|
105
+ post_processing_rules.each do |rule, handler|
106
+ handler.is_a?(String) ? text.gsub!(rule, handler) : text.gsub!(rule, &handler)
107
+ end
108
+ end
109
+ end
110
+
111
+ # Get the result of parsing the contents of a node.
112
+ def content_of(node)
113
+ process(node.respond_to?(:children) ? node.children : node)
114
+ end
115
+
116
+ # Helper method that tells you if the given DOM node is immediately
117
+ # surrounded by whitespace.
118
+ def surrounded_by_whitespace?(node)
119
+ (node.previous.text? && node.previous.to_s =~ /\s+$/) ||
120
+ (node.next.text? && node.next.to_s =~ /^\s+/)
121
+ end
122
+
123
+ def method_missing(tag, node, *args) #:nodoc:
124
+ process(node.children)
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,96 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/../undress")
2
+
3
+ module Undress
4
+ class Textile < Grammar
5
+ # whitespace handling
6
+ post_processing(/\n\n+/, "\n\n")
7
+ post_processing(/\A\s+/, "")
8
+ post_processing(/\s+\z/, "\n")
9
+
10
+ # special characters introduced by textile
11
+ post_processing(/&#8230;/, "...")
12
+ post_processing(/&#8217;/, "'")
13
+ post_processing(/&#822[01];/, '"')
14
+ post_processing(/&#8212;/, "--")
15
+ post_processing(/&#8211;/, "-")
16
+ post_processing(/(\d+\s*)&#215;(\s*\d+)/, '\1x\2')
17
+ post_processing(/&#174;/, "(r)")
18
+ post_processing(/&#169;/, "(c)")
19
+ post_processing(/&#8482;/, "(tm)")
20
+
21
+ # inline elements
22
+ rule_for(:a) {|e|
23
+ title = e.has_attribute?("title") ? " (#{e["title"]})" : ""
24
+ "[#{content_of(e)}#{title}:#{e["href"]}]"
25
+ }
26
+ rule_for(:img) {|e|
27
+ alt = e.has_attribute?("alt") ? "(#{e["alt"]})" : ""
28
+ "!#{e["src"]}#{alt}!"
29
+ }
30
+ rule_for(:strong) {|e| "*#{content_of(e)}*" }
31
+ rule_for(:em) {|e| "_#{content_of(e)}_" }
32
+ rule_for(:code) {|e| "@#{content_of(e)}@" }
33
+ rule_for(:cite) {|e| "??#{content_of(e)}??" }
34
+ rule_for(:sup) {|e| surrounded_by_whitespace?(e) ? "^#{content_of(e)}^" : "[^#{content_of(e)}^]" }
35
+ rule_for(:sub) {|e| surrounded_by_whitespace?(e) ? "~#{content_of(e)}~" : "[~#{content_of(e)}~]" }
36
+ rule_for(:ins) {|e| "+#{content_of(e)}+" }
37
+ rule_for(:del) {|e| "-#{content_of(e)}-" }
38
+ rule_for(:acronym) {|e| e.has_attribute?("title") ? "#{content_of(e)}(#{e["title"]})" : content_of(e) }
39
+
40
+ # text formatting and layout
41
+ rule_for(:p) {|e| "\n\n#{content_of(e)}\n\n" }
42
+ rule_for(:br) {|e| "\n" }
43
+ rule_for(:blockquote) {|e| "\n\nbq. #{content_of(e)}\n\n" }
44
+ rule_for(:pre) {|e|
45
+ if e.children.all? {|n| n.text? && n.content =~ /^\s+$/ || n.elem? && n.name == "code" }
46
+ "\n\npc. #{content_of(e % "code")}\n\n"
47
+ else
48
+ "<pre>#{content_of(e)}</pre>"
49
+ end
50
+ }
51
+
52
+ # headings
53
+ rule_for(:h1) {|e| "\n\nh1. #{content_of(e)}\n\n" }
54
+ rule_for(:h2) {|e| "\n\nh2. #{content_of(e)}\n\n" }
55
+ rule_for(:h3) {|e| "\n\nh3. #{content_of(e)}\n\n" }
56
+ rule_for(:h4) {|e| "\n\nh4. #{content_of(e)}\n\n" }
57
+ rule_for(:h5) {|e| "\n\nh5. #{content_of(e)}\n\n" }
58
+ rule_for(:h6) {|e| "\n\nh6. #{content_of(e)}\n\n" }
59
+
60
+ # lists
61
+ rule_for(:li) {|e|
62
+ token = e.parent.name == "ul" ? "*" : "#"
63
+ nesting = e.ancestors.inject(1) {|total,node| total + (%(ul ol).include?(node.name) ? 0 : 1) }
64
+ "\n#{token * nesting} #{content_of(e)}"
65
+ }
66
+ rule_for(:ul, :ol) {|e|
67
+ if e.ancestors.detect {|node| %(ul ol).include?(node.name) }
68
+ content_of(e)
69
+ else
70
+ "\n#{content_of(e)}\n\n"
71
+ end
72
+ }
73
+
74
+ # definition lists
75
+ rule_for(:dl) {|e| "\n\n#{content_of(e)}\n" }
76
+ rule_for(:dt) {|e| "- #{content_of(e)} " }
77
+ rule_for(:dd) {|e| ":= #{content_of(e)} =:\n" }
78
+
79
+ # tables
80
+ rule_for(:table) {|e| "\n\n#{content_of(e)}\n" }
81
+ rule_for(:tr) {|e| "#{content_of(e)}|\n" }
82
+ rule_for(:td, :th) {|e|
83
+ prefix = if e.name == "th"
84
+ "_. "
85
+ elsif e.has_attribute?("colspan")
86
+ "\\#{e["colspan"]}. "
87
+ elsif e.has_attribute?("rowspan")
88
+ "/#{e["rowspan"]}. "
89
+ end
90
+
91
+ "|#{prefix}#{content_of(e)}"
92
+ }
93
+ end
94
+
95
+ add_markup :textile, Textile
96
+ end
@@ -0,0 +1,55 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/test_helper")
2
+
3
+ module Undress
4
+ class TestGrammar < Test::Unit::TestCase
5
+ class Parent < Grammar
6
+ rule_for(:p) {|e| "<this is a paragraph>#{content_of(e)}</this is a paragraph>" }
7
+ end
8
+
9
+ class WithPreProcessingRules < Parent
10
+ pre_processing("p.foo") {|e| e.swap("<div>Cuack</div>") }
11
+ rule_for(:div) {|e| "<this was a div>#{content_of(e)}</this was a div>" }
12
+ end
13
+
14
+ class Child < Parent; end
15
+
16
+ class OverWriter < WithPreProcessingRules
17
+ rule_for(:div) {|e| content_of(e) }
18
+ end
19
+
20
+ class TextileExtension < Textile
21
+ rule_for(:a) {|e| "" }
22
+ end
23
+
24
+ def parse_with(grammar, html)
25
+ grammar.process!(Hpricot(html))
26
+ end
27
+
28
+ context "extending a grammar" do
29
+ test "the extended grammar should inherit the rules of the parent" do
30
+ output = parse_with Child, "<p>Foo Bar</p>"
31
+ assert_equal "<this is a paragraph>Foo Bar</this is a paragraph>", output
32
+ end
33
+
34
+ test "extending a grammar doesn't overwrite the parent's rules" do
35
+ output = parse_with OverWriter, "<div>Foo</div>"
36
+ assert_equal "Foo", output
37
+
38
+ output = parse_with WithPreProcessingRules, "<div>Foo</div>"
39
+ assert_equal "<this was a div>Foo</this was a div>", output
40
+ end
41
+
42
+ test "extending textile doesn't blow up" do
43
+ output = parse_with TextileExtension, "<p><a href='/'>Cuack</a></p><p>Foo Bar</p><p>I <a href='/'>work</a></p>"
44
+ assert_equal "Foo Bar\n\nI\n", output
45
+ end
46
+ end
47
+
48
+ context "pre processing rules" do
49
+ test "mutate the DOM before parsing the tags" do
50
+ output = parse_with WithPreProcessingRules, "<p class='foo'>Blah</p><p>O hai</p>"
51
+ assert_equal "<this was a div>Cuack</this was a div><this is a paragraph>O hai</this is a paragraph>", output
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,10 @@
1
+ require "test/unit"
2
+ require "contest"
3
+ Dir[File.expand_path(File.dirname(__FILE__) + "/../lib/**/*.rb")].each do |file|
4
+ require file
5
+ end
6
+
7
+ begin
8
+ require "redgreen"
9
+ rescue LoadError
10
+ end
@@ -0,0 +1,198 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/test_helper")
2
+
3
+ module Undress
4
+ class TextileTest < Test::Unit::TestCase
5
+ def assert_renders_textile(textile, html)
6
+ assert_equal textile, Undress(html).to_textile
7
+ end
8
+
9
+ context "Converting HTML to textile" do
10
+ test "converts nested tags" do
11
+ assert_renders_textile "h2. _this is *very* important_\n", "<h2><em>this is <strong>very</strong> important</em></h2>"
12
+ end
13
+
14
+ context "inline elements" do
15
+ test "converts <strong> tags" do
16
+ assert_renders_textile "*foo bar*", "<strong>foo bar</strong>"
17
+ end
18
+
19
+ test "converts <em> tags" do
20
+ assert_renders_textile "_foo bar_", "<em>foo bar</em>"
21
+ end
22
+
23
+ test "converts <code> tags" do
24
+ assert_renders_textile "@foo bar@", "<code>foo bar</code>"
25
+ end
26
+
27
+ test "converts <cite> tags" do
28
+ assert_renders_textile "??foo bar??", "<cite>foo bar</cite>"
29
+ end
30
+
31
+ test "converts <sup> tags" do
32
+ assert_renders_textile "foo ^sup^ bar", "foo <sup>sup</sup> bar"
33
+ assert_renders_textile "foo[^sup^]bar", "foo<sup>sup</sup>bar"
34
+ end
35
+
36
+ test "converts <sub> tags" do
37
+ assert_renders_textile "foo ~sub~ bar", "foo <sub>sub</sub> bar"
38
+ assert_renders_textile "foo[~sub~]bar", "foo<sub>sub</sub>bar"
39
+ end
40
+
41
+ test "converts <ins> tags" do
42
+ assert_renders_textile "+foo bar+", "<ins>foo bar</ins>"
43
+ end
44
+
45
+ test "converts <del> tags" do
46
+ assert_renders_textile "-foo bar-", "<del>foo bar</del>"
47
+ end
48
+
49
+ test "converts <acronym> tags" do
50
+ assert_renders_textile "EPA(Environmental Protection Agency)", "<acronym title='Environmental Protection Agency'>EPA</acronym>"
51
+ assert_renders_textile "EPA", "<acronym>EPA</acronym>"
52
+ end
53
+ end
54
+
55
+ context "links" do
56
+ test "converts simple links (without title)" do
57
+ assert_renders_textile "[Foo Bar:/cuack]", "<a href='/cuack'>Foo Bar</a>"
58
+ end
59
+
60
+ test "converts links with titles" do
61
+ assert_renders_textile "[Foo Bar (You should see this):/cuack]", "<a href='/cuack' title='You should see this'>Foo Bar</a>"
62
+ end
63
+ end
64
+
65
+ context "images" do
66
+ test "converts images without alt attributes" do
67
+ assert_renders_textile "!http://example.com/image.png!", "<img src='http://example.com/image.png'/>"
68
+ end
69
+
70
+ test "converts images with alt attributes" do
71
+ assert_renders_textile "!http://example.com/image.png(Awesome Pic)!", "<img src='http://example.com/image.png' alt='Awesome Pic'/>"
72
+ end
73
+ end
74
+
75
+ context "text formatting" do
76
+ test "converts paragraphs" do
77
+ assert_renders_textile "foo\n\nbar\n", "<p>foo</p><p>bar</p>"
78
+ end
79
+
80
+ test "converts <pre> tags which only contain a <code> child" do
81
+ assert_renders_textile "pc. var foo = 1;\n", "<pre><code>var foo = 1;</code></pre>"
82
+ assert_renders_textile "pc. var foo = 1;\n", "<pre> <code>var foo = 1;</code> </pre>"
83
+ end
84
+
85
+ test "leaves <pre> tags which contain mixed content as HTML" do
86
+ assert_renders_textile "<pre> foo bar</pre>", "<pre> foo bar</pre>"
87
+ end
88
+
89
+ test "converts <br> into a new line" do
90
+ assert_renders_textile "Foo\nBar", "Foo<br/>Bar"
91
+ end
92
+
93
+ test "converts blockquotes" do
94
+ assert_renders_textile "bq. foo bar\n", "<blockquote><div>foo bar</div></blockquote>"
95
+ end
96
+ end
97
+
98
+ context "headers" do
99
+ test "converts <h1> tags" do
100
+ assert_renders_textile "h1. foo bar\n", "<h1>foo bar</h1>"
101
+ end
102
+
103
+ test "converts <h2> tags" do
104
+ assert_renders_textile "h2. foo bar\n", "<h2>foo bar</h2>"
105
+ end
106
+
107
+ test "converts <h3> tags" do
108
+ assert_renders_textile "h3. foo bar\n", "<h3>foo bar</h3>"
109
+ end
110
+
111
+ test "converts <h4> tags" do
112
+ assert_renders_textile "h4. foo bar\n", "<h4>foo bar</h4>"
113
+ end
114
+
115
+ test "converts <h5> tags" do
116
+ assert_renders_textile "h5. foo bar\n", "<h5>foo bar</h5>"
117
+ end
118
+
119
+ test "converts <h6> tags" do
120
+ assert_renders_textile "h6. foo bar\n", "<h6>foo bar</h6>"
121
+ end
122
+ end
123
+
124
+ context "lists" do
125
+ test "converts bullet lists" do
126
+ assert_renders_textile "* foo\n* bar\n", "<ul><li>foo</li><li>bar</li></ul>"
127
+ end
128
+
129
+ test "converts numbered lists" do
130
+ assert_renders_textile "# foo\n# bar\n", "<ol><li>foo</li><li>bar</li></ol>"
131
+ end
132
+
133
+ test "converts nested bullet lists" do
134
+ assert_renders_textile "* foo\n** bar\n* baz\n", "<ul><li>foo<ul><li>bar</li></ul></li><li>baz</li></ul>"
135
+ end
136
+
137
+ test "converts nested numbered lists" do
138
+ assert_renders_textile "# foo\n## bar\n# baz\n", "<ol><li>foo<ol><li>bar</li></ol></li><li>baz</li></ol>"
139
+ end
140
+
141
+ test "converts nested mixed lists" do
142
+ assert_renders_textile "* foo\n## bar\n## baz\n*** quux\n* cuack\n",
143
+ "<ul><li>foo<ol><li>bar</li><li>baz<ul><li>quux</li></ul></li></ol></li><li>cuack</li></ul>"
144
+ end
145
+
146
+ test "converts a definition list" do
147
+ assert_renders_textile "- foo := defining foo =:\n- bar := defining bar =:\n",
148
+ "<dl><dt>foo</dt><dd>defining foo</dd><dt>bar</dt><dd>defining bar</dd></dl>"
149
+ end
150
+ end
151
+
152
+ context "tables" do
153
+ test "converts a simple table" do
154
+ assert_renders_textile "|foo|bar|baz|\n|1|2|3|\n",
155
+ "<table><tr><td>foo</td><td>bar</td><td>baz</td></tr><tr><td>1</td><td>2</td><td>3</td></tr></table>"
156
+ end
157
+
158
+ test "converts a table with headers" do
159
+ assert_renders_textile "|_. foo|_. bar|_. baz|\n|1|2|3|\n",
160
+ "<table><tr><th>foo</th><th>bar</th><th>baz</th></tr><tr><td>1</td><td>2</td><td>3</td></tr></table>"
161
+ end
162
+
163
+ test "converts a table with cells that span multiple columns" do
164
+ assert_renders_textile "|foo|bar|baz|\n|\\2. 1|2|\n",
165
+ "<table><tr><td>foo</td><td>bar</td><td>baz</td></tr><tr><td colspan='2'>1</td><td>2</td></tr></table>"
166
+ end
167
+
168
+ test "converts a table with cells that span multiple rows" do
169
+ assert_renders_textile "|/2. foo|bar|baz|\n|1|2|\n",
170
+ "<table><tr><td rowspan='2'>foo</td><td>bar</td><td>baz</td></tr><tr><td>1</td><td>2</td></tr></table>"
171
+ end
172
+ end
173
+
174
+ context "applying post processing rules" do
175
+ test "compresses newlines to a maximum of two consecutive newlines" do
176
+ assert_renders_textile "Foo\n\nBar\n\nBaz\n\n* Quux 1\n* Quux 2\n", "<p>Foo</p><p>Bar</p><p>Baz</p><ul><li>Quux 1</li><li>Quux 2</li></p>"
177
+ end
178
+
179
+ test "strips trailing newlines from the start and end of the output string" do
180
+ assert_renders_textile "Foo\n", "<p>Foo</p>"
181
+ end
182
+
183
+ test "converts all fancy characters introduced by textile back into their 'source code'" do
184
+ assert_renders_textile "What the ... hell?", "What the &#8230; hell?"
185
+ assert_renders_textile "It's mine", "It&#8217;s mine"
186
+ assert_renders_textile "\"Fancy quoting\"", "&#8220;Fancy quoting&#8221;"
187
+ assert_renders_textile "How dashing--right?", "How dashing&#8212;right?"
188
+ assert_renders_textile "How dashing - right?", "How dashing &#8211; right?"
189
+ assert_renders_textile "2 x 2 = 4", "2 &#215; 2 = 4"
190
+ assert_renders_textile "2x2 = 4", "2&#215;2 = 4"
191
+ assert_renders_textile "Registered(r)", "Registered&#174;"
192
+ assert_renders_textile "Copyrighted(c)", "Copyrighted&#169;"
193
+ assert_renders_textile "Trademarked(tm)", "Trademarked&#8482;"
194
+ end
195
+ end
196
+ end
197
+ end
198
+ end
@@ -0,0 +1,40 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "undress"
3
+ s.version = "0.1"
4
+ s.date = "2009-07-13"
5
+
6
+ s.description = "Simply translate HTML to Textile, Markdown, or whatever other markup format you need"
7
+ s.summary = "Convert HTML into other markup languages"
8
+ s.homepage = "http://undress.rubyforge.org"
9
+
10
+ s.authors = "Nicolás Sanguinetti"
11
+ s.email = "contacto@nicolassanguinetti.info"
12
+
13
+ s.require_paths = ["lib"]
14
+ s.rubyforge_project = "undress"
15
+ s.has_rdoc = true
16
+ s.rubygems_version = "1.3.1"
17
+
18
+ s.add_dependency "hpricot"
19
+
20
+ if s.respond_to?(:add_development_dependency)
21
+ s.add_development_dependency "sr-mg"
22
+ s.add_development_dependency "contest"
23
+ s.add_development_dependency "redgreen"
24
+ end
25
+
26
+ s.files = %w[
27
+ .gitignore
28
+ LICENSE
29
+ README.rdoc
30
+ Rakefile
31
+ undress.gemspec
32
+ lib/undress.rb
33
+ lib/undress/grammar.rb
34
+ lib/undress/textile.rb
35
+ lib/core_ext/object.rb
36
+ test/test_helper.rb
37
+ test/test_grammar.rb
38
+ test/test_textile.rb
39
+ ]
40
+ end
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: undress
3
+ version: !ruby/object:Gem::Version
4
+ version: "0.1"
5
+ platform: ruby
6
+ authors:
7
+ - "Nicol\xC3\xA1s Sanguinetti"
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-07-13 00:00:00 -03:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: sr-mg
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: contest
37
+ type: :development
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0"
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: redgreen
47
+ type: :development
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: "0"
54
+ version:
55
+ description: Simply translate HTML to Textile, Markdown, or whatever other markup format you need
56
+ email: contacto@nicolassanguinetti.info
57
+ executables: []
58
+
59
+ extensions: []
60
+
61
+ extra_rdoc_files: []
62
+
63
+ files:
64
+ - .gitignore
65
+ - LICENSE
66
+ - README.rdoc
67
+ - Rakefile
68
+ - undress.gemspec
69
+ - lib/undress.rb
70
+ - lib/undress/grammar.rb
71
+ - lib/undress/textile.rb
72
+ - lib/core_ext/object.rb
73
+ - test/test_helper.rb
74
+ - test/test_grammar.rb
75
+ - test/test_textile.rb
76
+ has_rdoc: true
77
+ homepage: http://undress.rubyforge.org
78
+ licenses: []
79
+
80
+ post_install_message:
81
+ rdoc_options: []
82
+
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: "0"
90
+ version:
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: "0"
96
+ version:
97
+ requirements: []
98
+
99
+ rubyforge_project: undress
100
+ rubygems_version: 1.3.4
101
+ signing_key:
102
+ specification_version: 3
103
+ summary: Convert HTML into other markup languages
104
+ test_files: []
105
+