suung-undress 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,142 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/textile")
2
+
3
+ module Undress
4
+ class GreenCloth < Textile
5
+
6
+ Undress::ALLOWED_TAGS = [
7
+ 'div', 'a', 'img', 'br', 'i', 'u', 'b', 'pre', 'kbd', 'code', 'cite', 'strong', 'em',
8
+ 'ins', 'sup', 'sub', 'del', 'table', 'tbody', 'thead', 'tr', 'td', 'th', 'ol', 'ul',
9
+ 'li', 'p', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'notextile', 'blockquote',
10
+ 'object', 'embed', 'param', 'acronym', 'dd', 'dl', 'dt'
11
+ ]
12
+
13
+ # table of contents
14
+ pre_processing("ul.toc") do |toc|
15
+ toc.swap "[[toc]]\n"
16
+ end
17
+
18
+ # headings
19
+ rule_for(:h1, :h2, :h3, :h4, :h5, :h6) {|e| process_headings(e) }
20
+
21
+ # inline elements
22
+ rule_for(:a) {|e|
23
+ "#{process_links_and_anchors(e)}"
24
+ }
25
+
26
+ # lists
27
+ rule_for(:li) {|e|
28
+ offset = ""
29
+ li = e
30
+ while li.parent
31
+ if li.parent.name == "ul" then offset = "*#{offset}"
32
+ elsif li.parent.name == "ol" then offset = "##{offset}"
33
+ else return offset end
34
+ li = li.parent.parent ? li.parent.parent : nil
35
+ end
36
+ "\n#{offset} #{content_of(e)}"
37
+ }
38
+
39
+ # text formatting
40
+ rule_for(:pre) {|e|
41
+ if e.children && e.children.all? {|n| n.text? && n.content =~ /^\s+$/ || n.elem? && n.name == "code" }
42
+ "\n\n<pre><code>#{content_of(e % "code")}</code></pre>"
43
+ else
44
+ "\n\n<pre>#{content_of(e)}</pre>"
45
+ end
46
+ }
47
+
48
+ rule_for(:code) {|e|
49
+ if e.inner_html.match(/\n/)
50
+ if e.parent && e.parent.name != "pre"
51
+ "<pre><code>#{content_of(e)}</code></pre>"
52
+ else
53
+ "<code>#{content_of(e)}</code>"
54
+ end
55
+ else
56
+ "@#{content_of(e)}@"
57
+ end
58
+ }
59
+
60
+ # passing trough objects
61
+ rule_for(:embed, :object, :param) {|e|
62
+ e.to_html
63
+ }
64
+
65
+ def process_headings(h)
66
+ h.children.each {|e|
67
+ next if e.class == Hpricot::Text
68
+ e.parent.replace_child(e, "") if e.has_attribute?("href") && e["href"] !~ /^\/|(https?|s?ftp):\/\//
69
+ }
70
+ case h.name
71
+ when "h1"
72
+ "#{content_of(h)}\n#{'=' * h.inner_text.size}\n\n" if h.name == "h1"
73
+ when "h2"
74
+ "#{content_of(h)}\n#{'-' * h.inner_text.size}\n\n" if h.name == "h2"
75
+ else
76
+ "#{h.name}. #{content_of(h)}\n\n"
77
+ end
78
+ end
79
+
80
+ def process_links_and_anchors(e)
81
+ return "" if e.empty?
82
+ inner, name, href = e.inner_html, e.get_attribute("name"), e.get_attribute("href")
83
+
84
+ # is an anchor? and cannot be child of any h1..h6
85
+ if name && !e.parent.name.match(/^h1|2|3|4|5|6$/)
86
+ inner == name || inner == name.gsub(/-/,"\s") ? "[# #{inner} #]" : "[# #{inner} -> #{name} #]"
87
+ # is a link?
88
+ elsif href && href != ""
89
+ case href
90
+ when /^\/#/
91
+ "[\"#{inner}\":#{href}"
92
+ when /^#/
93
+ "[#{inner} -> #{href}]"
94
+ when /^(https?|s?ftp):\/\//
95
+ href.gsub(/^(https?|s?ftp):\/\//, "") == inner ? "[#{href}]" : "[#{inner} -> #{href}]"
96
+ when /^[^\/]/
97
+ if inner != href
98
+ "[#{e.inner_text} -> #{href}]"
99
+ else
100
+ "[#{e.inner_text}]"
101
+ end
102
+ when /^\/.[^\/]*\/.[^\/]*\//
103
+ "[#{inner} -> #{href}]"
104
+ when /(?:\/page\/\+)[0-9]+$/
105
+ "[#{inner} -> +#{href.gsub(/\+[0-9]+$/)}]"
106
+ else
107
+ process_as_wiki_link(e)
108
+ end
109
+ else
110
+ ""
111
+ end
112
+ end
113
+
114
+ def process_as_wiki_link(e)
115
+ inner, name, href = e.inner_html, e.get_attribute("name"), e.get_attribute("href")
116
+
117
+ # pages or group pages
118
+ context_name, page_name = href.split("/")[1..2]
119
+ page_name = context_name if page_name.nil?
120
+ wiki_page_name = page_name.gsub(/[a-z-]*[^\/]$/m) {|m| m.tr('-',' ')}
121
+
122
+ # simple page
123
+ if context_name == "page"
124
+ return "[#{inner}]" if wiki_page_name == inner
125
+ return "[#{inner} -> #{wiki_page_name}]"
126
+ end
127
+ # group page
128
+ if context_name != page_name
129
+ return "[#{context_name} / #{wiki_page_name}]" if wiki_page_name == inner
130
+ return "[#{inner} -> #{wiki_page_name}]" if context_name == "page"
131
+ return "[#{inner} -> #{context_name} / #{wiki_page_name}]"
132
+ end
133
+ if inner == page_name || inner == wiki_page_name || inner == wiki_page_name.gsub(/\s/,"-")
134
+ return "[#{wiki_page_name}]"
135
+ end
136
+ # fall back
137
+ return "[#{inner} -> #{href}]"
138
+ end
139
+
140
+ end
141
+ add_markup :greencloth, GreenCloth
142
+ end
@@ -0,0 +1,126 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/../undress")
2
+
3
+ module Undress
4
+ class Textile < Grammar
5
+ whitelist_attributes :class, :id, :lang, :style, :colspan, :rowspan
6
+
7
+ # entities
8
+ post_processing(/&nbsp;/, " ")
9
+
10
+ # whitespace handling
11
+ post_processing(/\n\n+/, "\n\n")
12
+ post_processing(/\A\s+/, "")
13
+ post_processing(/\s+\z/, "\n")
14
+
15
+ # special characters introduced by textile
16
+ post_processing(/&#8230;/, "...")
17
+ post_processing(/&#8217;/, "'")
18
+ post_processing(/&#822[01];/, '"')
19
+ post_processing(/&#8212;/, "--")
20
+ post_processing(/&#8211;/, "-")
21
+ post_processing(/(\d+\s*)&#215;(\s*\d+)/, '\1x\2')
22
+ post_processing(/&#174;/, "(r)")
23
+ post_processing(/&#169;/, "(c)")
24
+ post_processing(/&#8482;/, "(tm)")
25
+
26
+ # inline elements
27
+ rule_for(:a) {|e|
28
+ title = e.has_attribute?("title") ? " (#{e["title"]})" : ""
29
+ "[#{content_of(e)}#{title}:#{e["href"]}]"
30
+ }
31
+ rule_for(:img) {|e|
32
+ alt = e.has_attribute?("alt") ? "(#{e["alt"]})" : ""
33
+ "!#{e["src"]}#{alt}!"
34
+ }
35
+ rule_for(:strong) {|e| complete_word?(e) ? "*#{attributes(e)}#{content_of(e)}*" : "[*#{attributes(e)}#{content_of(e)}*]"}
36
+ rule_for(:em) {|e| complete_word?(e) ? "_#{attributes(e)}#{content_of(e)}_" : "[_#{attributes(e)}#{content_of(e)}_]"}
37
+ rule_for(:code) {|e| "@#{attributes(e)}#{content_of(e)}@" }
38
+ rule_for(:cite) {|e| "??#{attributes(e)}#{content_of(e)}??" }
39
+ rule_for(:sup) {|e| surrounded_by_whitespace?(e) ? "^#{attributes(e)}#{content_of(e)}^" : "[^#{attributes(e)}#{content_of(e)}^]" }
40
+ rule_for(:sub) {|e| surrounded_by_whitespace?(e) ? "~#{attributes(e)}#{content_of(e)}~" : "[~#{attributes(e)}#{content_of(e)}~]" }
41
+ rule_for(:ins) {|e| complete_word?(e) ? "+#{attributes(e)}#{content_of(e)}+" : "[+#{attributes(e)}#{content_of(e)}+]"}
42
+ rule_for(:del) {|e| complete_word?(e) ? "-#{attributes(e)}#{content_of(e)}-" : "[-#{attributes(e)}#{content_of(e)}-]"}
43
+ rule_for(:acronym) {|e| e.has_attribute?("title") ? "#{content_of(e)}(#{e["title"]})" : content_of(e) }
44
+
45
+
46
+ # text formatting and layout
47
+ rule_for(:p) do |e|
48
+ at = attributes(e) != "" ? "p#{at}#{attributes(e)}. " : ""
49
+ e.parent && e.parent.name == "blockquote" ? "#{at}#{content_of(e)}\n\n" : "\n\n#{at}#{content_of(e)}\n\n"
50
+ end
51
+ rule_for(:br) {|e| "\n" }
52
+ rule_for(:blockquote) {|e| "\n\nbq#{attributes(e)}. #{content_of(e)}\n\n" }
53
+ rule_for(:pre) {|e|
54
+ if e.children && e.children.all? {|n| n.text? && n.content =~ /^\s+$/ || n.elem? && n.name == "code" }
55
+ "\n\npc#{attributes(e)}. #{content_of(e % "code")}\n\n"
56
+ else
57
+ "<pre>#{content_of(e)}</pre>"
58
+ end
59
+ }
60
+
61
+ # headings
62
+ rule_for(:h1) {|e| "\n\nh1#{attributes(e)}. #{content_of(e)}\n\n" }
63
+ rule_for(:h2) {|e| "\n\nh2#{attributes(e)}. #{content_of(e)}\n\n" }
64
+ rule_for(:h3) {|e| "\n\nh3#{attributes(e)}. #{content_of(e)}\n\n" }
65
+ rule_for(:h4) {|e| "\n\nh4#{attributes(e)}. #{content_of(e)}\n\n" }
66
+ rule_for(:h5) {|e| "\n\nh5#{attributes(e)}. #{content_of(e)}\n\n" }
67
+ rule_for(:h6) {|e| "\n\nh6#{attributes(e)}. #{content_of(e)}\n\n" }
68
+
69
+ # lists
70
+ rule_for(:li) {|e|
71
+ token = e.parent.name == "ul" ? "*" : "#"
72
+ nesting = e.ancestors.inject(1) {|total,node| total + (%(ul ol).include?(node.name) ? 0 : 1) }
73
+ "\n#{token * nesting} #{content_of(e)}"
74
+ }
75
+ rule_for(:ul, :ol) {|e|
76
+ if e.ancestors.detect {|node| %(ul ol).include?(node.name) }
77
+ content_of(e)
78
+ else
79
+ "\n#{content_of(e)}\n\n"
80
+ end
81
+ }
82
+
83
+ # definition lists
84
+ rule_for(:dl) {|e| "\n\n#{content_of(e)}\n" }
85
+ rule_for(:dt) {|e| "- #{content_of(e)} " }
86
+ rule_for(:dd) {|e| ":= #{content_of(e)} =:\n" }
87
+
88
+ # tables
89
+ rule_for(:table) {|e| "\n\n#{content_of(e)}\n" }
90
+ rule_for(:tr) {|e| "#{content_of(e)}|\n" }
91
+ rule_for(:td, :th) {|e| "|#{e.name == "th" ? "_. " : attributes(e)}#{content_of(e)}" }
92
+
93
+ def attributes(node) #:nodoc:
94
+ filtered = super(node)
95
+
96
+ if filtered
97
+
98
+ if filtered.has_key?(:colspan)
99
+ return "\\#{filtered[:colspan]}. "
100
+ end
101
+
102
+ if filtered.has_key?(:rowspan)
103
+ return "/#{filtered[:rowspan]}. "
104
+ end
105
+
106
+ if filtered.has_key?(:lang)
107
+ return "[#{filtered[:lang]}]"
108
+ end
109
+
110
+ if filtered.has_key?(:class) || filtered.has_key?(:id)
111
+ klass = filtered.fetch(:class, "")
112
+ id = filtered.fetch(:id, false) ? "#" + filtered[:id] : ""
113
+ return "(#{klass}#{id})"
114
+ end
115
+
116
+ if filtered.has_key?(:style)
117
+ return "{#{filtered[:style]}}"
118
+ end
119
+ ""
120
+ end
121
+ ""
122
+ end
123
+ end
124
+
125
+ add_markup :textile, Textile
126
+ end
@@ -0,0 +1,75 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/test_helper")
2
+
3
+ module Undress
4
+ class TestGrammar < Test::Unit::TestCase
5
+ class Parent < Grammar
6
+ rule_for(:p) {|e| "<this is a paragraph>#{content_of(e)}</this is a paragraph>" }
7
+ end
8
+
9
+ class WithPreProcessingRules < Parent
10
+ pre_processing("p.foo") {|e| e.swap("<div>Cuack</div>") }
11
+ rule_for(:div) {|e| "<this was a div>#{content_of(e)}</this was a div>" }
12
+ end
13
+
14
+ class Child < Parent; end
15
+
16
+ class OverWriter < WithPreProcessingRules
17
+ rule_for(:div) {|e| content_of(e) }
18
+ end
19
+
20
+ class TextileExtension < Textile
21
+ rule_for(:a) {|e| "" }
22
+ end
23
+
24
+ class WithAttributes < Parent
25
+ whitelist_attributes :id, :class
26
+ end
27
+
28
+ def parse_with(grammar, html)
29
+ grammar.process!(Hpricot(html))
30
+ end
31
+
32
+ context "extending a grammar" do
33
+ test "the extended grammar should inherit the rules of the parent" do
34
+ output = parse_with Child, "<p>Foo Bar</p>"
35
+ assert_equal "<this is a paragraph>Foo Bar</this is a paragraph>", output
36
+ end
37
+
38
+ test "extending a grammar doesn't overwrite the parent's rules" do
39
+ output = parse_with OverWriter, "<div>Foo</div>"
40
+ assert_equal "Foo", output
41
+
42
+ output = parse_with WithPreProcessingRules, "<div>Foo</div>"
43
+ assert_equal "<this was a div>Foo</this was a div>", output
44
+ end
45
+
46
+ test "extending textile doesn't blow up" do
47
+ output = parse_with TextileExtension, "<p><a href='/'>Cuack</a></p><p>Foo Bar</p><p>I <a href='/'>work</a></p>"
48
+ assert_equal "Foo Bar\n\nI\n", output
49
+ end
50
+ end
51
+
52
+ context "pre processing rules" do
53
+ test "mutate the DOM before parsing the tags" do
54
+ output = parse_with WithPreProcessingRules, "<p class='foo'>Blah</p><p>O hai</p>"
55
+ assert_equal "<this was a div>Cuack</this was a div><this is a paragraph>O hai</this is a paragraph>", output
56
+ end
57
+ end
58
+
59
+ context "handles attributes" do
60
+ def attributes_for_tag(html)
61
+ WithAttributes.new.attributes(Hpricot(html).children.first)
62
+ end
63
+
64
+ test "whitelisted attributes are picked up in the attributes hash" do
65
+ attributes = attributes_for_tag("<p class='foo bar' id='baz'>Cuack</p>")
66
+ assert_equal({ :class => "foo bar", :id => "baz" }, attributes)
67
+ end
68
+
69
+ test "attributes that are not in the whitelist are ignored" do
70
+ attributes = attributes_for_tag("<p lang='es' id='saludo'>Hola</p>")
71
+ assert_equal({ :id => "saludo" }, attributes)
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,435 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/test_helper")
2
+
3
+ class Undress::GreenClothTest < Test::Unit::TestCase
4
+ def assert_renders_greencloth(greencloth, html)
5
+ assert_equal greencloth, Undress(html).to_greencloth
6
+ end
7
+
8
+ def assert_not_renders_greencloth(greencloth, html)
9
+ assert_not_equal greencloth, Undress(html).to_greencloth
10
+ end
11
+
12
+ context "parsing badly indented documents" do
13
+ test "badly indent doc" do
14
+ html = "<ul>
15
+ <li>foo</li>
16
+ <li>bar</li>
17
+ <li>and x is also.</li>
18
+ </ul>"
19
+ greencloth = "* foo\n* bar\n* and x is also.\n"
20
+ assert_renders_greencloth greencloth, html
21
+ end
22
+ end
23
+
24
+ context "some troubles with empty tags" do
25
+ test "with pre" do
26
+ html = "<pre></pre>"
27
+ greencloth = "<pre></pre>"
28
+ assert_renders_greencloth greencloth, html
29
+ end
30
+
31
+ test "with p" do
32
+ html = "<p></p>"
33
+ greencloth = ""
34
+ assert_renders_greencloth greencloth, html
35
+ end
36
+ end
37
+
38
+ # TODO:
39
+ # this is ok to ensure invalid html -> to greencloth but xhtmlize! must have
40
+ # tests on test_undress or something too
41
+ context "parsing not valid xhtml documents" do
42
+ context "with tables" do
43
+ test "cells should not have spaces at the start/end inside" do
44
+ html = "<table> <tbody> <tr class='odd'> <th>&nbsp;1&nbsp;<br></th> <th>2<br/>&nbsp;</th> </tr> <tr class='even'> <td>&nbsp;11<br/></td> <td>22</td> </tr> </tbody> </table>"
45
+ greencloth = "|_. 1|_. 2|\n|11|22|\n"
46
+ assert_renders_greencloth greencloth, html
47
+ end
48
+
49
+ test "tables should not have <br> inside <td> and <th>" do
50
+ html = "<table> <tbody> <tr class='odd'> <th>1<br></th> <th>2<br/></th> </tr> <tr class='even'> <td>11<br/></td> <td>22</td> </tr> </tbody> </table>"
51
+ greencloth = "|_. 1|_. 2|\n|11|22|\n"
52
+ assert_renders_greencloth greencloth, html
53
+ end
54
+
55
+ test "tables should not have spases beetween <td> inside" do
56
+ html = "<table> <tbody> <tr class='odd'> <td>1</td> <td>2</td> </tr> <tr class='even'> <td>11</td> <td>22</td> </tr> </tbody> </table>"
57
+ greencloth = "|1|2|\n|11|22|\n"
58
+ assert_renders_greencloth greencloth, html
59
+ end
60
+ end
61
+
62
+ test "with <u> tags" do
63
+ html = "<u>underline</u>"
64
+ greencloth = "+underline+"
65
+ assert_renders_greencloth greencloth, html
66
+ end
67
+
68
+ test "with<strike> tags" do
69
+ html = "some <strike>strike</strike> text"
70
+ greencloth = "some -strike- text"
71
+ assert_renders_greencloth greencloth, html
72
+ end
73
+
74
+ test "space between 2 spans with styles" do
75
+ html = "<p><span style='font-weight: bold;'>bold</span> <span style='font-style: italic;'>italic</span></p>"
76
+ greencloth = "*bold* _italic_\n"
77
+ assert_renders_greencloth greencloth, html
78
+ end
79
+
80
+ test "a <span> bold, italic, underline, line-through at the same time" do
81
+ html = "<p>some text <span style='font-weight:bold; font-style:italic; text-decoration:underline;'>bold</span> with style</p>"
82
+ greencloth = "some text *+_bold_+* with style\n"
83
+ assert_renders_greencloth greencloth, html
84
+ end
85
+
86
+ test "font-weight:bold styles in <span> elements should be <strong>" do
87
+ html = "<p>some text <span style='font-weight:bold'>bold</span> with style</p>"
88
+ greencloth = "some text *bold* with style\n"
89
+ assert_renders_greencloth greencloth, html
90
+ html = "<p style='font-weight:bold'>some text bold with style</p>"
91
+ greencloth = "*some text bold with style*\n"
92
+ assert_renders_greencloth greencloth, html
93
+ end
94
+
95
+ test "style 'line-through' should be converted to <del> in <span> elements" do
96
+ html = "<p>with <span style='text-decoration: line-through;'>some</span> in the <span style='text-decoration: line-through;'>paragraph</span></p>"
97
+ greencloth = "with -some- in the -paragraph-\n"
98
+ assert_renders_greencloth greencloth, html
99
+ html = "<p style='text-decoration: line-through;'>with some in the paragraph</p>"
100
+ greencloth = "-with some in the paragraph-\n"
101
+ assert_renders_greencloth greencloth, html
102
+ end
103
+
104
+ test "style 'underline' should be converted to <ins> in <span> elements" do
105
+ html = "<p>with <span style='text-decoration: underline;'>some</span> in the <span style='text-decoration: underline;'>paragraph</span></p>"
106
+ greencloth = "with +some+ in the +paragraph+\n"
107
+ assert_renders_greencloth greencloth, html
108
+ html = "<p style='text-decoration: underline;'>with some in the paragraph</p>"
109
+ greencloth = "+with some in the paragraph+\n"
110
+ assert_renders_greencloth greencloth, html
111
+ end
112
+
113
+ test "style 'italic' should be converted to <em> in <span> elements" do
114
+ html = "<p>with <span style='font-style: italic;'>some</span> in the <span style='font-style: italic;'>paragraph</span></p>"
115
+ greencloth = "with _some_ in the _paragraph_\n"
116
+ assert_renders_greencloth greencloth, html
117
+ html = "<p style='font-style: italic;'>with some in the paragraph</p>"
118
+ greencloth = "_with some in the paragraph_\n"
119
+ assert_renders_greencloth greencloth, html
120
+ end
121
+
122
+ test "a nested invalid unordered list" do
123
+ html = "<ul><li>item 1</li><li>item 2</li><ul><li>nested 1</li><li>nested 2</li></ul><li>item 3</li></ul>"
124
+ greencloth = "* item 1\n* item 2\n** nested 1\n** nested 2\n* item 3\n"
125
+ assert_renders_greencloth greencloth, html
126
+ end
127
+
128
+ test "a nested invalid ordered list" do
129
+ html = "<ol><li>item 1</li><li>item 2</li><ol><li>nested 1</li><li>nested 2</li></ol><li>item 3</li></ol>"
130
+ greencloth = "# item 1\n# item 2\n## nested 1\n## nested 2\n# item 3\n"
131
+ assert_renders_greencloth greencloth, html
132
+ end
133
+
134
+ test "a nested invalid mixed list with 3 levels" do
135
+ html = "<ul><li>item 1</li><li>item 2</li><ol><li>nested 1</li><li>nested 2</li><ul><li>nested2 1</li><li>nested2 2</li></ul></ol><li>item 3</li></ul>"
136
+ greencloth = "* item 1\n* item 2\n*# nested 1\n*# nested 2\n*#* nested2 1\n*#* nested2 2\n* item 3\n"
137
+ assert_renders_greencloth greencloth, html
138
+ end
139
+
140
+ test "a nested invalid mixed list" do
141
+ html = "<ul><li>item 1</li><li>item 2</li><ol><li>nested 1</li><li>nested 2</li></ol><li>item 3</li></ul>"
142
+ greencloth = "* item 1\n* item 2\n*# nested 1\n*# nested 2\n* item 3\n"
143
+ assert_renders_greencloth greencloth, html
144
+ end
145
+
146
+ test "2 badly nested list inside" do
147
+ html = "<ul><li>item 1</li><li>item 2</li><ul><li>nested 1</li><ul><li>item 1x</li><li>item 2x</li></ul><li>nested 2</li></ul><li>item 3</li></ul>"
148
+ greencloth = "* item 1\n* item 2\n** nested 1\n*** item 1x\n*** item 2x\n** nested 2\n* item 3\n"
149
+ assert_renders_greencloth greencloth, html
150
+ end
151
+ end
152
+
153
+ # unallowed tags
154
+ context "remove unallowed tags" do
155
+ test "remove a head tag" do
156
+ html = "<html><head><title>Title</title></head>"
157
+ greencloth = ""
158
+ assert_renders_greencloth greencloth, html
159
+ end
160
+
161
+ test "remove a script tag" do
162
+ html = "<div>Some script inside a<script type='text/javascript'>window.alert('alert')</script> paragraph</div>"
163
+ greencloth = "Some script inside a paragraph"
164
+ assert_renders_greencloth greencloth, html
165
+ end
166
+ end
167
+
168
+ # code
169
+ context "converting code tags" do
170
+ test "a code inside a paragraph" do
171
+ html = "<p>do you like my <code>function</code>?</p>"
172
+ greencloth = "do you like my @function@?\n"
173
+ assert_renders_greencloth greencloth, html
174
+ end
175
+
176
+ test "code tag inside pre tag" do
177
+ html = "<pre><code>def say_hi\n\tputs 'hi'\nend</code></pre>"
178
+ greencloth = "<pre><code>def say_hi\n\tputs 'hi'\nend</code></pre>"
179
+ assert_renders_greencloth greencloth, html
180
+ end
181
+
182
+ test "code inside list items" do
183
+ html = "<ul><li><code>foo</code></li><li><code>bar</code></li><li>and <code>x</code> is also.</li></ul>"
184
+ greencloth = "* @foo@\n* @bar@\n* and @x@ is also.\n"
185
+ assert_renders_greencloth greencloth, html
186
+ end
187
+
188
+ test "code tag not inside a pre and without new lines inside" do
189
+ html = "<code>some code inside</code>"
190
+ greencloth = "@some code inside@"
191
+ assert_renders_greencloth greencloth, html
192
+ end
193
+ end
194
+
195
+ # embed and object
196
+ # the elements pass trough but the order of the attributes change
197
+ context "embed and object" do
198
+ test "embed" do
199
+ html = "<p>do you like my embedded blip.tv <embed src='http://blip.tv/play/Ac3GfI+2HA' allowfullscreen='true' type='application/x-shockwave-flash' allowscriptaccess='always' height='510' width='720' />?</p>"
200
+ greencloth = "do you like my embedded blip.tv <embed allowfullscreen=\"true\" src=\"http://blip.tv/play/Ac3GfI+2HA\" allowscriptaccess=\"always\" type=\"application/x-shockwave-flash\" height=\"510\" width=\"720\" />?\n"
201
+ assert_renders_greencloth greencloth, html
202
+ end
203
+
204
+ test "object" do
205
+ html = "<p>do you like my embedded youtube <object width='425' height='344'><param name='movie' value='http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1' /><param name='allowFullScreen' value='true' /><embed src='http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1' type='application/x-shockwave-flash' width='425' height='344' allowfullscreen='true' /></object>?</p>"
206
+ greencloth = "do you like my embedded youtube <object height=\"344\" width=\"425\"><param name=\"movie\" value=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" /><param name=\"allowFullScreen\" value=\"true\" /><embed allowfullscreen=\"true\" src=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" type=\"application/x-shockwave-flash\" height=\"344\" width=\"425\" /></object>?\n"
207
+ assert_renders_greencloth greencloth, html
208
+ end
209
+ end
210
+
211
+ # outline
212
+ # don't allow link to anchors or anchor defs inside hx, greencloth -> html
213
+ # take cares of it, so we are only allowing links inside hx elements for now
214
+ context "outline" do
215
+ test "table of contents toc" do
216
+ html = "<ul class='toc'><li class='toc1'><a href='#fruits'><span>1</span> Fruits</a></li><ul><li class='toc2'><a href='#tasty-apples'><span>1.1</span> Tasty Apples</a></li><ul><li class='toc3'><a href='green'><span>1.1.1</span> Green</a></li><li class='toc3'><a href='#red'><span>1.1.2</span> Red</a></li></ul>"
217
+ greencloth = "[[toc]]\n"
218
+ assert_renders_greencloth greencloth, html
219
+ end
220
+
221
+ test "headings with links, anchors and links to anchors" do
222
+ html = "<h1 class='first'><a name='russian-anarchists'></a>Russian Anarchists<a class='anchor' href='#russian-anarchists'>&para;</a></h1><h2><a name='michel-bakunin'></a>Michel <a href='http://en.wikipedia.org/wiki/Mikhail_Bakunin'>Bakunin</a><a class='anchor' href='#michel-bakunin'>&para;</a></h2><h2><a name='peter-kropotkin'></a><a href='http://en.wikipedia.org/wiki/Peter_Kropotkin'>Peter</a> Kropotkin<a class='anchor' href='#peter-kropotkin'>&para;</a></h2><h1><a name='russian-american-anarchists'></a>Russian-American Anarchists<a class='anchor' href='#russian-american-anarchists'>&para;</a></h1><h2><a name='emma-goldman'></a><a href='http://en.wikipedia.org/wiki/Emma_Goldman'>Emma Goldman</a><a class='anchor' href='#emma-goldman'>&para;</a></h2><h2><a name='alexander-berkman'></a>Alexander <a href='http://en.wikipedia.org/wiki/Alexander_Berkman'>Berkman</a><a class='anchor' href='#alexander-berkman'>&para;</a></h2>"
223
+ greencloth = "Russian Anarchists\n==================\n\nMichel [Bakunin -> http://en.wikipedia.org/wiki/Mikhail_Bakunin]\n--------------\n\n[Peter -> http://en.wikipedia.org/wiki/Peter_Kropotkin] Kropotkin\n---------------\n\nRussian-American Anarchists\n===========================\n\n[Emma Goldman -> http://en.wikipedia.org/wiki/Emma_Goldman]\n------------\n\nAlexander [Berkman -> http://en.wikipedia.org/wiki/Alexander_Berkman]\n-----------------\n"
224
+ assert_renders_greencloth greencloth, html
225
+ end
226
+
227
+ test "double trouble" do
228
+ html = "<h1 class='first'><a name='title'></a>Title<a class='anchor' href='#title'>&para;</a></h1><h3><a name='under-first'></a>Under first<a class='anchor' href='#under-first'>&para;</a></h3><h1><a name='title_2'></a>Title<a class='anchor' href='#title_2'>&para;</a></h1><h3><a name='under-second'></a>Under second<a class='anchor' href='#under-second'>&para;</a></h3>"
229
+ greencloth = "Title\n=====\n\nh3. Under first\n\nTitle\n=====\n\nh3. Under second\n"
230
+ assert_renders_greencloth greencloth, html
231
+ end
232
+ end
233
+
234
+ # basics
235
+ context "basics" do
236
+ test "headers" do
237
+ html = "<h1 class='first'>header one</h1>\n<h2>header two</h2>"
238
+ greencloth = "header one\n==========\n\nheader two\n----------\n"
239
+ assert_renders_greencloth greencloth, html
240
+ end
241
+
242
+ test "headers with paragraph" do
243
+ html = "<p>la la la</p>\n<h1 class='first'>header one</h1>\n<h2>header two</h2>\n<p>la la la</p>"
244
+ greencloth = "la la la\n\nheader one\n==========\n\nheader two\n----------\n\nla la la\n"
245
+ assert_renders_greencloth greencloth, html
246
+ end
247
+ end
248
+
249
+ # sections
250
+ # allways we render h1 with ==== and h2 with ----
251
+ context "Convert sections" do
252
+ test "one section no heading" do
253
+ html = "<div class='wiki_section' id='wiki_section-0'><p>start unheaded section</p><p>line line line</p></div>"
254
+ greencloth = "start unheaded section\n\nline line line\n"
255
+ assert_renders_greencloth greencloth, html
256
+ end
257
+
258
+ test "one section with heading" do
259
+ html = "<div class='wiki_section' id='wiki_section-0'><h2 class='first'>are you ready?!!?</h2><p>here we go now!</p></div>"
260
+ greencloth = "are you ready?!!?\n-----------------\n\nhere we go now!\n"
261
+ assert_renders_greencloth greencloth, html
262
+ end
263
+
264
+ test "all headings" do
265
+ html = "<h1>First</h1><h2>Second</h2><h3>Tres</h3><h4>Cuatro</h4><h5>Five</h5><h6>Six</h6>"
266
+ greencloth = "First\n=====\n\nSecond\n------\n\nh3. Tres\n\nh4. Cuatro\n\nh5. Five\n\nh6. Six\n"
267
+ assert_renders_greencloth greencloth, html
268
+ end
269
+
270
+ test "multiple sections with text" do
271
+ html = "<div class='wiki_section' id='wiki_section-0'><h2 class='first'>Section One</h2><p>section one line one is here<br />section one line two is next</p><p>Here is section one still</p></div><div class='wiki_section' id='wiki_section-1'><h1>Section Two</h1><p>Section two first line<br />Section two another line</p></div><div class='wiki_section' id='wiki_section-2'><h2>Section 3 with h2</h2><p>One more line for section 3</p></div><div class='wiki_section' id='wiki_section-3'><h3>final section 4</h3><p>section 4 first non-blank line</p>\n</div>"
272
+ greencloth = "Section One\n-----------\n\nsection one line one is here\nsection one line two is next\n\nHere is section one still\n\nSection Two\n===========\n\nSection two first line\nSection two another line\n\nSection 3 with h2\n-----------------\n\nOne more line for section 3\n\nh3. final section 4\n\nsection 4 first non-blank line\n"
273
+ assert_renders_greencloth greencloth, html
274
+ end
275
+ end
276
+
277
+ # lists
278
+ # TODO: start attribute not implemented
279
+ context "Converting html lists to greencloth" do
280
+ test "hard break in list" do
281
+ html = "<ul>\n\t<li>first line</li>\n\t<li>second<br />\n\tline</li>\n\t<li>third line</li>\n</ul>\n"
282
+ greencloth = "* first line\n* second\nline\n* third line\n"
283
+ assert_renders_greencloth greencloth, html
284
+ end
285
+
286
+ test "mixed nesting" do
287
+ html = "<ul><li>bullet\n<ol>\n<li>number</li>\n<li>number\n<ul>\n\t<li>bullet</li>\n</ul></li>\n<li>number</li>\n<li>number with<br />a break</li>\n</ol></li>\n<li>bullet\n<ul><li>okay</li></ul></li></ul>"
288
+ greencloth = "* bullet\n*# number\n*# number\n*#* bullet\n*# number\n*# number with\na break\n* bullet\n** okay\n"
289
+ assert_renders_greencloth greencloth, html
290
+ end
291
+
292
+ test "list continuation" do # uses start
293
+ html = "<ol><li>one</li><li>two</li><li>three</li></ol><ol><li>one</li><li>two</li><li>three</li></ol><ol start='4'><li>four</li><li>five</li><li>six</li></ol>"
294
+ greencloth = "# one\n# two\n# three\n\n# one\n# two\n# three\n\n# four\n# five\n# six\n"
295
+ assert_renders_greencloth greencloth, html
296
+ end
297
+
298
+ test "continue after break" do # uses start
299
+ html = "<ol><li>one</li><li>two</li><li>three</li></ol><p>test</p><ol><li>one</li><li>two</li><li>three</li></ol><p>test</p><ol start='4'><li>four</li><li>five</li><li>six</li></ol>"
300
+ greencloth = "# one\n# two\n# three\n\ntest\n\n# one\n# two\n# three\n\ntest\n\n# four\n# five\n# six\n"
301
+ assert_renders_greencloth greencloth, html
302
+ end
303
+
304
+ test "continue list when prior list contained nested list" do # uses start
305
+ greencloth = "# one\n# two\n# three\n\n# four\n# five\n## sub-note\n## another sub-note\n# six\n\n# seven\n# eight\n# nine\n"
306
+ html = "<ol><li>one</li><li>two</li><li>three</li></ol><ol start='4'><li>four</li><li>five<ol><li>sub-note</li><li>another sub-note</li></ol></li><li>six</li></ol><ol start='7'><li>seven</li><li>eight</li><li>nine</li></ol>"
307
+ assert_renders_greencloth greencloth, html
308
+ end
309
+
310
+ test "" do
311
+
312
+ end
313
+ end
314
+
315
+ # links
316
+ context "Converting html links to greencloth" do
317
+ test "simple link test" do
318
+ html = "<a href='url'>text</a>"
319
+ greencloth = "[text]"
320
+ assert_not_renders_greencloth greencloth,html
321
+ end
322
+
323
+ test "convert a link to a wiki page inside a paragraph" do
324
+ html = "<p>this is a <a href='/page/plain-link'>plain link</a> in some text</p>"
325
+ greencloth = "this is a [plain link] in some text\n"
326
+ assert_renders_greencloth greencloth, html
327
+ end
328
+
329
+ test "convert a link to a wiki page with namespace" do
330
+ html= "<p>this is a <a href='/namespaced/link'>link</a> in some text</p>"
331
+ greencloth = "this is a [namespaced / link] in some text\n"
332
+ assert_renders_greencloth greencloth, html
333
+ end
334
+
335
+ test "convert a link to a wiki page" do
336
+ html= "<p>this is a <a href='/page/something-else'>link to</a> in some text</p>"
337
+ greencloth = "this is a [link to -> something else] in some text\n"
338
+ assert_renders_greencloth greencloth, html
339
+ end
340
+
341
+ test "convert a link to a wiki page with namespace and text different than link dest" do
342
+ html= "<p>this is a <a href='/namespace/something-else'>link to</a> in some text</p>"
343
+ greencloth = "this is a [link to -> namespace / something else] in some text\n"
344
+ assert_renders_greencloth greencloth, html
345
+ end
346
+
347
+ test "convert a link to an absolute path" do
348
+ html = "<p>this is a <a href='/an/absolute/path'>link to</a> in some text</p>"
349
+ greencloth = "this is a [link to -> /an/absolute/path] in some text\n"
350
+ assert_renders_greencloth greencloth, html
351
+ end
352
+
353
+ test "convert a link to an external domain" do
354
+ html = "<p>this is a <a href='https://riseup.net'>link to</a> a url</p>"
355
+ greencloth = "this is a [link to -> https://riseup.net] a url\n"
356
+ assert_renders_greencloth greencloth, html
357
+ end
358
+
359
+ test "a link to an external domain with the same text as dest" do
360
+ html = "<p>url in brackets <a href='https://riseup.net/'>riseup.net</a></p>"
361
+ greencloth = "url in brackets [riseup.net -> https://riseup.net/]\n"
362
+ assert_renders_greencloth greencloth, html
363
+ end
364
+
365
+ test "a link to a wiki page with the same name as dest" do
366
+ html = "<p>a <a href='/page/name-link'>name link</a> in need of humanizing</p>"
367
+ greencloth = "a [name link] in need of humanizing\n"
368
+ assert_renders_greencloth greencloth, html
369
+ end
370
+
371
+ test "link to a user blue" do
372
+ html = "<p>link to a user <a href='/blue'>blue</a></p>"
373
+ greencloth = "link to a user [blue]\n"
374
+ assert_renders_greencloth greencloth, html
375
+ end
376
+
377
+ test "link with dashes should keep the dashes" do
378
+ html = "<p><a href='/-dashes/in/the/link-'>link to</a></p>"
379
+ greencloth = "[link to -> /-dashes/in/the/link-]\n"
380
+ assert_renders_greencloth greencloth, html
381
+ end
382
+
383
+ test "link with underscores should keep the underscores" do
384
+ html = "<p>links <a href='/page/with_underscores'>with_underscores</a> should keep underscore</p>"
385
+ greencloth = "links [with_underscores] should keep underscore\n"
386
+ assert_renders_greencloth greencloth, html
387
+ end
388
+
389
+ test "a link inside a li element" do
390
+ html ="<ul>\n<li>\n\t\t\n<a href='/page/this'>link to</a></li></ul>"
391
+ greencloth = "* [link to -> this]\n"
392
+ assert_renders_greencloth greencloth, html
393
+ end
394
+
395
+ test "an external link inside a li element" do
396
+ html = "<ul>\n<li><a href='https://riseup.net/'>riseup.net</a></li>\n</ul>"
397
+ greencloth = "* [riseup.net -> https://riseup.net/]\n"
398
+ assert_renders_greencloth greencloth, html
399
+ end
400
+
401
+ test "many anchors inside a paragraph" do
402
+ html = "<p>make anchors <a name='here'>here</a> or <a name='maybe-here'>maybe here</a> or <a name='there'>over</a></p>"
403
+ greencloth = "make anchors [# here #] or [# maybe here #] or [# over -> there #]\n"
404
+ assert_renders_greencloth greencloth, html
405
+ end
406
+
407
+ # TODO: there are differents in this test about how cg support writing anchors
408
+ # this is a reduced support of it
409
+ test "anchors and links" do
410
+ html = "<p>link to <a href='/page/anchors#like-so'>anchors</a> or <a href='/page/like#so'>maybe</a> or <a href='#so'>just</a> or <a href='#so'>so</a></p>"
411
+ greencloth = "link to [anchors -> anchors#like so] or [maybe -> like#so] or [just -> #so] or [so -> #so]\n"
412
+ assert_renders_greencloth greencloth, html
413
+ end
414
+
415
+ test "more anchors" do
416
+ html = "<p><a href='#5'>link</a> to a numeric anchor <a name='5'>5</a></p>"
417
+ greencloth = "[link -> #5] to a numeric anchor [# 5 #]\n"
418
+ assert_renders_greencloth greencloth, html
419
+ end
420
+
421
+ test "3 links without /" do
422
+ html = "<p><a href='some'>some</a> and <a href='other'>other</a> and <a href='one_more'>one_more</a></p>"
423
+ greencloth = "[some] and [other] and [one_more]\n"
424
+ assert_renders_greencloth greencloth, html
425
+ end
426
+ end
427
+
428
+ context "troubles with headings" do
429
+ test "with h1" do
430
+ html = "<h1 class='first'><a name='this-is-h1-text---this-is-h1-text'></a><span class='caps'>THIS</span> IS H1 <span class='caps'>TEXT</span> - this is h1 text<a class='anchor' href='#this-is-h1-text---this-is-h1-text'>¶</a></h1>"
431
+ greencloth = "THIS IS H1 TEXT - this is h1 text\n=================================\n"
432
+ assert_renders_greencloth greencloth, html
433
+ end
434
+ end
435
+ end