zevarito-undress 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/LICENSE +22 -0
- data/README.rdoc +42 -0
- data/Rakefile +32 -0
- data/lib/core_ext/object.rb +6 -0
- data/lib/undress.rb +51 -0
- data/lib/undress/grammar.rb +127 -0
- data/lib/undress/greencloth.rb +138 -0
- data/lib/undress/textile.rb +104 -0
- data/test/test_grammar.rb +55 -0
- data/test/test_greencloth.rb +276 -0
- data/test/test_helper.rb +11 -0
- data/test/test_textile.rb +198 -0
- data/undress.gemspec +42 -0
- metadata +105 -0
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
(The MIT License)
|
2
|
+
|
3
|
+
Copyright (c) 2009 Nicolas Sanguinetti, entp.com
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
'Software'), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
20
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
21
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
22
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
= Undress
|
2
|
+
|
3
|
+
Easily convert back HTML to Textile, Greencloth.
|
4
|
+
|
5
|
+
require "undress/textile"
|
6
|
+
|
7
|
+
code =<<html
|
8
|
+
<h1>Hello world!</h1>
|
9
|
+
<p><strong>Hey!</strong> How is it going?</p>
|
10
|
+
<h2>Supported Markup Languages so far:</h2>
|
11
|
+
<ul>
|
12
|
+
<li>Textile</li>
|
13
|
+
<li>Greencloth</li>
|
14
|
+
</ul>
|
15
|
+
html
|
16
|
+
|
17
|
+
Undress(code).to_textile
|
18
|
+
|
19
|
+
Will produce
|
20
|
+
|
21
|
+
h1. Hello world!
|
22
|
+
|
23
|
+
*Hey!* How is it going?
|
24
|
+
|
25
|
+
h2. Supported Markup Languages so far:
|
26
|
+
|
27
|
+
* Textile
|
28
|
+
* Greencloth
|
29
|
+
|
30
|
+
== Supported Markup Languages
|
31
|
+
|
32
|
+
* Textile
|
33
|
+
* Greencloth, see [http://we.riseup.net]
|
34
|
+
|
35
|
+
== Get it
|
36
|
+
|
37
|
+
gem install undress
|
38
|
+
|
39
|
+
== License
|
40
|
+
|
41
|
+
Authors:: Nicolas Sanguinetti (foca[http://github.com/foca]), Alvaro Gil (zevarito[http://github.com/zevarito])
|
42
|
+
License:: MIT (Check LICENSE for details)
|
data/Rakefile
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require "rake/testtask"
|
2
|
+
|
3
|
+
begin
|
4
|
+
require "hanna/rdoctask"
|
5
|
+
rescue LoadError
|
6
|
+
require "rake/rdoctask"
|
7
|
+
end
|
8
|
+
|
9
|
+
Rake::RDocTask.new do |rd|
|
10
|
+
rd.main = "README"
|
11
|
+
rd.title = "API Documentation for Undress"
|
12
|
+
rd.rdoc_files.include("README.rdoc", "LICENSE", "lib/**/*.rb")
|
13
|
+
rd.rdoc_dir = "doc"
|
14
|
+
end
|
15
|
+
|
16
|
+
begin
|
17
|
+
require "metric_fu"
|
18
|
+
rescue LoadError
|
19
|
+
end
|
20
|
+
|
21
|
+
begin
|
22
|
+
require "mg"
|
23
|
+
MG.new("undress.gemspec")
|
24
|
+
rescue LoadError
|
25
|
+
end
|
26
|
+
|
27
|
+
desc "Default: run tests"
|
28
|
+
task :default => :test
|
29
|
+
|
30
|
+
Rake::TestTask.new do |t|
|
31
|
+
t.test_files = FileList["test/test_*.rb"]
|
32
|
+
end
|
data/lib/undress.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
require "hpricot"
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + "/core_ext/object")
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + "/undress/grammar")
|
4
|
+
|
5
|
+
# Load an HTML document so you can undress it. Pass it either a string or an IO
|
6
|
+
# object. You can pass an optional hash of options, which will be forwarded
|
7
|
+
# straight to Hpricot. Check it's
|
8
|
+
# documentation[http://code.whytheluckystiff.net/doc/hpricot] for details.
|
9
|
+
def Undress(html, options={})
|
10
|
+
Undress::Document.new(html, options)
|
11
|
+
end
|
12
|
+
|
13
|
+
module Undress
|
14
|
+
|
15
|
+
# if this array is empty we allow all tags
|
16
|
+
# if the processed node name not exist in this array we drop it
|
17
|
+
ALLOWED_TAGS = []
|
18
|
+
|
19
|
+
# Register a markup language. The name will become the method used to convert
|
20
|
+
# HTML to this markup language: for example registering the name +:textile+
|
21
|
+
# gives you <tt>Undress(code).to_textile</tt>, registering +:markdown+ would
|
22
|
+
# give you <tt>Undress(code).to_markdown</tt>, etc.
|
23
|
+
def self.add_markup(name, grammar)
|
24
|
+
Document.add_markup(name, grammar)
|
25
|
+
end
|
26
|
+
|
27
|
+
class Document #:nodoc:
|
28
|
+
def initialize(html, options)
|
29
|
+
@doc = Hpricot(html, options)
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.add_markup(name, grammar)
|
33
|
+
define_method "to_#{name}" do
|
34
|
+
grammar.process!(@doc)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
module ::Hpricot #:nodoc:
|
40
|
+
class Elem #:nodoc:
|
41
|
+
def ancestors
|
42
|
+
node, ancestors = parent, Elements[]
|
43
|
+
while node.respond_to?(:parent) && node.parent
|
44
|
+
ancestors << node
|
45
|
+
node = node.parent
|
46
|
+
end
|
47
|
+
ancestors
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
module Undress
|
2
|
+
# Grammars give you a DSL to declare how to convert an HTML document into a
|
3
|
+
# different markup language.
|
4
|
+
class Grammar
|
5
|
+
def self.inherited(base) # :nodoc:
|
6
|
+
base.instance_variable_set(:@post_processing_rules, post_processing_rules)
|
7
|
+
base.instance_variable_set(:@pre_processing_rules, pre_processing_rules)
|
8
|
+
end
|
9
|
+
|
10
|
+
# Add a parsing rule for a group of html tags.
|
11
|
+
#
|
12
|
+
# rule_for :p do |element|
|
13
|
+
# "<this was a paragraph>#{content_of(element)}</this was a paragraph>"
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# will replace your <tt><p></tt> tags for <tt><this was a paragraph></tt>
|
17
|
+
# tags, without altering the contents.
|
18
|
+
#
|
19
|
+
# The element yielded to the block is an Hpricot element for the given tag.
|
20
|
+
def self.rule_for(*tags, &handler) # :yields: element
|
21
|
+
tags.each do |tag|
|
22
|
+
define_method tag.to_sym, &handler
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Set a default rule for unrecognized tags.
|
27
|
+
#
|
28
|
+
# Unless you define a special case, it will ignore the tags and just output
|
29
|
+
# the contents of unrecognized tags.
|
30
|
+
def self.default(&handler) # :yields: element
|
31
|
+
define_method :method_missing do |tag, node, *args|
|
32
|
+
handler.call(node)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Add a post-processing rule to your parser.
|
37
|
+
#
|
38
|
+
# This takes a regular expression that will be applied to the output after
|
39
|
+
# processing any nodes. It can take a string as a replacement, or a block
|
40
|
+
# that will be passed to String#gsub.
|
41
|
+
#
|
42
|
+
# post_processing(/\n\n+/, "\n\n") # compress more than two newlines
|
43
|
+
# post_processing(/whatever/) { ... }
|
44
|
+
def self.post_processing(regexp, replacement = nil, &handler) #:yields: matched_string
|
45
|
+
post_processing_rules[regexp] = replacement || handler
|
46
|
+
end
|
47
|
+
|
48
|
+
# Add a pre-processing rule to your parser.
|
49
|
+
#
|
50
|
+
# This lets you mutate the DOM before applying any rule defined with
|
51
|
+
# +rule_for+. You need to pass a CSS/XPath selector, and a block that
|
52
|
+
# takes an Hpricot element to parse it.
|
53
|
+
#
|
54
|
+
# pre_processing "ul.toc" do |element|
|
55
|
+
# element.swap("<p>[[toc]]</p>")
|
56
|
+
# end
|
57
|
+
#
|
58
|
+
# Would replace any unordered lists with the class +toc+ for a
|
59
|
+
# paragraph containing the code <tt>[[toc]]</tt>.
|
60
|
+
def self.pre_processing(selector, &handler) # :yields: element
|
61
|
+
pre_processing_rules[selector] = handler
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.post_processing_rules #:nodoc:
|
65
|
+
@post_processing_rules ||= {}
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.pre_processing_rules #:nodoc:
|
69
|
+
@pre_processing_rules ||= {}
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.process!(node) #:nodoc:
|
73
|
+
new.process!(node)
|
74
|
+
end
|
75
|
+
|
76
|
+
attr_reader :pre_processing_rules #:nodoc:
|
77
|
+
attr_reader :post_processing_rules #:nodoc:
|
78
|
+
|
79
|
+
def initialize #:nodoc:
|
80
|
+
@pre_processing_rules = self.class.pre_processing_rules.dup
|
81
|
+
@post_processing_rules = self.class.post_processing_rules.dup
|
82
|
+
end
|
83
|
+
|
84
|
+
# Process a DOM node, converting it to your markup language according to
|
85
|
+
# your defined rules. If the node is a Text node, it will return it's
|
86
|
+
# string representation. Otherwise it will call the rule defined for it.
|
87
|
+
def process(nodes)
|
88
|
+
Array(nodes).map do |node|
|
89
|
+
if node.text?
|
90
|
+
node.to_html
|
91
|
+
elsif node.elem?
|
92
|
+
send node.name.to_sym, node if ALLOWED_TAGS.empty? || ALLOWED_TAGS.include?(node.name)
|
93
|
+
else
|
94
|
+
""
|
95
|
+
end
|
96
|
+
end.join("")
|
97
|
+
end
|
98
|
+
|
99
|
+
def process!(node) #:nodoc:
|
100
|
+
pre_processing_rules.each do |selector, handler|
|
101
|
+
node.search(selector).each(&handler)
|
102
|
+
end
|
103
|
+
|
104
|
+
process(node.children).tap do |text|
|
105
|
+
post_processing_rules.each do |rule, handler|
|
106
|
+
handler.is_a?(String) ? text.gsub!(rule, handler) : text.gsub!(rule, &handler)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# Get the result of parsing the contents of a node.
|
112
|
+
def content_of(node)
|
113
|
+
process(node.respond_to?(:children) ? node.children : node)
|
114
|
+
end
|
115
|
+
|
116
|
+
# Helper method that tells you if the given DOM node is immediately
|
117
|
+
# surrounded by whitespace.
|
118
|
+
def surrounded_by_whitespace?(node)
|
119
|
+
(node.previous.text? && node.previous.to_s =~ /\s+$/) ||
|
120
|
+
(node.next.text? && node.next.to_s =~ /^\s+/)
|
121
|
+
end
|
122
|
+
|
123
|
+
def method_missing(tag, node, *args) #:nodoc:
|
124
|
+
process(node.children)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/textile")
|
2
|
+
|
3
|
+
module Undress
|
4
|
+
class GreenCloth < Textile
|
5
|
+
|
6
|
+
Undress::ALLOWED_TAGS = [
|
7
|
+
'div', 'a', 'img', 'br', 'i', 'u', 'b', 'pre', 'kbd', 'code', 'cite', 'strong', 'em',
|
8
|
+
'ins', 'sup', 'sub', 'del', 'table', 'tr', 'td', 'th', 'ol', 'ul', 'li', 'p', 'span',
|
9
|
+
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'notextile', 'blockquote', 'object', 'embed',
|
10
|
+
'param', 'acronym', 'dd', 'dl', 'dt'
|
11
|
+
]
|
12
|
+
|
13
|
+
# table of contents
|
14
|
+
pre_processing("ul.toc") do |toc|
|
15
|
+
toc.swap "[[toc]]"
|
16
|
+
end
|
17
|
+
|
18
|
+
# headings
|
19
|
+
rule_for(:h1, :h2, :h3, :h4, :h5, :h6) {|e| process_headings(e) }
|
20
|
+
|
21
|
+
# inline elements
|
22
|
+
rule_for(:a) {|e|
|
23
|
+
"#{process_links_and_anchors(e)}"
|
24
|
+
}
|
25
|
+
|
26
|
+
# lists
|
27
|
+
rule_for(:li) {|e|
|
28
|
+
offset = ""
|
29
|
+
li = e
|
30
|
+
while li.parent
|
31
|
+
if li.parent.name == "ul" then offset = "*#{offset}"
|
32
|
+
elsif li.parent.name == "ol" then offset = "##{offset}"
|
33
|
+
else return offset end
|
34
|
+
li = li.parent.parent ? li.parent.parent : nil
|
35
|
+
end
|
36
|
+
"\n#{offset} #{content_of(e)}"
|
37
|
+
}
|
38
|
+
|
39
|
+
# text formatting
|
40
|
+
rule_for(:pre) {|e|
|
41
|
+
if e.children.all? {|n| n.text? && n.content =~ /^\s+$/ || n.elem? && n.name == "code" }
|
42
|
+
"\n\n<pre><code>#{content_of(e % "code")}</code></pre>"
|
43
|
+
else
|
44
|
+
"\n\n<pre>#{content_of(e)}</pre>"
|
45
|
+
end
|
46
|
+
}
|
47
|
+
|
48
|
+
rule_for(:code) {|e|
|
49
|
+
if e.inner_html.match(/\n/)
|
50
|
+
if e.parent && e.parent.name != "pre"
|
51
|
+
"<pre><code>#{content_of(e)}</code></pre>"
|
52
|
+
else
|
53
|
+
"<code>#{content_of(e)}</code>"
|
54
|
+
end
|
55
|
+
else
|
56
|
+
"@#{content_of(e)}@"
|
57
|
+
end
|
58
|
+
}
|
59
|
+
|
60
|
+
# passing trough objects
|
61
|
+
rule_for(:embed, :object, :param) {|e|
|
62
|
+
e.to_html
|
63
|
+
}
|
64
|
+
|
65
|
+
def process_headings(h)
|
66
|
+
h.children.each {|e|
|
67
|
+
next if e.class == Hpricot::Text
|
68
|
+
e.parent.replace_child(e, "") if e.name != "a" || e.has_attribute?("href") && e["href"] !~ /^\/|(https?|s?ftp):\/\//
|
69
|
+
}
|
70
|
+
case h.name
|
71
|
+
when "h1"
|
72
|
+
"#{content_of(h)}\n#{'=' * h.inner_text.size}\n\n" if h.name == "h1"
|
73
|
+
when "h2"
|
74
|
+
"#{content_of(h)}\n#{'-' * h.inner_text.size}\n\n" if h.name == "h2"
|
75
|
+
else
|
76
|
+
"#{h.name}. #{content_of(h)}\n\n"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def process_links_and_anchors(e)
|
81
|
+
return "" if e.empty?
|
82
|
+
inner, name, href = e.inner_html, e.get_attribute("name"), e.get_attribute("href")
|
83
|
+
|
84
|
+
# is an anchor? and cannot be child of any h1..h6
|
85
|
+
if name && !e.parent.name.match(/^h1|2|3|4|5|6$/)
|
86
|
+
inner == name || inner == name.gsub(/-/,"\s") ? "[# #{inner} #]" : "[# #{inner} -> #{name} #]"
|
87
|
+
# is a link?
|
88
|
+
elsif href && href != ""
|
89
|
+
case href
|
90
|
+
when /^\/#/
|
91
|
+
"[\"#{inner}\":#{href}"
|
92
|
+
when /^#/
|
93
|
+
"[#{inner} -> #{href}]"
|
94
|
+
when /^(https?|s?ftp):\/\//
|
95
|
+
href.gsub(/^(https?|s?ftp):\/\//, "") == inner ? "[#{href}]" : "[#{inner} -> #{href}]"
|
96
|
+
when /^[^\/]/
|
97
|
+
"[#{e.inner_text}]"
|
98
|
+
when /^\/.[^\/]*\/.[^\/]*\//
|
99
|
+
"[#{inner} -> #{href}]"
|
100
|
+
when /(?:\/page\/\+)[0-9]+$/
|
101
|
+
"[#{inner} -> +#{href.gsub(/\+[0-9]+$/)}]"
|
102
|
+
else
|
103
|
+
process_as_wiki_link(e)
|
104
|
+
end
|
105
|
+
else
|
106
|
+
""
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def process_as_wiki_link(e)
|
111
|
+
inner, name, href = e.inner_html, e.get_attribute("name"), e.get_attribute("href")
|
112
|
+
|
113
|
+
# pages or group pages
|
114
|
+
context_name, page_name = href.split("/")[1..2]
|
115
|
+
page_name = context_name if page_name.nil?
|
116
|
+
wiki_page_name = page_name.gsub(/[a-z-]*[^\/]$/m) {|m| m.tr('-',' ')}
|
117
|
+
|
118
|
+
# simple page
|
119
|
+
if context_name == "page"
|
120
|
+
return "[#{inner}]" if wiki_page_name == inner
|
121
|
+
return "[#{inner} -> #{wiki_page_name}]"
|
122
|
+
end
|
123
|
+
# group page
|
124
|
+
if context_name != page_name
|
125
|
+
return "[#{context_name} / #{wiki_page_name}]" if wiki_page_name == inner
|
126
|
+
return "[#{inner} -> #{wiki_page_name}]" if context_name == "page"
|
127
|
+
return "[#{inner} -> #{context_name} / #{wiki_page_name}]"
|
128
|
+
end
|
129
|
+
if inner == page_name || inner == wiki_page_name || inner == wiki_page_name.gsub(/\s/,"-")
|
130
|
+
return "[#{wiki_page_name}]"
|
131
|
+
end
|
132
|
+
# fall back
|
133
|
+
return "[#{inner} -> #{href}]"
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
add_markup :greencloth, GreenCloth
|
138
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/../undress")
|
2
|
+
|
3
|
+
module Undress
|
4
|
+
class Textile < Grammar
|
5
|
+
|
6
|
+
# delete tabs and newlines from inside elements
|
7
|
+
pre_processing("*") do |e|
|
8
|
+
if e.elem? && e.parent.doc? && e.inner_html != "" && e.name != "pre"
|
9
|
+
e.inner_html = e.inner_html.gsub(/\n|\t/,"")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# whitespace handling
|
14
|
+
post_processing(/\n\n+/, "\n\n")
|
15
|
+
post_processing(/\A\s+/, "")
|
16
|
+
post_processing(/\s+\z/, "\n")
|
17
|
+
|
18
|
+
# special characters introduced by textile
|
19
|
+
post_processing(/…/, "...")
|
20
|
+
post_processing(/’/, "'")
|
21
|
+
post_processing(/̶[01];/, '"')
|
22
|
+
post_processing(/—/, "--")
|
23
|
+
post_processing(/–/, "-")
|
24
|
+
post_processing(/(\d+\s*)×(\s*\d+)/, '\1x\2')
|
25
|
+
post_processing(/®/, "(r)")
|
26
|
+
post_processing(/©/, "(c)")
|
27
|
+
post_processing(/™/, "(tm)")
|
28
|
+
|
29
|
+
# inline elements
|
30
|
+
rule_for(:a) {|e|
|
31
|
+
title = e.has_attribute?("title") ? " (#{e["title"]})" : ""
|
32
|
+
"[#{content_of(e)}#{title}:#{e["href"]}]"
|
33
|
+
}
|
34
|
+
rule_for(:img) {|e|
|
35
|
+
alt = e.has_attribute?("alt") ? "(#{e["alt"]})" : ""
|
36
|
+
"!#{e["src"]}#{alt}!"
|
37
|
+
}
|
38
|
+
rule_for(:strong) {|e| "*#{content_of(e)}*" }
|
39
|
+
rule_for(:em) {|e| "_#{content_of(e)}_" }
|
40
|
+
rule_for(:code) {|e| "@#{content_of(e)}@" }
|
41
|
+
rule_for(:cite) {|e| "??#{content_of(e)}??" }
|
42
|
+
rule_for(:sup) {|e| surrounded_by_whitespace?(e) ? "^#{content_of(e)}^" : "[^#{content_of(e)}^]" }
|
43
|
+
rule_for(:sub) {|e| surrounded_by_whitespace?(e) ? "~#{content_of(e)}~" : "[~#{content_of(e)}~]" }
|
44
|
+
rule_for(:ins) {|e| "+#{content_of(e)}+" }
|
45
|
+
rule_for(:del) {|e| "-#{content_of(e)}-" }
|
46
|
+
rule_for(:acronym) {|e| e.has_attribute?("title") ? "#{content_of(e)}(#{e["title"]})" : content_of(e) }
|
47
|
+
|
48
|
+
# text formatting and layout
|
49
|
+
rule_for(:p) {|e| "\n\n#{content_of(e)}\n\n" }
|
50
|
+
rule_for(:br) {|e| "\n" }
|
51
|
+
rule_for(:blockquote) {|e| "\n\nbq. #{content_of(e)}\n\n" }
|
52
|
+
rule_for(:pre) {|e|
|
53
|
+
if e.children.all? {|n| n.text? && n.content =~ /^\s+$/ || n.elem? && n.name == "code" }
|
54
|
+
"\n\npc. #{content_of(e % "code")}\n\n"
|
55
|
+
else
|
56
|
+
"<pre>#{content_of(e)}</pre>"
|
57
|
+
end
|
58
|
+
}
|
59
|
+
|
60
|
+
# headings
|
61
|
+
rule_for(:h1) {|e| "\n\nh1. #{content_of(e)}\n\n" }
|
62
|
+
rule_for(:h2) {|e| "\n\nh2. #{content_of(e)}\n\n" }
|
63
|
+
rule_for(:h3) {|e| "\n\nh3. #{content_of(e)}\n\n" }
|
64
|
+
rule_for(:h4) {|e| "\n\nh4. #{content_of(e)}\n\n" }
|
65
|
+
rule_for(:h5) {|e| "\n\nh5. #{content_of(e)}\n\n" }
|
66
|
+
rule_for(:h6) {|e| "\n\nh6. #{content_of(e)}\n\n" }
|
67
|
+
|
68
|
+
# lists
|
69
|
+
rule_for(:li) {|e|
|
70
|
+
token = e.parent.name == "ul" ? "*" : "#"
|
71
|
+
nesting = e.ancestors.inject(1) {|total,node| total + (%(ul ol).include?(node.name) ? 0 : 1) }
|
72
|
+
"\n#{token * nesting} #{content_of(e)}"
|
73
|
+
}
|
74
|
+
rule_for(:ul, :ol) {|e|
|
75
|
+
if e.ancestors.detect {|node| %(ul ol).include?(node.name) }
|
76
|
+
content_of(e)
|
77
|
+
else
|
78
|
+
"\n#{content_of(e)}\n\n"
|
79
|
+
end
|
80
|
+
}
|
81
|
+
|
82
|
+
# definition lists
|
83
|
+
rule_for(:dl) {|e| "\n\n#{content_of(e)}\n" }
|
84
|
+
rule_for(:dt) {|e| "- #{content_of(e)} " }
|
85
|
+
rule_for(:dd) {|e| ":= #{content_of(e)} =:\n" }
|
86
|
+
|
87
|
+
# tables
|
88
|
+
rule_for(:table) {|e| "\n\n#{content_of(e)}\n" }
|
89
|
+
rule_for(:tr) {|e| "#{content_of(e)}|\n" }
|
90
|
+
rule_for(:td, :th) {|e|
|
91
|
+
prefix = if e.name == "th"
|
92
|
+
"_. "
|
93
|
+
elsif e.has_attribute?("colspan")
|
94
|
+
"\\#{e["colspan"]}. "
|
95
|
+
elsif e.has_attribute?("rowspan")
|
96
|
+
"/#{e["rowspan"]}. "
|
97
|
+
end
|
98
|
+
|
99
|
+
"|#{prefix}#{content_of(e)}"
|
100
|
+
}
|
101
|
+
end
|
102
|
+
|
103
|
+
add_markup :textile, Textile
|
104
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/test_helper")
|
2
|
+
|
3
|
+
module Undress
|
4
|
+
class TestGrammar < Test::Unit::TestCase
|
5
|
+
class Parent < Grammar
|
6
|
+
rule_for(:p) {|e| "<this is a paragraph>#{content_of(e)}</this is a paragraph>" }
|
7
|
+
end
|
8
|
+
|
9
|
+
class WithPreProcessingRules < Parent
|
10
|
+
pre_processing("p.foo") {|e| e.swap("<div>Cuack</div>") }
|
11
|
+
rule_for(:div) {|e| "<this was a div>#{content_of(e)}</this was a div>" }
|
12
|
+
end
|
13
|
+
|
14
|
+
class Child < Parent; end
|
15
|
+
|
16
|
+
class OverWriter < WithPreProcessingRules
|
17
|
+
rule_for(:div) {|e| content_of(e) }
|
18
|
+
end
|
19
|
+
|
20
|
+
class TextileExtension < Textile
|
21
|
+
rule_for(:a) {|e| "" }
|
22
|
+
end
|
23
|
+
|
24
|
+
def parse_with(grammar, html)
|
25
|
+
grammar.process!(Hpricot(html))
|
26
|
+
end
|
27
|
+
|
28
|
+
context "extending a grammar" do
|
29
|
+
test "the extended grammar should inherit the rules of the parent" do
|
30
|
+
output = parse_with Child, "<p>Foo Bar</p>"
|
31
|
+
assert_equal "<this is a paragraph>Foo Bar</this is a paragraph>", output
|
32
|
+
end
|
33
|
+
|
34
|
+
test "extending a grammar doesn't overwrite the parent's rules" do
|
35
|
+
output = parse_with OverWriter, "<div>Foo</div>"
|
36
|
+
assert_equal "Foo", output
|
37
|
+
|
38
|
+
output = parse_with WithPreProcessingRules, "<div>Foo</div>"
|
39
|
+
assert_equal "<this was a div>Foo</this was a div>", output
|
40
|
+
end
|
41
|
+
|
42
|
+
test "extending textile doesn't blow up" do
|
43
|
+
output = parse_with TextileExtension, "<p><a href='/'>Cuack</a></p><p>Foo Bar</p><p>I <a href='/'>work</a></p>"
|
44
|
+
assert_equal "Foo Bar\n\nI\n", output
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
context "pre processing rules" do
|
49
|
+
test "mutate the DOM before parsing the tags" do
|
50
|
+
output = parse_with WithPreProcessingRules, "<p class='foo'>Blah</p><p>O hai</p>"
|
51
|
+
assert_equal "<this was a div>Cuack</this was a div><this is a paragraph>O hai</this is a paragraph>", output
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,276 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/test_helper")
|
2
|
+
|
3
|
+
class Undress::GreenClothTest < Test::Unit::TestCase
|
4
|
+
def assert_renders_greencloth(greencloth, html)
|
5
|
+
assert_equal greencloth, Undress(html, :xhtml_strict => true).to_greencloth
|
6
|
+
end
|
7
|
+
|
8
|
+
# unallowed tags
|
9
|
+
context "remove unallowed tags" do
|
10
|
+
test "remove a head tag" do
|
11
|
+
html = "<html><head><title>Title</title></head>"
|
12
|
+
greencloth = ""
|
13
|
+
assert_renders_greencloth greencloth, html
|
14
|
+
end
|
15
|
+
|
16
|
+
test "remove a script tag" do
|
17
|
+
html = "<div>Some script inside a<script type='text/javascript'>window.alert('alert')</script> paragraph</div>"
|
18
|
+
greencloth = "Some script inside a paragraph"
|
19
|
+
assert_renders_greencloth greencloth, html
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# code
|
24
|
+
context "converting code tags" do
|
25
|
+
test "a code inside a paragraph" do
|
26
|
+
html = "<p>do you like my <code>function</code>?</p>"
|
27
|
+
greencloth = "do you like my @function@?\n"
|
28
|
+
assert_renders_greencloth greencloth, html
|
29
|
+
end
|
30
|
+
|
31
|
+
test "code tag inside pre tag" do
|
32
|
+
html = "<pre><code>def say_hi\n\tputs 'hi'\nend</code></pre>"
|
33
|
+
greencloth = "<pre><code>def say_hi\n\tputs 'hi'\nend</code></pre>"
|
34
|
+
assert_renders_greencloth greencloth, html
|
35
|
+
end
|
36
|
+
|
37
|
+
test "code inside list items" do
|
38
|
+
html = "<ul><li><code>foo</code></li><li><code>bar</code></li><li>and <code>x</code> is also.</li></ul>"
|
39
|
+
greencloth = "* @foo@\n* @bar@\n* and @x@ is also.\n"
|
40
|
+
assert_renders_greencloth greencloth, html
|
41
|
+
end
|
42
|
+
|
43
|
+
test "code tag not inside a pre and without new lines inside" do
|
44
|
+
html = "<code>some code inside</code>"
|
45
|
+
greencloth = "@some code inside@"
|
46
|
+
assert_renders_greencloth greencloth, html
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# embed and object
|
51
|
+
# the elements pass trough but the order of the attributes change
|
52
|
+
context "embed and object" do
|
53
|
+
test "embed" do
|
54
|
+
html = "<p>do you like my embedded blip.tv <embed src='http://blip.tv/play/Ac3GfI+2HA' allowfullscreen='true' type='application/x-shockwave-flash' allowscriptaccess='always' height='510' width='720' />?</p>"
|
55
|
+
greencloth = "do you like my embedded blip.tv <embed src=\"http://blip.tv/play/Ac3GfI+2HA\" allowfullscreen=\"true\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" height=\"510\" width=\"720\" />?\n"
|
56
|
+
assert_renders_greencloth greencloth, html
|
57
|
+
end
|
58
|
+
|
59
|
+
test "object" do
|
60
|
+
html = "<p>do you like my embedded youtube <object width='425' height='344'><param name='movie' value='http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1' /><param name='allowFullScreen' value='true' /><embed src='http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1' type='application/x-shockwave-flash' width='425' height='344' allowfullscreen='true' /></object>?</p>"
|
61
|
+
greencloth = "do you like my embedded youtube <object height=\"344\" width=\"425\"><param name=\"movie\" value=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" /><param name=\"allowFullScreen\" value=\"true\" /><embed src=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" allowfullscreen=\"true\" type=\"application/x-shockwave-flash\" height=\"344\" width=\"425\" /></object>?\n"
|
62
|
+
assert_renders_greencloth greencloth, html
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# outline
|
67
|
+
# don't allow link to anchors or anchor defs inside hx, greencloth -> html
|
68
|
+
# take cares of it, so we are only allowing links inside hx elements for now
|
69
|
+
context "outline" do
|
70
|
+
test "table of contents toc" do
|
71
|
+
html = "<ul class='toc'><li class='toc1'><a href='#fruits'><span>1</span> Fruits</a></li><ul><li class='toc2'><a href='#tasty-apples'><span>1.1</span> Tasty Apples</a></li><ul><li class='toc3'><a href='green'><span>1.1.1</span> Green</a></li><li class='toc3'><a href='#red'><span>1.1.2</span> Red</a></li></ul>"
|
72
|
+
greencloth = "[[toc]]"
|
73
|
+
assert_renders_greencloth greencloth, html
|
74
|
+
end
|
75
|
+
|
76
|
+
test "headings with links, anchors and links to anchors" do
|
77
|
+
html = "<h1 class='first'><a name='russian-anarchists'></a>Russian Anarchists<a class='anchor' href='#russian-anarchists'>¶</a></h1><h2><a name='michel-bakunin'></a>Michel <a href='http://en.wikipedia.org/wiki/Mikhail_Bakunin'>Bakunin</a><a class='anchor' href='#michel-bakunin'>¶</a></h2><h2><a name='peter-kropotkin'></a><a href='http://en.wikipedia.org/wiki/Peter_Kropotkin'>Peter</a> Kropotkin<a class='anchor' href='#peter-kropotkin'>¶</a></h2><h1><a name='russian-american-anarchists'></a>Russian-American Anarchists<a class='anchor' href='#russian-american-anarchists'>¶</a></h1><h2><a name='emma-goldman'></a><a href='http://en.wikipedia.org/wiki/Emma_Goldman'>Emma Goldman</a><a class='anchor' href='#emma-goldman'>¶</a></h2><h2><a name='alexander-berkman'></a>Alexander <a href='http://en.wikipedia.org/wiki/Alexander_Berkman'>Berkman</a><a class='anchor' href='#alexander-berkman'>¶</a></h2>"
|
78
|
+
greencloth = "Russian Anarchists\n==================\n\nMichel [Bakunin -> http://en.wikipedia.org/wiki/Mikhail_Bakunin]\n--------------\n\n[Peter -> http://en.wikipedia.org/wiki/Peter_Kropotkin] Kropotkin\n---------------\n\nRussian-American Anarchists\n===========================\n\n[Emma Goldman -> http://en.wikipedia.org/wiki/Emma_Goldman]\n------------\n\nAlexander [Berkman -> http://en.wikipedia.org/wiki/Alexander_Berkman]\n-----------------\n"
|
79
|
+
assert_renders_greencloth greencloth, html
|
80
|
+
end
|
81
|
+
|
82
|
+
test "double trouble" do
|
83
|
+
html = "<h1 class='first'><a name='title'></a>Title<a class='anchor' href='#title'>¶</a></h1><h3><a name='under-first'></a>Under first<a class='anchor' href='#under-first'>¶</a></h3><h1><a name='title_2'></a>Title<a class='anchor' href='#title_2'>¶</a></h1><h3><a name='under-second'></a>Under second<a class='anchor' href='#under-second'>¶</a></h3>"
|
84
|
+
greencloth = "Title\n=====\n\nh3. Under first\n\nTitle\n=====\n\nh3. Under second\n"
|
85
|
+
assert_renders_greencloth greencloth, html
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# basics
|
90
|
+
context "basics" do
|
91
|
+
test "headers" do
|
92
|
+
html = "<h1 class='first'>header one</h1>\n<h2>header two</h2>"
|
93
|
+
greencloth = "header one\n==========\n\nheader two\n----------\n"
|
94
|
+
assert_renders_greencloth greencloth, html
|
95
|
+
end
|
96
|
+
|
97
|
+
test "headers with paragraph" do
|
98
|
+
html = "<p>la la la</p>\n<h1 class='first'>header one</h1>\n<h2>header two</h2>\n<p>la la la</p>"
|
99
|
+
greencloth = "la la la\n\nheader one\n==========\n\nheader two\n----------\n\nla la la\n"
|
100
|
+
assert_renders_greencloth greencloth, html
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# sections
|
105
|
+
# allways we render h1 with ==== and h2 with ----
|
106
|
+
context "Convert sections" do
|
107
|
+
test "one section no heading" do
|
108
|
+
html = "<div class='wiki_section' id='wiki_section-0'><p>start unheaded section</p><p>line line line</p></div>"
|
109
|
+
greencloth = "start unheaded section\n\nline line line\n"
|
110
|
+
assert_renders_greencloth greencloth, html
|
111
|
+
end
|
112
|
+
|
113
|
+
test "one section with heading" do
|
114
|
+
html = "<div class='wiki_section' id='wiki_section-0'><h2 class='first'>are you ready?!!?</h2><p>here we go now!</p></div>"
|
115
|
+
greencloth = "are you ready?!!?\n-----------------\n\nhere we go now!\n"
|
116
|
+
assert_renders_greencloth greencloth, html
|
117
|
+
end
|
118
|
+
|
119
|
+
test "all headings" do
|
120
|
+
html = "<h1>First</h1><h2>Second</h2><h3>Tres</h3><h4>Cuatro</h4><h5>Five</h5><h6>Six</h6>"
|
121
|
+
greencloth = "First\n=====\n\nSecond\n------\n\nh3. Tres\n\nh4. Cuatro\n\nh5. Five\n\nh6. Six\n"
|
122
|
+
assert_renders_greencloth greencloth, html
|
123
|
+
end
|
124
|
+
|
125
|
+
test "multiple sections with text" do
|
126
|
+
html = "<div class='wiki_section' id='wiki_section-0'><h2 class='first'>Section One</h2><p>section one line one is here<br />section one line two is next</p><p>Here is section one still</p></div><div class='wiki_section' id='wiki_section-1'><h1>Section Two</h1><p>Section two first line<br />Section two another line</p></div><div class='wiki_section' id='wiki_section-2'><h2>Section 3 with h2</h2><p>One more line for section 3</p></div><div class='wiki_section' id='wiki_section-3'><h3>final section 4</h3><p>section 4 first non-blank line</p>\n</div>"
|
127
|
+
greencloth = "Section One\n-----------\n\nsection one line one is here\nsection one line two is next\n\nHere is section one still\n\nSection Two\n===========\n\nSection two first line\nSection two another line\n\nSection 3 with h2\n-----------------\n\nOne more line for section 3\n\nh3. final section 4\n\nsection 4 first non-blank line\n"
|
128
|
+
assert_renders_greencloth greencloth, html
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# lists
|
133
|
+
# TODO: start attribute not implemented
|
134
|
+
context "Converting html lists to greencloth" do
|
135
|
+
test "hard break in list" do
|
136
|
+
html = "<ul>\n\t<li>first line</li>\n\t<li>second<br />\n\tline</li>\n\t<li>third line</li>\n</ul>\n"
|
137
|
+
greencloth = "* first line\n* second\nline\n* third line\n"
|
138
|
+
assert_renders_greencloth greencloth, html
|
139
|
+
end
|
140
|
+
|
141
|
+
test "mixed nesting" do
|
142
|
+
html = "<ul><li>bullet\n<ol>\n<li>number</li>\n<li>number\n<ul>\n\t<li>bullet</li>\n</ul></li>\n<li>number</li>\n<li>number with<br />a break</li>\n</ol></li>\n<li>bullet\n<ul><li>okay</li></ul></li></ul>"
|
143
|
+
greencloth = "* bullet\n*# number\n*# number\n*#* bullet\n*# number\n*# number with\na break\n* bullet\n** okay\n"
|
144
|
+
assert_renders_greencloth greencloth, html
|
145
|
+
end
|
146
|
+
|
147
|
+
test "list continuation" do # uses start
|
148
|
+
html = "<ol><li>one</li><li>two</li><li>three</li></ol><ol><li>one</li><li>two</li><li>three</li></ol><ol start='4'><li>four</li><li>five</li><li>six</li></ol>"
|
149
|
+
greencloth = "# one\n# two\n# three\n\n# one\n# two\n# three\n\n# four\n# five\n# six\n"
|
150
|
+
assert_renders_greencloth greencloth, html
|
151
|
+
end
|
152
|
+
|
153
|
+
test "continue after break" do # uses start
|
154
|
+
html = "<ol><li>one</li><li>two</li><li>three</li></ol><p>test</p><ol><li>one</li><li>two</li><li>three</li></ol><p>test</p><ol start='4'><li>four</li><li>five</li><li>six</li></ol>"
|
155
|
+
greencloth = "# one\n# two\n# three\n\ntest\n\n# one\n# two\n# three\n\ntest\n\n# four\n# five\n# six\n"
|
156
|
+
assert_renders_greencloth greencloth, html
|
157
|
+
end
|
158
|
+
|
159
|
+
test "continue list when prior list contained nested list" do # uses start
|
160
|
+
greencloth = "# one\n# two\n# three\n\n# four\n# five\n## sub-note\n## another sub-note\n# six\n\n# seven\n# eight\n# nine\n"
|
161
|
+
html = "<ol><li>one</li><li>two</li><li>three</li></ol><ol start='4'><li>four</li><li>five<ol><li>sub-note</li><li>another sub-note</li></ol></li><li>six</li></ol><ol start='7'><li>seven</li><li>eight</li><li>nine</li></ol>"
|
162
|
+
assert_renders_greencloth greencloth, html
|
163
|
+
end
|
164
|
+
|
165
|
+
test "" do
|
166
|
+
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
# links
|
171
|
+
context "Converting html links to greencloth" do
|
172
|
+
test "convert a link to a wiki page inside a paragraph" do
|
173
|
+
html = "<p>this is a <a href='/page/plain-link'>plain link</a> in some text</p>"
|
174
|
+
greencloth = "this is a [plain link] in some text\n"
|
175
|
+
assert_renders_greencloth greencloth, html
|
176
|
+
end
|
177
|
+
|
178
|
+
test "convert a link to a wiki page with namespace" do
|
179
|
+
html= "<p>this is a <a href='/namespaced/link'>link</a> in some text</p>"
|
180
|
+
greencloth = "this is a [namespaced / link] in some text\n"
|
181
|
+
assert_renders_greencloth greencloth, html
|
182
|
+
end
|
183
|
+
|
184
|
+
test "convert a link to a wiki page" do
|
185
|
+
html= "<p>this is a <a href='/page/something-else'>link to</a> in some text</p>"
|
186
|
+
greencloth = "this is a [link to -> something else] in some text\n"
|
187
|
+
assert_renders_greencloth greencloth, html
|
188
|
+
end
|
189
|
+
|
190
|
+
test "convert a link to a wiki page with namespace and text different than link dest" do
|
191
|
+
html= "<p>this is a <a href='/namespace/something-else'>link to</a> in some text</p>"
|
192
|
+
greencloth = "this is a [link to -> namespace / something else] in some text\n"
|
193
|
+
assert_renders_greencloth greencloth, html
|
194
|
+
end
|
195
|
+
|
196
|
+
test "convert a link to an absolute path" do
|
197
|
+
html = "<p>this is a <a href='/an/absolute/path'>link to</a> in some text</p>"
|
198
|
+
greencloth = "this is a [link to -> /an/absolute/path] in some text\n"
|
199
|
+
assert_renders_greencloth greencloth, html
|
200
|
+
end
|
201
|
+
|
202
|
+
test "convert a link to an external domain" do
|
203
|
+
html = "<p>this is a <a href='https://riseup.net'>link to</a> a url</p>"
|
204
|
+
greencloth = "this is a [link to -> https://riseup.net] a url\n"
|
205
|
+
assert_renders_greencloth greencloth, html
|
206
|
+
end
|
207
|
+
|
208
|
+
test "a link to an external domain with the same text as dest" do
|
209
|
+
html = "<p>url in brackets <a href='https://riseup.net/'>riseup.net</a></p>"
|
210
|
+
greencloth = "url in brackets [riseup.net -> https://riseup.net/]\n"
|
211
|
+
assert_renders_greencloth greencloth, html
|
212
|
+
end
|
213
|
+
|
214
|
+
test "a link to a wiki page with the same name as dest" do
|
215
|
+
html = "<p>a <a href='/page/name-link'>name link</a> in need of humanizing</p>"
|
216
|
+
greencloth = "a [name link] in need of humanizing\n"
|
217
|
+
assert_renders_greencloth greencloth, html
|
218
|
+
end
|
219
|
+
|
220
|
+
test "link to a user blue" do
|
221
|
+
html = "<p>link to a user <a href='/blue'>blue</a></p>"
|
222
|
+
greencloth = "link to a user [blue]\n"
|
223
|
+
assert_renders_greencloth greencloth, html
|
224
|
+
end
|
225
|
+
|
226
|
+
test "link with dashes should keep the dashes" do
|
227
|
+
html = "<p><a href='/-dashes/in/the/link-'>link to</a></p>"
|
228
|
+
greencloth = "[link to -> /-dashes/in/the/link-]\n"
|
229
|
+
assert_renders_greencloth greencloth, html
|
230
|
+
end
|
231
|
+
|
232
|
+
test "link with underscores should keep the underscores" do
|
233
|
+
html = "<p>links <a href='/page/with_underscores'>with_underscores</a> should keep underscore</p>"
|
234
|
+
greencloth = "links [with_underscores] should keep underscore\n"
|
235
|
+
assert_renders_greencloth greencloth, html
|
236
|
+
end
|
237
|
+
|
238
|
+
test "a link inside a li element" do
|
239
|
+
html ="<ul>\n<li>\n\t\t\n<a href='/page/this'>link to</a></li></ul>"
|
240
|
+
greencloth = "* [link to -> this]\n"
|
241
|
+
assert_renders_greencloth greencloth, html
|
242
|
+
end
|
243
|
+
|
244
|
+
test "an external link inside a li element" do
|
245
|
+
html = "<ul>\n<li><a href='https://riseup.net/'>riseup.net</a></li>\n</ul>"
|
246
|
+
greencloth = "* [riseup.net -> https://riseup.net/]\n"
|
247
|
+
assert_renders_greencloth greencloth, html
|
248
|
+
end
|
249
|
+
|
250
|
+
test "many anchors inside a paragraph" do
|
251
|
+
html = "<p>make anchors <a name='here'>here</a> or <a name='maybe-here'>maybe here</a> or <a name='there'>over</a></p>"
|
252
|
+
greencloth = "make anchors [# here #] or [# maybe here #] or [# over -> there #]\n"
|
253
|
+
assert_renders_greencloth greencloth, html
|
254
|
+
end
|
255
|
+
|
256
|
+
# TODO: there are differents in this test about how cg support writing anchors
|
257
|
+
# this is a reduced support of it
|
258
|
+
test "anchors and links" do
|
259
|
+
html = "<p>link to <a href='/page/anchors#like-so'>anchors</a> or <a href='/page/like#so'>maybe</a> or <a href='#so'>just</a> or <a href='#so'>so</a></p>"
|
260
|
+
greencloth = "link to [anchors -> anchors#like so] or [maybe -> like#so] or [just -> #so] or [so -> #so]\n"
|
261
|
+
assert_renders_greencloth greencloth, html
|
262
|
+
end
|
263
|
+
|
264
|
+
test "more anchors" do
|
265
|
+
html = "<p><a href='#5'>link</a> to a numeric anchor <a name='5'>5</a></p>"
|
266
|
+
greencloth = "[link -> #5] to a numeric anchor [# 5 #]\n"
|
267
|
+
assert_renders_greencloth greencloth, html
|
268
|
+
end
|
269
|
+
|
270
|
+
test "3 links without /" do
|
271
|
+
html = "<p><a href='some'>some</a> and <a href='other'>other</a> and <a href='one_more'>one_more</a></p>"
|
272
|
+
greencloth = "[some] and [other] and [one_more]\n"
|
273
|
+
assert_renders_greencloth greencloth, html
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,198 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/test_helper")
|
2
|
+
|
3
|
+
module Undress
|
4
|
+
class TextileTest < Test::Unit::TestCase
|
5
|
+
def assert_renders_textile(textile, html)
|
6
|
+
assert_equal textile, Undress(html).to_textile
|
7
|
+
end
|
8
|
+
|
9
|
+
context "Converting HTML to textile" do
|
10
|
+
test "converts nested tags" do
|
11
|
+
assert_renders_textile "h2. _this is *very* important_\n", "<h2><em>this is <strong>very</strong> important</em></h2>"
|
12
|
+
end
|
13
|
+
|
14
|
+
context "inline elements" do
|
15
|
+
test "converts <strong> tags" do
|
16
|
+
assert_renders_textile "*foo bar*", "<strong>foo bar</strong>"
|
17
|
+
end
|
18
|
+
|
19
|
+
test "converts <em> tags" do
|
20
|
+
assert_renders_textile "_foo bar_", "<em>foo bar</em>"
|
21
|
+
end
|
22
|
+
|
23
|
+
test "converts <code> tags" do
|
24
|
+
assert_renders_textile "@foo bar@", "<code>foo bar</code>"
|
25
|
+
end
|
26
|
+
|
27
|
+
test "converts <cite> tags" do
|
28
|
+
assert_renders_textile "??foo bar??", "<cite>foo bar</cite>"
|
29
|
+
end
|
30
|
+
|
31
|
+
test "converts <sup> tags" do
|
32
|
+
assert_renders_textile "foo ^sup^ bar", "foo <sup>sup</sup> bar"
|
33
|
+
assert_renders_textile "foo[^sup^]bar", "foo<sup>sup</sup>bar"
|
34
|
+
end
|
35
|
+
|
36
|
+
test "converts <sub> tags" do
|
37
|
+
assert_renders_textile "foo ~sub~ bar", "foo <sub>sub</sub> bar"
|
38
|
+
assert_renders_textile "foo[~sub~]bar", "foo<sub>sub</sub>bar"
|
39
|
+
end
|
40
|
+
|
41
|
+
test "converts <ins> tags" do
|
42
|
+
assert_renders_textile "+foo bar+", "<ins>foo bar</ins>"
|
43
|
+
end
|
44
|
+
|
45
|
+
test "converts <del> tags" do
|
46
|
+
assert_renders_textile "-foo bar-", "<del>foo bar</del>"
|
47
|
+
end
|
48
|
+
|
49
|
+
test "converts <acronym> tags" do
|
50
|
+
assert_renders_textile "EPA(Environmental Protection Agency)", "<acronym title='Environmental Protection Agency'>EPA</acronym>"
|
51
|
+
assert_renders_textile "EPA", "<acronym>EPA</acronym>"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
context "links" do
|
56
|
+
test "converts simple links (without title)" do
|
57
|
+
assert_renders_textile "[Foo Bar:/cuack]", "<a href='/cuack'>Foo Bar</a>"
|
58
|
+
end
|
59
|
+
|
60
|
+
test "converts links with titles" do
|
61
|
+
assert_renders_textile "[Foo Bar (You should see this):/cuack]", "<a href='/cuack' title='You should see this'>Foo Bar</a>"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
context "images" do
|
66
|
+
test "converts images without alt attributes" do
|
67
|
+
assert_renders_textile "!http://example.com/image.png!", "<img src='http://example.com/image.png'/>"
|
68
|
+
end
|
69
|
+
|
70
|
+
test "converts images with alt attributes" do
|
71
|
+
assert_renders_textile "!http://example.com/image.png(Awesome Pic)!", "<img src='http://example.com/image.png' alt='Awesome Pic'/>"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
context "text formatting" do
|
76
|
+
test "converts paragraphs" do
|
77
|
+
assert_renders_textile "foo\n\nbar\n", "<p>foo</p><p>bar</p>"
|
78
|
+
end
|
79
|
+
|
80
|
+
test "converts <pre> tags which only contain a <code> child" do
|
81
|
+
assert_renders_textile "pc. var foo = 1;\n", "<pre><code>var foo = 1;</code></pre>"
|
82
|
+
assert_renders_textile "pc. var foo = 1;\n", "<pre> <code>var foo = 1;</code> </pre>"
|
83
|
+
end
|
84
|
+
|
85
|
+
test "leaves <pre> tags which contain mixed content as HTML" do
|
86
|
+
assert_renders_textile "<pre> foo bar</pre>", "<pre> foo bar</pre>"
|
87
|
+
end
|
88
|
+
|
89
|
+
test "converts <br> into a new line" do
|
90
|
+
assert_renders_textile "Foo\nBar", "Foo<br/>Bar"
|
91
|
+
end
|
92
|
+
|
93
|
+
test "converts blockquotes" do
|
94
|
+
assert_renders_textile "bq. foo bar\n", "<blockquote><div>foo bar</div></blockquote>"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
context "headers" do
|
99
|
+
test "converts <h1> tags" do
|
100
|
+
assert_renders_textile "h1. foo bar\n", "<h1>foo bar</h1>"
|
101
|
+
end
|
102
|
+
|
103
|
+
test "converts <h2> tags" do
|
104
|
+
assert_renders_textile "h2. foo bar\n", "<h2>foo bar</h2>"
|
105
|
+
end
|
106
|
+
|
107
|
+
test "converts <h3> tags" do
|
108
|
+
assert_renders_textile "h3. foo bar\n", "<h3>foo bar</h3>"
|
109
|
+
end
|
110
|
+
|
111
|
+
test "converts <h4> tags" do
|
112
|
+
assert_renders_textile "h4. foo bar\n", "<h4>foo bar</h4>"
|
113
|
+
end
|
114
|
+
|
115
|
+
test "converts <h5> tags" do
|
116
|
+
assert_renders_textile "h5. foo bar\n", "<h5>foo bar</h5>"
|
117
|
+
end
|
118
|
+
|
119
|
+
test "converts <h6> tags" do
|
120
|
+
assert_renders_textile "h6. foo bar\n", "<h6>foo bar</h6>"
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
context "lists" do
|
125
|
+
test "converts bullet lists" do
|
126
|
+
assert_renders_textile "* foo\n* bar\n", "<ul><li>foo</li><li>bar</li></ul>"
|
127
|
+
end
|
128
|
+
|
129
|
+
test "converts numbered lists" do
|
130
|
+
assert_renders_textile "# foo\n# bar\n", "<ol><li>foo</li><li>bar</li></ol>"
|
131
|
+
end
|
132
|
+
|
133
|
+
test "converts nested bullet lists" do
|
134
|
+
assert_renders_textile "* foo\n** bar\n* baz\n", "<ul><li>foo<ul><li>bar</li></ul></li><li>baz</li></ul>"
|
135
|
+
end
|
136
|
+
|
137
|
+
test "converts nested numbered lists" do
|
138
|
+
assert_renders_textile "# foo\n## bar\n# baz\n", "<ol><li>foo<ol><li>bar</li></ol></li><li>baz</li></ol>"
|
139
|
+
end
|
140
|
+
|
141
|
+
test "converts nested mixed lists" do
|
142
|
+
assert_renders_textile "* foo\n## bar\n## baz\n*** quux\n* cuack\n",
|
143
|
+
"<ul><li>foo<ol><li>bar</li><li>baz<ul><li>quux</li></ul></li></ol></li><li>cuack</li></ul>"
|
144
|
+
end
|
145
|
+
|
146
|
+
test "converts a definition list" do
|
147
|
+
assert_renders_textile "- foo := defining foo =:\n- bar := defining bar =:\n",
|
148
|
+
"<dl><dt>foo</dt><dd>defining foo</dd><dt>bar</dt><dd>defining bar</dd></dl>"
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
context "tables" do
|
153
|
+
test "converts a simple table" do
|
154
|
+
assert_renders_textile "|foo|bar|baz|\n|1|2|3|\n",
|
155
|
+
"<table><tr><td>foo</td><td>bar</td><td>baz</td></tr><tr><td>1</td><td>2</td><td>3</td></tr></table>"
|
156
|
+
end
|
157
|
+
|
158
|
+
test "converts a table with headers" do
|
159
|
+
assert_renders_textile "|_. foo|_. bar|_. baz|\n|1|2|3|\n",
|
160
|
+
"<table><tr><th>foo</th><th>bar</th><th>baz</th></tr><tr><td>1</td><td>2</td><td>3</td></tr></table>"
|
161
|
+
end
|
162
|
+
|
163
|
+
test "converts a table with cells that span multiple columns" do
|
164
|
+
assert_renders_textile "|foo|bar|baz|\n|\\2. 1|2|\n",
|
165
|
+
"<table><tr><td>foo</td><td>bar</td><td>baz</td></tr><tr><td colspan='2'>1</td><td>2</td></tr></table>"
|
166
|
+
end
|
167
|
+
|
168
|
+
test "converts a table with cells that span multiple rows" do
|
169
|
+
assert_renders_textile "|/2. foo|bar|baz|\n|1|2|\n",
|
170
|
+
"<table><tr><td rowspan='2'>foo</td><td>bar</td><td>baz</td></tr><tr><td>1</td><td>2</td></tr></table>"
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
context "applying post processing rules" do
|
175
|
+
test "compresses newlines to a maximum of two consecutive newlines" do
|
176
|
+
assert_renders_textile "Foo\n\nBar\n\nBaz\n\n* Quux 1\n* Quux 2\n", "<p>Foo</p><p>Bar</p><p>Baz</p><ul><li>Quux 1</li><li>Quux 2</li></p>"
|
177
|
+
end
|
178
|
+
|
179
|
+
test "strips trailing newlines from the start and end of the output string" do
|
180
|
+
assert_renders_textile "Foo\n", "<p>Foo</p>"
|
181
|
+
end
|
182
|
+
|
183
|
+
test "converts all fancy characters introduced by textile back into their 'source code'" do
|
184
|
+
assert_renders_textile "What the ... hell?", "What the … hell?"
|
185
|
+
assert_renders_textile "It's mine", "It’s mine"
|
186
|
+
assert_renders_textile "\"Fancy quoting\"", "“Fancy quoting”"
|
187
|
+
assert_renders_textile "How dashing--right?", "How dashing—right?"
|
188
|
+
assert_renders_textile "How dashing - right?", "How dashing – right?"
|
189
|
+
assert_renders_textile "2 x 2 = 4", "2 × 2 = 4"
|
190
|
+
assert_renders_textile "2x2 = 4", "2×2 = 4"
|
191
|
+
assert_renders_textile "Registered(r)", "Registered®"
|
192
|
+
assert_renders_textile "Copyrighted(c)", "Copyrighted©"
|
193
|
+
assert_renders_textile "Trademarked(tm)", "Trademarked™"
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
data/undress.gemspec
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "undress"
|
3
|
+
s.version = "0.1"
|
4
|
+
s.date = "2009-07-13"
|
5
|
+
|
6
|
+
s.description = "Simply translate HTML to Textile, Markdown, or whatever other markup format you need"
|
7
|
+
s.summary = "Convert HTML into other markup languages"
|
8
|
+
s.homepage = "http://undress.rubyforge.org"
|
9
|
+
|
10
|
+
s.authors = "Nicolás Sanguinetti"
|
11
|
+
s.email = "contacto@nicolassanguinetti.info"
|
12
|
+
|
13
|
+
s.require_paths = ["lib"]
|
14
|
+
s.rubyforge_project = "undress"
|
15
|
+
s.has_rdoc = true
|
16
|
+
s.rubygems_version = "1.3.1"
|
17
|
+
|
18
|
+
s.add_dependency "hpricot"
|
19
|
+
|
20
|
+
if s.respond_to?(:add_development_dependency)
|
21
|
+
s.add_development_dependency "sr-mg"
|
22
|
+
s.add_development_dependency "contest"
|
23
|
+
s.add_development_dependency "redgreen"
|
24
|
+
end
|
25
|
+
|
26
|
+
s.files = %w[
|
27
|
+
.gitignore
|
28
|
+
LICENSE
|
29
|
+
README.rdoc
|
30
|
+
Rakefile
|
31
|
+
undress.gemspec
|
32
|
+
lib/undress.rb
|
33
|
+
lib/undress/grammar.rb
|
34
|
+
lib/undress/textile.rb
|
35
|
+
lib/undress/greencloth.rb
|
36
|
+
lib/core_ext/object.rb
|
37
|
+
test/test_helper.rb
|
38
|
+
test/test_grammar.rb
|
39
|
+
test/test_textile.rb
|
40
|
+
test/test_greencloth.rb
|
41
|
+
]
|
42
|
+
end
|
metadata
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: zevarito-undress
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: "0.1"
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- "Nicol\xC3\xA1s Sanguinetti"
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-07-13 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hpricot
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: sr-mg
|
27
|
+
type: :development
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: contest
|
37
|
+
type: :development
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: "0"
|
44
|
+
version:
|
45
|
+
- !ruby/object:Gem::Dependency
|
46
|
+
name: redgreen
|
47
|
+
type: :development
|
48
|
+
version_requirement:
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: "0"
|
54
|
+
version:
|
55
|
+
description: Simply translate HTML to Textile, Markdown, or whatever other markup format you need
|
56
|
+
email: contacto@nicolassanguinetti.info
|
57
|
+
executables: []
|
58
|
+
|
59
|
+
extensions: []
|
60
|
+
|
61
|
+
extra_rdoc_files: []
|
62
|
+
|
63
|
+
files:
|
64
|
+
- .gitignore
|
65
|
+
- LICENSE
|
66
|
+
- README.rdoc
|
67
|
+
- Rakefile
|
68
|
+
- undress.gemspec
|
69
|
+
- lib/undress.rb
|
70
|
+
- lib/undress/grammar.rb
|
71
|
+
- lib/undress/textile.rb
|
72
|
+
- lib/undress/greencloth.rb
|
73
|
+
- lib/core_ext/object.rb
|
74
|
+
- test/test_helper.rb
|
75
|
+
- test/test_grammar.rb
|
76
|
+
- test/test_textile.rb
|
77
|
+
- test/test_greencloth.rb
|
78
|
+
has_rdoc: true
|
79
|
+
homepage: http://undress.rubyforge.org
|
80
|
+
post_install_message:
|
81
|
+
rdoc_options: []
|
82
|
+
|
83
|
+
require_paths:
|
84
|
+
- lib
|
85
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: "0"
|
90
|
+
version:
|
91
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: "0"
|
96
|
+
version:
|
97
|
+
requirements: []
|
98
|
+
|
99
|
+
rubyforge_project: undress
|
100
|
+
rubygems_version: 1.2.0
|
101
|
+
signing_key:
|
102
|
+
specification_version: 2
|
103
|
+
summary: Convert HTML into other markup languages
|
104
|
+
test_files: []
|
105
|
+
|