undress 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/LICENSE +22 -0
- data/README.rdoc +39 -0
- data/Rakefile +32 -0
- data/lib/core_ext/object.rb +6 -0
- data/lib/undress.rb +46 -0
- data/lib/undress/grammar.rb +127 -0
- data/lib/undress/textile.rb +96 -0
- data/test/test_grammar.rb +55 -0
- data/test/test_helper.rb +10 -0
- data/test/test_textile.rb +198 -0
- data/undress.gemspec +40 -0
- metadata +105 -0
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
(The MIT License)
|
2
|
+
|
3
|
+
Copyright (c) 2009 Nicolas Sanguinetti, entp.com
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
'Software'), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
20
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
21
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
22
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
= Undress
|
2
|
+
|
3
|
+
Easily convert back HTML to Textile, Markdown, RDoc or whatever other
|
4
|
+
markup language you like.
|
5
|
+
|
6
|
+
require "undress"
|
7
|
+
|
8
|
+
code =<<html
|
9
|
+
<h1>Hello world!</h1>
|
10
|
+
<p><strong>Hey!</strong> How is it going?</p>
|
11
|
+
<h2>Supported Markup Languages so far:</h2>
|
12
|
+
<ul>
|
13
|
+
<li>Textile</li>
|
14
|
+
<li>And more to come :P</li>
|
15
|
+
</ul>
|
16
|
+
html
|
17
|
+
|
18
|
+
Undress(code).to_textile
|
19
|
+
|
20
|
+
Will produce
|
21
|
+
|
22
|
+
h1. Hello world!
|
23
|
+
|
24
|
+
*Hey!* How is it going?
|
25
|
+
|
26
|
+
h2. Supported Markup Languages so far:
|
27
|
+
|
28
|
+
* Textile
|
29
|
+
* And more to come :P
|
30
|
+
|
31
|
+
== Supported Markup Languages
|
32
|
+
|
33
|
+
For now the only language supported is Textile. But I'll be happy to accept
|
34
|
+
patches to add more languages :)
|
35
|
+
|
36
|
+
== License
|
37
|
+
|
38
|
+
Authors:: Nicolas Sanguinetti (foca[http://github.com/foca])
|
39
|
+
License:: MIT (Check LICENSE for details)
|
data/Rakefile
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require "rake/testtask"
|
2
|
+
|
3
|
+
begin
|
4
|
+
require "hanna/rdoctask"
|
5
|
+
rescue LoadError
|
6
|
+
require "rake/rdoctask"
|
7
|
+
end
|
8
|
+
|
9
|
+
Rake::RDocTask.new do |rd|
|
10
|
+
rd.main = "README"
|
11
|
+
rd.title = "API Documentation for Undress"
|
12
|
+
rd.rdoc_files.include("README.rdoc", "LICENSE", "lib/**/*.rb")
|
13
|
+
rd.rdoc_dir = "doc"
|
14
|
+
end
|
15
|
+
|
16
|
+
begin
|
17
|
+
require "metric_fu"
|
18
|
+
rescue LoadError
|
19
|
+
end
|
20
|
+
|
21
|
+
begin
|
22
|
+
require "mg"
|
23
|
+
MG.new("undress.gemspec")
|
24
|
+
rescue LoadError
|
25
|
+
end
|
26
|
+
|
27
|
+
desc "Default: run tests"
|
28
|
+
task :default => :test
|
29
|
+
|
30
|
+
Rake::TestTask.new do |t|
|
31
|
+
t.test_files = FileList["test/test_*.rb"]
|
32
|
+
end
|
data/lib/undress.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require "hpricot"
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + "/core_ext/object")
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + "/undress/grammar")
|
4
|
+
|
5
|
+
# Load an HTML document so you can undress it. Pass it either a string or an IO
|
6
|
+
# object. You can pass an optional hash of options, which will be forwarded
|
7
|
+
# straight to Hpricot. Check it's
|
8
|
+
# documentation[http://code.whytheluckystiff.net/doc/hpricot] for details.
|
9
|
+
def Undress(html, options={})
|
10
|
+
Undress::Document.new(html, options)
|
11
|
+
end
|
12
|
+
|
13
|
+
module Undress
|
14
|
+
# Register a markup language. The name will become the method used to convert
|
15
|
+
# HTML to this markup language: for example registering the name +:textile+
|
16
|
+
# gives you <tt>Undress(code).to_textile</tt>, registering +:markdown+ would
|
17
|
+
# give you <tt>Undress(code).to_markdown</tt>, etc.
|
18
|
+
def self.add_markup(name, grammar)
|
19
|
+
Document.add_markup(name, grammar)
|
20
|
+
end
|
21
|
+
|
22
|
+
class Document #:nodoc:
|
23
|
+
def initialize(html, options)
|
24
|
+
@doc = Hpricot(html, options)
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.add_markup(name, grammar)
|
28
|
+
define_method "to_#{name}" do
|
29
|
+
grammar.process!(@doc)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
module ::Hpricot #:nodoc:
|
35
|
+
class Elem #:nodoc:
|
36
|
+
def ancestors
|
37
|
+
node, ancestors = parent, Elements[]
|
38
|
+
while node.respond_to?(:parent) && node.parent
|
39
|
+
ancestors << node
|
40
|
+
node = node.parent
|
41
|
+
end
|
42
|
+
ancestors
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
module Undress
|
2
|
+
# Grammars give you a DSL to declare how to convert an HTML document into a
|
3
|
+
# different markup language.
|
4
|
+
class Grammar
|
5
|
+
def self.inherited(base) # :nodoc:
|
6
|
+
base.instance_variable_set(:@post_processing_rules, post_processing_rules)
|
7
|
+
base.instance_variable_set(:@pre_processing_rules, pre_processing_rules)
|
8
|
+
end
|
9
|
+
|
10
|
+
# Add a parsing rule for a group of html tags.
|
11
|
+
#
|
12
|
+
# rule_for :p do |element|
|
13
|
+
# "<this was a paragraph>#{content_of(element)}</this was a paragraph>"
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# will replace your <tt><p></tt> tags for <tt><this was a paragraph></tt>
|
17
|
+
# tags, without altering the contents.
|
18
|
+
#
|
19
|
+
# The element yielded to the block is an Hpricot element for the given tag.
|
20
|
+
def self.rule_for(*tags, &handler) # :yields: element
|
21
|
+
tags.each do |tag|
|
22
|
+
define_method tag.to_sym, &handler
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Set a default rule for unrecognized tags.
|
27
|
+
#
|
28
|
+
# Unless you define a special case, it will ignore the tags and just output
|
29
|
+
# the contents of unrecognized tags.
|
30
|
+
def self.default(&handler) # :yields: element
|
31
|
+
define_method :method_missing do |tag, node, *args|
|
32
|
+
handler.call(node)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Add a post-processing rule to your parser.
|
37
|
+
#
|
38
|
+
# This takes a regular expression that will be applied to the output after
|
39
|
+
# processing any nodes. It can take a string as a replacement, or a block
|
40
|
+
# that will be passed to String#gsub.
|
41
|
+
#
|
42
|
+
# post_processing(/\n\n+/, "\n\n") # compress more than two newlines
|
43
|
+
# post_processing(/whatever/) { ... }
|
44
|
+
def self.post_processing(regexp, replacement = nil, &handler) #:yields: matched_string
|
45
|
+
post_processing_rules[regexp] = replacement || handler
|
46
|
+
end
|
47
|
+
|
48
|
+
# Add a pre-processing rule to your parser.
|
49
|
+
#
|
50
|
+
# This lets you mutate the DOM before applying any rule defined with
|
51
|
+
# +rule_for+. You need to pass a CSS/XPath selector, and a block that
|
52
|
+
# takes an Hpricot element to parse it.
|
53
|
+
#
|
54
|
+
# pre_processing "ul.toc" do |element|
|
55
|
+
# element.swap("<p>[[toc]]</p>")
|
56
|
+
# end
|
57
|
+
#
|
58
|
+
# Would replace any unordered lists with the class +toc+ for a
|
59
|
+
# paragraph containing the code <tt>[[toc]]</tt>.
|
60
|
+
def self.pre_processing(selector, &handler) # :yields: element
|
61
|
+
pre_processing_rules[selector] = handler
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.post_processing_rules #:nodoc:
|
65
|
+
@post_processing_rules ||= {}
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.pre_processing_rules #:nodoc:
|
69
|
+
@pre_processing_rules ||= {}
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.process!(node) #:nodoc:
|
73
|
+
new.process!(node)
|
74
|
+
end
|
75
|
+
|
76
|
+
attr_reader :pre_processing_rules #:nodoc:
|
77
|
+
attr_reader :post_processing_rules #:nodoc:
|
78
|
+
|
79
|
+
def initialize #:nodoc:
|
80
|
+
@pre_processing_rules = self.class.pre_processing_rules.dup
|
81
|
+
@post_processing_rules = self.class.post_processing_rules.dup
|
82
|
+
end
|
83
|
+
|
84
|
+
# Process a DOM node, converting it to your markup language according to
|
85
|
+
# your defined rules. If the node is a Text node, it will return it's
|
86
|
+
# string representation. Otherwise it will call the rule defined for it.
|
87
|
+
def process(nodes)
|
88
|
+
Array(nodes).map do |node|
|
89
|
+
if node.text?
|
90
|
+
node.to_html
|
91
|
+
elsif node.elem?
|
92
|
+
send node.name.to_sym, node
|
93
|
+
else
|
94
|
+
""
|
95
|
+
end
|
96
|
+
end.join("")
|
97
|
+
end
|
98
|
+
|
99
|
+
def process!(node) #:nodoc:
|
100
|
+
pre_processing_rules.each do |selector, handler|
|
101
|
+
node.search(selector).each(&handler)
|
102
|
+
end
|
103
|
+
|
104
|
+
process(node.children).tap do |text|
|
105
|
+
post_processing_rules.each do |rule, handler|
|
106
|
+
handler.is_a?(String) ? text.gsub!(rule, handler) : text.gsub!(rule, &handler)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# Get the result of parsing the contents of a node.
|
112
|
+
def content_of(node)
|
113
|
+
process(node.respond_to?(:children) ? node.children : node)
|
114
|
+
end
|
115
|
+
|
116
|
+
# Helper method that tells you if the given DOM node is immediately
|
117
|
+
# surrounded by whitespace.
|
118
|
+
def surrounded_by_whitespace?(node)
|
119
|
+
(node.previous.text? && node.previous.to_s =~ /\s+$/) ||
|
120
|
+
(node.next.text? && node.next.to_s =~ /^\s+/)
|
121
|
+
end
|
122
|
+
|
123
|
+
def method_missing(tag, node, *args) #:nodoc:
|
124
|
+
process(node.children)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/../undress")
|
2
|
+
|
3
|
+
module Undress
|
4
|
+
class Textile < Grammar
|
5
|
+
# whitespace handling
|
6
|
+
post_processing(/\n\n+/, "\n\n")
|
7
|
+
post_processing(/\A\s+/, "")
|
8
|
+
post_processing(/\s+\z/, "\n")
|
9
|
+
|
10
|
+
# special characters introduced by textile
|
11
|
+
post_processing(/…/, "...")
|
12
|
+
post_processing(/’/, "'")
|
13
|
+
post_processing(/̶[01];/, '"')
|
14
|
+
post_processing(/—/, "--")
|
15
|
+
post_processing(/–/, "-")
|
16
|
+
post_processing(/(\d+\s*)×(\s*\d+)/, '\1x\2')
|
17
|
+
post_processing(/®/, "(r)")
|
18
|
+
post_processing(/©/, "(c)")
|
19
|
+
post_processing(/™/, "(tm)")
|
20
|
+
|
21
|
+
# inline elements
|
22
|
+
rule_for(:a) {|e|
|
23
|
+
title = e.has_attribute?("title") ? " (#{e["title"]})" : ""
|
24
|
+
"[#{content_of(e)}#{title}:#{e["href"]}]"
|
25
|
+
}
|
26
|
+
rule_for(:img) {|e|
|
27
|
+
alt = e.has_attribute?("alt") ? "(#{e["alt"]})" : ""
|
28
|
+
"!#{e["src"]}#{alt}!"
|
29
|
+
}
|
30
|
+
rule_for(:strong) {|e| "*#{content_of(e)}*" }
|
31
|
+
rule_for(:em) {|e| "_#{content_of(e)}_" }
|
32
|
+
rule_for(:code) {|e| "@#{content_of(e)}@" }
|
33
|
+
rule_for(:cite) {|e| "??#{content_of(e)}??" }
|
34
|
+
rule_for(:sup) {|e| surrounded_by_whitespace?(e) ? "^#{content_of(e)}^" : "[^#{content_of(e)}^]" }
|
35
|
+
rule_for(:sub) {|e| surrounded_by_whitespace?(e) ? "~#{content_of(e)}~" : "[~#{content_of(e)}~]" }
|
36
|
+
rule_for(:ins) {|e| "+#{content_of(e)}+" }
|
37
|
+
rule_for(:del) {|e| "-#{content_of(e)}-" }
|
38
|
+
rule_for(:acronym) {|e| e.has_attribute?("title") ? "#{content_of(e)}(#{e["title"]})" : content_of(e) }
|
39
|
+
|
40
|
+
# text formatting and layout
|
41
|
+
rule_for(:p) {|e| "\n\n#{content_of(e)}\n\n" }
|
42
|
+
rule_for(:br) {|e| "\n" }
|
43
|
+
rule_for(:blockquote) {|e| "\n\nbq. #{content_of(e)}\n\n" }
|
44
|
+
rule_for(:pre) {|e|
|
45
|
+
if e.children.all? {|n| n.text? && n.content =~ /^\s+$/ || n.elem? && n.name == "code" }
|
46
|
+
"\n\npc. #{content_of(e % "code")}\n\n"
|
47
|
+
else
|
48
|
+
"<pre>#{content_of(e)}</pre>"
|
49
|
+
end
|
50
|
+
}
|
51
|
+
|
52
|
+
# headings
|
53
|
+
rule_for(:h1) {|e| "\n\nh1. #{content_of(e)}\n\n" }
|
54
|
+
rule_for(:h2) {|e| "\n\nh2. #{content_of(e)}\n\n" }
|
55
|
+
rule_for(:h3) {|e| "\n\nh3. #{content_of(e)}\n\n" }
|
56
|
+
rule_for(:h4) {|e| "\n\nh4. #{content_of(e)}\n\n" }
|
57
|
+
rule_for(:h5) {|e| "\n\nh5. #{content_of(e)}\n\n" }
|
58
|
+
rule_for(:h6) {|e| "\n\nh6. #{content_of(e)}\n\n" }
|
59
|
+
|
60
|
+
# lists
|
61
|
+
rule_for(:li) {|e|
|
62
|
+
token = e.parent.name == "ul" ? "*" : "#"
|
63
|
+
nesting = e.ancestors.inject(1) {|total,node| total + (%(ul ol).include?(node.name) ? 0 : 1) }
|
64
|
+
"\n#{token * nesting} #{content_of(e)}"
|
65
|
+
}
|
66
|
+
rule_for(:ul, :ol) {|e|
|
67
|
+
if e.ancestors.detect {|node| %(ul ol).include?(node.name) }
|
68
|
+
content_of(e)
|
69
|
+
else
|
70
|
+
"\n#{content_of(e)}\n\n"
|
71
|
+
end
|
72
|
+
}
|
73
|
+
|
74
|
+
# definition lists
|
75
|
+
rule_for(:dl) {|e| "\n\n#{content_of(e)}\n" }
|
76
|
+
rule_for(:dt) {|e| "- #{content_of(e)} " }
|
77
|
+
rule_for(:dd) {|e| ":= #{content_of(e)} =:\n" }
|
78
|
+
|
79
|
+
# tables
|
80
|
+
rule_for(:table) {|e| "\n\n#{content_of(e)}\n" }
|
81
|
+
rule_for(:tr) {|e| "#{content_of(e)}|\n" }
|
82
|
+
rule_for(:td, :th) {|e|
|
83
|
+
prefix = if e.name == "th"
|
84
|
+
"_. "
|
85
|
+
elsif e.has_attribute?("colspan")
|
86
|
+
"\\#{e["colspan"]}. "
|
87
|
+
elsif e.has_attribute?("rowspan")
|
88
|
+
"/#{e["rowspan"]}. "
|
89
|
+
end
|
90
|
+
|
91
|
+
"|#{prefix}#{content_of(e)}"
|
92
|
+
}
|
93
|
+
end
|
94
|
+
|
95
|
+
add_markup :textile, Textile
|
96
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/test_helper")
|
2
|
+
|
3
|
+
module Undress
|
4
|
+
class TestGrammar < Test::Unit::TestCase
|
5
|
+
class Parent < Grammar
|
6
|
+
rule_for(:p) {|e| "<this is a paragraph>#{content_of(e)}</this is a paragraph>" }
|
7
|
+
end
|
8
|
+
|
9
|
+
class WithPreProcessingRules < Parent
|
10
|
+
pre_processing("p.foo") {|e| e.swap("<div>Cuack</div>") }
|
11
|
+
rule_for(:div) {|e| "<this was a div>#{content_of(e)}</this was a div>" }
|
12
|
+
end
|
13
|
+
|
14
|
+
class Child < Parent; end
|
15
|
+
|
16
|
+
class OverWriter < WithPreProcessingRules
|
17
|
+
rule_for(:div) {|e| content_of(e) }
|
18
|
+
end
|
19
|
+
|
20
|
+
class TextileExtension < Textile
|
21
|
+
rule_for(:a) {|e| "" }
|
22
|
+
end
|
23
|
+
|
24
|
+
def parse_with(grammar, html)
|
25
|
+
grammar.process!(Hpricot(html))
|
26
|
+
end
|
27
|
+
|
28
|
+
context "extending a grammar" do
|
29
|
+
test "the extended grammar should inherit the rules of the parent" do
|
30
|
+
output = parse_with Child, "<p>Foo Bar</p>"
|
31
|
+
assert_equal "<this is a paragraph>Foo Bar</this is a paragraph>", output
|
32
|
+
end
|
33
|
+
|
34
|
+
test "extending a grammar doesn't overwrite the parent's rules" do
|
35
|
+
output = parse_with OverWriter, "<div>Foo</div>"
|
36
|
+
assert_equal "Foo", output
|
37
|
+
|
38
|
+
output = parse_with WithPreProcessingRules, "<div>Foo</div>"
|
39
|
+
assert_equal "<this was a div>Foo</this was a div>", output
|
40
|
+
end
|
41
|
+
|
42
|
+
test "extending textile doesn't blow up" do
|
43
|
+
output = parse_with TextileExtension, "<p><a href='/'>Cuack</a></p><p>Foo Bar</p><p>I <a href='/'>work</a></p>"
|
44
|
+
assert_equal "Foo Bar\n\nI\n", output
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
context "pre processing rules" do
|
49
|
+
test "mutate the DOM before parsing the tags" do
|
50
|
+
output = parse_with WithPreProcessingRules, "<p class='foo'>Blah</p><p>O hai</p>"
|
51
|
+
assert_equal "<this was a div>Cuack</this was a div><this is a paragraph>O hai</this is a paragraph>", output
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,198 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/test_helper")
|
2
|
+
|
3
|
+
module Undress
|
4
|
+
class TextileTest < Test::Unit::TestCase
|
5
|
+
def assert_renders_textile(textile, html)
|
6
|
+
assert_equal textile, Undress(html).to_textile
|
7
|
+
end
|
8
|
+
|
9
|
+
context "Converting HTML to textile" do
|
10
|
+
test "converts nested tags" do
|
11
|
+
assert_renders_textile "h2. _this is *very* important_\n", "<h2><em>this is <strong>very</strong> important</em></h2>"
|
12
|
+
end
|
13
|
+
|
14
|
+
context "inline elements" do
|
15
|
+
test "converts <strong> tags" do
|
16
|
+
assert_renders_textile "*foo bar*", "<strong>foo bar</strong>"
|
17
|
+
end
|
18
|
+
|
19
|
+
test "converts <em> tags" do
|
20
|
+
assert_renders_textile "_foo bar_", "<em>foo bar</em>"
|
21
|
+
end
|
22
|
+
|
23
|
+
test "converts <code> tags" do
|
24
|
+
assert_renders_textile "@foo bar@", "<code>foo bar</code>"
|
25
|
+
end
|
26
|
+
|
27
|
+
test "converts <cite> tags" do
|
28
|
+
assert_renders_textile "??foo bar??", "<cite>foo bar</cite>"
|
29
|
+
end
|
30
|
+
|
31
|
+
test "converts <sup> tags" do
|
32
|
+
assert_renders_textile "foo ^sup^ bar", "foo <sup>sup</sup> bar"
|
33
|
+
assert_renders_textile "foo[^sup^]bar", "foo<sup>sup</sup>bar"
|
34
|
+
end
|
35
|
+
|
36
|
+
test "converts <sub> tags" do
|
37
|
+
assert_renders_textile "foo ~sub~ bar", "foo <sub>sub</sub> bar"
|
38
|
+
assert_renders_textile "foo[~sub~]bar", "foo<sub>sub</sub>bar"
|
39
|
+
end
|
40
|
+
|
41
|
+
test "converts <ins> tags" do
|
42
|
+
assert_renders_textile "+foo bar+", "<ins>foo bar</ins>"
|
43
|
+
end
|
44
|
+
|
45
|
+
test "converts <del> tags" do
|
46
|
+
assert_renders_textile "-foo bar-", "<del>foo bar</del>"
|
47
|
+
end
|
48
|
+
|
49
|
+
test "converts <acronym> tags" do
|
50
|
+
assert_renders_textile "EPA(Environmental Protection Agency)", "<acronym title='Environmental Protection Agency'>EPA</acronym>"
|
51
|
+
assert_renders_textile "EPA", "<acronym>EPA</acronym>"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
context "links" do
|
56
|
+
test "converts simple links (without title)" do
|
57
|
+
assert_renders_textile "[Foo Bar:/cuack]", "<a href='/cuack'>Foo Bar</a>"
|
58
|
+
end
|
59
|
+
|
60
|
+
test "converts links with titles" do
|
61
|
+
assert_renders_textile "[Foo Bar (You should see this):/cuack]", "<a href='/cuack' title='You should see this'>Foo Bar</a>"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
context "images" do
|
66
|
+
test "converts images without alt attributes" do
|
67
|
+
assert_renders_textile "!http://example.com/image.png!", "<img src='http://example.com/image.png'/>"
|
68
|
+
end
|
69
|
+
|
70
|
+
test "converts images with alt attributes" do
|
71
|
+
assert_renders_textile "!http://example.com/image.png(Awesome Pic)!", "<img src='http://example.com/image.png' alt='Awesome Pic'/>"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
context "text formatting" do
|
76
|
+
test "converts paragraphs" do
|
77
|
+
assert_renders_textile "foo\n\nbar\n", "<p>foo</p><p>bar</p>"
|
78
|
+
end
|
79
|
+
|
80
|
+
test "converts <pre> tags which only contain a <code> child" do
|
81
|
+
assert_renders_textile "pc. var foo = 1;\n", "<pre><code>var foo = 1;</code></pre>"
|
82
|
+
assert_renders_textile "pc. var foo = 1;\n", "<pre> <code>var foo = 1;</code> </pre>"
|
83
|
+
end
|
84
|
+
|
85
|
+
test "leaves <pre> tags which contain mixed content as HTML" do
|
86
|
+
assert_renders_textile "<pre> foo bar</pre>", "<pre> foo bar</pre>"
|
87
|
+
end
|
88
|
+
|
89
|
+
test "converts <br> into a new line" do
|
90
|
+
assert_renders_textile "Foo\nBar", "Foo<br/>Bar"
|
91
|
+
end
|
92
|
+
|
93
|
+
test "converts blockquotes" do
|
94
|
+
assert_renders_textile "bq. foo bar\n", "<blockquote><div>foo bar</div></blockquote>"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
context "headers" do
|
99
|
+
test "converts <h1> tags" do
|
100
|
+
assert_renders_textile "h1. foo bar\n", "<h1>foo bar</h1>"
|
101
|
+
end
|
102
|
+
|
103
|
+
test "converts <h2> tags" do
|
104
|
+
assert_renders_textile "h2. foo bar\n", "<h2>foo bar</h2>"
|
105
|
+
end
|
106
|
+
|
107
|
+
test "converts <h3> tags" do
|
108
|
+
assert_renders_textile "h3. foo bar\n", "<h3>foo bar</h3>"
|
109
|
+
end
|
110
|
+
|
111
|
+
test "converts <h4> tags" do
|
112
|
+
assert_renders_textile "h4. foo bar\n", "<h4>foo bar</h4>"
|
113
|
+
end
|
114
|
+
|
115
|
+
test "converts <h5> tags" do
|
116
|
+
assert_renders_textile "h5. foo bar\n", "<h5>foo bar</h5>"
|
117
|
+
end
|
118
|
+
|
119
|
+
test "converts <h6> tags" do
|
120
|
+
assert_renders_textile "h6. foo bar\n", "<h6>foo bar</h6>"
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
context "lists" do
|
125
|
+
test "converts bullet lists" do
|
126
|
+
assert_renders_textile "* foo\n* bar\n", "<ul><li>foo</li><li>bar</li></ul>"
|
127
|
+
end
|
128
|
+
|
129
|
+
test "converts numbered lists" do
|
130
|
+
assert_renders_textile "# foo\n# bar\n", "<ol><li>foo</li><li>bar</li></ol>"
|
131
|
+
end
|
132
|
+
|
133
|
+
test "converts nested bullet lists" do
|
134
|
+
assert_renders_textile "* foo\n** bar\n* baz\n", "<ul><li>foo<ul><li>bar</li></ul></li><li>baz</li></ul>"
|
135
|
+
end
|
136
|
+
|
137
|
+
test "converts nested numbered lists" do
|
138
|
+
assert_renders_textile "# foo\n## bar\n# baz\n", "<ol><li>foo<ol><li>bar</li></ol></li><li>baz</li></ol>"
|
139
|
+
end
|
140
|
+
|
141
|
+
test "converts nested mixed lists" do
|
142
|
+
assert_renders_textile "* foo\n## bar\n## baz\n*** quux\n* cuack\n",
|
143
|
+
"<ul><li>foo<ol><li>bar</li><li>baz<ul><li>quux</li></ul></li></ol></li><li>cuack</li></ul>"
|
144
|
+
end
|
145
|
+
|
146
|
+
test "converts a definition list" do
|
147
|
+
assert_renders_textile "- foo := defining foo =:\n- bar := defining bar =:\n",
|
148
|
+
"<dl><dt>foo</dt><dd>defining foo</dd><dt>bar</dt><dd>defining bar</dd></dl>"
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
context "tables" do
|
153
|
+
test "converts a simple table" do
|
154
|
+
assert_renders_textile "|foo|bar|baz|\n|1|2|3|\n",
|
155
|
+
"<table><tr><td>foo</td><td>bar</td><td>baz</td></tr><tr><td>1</td><td>2</td><td>3</td></tr></table>"
|
156
|
+
end
|
157
|
+
|
158
|
+
test "converts a table with headers" do
|
159
|
+
assert_renders_textile "|_. foo|_. bar|_. baz|\n|1|2|3|\n",
|
160
|
+
"<table><tr><th>foo</th><th>bar</th><th>baz</th></tr><tr><td>1</td><td>2</td><td>3</td></tr></table>"
|
161
|
+
end
|
162
|
+
|
163
|
+
test "converts a table with cells that span multiple columns" do
|
164
|
+
assert_renders_textile "|foo|bar|baz|\n|\\2. 1|2|\n",
|
165
|
+
"<table><tr><td>foo</td><td>bar</td><td>baz</td></tr><tr><td colspan='2'>1</td><td>2</td></tr></table>"
|
166
|
+
end
|
167
|
+
|
168
|
+
test "converts a table with cells that span multiple rows" do
|
169
|
+
assert_renders_textile "|/2. foo|bar|baz|\n|1|2|\n",
|
170
|
+
"<table><tr><td rowspan='2'>foo</td><td>bar</td><td>baz</td></tr><tr><td>1</td><td>2</td></tr></table>"
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
context "applying post processing rules" do
|
175
|
+
test "compresses newlines to a maximum of two consecutive newlines" do
|
176
|
+
assert_renders_textile "Foo\n\nBar\n\nBaz\n\n* Quux 1\n* Quux 2\n", "<p>Foo</p><p>Bar</p><p>Baz</p><ul><li>Quux 1</li><li>Quux 2</li></p>"
|
177
|
+
end
|
178
|
+
|
179
|
+
test "strips trailing newlines from the start and end of the output string" do
|
180
|
+
assert_renders_textile "Foo\n", "<p>Foo</p>"
|
181
|
+
end
|
182
|
+
|
183
|
+
test "converts all fancy characters introduced by textile back into their 'source code'" do
|
184
|
+
assert_renders_textile "What the ... hell?", "What the … hell?"
|
185
|
+
assert_renders_textile "It's mine", "It’s mine"
|
186
|
+
assert_renders_textile "\"Fancy quoting\"", "“Fancy quoting”"
|
187
|
+
assert_renders_textile "How dashing--right?", "How dashing—right?"
|
188
|
+
assert_renders_textile "How dashing - right?", "How dashing – right?"
|
189
|
+
assert_renders_textile "2 x 2 = 4", "2 × 2 = 4"
|
190
|
+
assert_renders_textile "2x2 = 4", "2×2 = 4"
|
191
|
+
assert_renders_textile "Registered(r)", "Registered®"
|
192
|
+
assert_renders_textile "Copyrighted(c)", "Copyrighted©"
|
193
|
+
assert_renders_textile "Trademarked(tm)", "Trademarked™"
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
data/undress.gemspec
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "undress"
|
3
|
+
s.version = "0.1"
|
4
|
+
s.date = "2009-07-13"
|
5
|
+
|
6
|
+
s.description = "Simply translate HTML to Textile, Markdown, or whatever other markup format you need"
|
7
|
+
s.summary = "Convert HTML into other markup languages"
|
8
|
+
s.homepage = "http://undress.rubyforge.org"
|
9
|
+
|
10
|
+
s.authors = "Nicolás Sanguinetti"
|
11
|
+
s.email = "contacto@nicolassanguinetti.info"
|
12
|
+
|
13
|
+
s.require_paths = ["lib"]
|
14
|
+
s.rubyforge_project = "undress"
|
15
|
+
s.has_rdoc = true
|
16
|
+
s.rubygems_version = "1.3.1"
|
17
|
+
|
18
|
+
s.add_dependency "hpricot"
|
19
|
+
|
20
|
+
if s.respond_to?(:add_development_dependency)
|
21
|
+
s.add_development_dependency "sr-mg"
|
22
|
+
s.add_development_dependency "contest"
|
23
|
+
s.add_development_dependency "redgreen"
|
24
|
+
end
|
25
|
+
|
26
|
+
s.files = %w[
|
27
|
+
.gitignore
|
28
|
+
LICENSE
|
29
|
+
README.rdoc
|
30
|
+
Rakefile
|
31
|
+
undress.gemspec
|
32
|
+
lib/undress.rb
|
33
|
+
lib/undress/grammar.rb
|
34
|
+
lib/undress/textile.rb
|
35
|
+
lib/core_ext/object.rb
|
36
|
+
test/test_helper.rb
|
37
|
+
test/test_grammar.rb
|
38
|
+
test/test_textile.rb
|
39
|
+
]
|
40
|
+
end
|
metadata
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: undress
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: "0.1"
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- "Nicol\xC3\xA1s Sanguinetti"
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-07-13 00:00:00 -03:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hpricot
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: sr-mg
|
27
|
+
type: :development
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: contest
|
37
|
+
type: :development
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: "0"
|
44
|
+
version:
|
45
|
+
- !ruby/object:Gem::Dependency
|
46
|
+
name: redgreen
|
47
|
+
type: :development
|
48
|
+
version_requirement:
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: "0"
|
54
|
+
version:
|
55
|
+
description: Simply translate HTML to Textile, Markdown, or whatever other markup format you need
|
56
|
+
email: contacto@nicolassanguinetti.info
|
57
|
+
executables: []
|
58
|
+
|
59
|
+
extensions: []
|
60
|
+
|
61
|
+
extra_rdoc_files: []
|
62
|
+
|
63
|
+
files:
|
64
|
+
- .gitignore
|
65
|
+
- LICENSE
|
66
|
+
- README.rdoc
|
67
|
+
- Rakefile
|
68
|
+
- undress.gemspec
|
69
|
+
- lib/undress.rb
|
70
|
+
- lib/undress/grammar.rb
|
71
|
+
- lib/undress/textile.rb
|
72
|
+
- lib/core_ext/object.rb
|
73
|
+
- test/test_helper.rb
|
74
|
+
- test/test_grammar.rb
|
75
|
+
- test/test_textile.rb
|
76
|
+
has_rdoc: true
|
77
|
+
homepage: http://undress.rubyforge.org
|
78
|
+
licenses: []
|
79
|
+
|
80
|
+
post_install_message:
|
81
|
+
rdoc_options: []
|
82
|
+
|
83
|
+
require_paths:
|
84
|
+
- lib
|
85
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: "0"
|
90
|
+
version:
|
91
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: "0"
|
96
|
+
version:
|
97
|
+
requirements: []
|
98
|
+
|
99
|
+
rubyforge_project: undress
|
100
|
+
rubygems_version: 1.3.4
|
101
|
+
signing_key:
|
102
|
+
specification_version: 3
|
103
|
+
summary: Convert HTML into other markup languages
|
104
|
+
test_files: []
|
105
|
+
|