undress 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/LICENSE +22 -0
- data/README.rdoc +39 -0
- data/Rakefile +32 -0
- data/lib/core_ext/object.rb +6 -0
- data/lib/undress.rb +46 -0
- data/lib/undress/grammar.rb +127 -0
- data/lib/undress/textile.rb +96 -0
- data/test/test_grammar.rb +55 -0
- data/test/test_helper.rb +10 -0
- data/test/test_textile.rb +198 -0
- data/undress.gemspec +40 -0
- metadata +105 -0
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
(The MIT License)
|
2
|
+
|
3
|
+
Copyright (c) 2009 Nicolas Sanguinetti, entp.com
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
'Software'), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
20
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
21
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
22
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
= Undress
|
2
|
+
|
3
|
+
Easily convert back HTML to Textile, Markdown, RDoc or whatever other
|
4
|
+
markup language you like.
|
5
|
+
|
6
|
+
require "undress"
|
7
|
+
|
8
|
+
code =<<html
|
9
|
+
<h1>Hello world!</h1>
|
10
|
+
<p><strong>Hey!</strong> How is it going?</p>
|
11
|
+
<h2>Supported Markup Languages so far:</h2>
|
12
|
+
<ul>
|
13
|
+
<li>Textile</li>
|
14
|
+
<li>And more to come :P</li>
|
15
|
+
</ul>
|
16
|
+
html
|
17
|
+
|
18
|
+
Undress(code).to_textile
|
19
|
+
|
20
|
+
Will produce
|
21
|
+
|
22
|
+
h1. Hello world!
|
23
|
+
|
24
|
+
*Hey!* How is it going?
|
25
|
+
|
26
|
+
h2. Supported Markup Languages so far:
|
27
|
+
|
28
|
+
* Textile
|
29
|
+
* And more to come :P
|
30
|
+
|
31
|
+
== Supported Markup Languages
|
32
|
+
|
33
|
+
For now the only language supported is Textile. But I'll be happy to accept
|
34
|
+
patches to add more languages :)
|
35
|
+
|
36
|
+
== License
|
37
|
+
|
38
|
+
Authors:: Nicolas Sanguinetti (foca[http://github.com/foca])
|
39
|
+
License:: MIT (Check LICENSE for details)
|
data/Rakefile
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require "rake/testtask"
|
2
|
+
|
3
|
+
begin
|
4
|
+
require "hanna/rdoctask"
|
5
|
+
rescue LoadError
|
6
|
+
require "rake/rdoctask"
|
7
|
+
end
|
8
|
+
|
9
|
+
Rake::RDocTask.new do |rd|
|
10
|
+
rd.main = "README"
|
11
|
+
rd.title = "API Documentation for Undress"
|
12
|
+
rd.rdoc_files.include("README.rdoc", "LICENSE", "lib/**/*.rb")
|
13
|
+
rd.rdoc_dir = "doc"
|
14
|
+
end
|
15
|
+
|
16
|
+
begin
|
17
|
+
require "metric_fu"
|
18
|
+
rescue LoadError
|
19
|
+
end
|
20
|
+
|
21
|
+
begin
|
22
|
+
require "mg"
|
23
|
+
MG.new("undress.gemspec")
|
24
|
+
rescue LoadError
|
25
|
+
end
|
26
|
+
|
27
|
+
desc "Default: run tests"
|
28
|
+
task :default => :test
|
29
|
+
|
30
|
+
Rake::TestTask.new do |t|
|
31
|
+
t.test_files = FileList["test/test_*.rb"]
|
32
|
+
end
|
data/lib/undress.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require "hpricot"
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + "/core_ext/object")
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + "/undress/grammar")
|
4
|
+
|
5
|
+
# Load an HTML document so you can undress it. Pass it either a string or an IO
|
6
|
+
# object. You can pass an optional hash of options, which will be forwarded
|
7
|
+
# straight to Hpricot. Check it's
|
8
|
+
# documentation[http://code.whytheluckystiff.net/doc/hpricot] for details.
|
9
|
+
def Undress(html, options={})
|
10
|
+
Undress::Document.new(html, options)
|
11
|
+
end
|
12
|
+
|
13
|
+
module Undress
|
14
|
+
# Register a markup language. The name will become the method used to convert
|
15
|
+
# HTML to this markup language: for example registering the name +:textile+
|
16
|
+
# gives you <tt>Undress(code).to_textile</tt>, registering +:markdown+ would
|
17
|
+
# give you <tt>Undress(code).to_markdown</tt>, etc.
|
18
|
+
def self.add_markup(name, grammar)
|
19
|
+
Document.add_markup(name, grammar)
|
20
|
+
end
|
21
|
+
|
22
|
+
class Document #:nodoc:
|
23
|
+
def initialize(html, options)
|
24
|
+
@doc = Hpricot(html, options)
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.add_markup(name, grammar)
|
28
|
+
define_method "to_#{name}" do
|
29
|
+
grammar.process!(@doc)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
module ::Hpricot #:nodoc:
|
35
|
+
class Elem #:nodoc:
|
36
|
+
def ancestors
|
37
|
+
node, ancestors = parent, Elements[]
|
38
|
+
while node.respond_to?(:parent) && node.parent
|
39
|
+
ancestors << node
|
40
|
+
node = node.parent
|
41
|
+
end
|
42
|
+
ancestors
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
module Undress
|
2
|
+
# Grammars give you a DSL to declare how to convert an HTML document into a
|
3
|
+
# different markup language.
|
4
|
+
class Grammar
|
5
|
+
def self.inherited(base) # :nodoc:
|
6
|
+
base.instance_variable_set(:@post_processing_rules, post_processing_rules)
|
7
|
+
base.instance_variable_set(:@pre_processing_rules, pre_processing_rules)
|
8
|
+
end
|
9
|
+
|
10
|
+
# Add a parsing rule for a group of html tags.
|
11
|
+
#
|
12
|
+
# rule_for :p do |element|
|
13
|
+
# "<this was a paragraph>#{content_of(element)}</this was a paragraph>"
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# will replace your <tt><p></tt> tags for <tt><this was a paragraph></tt>
|
17
|
+
# tags, without altering the contents.
|
18
|
+
#
|
19
|
+
# The element yielded to the block is an Hpricot element for the given tag.
|
20
|
+
def self.rule_for(*tags, &handler) # :yields: element
|
21
|
+
tags.each do |tag|
|
22
|
+
define_method tag.to_sym, &handler
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Set a default rule for unrecognized tags.
|
27
|
+
#
|
28
|
+
# Unless you define a special case, it will ignore the tags and just output
|
29
|
+
# the contents of unrecognized tags.
|
30
|
+
def self.default(&handler) # :yields: element
|
31
|
+
define_method :method_missing do |tag, node, *args|
|
32
|
+
handler.call(node)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Add a post-processing rule to your parser.
|
37
|
+
#
|
38
|
+
# This takes a regular expression that will be applied to the output after
|
39
|
+
# processing any nodes. It can take a string as a replacement, or a block
|
40
|
+
# that will be passed to String#gsub.
|
41
|
+
#
|
42
|
+
# post_processing(/\n\n+/, "\n\n") # compress more than two newlines
|
43
|
+
# post_processing(/whatever/) { ... }
|
44
|
+
def self.post_processing(regexp, replacement = nil, &handler) #:yields: matched_string
|
45
|
+
post_processing_rules[regexp] = replacement || handler
|
46
|
+
end
|
47
|
+
|
48
|
+
# Add a pre-processing rule to your parser.
|
49
|
+
#
|
50
|
+
# This lets you mutate the DOM before applying any rule defined with
|
51
|
+
# +rule_for+. You need to pass a CSS/XPath selector, and a block that
|
52
|
+
# takes an Hpricot element to parse it.
|
53
|
+
#
|
54
|
+
# pre_processing "ul.toc" do |element|
|
55
|
+
# element.swap("<p>[[toc]]</p>")
|
56
|
+
# end
|
57
|
+
#
|
58
|
+
# Would replace any unordered lists with the class +toc+ for a
|
59
|
+
# paragraph containing the code <tt>[[toc]]</tt>.
|
60
|
+
def self.pre_processing(selector, &handler) # :yields: element
|
61
|
+
pre_processing_rules[selector] = handler
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.post_processing_rules #:nodoc:
|
65
|
+
@post_processing_rules ||= {}
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.pre_processing_rules #:nodoc:
|
69
|
+
@pre_processing_rules ||= {}
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.process!(node) #:nodoc:
|
73
|
+
new.process!(node)
|
74
|
+
end
|
75
|
+
|
76
|
+
attr_reader :pre_processing_rules #:nodoc:
|
77
|
+
attr_reader :post_processing_rules #:nodoc:
|
78
|
+
|
79
|
+
def initialize #:nodoc:
|
80
|
+
@pre_processing_rules = self.class.pre_processing_rules.dup
|
81
|
+
@post_processing_rules = self.class.post_processing_rules.dup
|
82
|
+
end
|
83
|
+
|
84
|
+
# Process a DOM node, converting it to your markup language according to
|
85
|
+
# your defined rules. If the node is a Text node, it will return it's
|
86
|
+
# string representation. Otherwise it will call the rule defined for it.
|
87
|
+
def process(nodes)
|
88
|
+
Array(nodes).map do |node|
|
89
|
+
if node.text?
|
90
|
+
node.to_html
|
91
|
+
elsif node.elem?
|
92
|
+
send node.name.to_sym, node
|
93
|
+
else
|
94
|
+
""
|
95
|
+
end
|
96
|
+
end.join("")
|
97
|
+
end
|
98
|
+
|
99
|
+
def process!(node) #:nodoc:
|
100
|
+
pre_processing_rules.each do |selector, handler|
|
101
|
+
node.search(selector).each(&handler)
|
102
|
+
end
|
103
|
+
|
104
|
+
process(node.children).tap do |text|
|
105
|
+
post_processing_rules.each do |rule, handler|
|
106
|
+
handler.is_a?(String) ? text.gsub!(rule, handler) : text.gsub!(rule, &handler)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# Get the result of parsing the contents of a node.
|
112
|
+
def content_of(node)
|
113
|
+
process(node.respond_to?(:children) ? node.children : node)
|
114
|
+
end
|
115
|
+
|
116
|
+
# Helper method that tells you if the given DOM node is immediately
|
117
|
+
# surrounded by whitespace.
|
118
|
+
def surrounded_by_whitespace?(node)
|
119
|
+
(node.previous.text? && node.previous.to_s =~ /\s+$/) ||
|
120
|
+
(node.next.text? && node.next.to_s =~ /^\s+/)
|
121
|
+
end
|
122
|
+
|
123
|
+
def method_missing(tag, node, *args) #:nodoc:
|
124
|
+
process(node.children)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/../undress")
|
2
|
+
|
3
|
+
module Undress
|
4
|
+
class Textile < Grammar
|
5
|
+
# whitespace handling
|
6
|
+
post_processing(/\n\n+/, "\n\n")
|
7
|
+
post_processing(/\A\s+/, "")
|
8
|
+
post_processing(/\s+\z/, "\n")
|
9
|
+
|
10
|
+
# special characters introduced by textile
|
11
|
+
post_processing(/…/, "...")
|
12
|
+
post_processing(/’/, "'")
|
13
|
+
post_processing(/̶[01];/, '"')
|
14
|
+
post_processing(/—/, "--")
|
15
|
+
post_processing(/–/, "-")
|
16
|
+
post_processing(/(\d+\s*)×(\s*\d+)/, '\1x\2')
|
17
|
+
post_processing(/®/, "(r)")
|
18
|
+
post_processing(/©/, "(c)")
|
19
|
+
post_processing(/™/, "(tm)")
|
20
|
+
|
21
|
+
# inline elements
|
22
|
+
rule_for(:a) {|e|
|
23
|
+
title = e.has_attribute?("title") ? " (#{e["title"]})" : ""
|
24
|
+
"[#{content_of(e)}#{title}:#{e["href"]}]"
|
25
|
+
}
|
26
|
+
rule_for(:img) {|e|
|
27
|
+
alt = e.has_attribute?("alt") ? "(#{e["alt"]})" : ""
|
28
|
+
"!#{e["src"]}#{alt}!"
|
29
|
+
}
|
30
|
+
rule_for(:strong) {|e| "*#{content_of(e)}*" }
|
31
|
+
rule_for(:em) {|e| "_#{content_of(e)}_" }
|
32
|
+
rule_for(:code) {|e| "@#{content_of(e)}@" }
|
33
|
+
rule_for(:cite) {|e| "??#{content_of(e)}??" }
|
34
|
+
rule_for(:sup) {|e| surrounded_by_whitespace?(e) ? "^#{content_of(e)}^" : "[^#{content_of(e)}^]" }
|
35
|
+
rule_for(:sub) {|e| surrounded_by_whitespace?(e) ? "~#{content_of(e)}~" : "[~#{content_of(e)}~]" }
|
36
|
+
rule_for(:ins) {|e| "+#{content_of(e)}+" }
|
37
|
+
rule_for(:del) {|e| "-#{content_of(e)}-" }
|
38
|
+
rule_for(:acronym) {|e| e.has_attribute?("title") ? "#{content_of(e)}(#{e["title"]})" : content_of(e) }
|
39
|
+
|
40
|
+
# text formatting and layout
|
41
|
+
rule_for(:p) {|e| "\n\n#{content_of(e)}\n\n" }
|
42
|
+
rule_for(:br) {|e| "\n" }
|
43
|
+
rule_for(:blockquote) {|e| "\n\nbq. #{content_of(e)}\n\n" }
|
44
|
+
rule_for(:pre) {|e|
|
45
|
+
if e.children.all? {|n| n.text? && n.content =~ /^\s+$/ || n.elem? && n.name == "code" }
|
46
|
+
"\n\npc. #{content_of(e % "code")}\n\n"
|
47
|
+
else
|
48
|
+
"<pre>#{content_of(e)}</pre>"
|
49
|
+
end
|
50
|
+
}
|
51
|
+
|
52
|
+
# headings
|
53
|
+
rule_for(:h1) {|e| "\n\nh1. #{content_of(e)}\n\n" }
|
54
|
+
rule_for(:h2) {|e| "\n\nh2. #{content_of(e)}\n\n" }
|
55
|
+
rule_for(:h3) {|e| "\n\nh3. #{content_of(e)}\n\n" }
|
56
|
+
rule_for(:h4) {|e| "\n\nh4. #{content_of(e)}\n\n" }
|
57
|
+
rule_for(:h5) {|e| "\n\nh5. #{content_of(e)}\n\n" }
|
58
|
+
rule_for(:h6) {|e| "\n\nh6. #{content_of(e)}\n\n" }
|
59
|
+
|
60
|
+
# lists
|
61
|
+
rule_for(:li) {|e|
|
62
|
+
token = e.parent.name == "ul" ? "*" : "#"
|
63
|
+
nesting = e.ancestors.inject(1) {|total,node| total + (%(ul ol).include?(node.name) ? 0 : 1) }
|
64
|
+
"\n#{token * nesting} #{content_of(e)}"
|
65
|
+
}
|
66
|
+
rule_for(:ul, :ol) {|e|
|
67
|
+
if e.ancestors.detect {|node| %(ul ol).include?(node.name) }
|
68
|
+
content_of(e)
|
69
|
+
else
|
70
|
+
"\n#{content_of(e)}\n\n"
|
71
|
+
end
|
72
|
+
}
|
73
|
+
|
74
|
+
# definition lists
|
75
|
+
rule_for(:dl) {|e| "\n\n#{content_of(e)}\n" }
|
76
|
+
rule_for(:dt) {|e| "- #{content_of(e)} " }
|
77
|
+
rule_for(:dd) {|e| ":= #{content_of(e)} =:\n" }
|
78
|
+
|
79
|
+
# tables
|
80
|
+
rule_for(:table) {|e| "\n\n#{content_of(e)}\n" }
|
81
|
+
rule_for(:tr) {|e| "#{content_of(e)}|\n" }
|
82
|
+
rule_for(:td, :th) {|e|
|
83
|
+
prefix = if e.name == "th"
|
84
|
+
"_. "
|
85
|
+
elsif e.has_attribute?("colspan")
|
86
|
+
"\\#{e["colspan"]}. "
|
87
|
+
elsif e.has_attribute?("rowspan")
|
88
|
+
"/#{e["rowspan"]}. "
|
89
|
+
end
|
90
|
+
|
91
|
+
"|#{prefix}#{content_of(e)}"
|
92
|
+
}
|
93
|
+
end
|
94
|
+
|
95
|
+
add_markup :textile, Textile
|
96
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/test_helper")
|
2
|
+
|
3
|
+
module Undress
|
4
|
+
class TestGrammar < Test::Unit::TestCase
|
5
|
+
class Parent < Grammar
|
6
|
+
rule_for(:p) {|e| "<this is a paragraph>#{content_of(e)}</this is a paragraph>" }
|
7
|
+
end
|
8
|
+
|
9
|
+
class WithPreProcessingRules < Parent
|
10
|
+
pre_processing("p.foo") {|e| e.swap("<div>Cuack</div>") }
|
11
|
+
rule_for(:div) {|e| "<this was a div>#{content_of(e)}</this was a div>" }
|
12
|
+
end
|
13
|
+
|
14
|
+
class Child < Parent; end
|
15
|
+
|
16
|
+
class OverWriter < WithPreProcessingRules
|
17
|
+
rule_for(:div) {|e| content_of(e) }
|
18
|
+
end
|
19
|
+
|
20
|
+
class TextileExtension < Textile
|
21
|
+
rule_for(:a) {|e| "" }
|
22
|
+
end
|
23
|
+
|
24
|
+
def parse_with(grammar, html)
|
25
|
+
grammar.process!(Hpricot(html))
|
26
|
+
end
|
27
|
+
|
28
|
+
context "extending a grammar" do
|
29
|
+
test "the extended grammar should inherit the rules of the parent" do
|
30
|
+
output = parse_with Child, "<p>Foo Bar</p>"
|
31
|
+
assert_equal "<this is a paragraph>Foo Bar</this is a paragraph>", output
|
32
|
+
end
|
33
|
+
|
34
|
+
test "extending a grammar doesn't overwrite the parent's rules" do
|
35
|
+
output = parse_with OverWriter, "<div>Foo</div>"
|
36
|
+
assert_equal "Foo", output
|
37
|
+
|
38
|
+
output = parse_with WithPreProcessingRules, "<div>Foo</div>"
|
39
|
+
assert_equal "<this was a div>Foo</this was a div>", output
|
40
|
+
end
|
41
|
+
|
42
|
+
test "extending textile doesn't blow up" do
|
43
|
+
output = parse_with TextileExtension, "<p><a href='/'>Cuack</a></p><p>Foo Bar</p><p>I <a href='/'>work</a></p>"
|
44
|
+
assert_equal "Foo Bar\n\nI\n", output
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
context "pre processing rules" do
|
49
|
+
test "mutate the DOM before parsing the tags" do
|
50
|
+
output = parse_with WithPreProcessingRules, "<p class='foo'>Blah</p><p>O hai</p>"
|
51
|
+
assert_equal "<this was a div>Cuack</this was a div><this is a paragraph>O hai</this is a paragraph>", output
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,198 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/test_helper")
|
2
|
+
|
3
|
+
module Undress
|
4
|
+
class TextileTest < Test::Unit::TestCase
|
5
|
+
def assert_renders_textile(textile, html)
|
6
|
+
assert_equal textile, Undress(html).to_textile
|
7
|
+
end
|
8
|
+
|
9
|
+
context "Converting HTML to textile" do
|
10
|
+
test "converts nested tags" do
|
11
|
+
assert_renders_textile "h2. _this is *very* important_\n", "<h2><em>this is <strong>very</strong> important</em></h2>"
|
12
|
+
end
|
13
|
+
|
14
|
+
context "inline elements" do
|
15
|
+
test "converts <strong> tags" do
|
16
|
+
assert_renders_textile "*foo bar*", "<strong>foo bar</strong>"
|
17
|
+
end
|
18
|
+
|
19
|
+
test "converts <em> tags" do
|
20
|
+
assert_renders_textile "_foo bar_", "<em>foo bar</em>"
|
21
|
+
end
|
22
|
+
|
23
|
+
test "converts <code> tags" do
|
24
|
+
assert_renders_textile "@foo bar@", "<code>foo bar</code>"
|
25
|
+
end
|
26
|
+
|
27
|
+
test "converts <cite> tags" do
|
28
|
+
assert_renders_textile "??foo bar??", "<cite>foo bar</cite>"
|
29
|
+
end
|
30
|
+
|
31
|
+
test "converts <sup> tags" do
|
32
|
+
assert_renders_textile "foo ^sup^ bar", "foo <sup>sup</sup> bar"
|
33
|
+
assert_renders_textile "foo[^sup^]bar", "foo<sup>sup</sup>bar"
|
34
|
+
end
|
35
|
+
|
36
|
+
test "converts <sub> tags" do
|
37
|
+
assert_renders_textile "foo ~sub~ bar", "foo <sub>sub</sub> bar"
|
38
|
+
assert_renders_textile "foo[~sub~]bar", "foo<sub>sub</sub>bar"
|
39
|
+
end
|
40
|
+
|
41
|
+
test "converts <ins> tags" do
|
42
|
+
assert_renders_textile "+foo bar+", "<ins>foo bar</ins>"
|
43
|
+
end
|
44
|
+
|
45
|
+
test "converts <del> tags" do
|
46
|
+
assert_renders_textile "-foo bar-", "<del>foo bar</del>"
|
47
|
+
end
|
48
|
+
|
49
|
+
test "converts <acronym> tags" do
|
50
|
+
assert_renders_textile "EPA(Environmental Protection Agency)", "<acronym title='Environmental Protection Agency'>EPA</acronym>"
|
51
|
+
assert_renders_textile "EPA", "<acronym>EPA</acronym>"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
context "links" do
|
56
|
+
test "converts simple links (without title)" do
|
57
|
+
assert_renders_textile "[Foo Bar:/cuack]", "<a href='/cuack'>Foo Bar</a>"
|
58
|
+
end
|
59
|
+
|
60
|
+
test "converts links with titles" do
|
61
|
+
assert_renders_textile "[Foo Bar (You should see this):/cuack]", "<a href='/cuack' title='You should see this'>Foo Bar</a>"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
context "images" do
|
66
|
+
test "converts images without alt attributes" do
|
67
|
+
assert_renders_textile "!http://example.com/image.png!", "<img src='http://example.com/image.png'/>"
|
68
|
+
end
|
69
|
+
|
70
|
+
test "converts images with alt attributes" do
|
71
|
+
assert_renders_textile "!http://example.com/image.png(Awesome Pic)!", "<img src='http://example.com/image.png' alt='Awesome Pic'/>"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
context "text formatting" do
|
76
|
+
test "converts paragraphs" do
|
77
|
+
assert_renders_textile "foo\n\nbar\n", "<p>foo</p><p>bar</p>"
|
78
|
+
end
|
79
|
+
|
80
|
+
test "converts <pre> tags which only contain a <code> child" do
|
81
|
+
assert_renders_textile "pc. var foo = 1;\n", "<pre><code>var foo = 1;</code></pre>"
|
82
|
+
assert_renders_textile "pc. var foo = 1;\n", "<pre> <code>var foo = 1;</code> </pre>"
|
83
|
+
end
|
84
|
+
|
85
|
+
test "leaves <pre> tags which contain mixed content as HTML" do
|
86
|
+
assert_renders_textile "<pre> foo bar</pre>", "<pre> foo bar</pre>"
|
87
|
+
end
|
88
|
+
|
89
|
+
test "converts <br> into a new line" do
|
90
|
+
assert_renders_textile "Foo\nBar", "Foo<br/>Bar"
|
91
|
+
end
|
92
|
+
|
93
|
+
test "converts blockquotes" do
|
94
|
+
assert_renders_textile "bq. foo bar\n", "<blockquote><div>foo bar</div></blockquote>"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
context "headers" do
|
99
|
+
test "converts <h1> tags" do
|
100
|
+
assert_renders_textile "h1. foo bar\n", "<h1>foo bar</h1>"
|
101
|
+
end
|
102
|
+
|
103
|
+
test "converts <h2> tags" do
|
104
|
+
assert_renders_textile "h2. foo bar\n", "<h2>foo bar</h2>"
|
105
|
+
end
|
106
|
+
|
107
|
+
test "converts <h3> tags" do
|
108
|
+
assert_renders_textile "h3. foo bar\n", "<h3>foo bar</h3>"
|
109
|
+
end
|
110
|
+
|
111
|
+
test "converts <h4> tags" do
|
112
|
+
assert_renders_textile "h4. foo bar\n", "<h4>foo bar</h4>"
|
113
|
+
end
|
114
|
+
|
115
|
+
test "converts <h5> tags" do
|
116
|
+
assert_renders_textile "h5. foo bar\n", "<h5>foo bar</h5>"
|
117
|
+
end
|
118
|
+
|
119
|
+
test "converts <h6> tags" do
|
120
|
+
assert_renders_textile "h6. foo bar\n", "<h6>foo bar</h6>"
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
context "lists" do
|
125
|
+
test "converts bullet lists" do
|
126
|
+
assert_renders_textile "* foo\n* bar\n", "<ul><li>foo</li><li>bar</li></ul>"
|
127
|
+
end
|
128
|
+
|
129
|
+
test "converts numbered lists" do
|
130
|
+
assert_renders_textile "# foo\n# bar\n", "<ol><li>foo</li><li>bar</li></ol>"
|
131
|
+
end
|
132
|
+
|
133
|
+
test "converts nested bullet lists" do
|
134
|
+
assert_renders_textile "* foo\n** bar\n* baz\n", "<ul><li>foo<ul><li>bar</li></ul></li><li>baz</li></ul>"
|
135
|
+
end
|
136
|
+
|
137
|
+
test "converts nested numbered lists" do
|
138
|
+
assert_renders_textile "# foo\n## bar\n# baz\n", "<ol><li>foo<ol><li>bar</li></ol></li><li>baz</li></ol>"
|
139
|
+
end
|
140
|
+
|
141
|
+
test "converts nested mixed lists" do
|
142
|
+
assert_renders_textile "* foo\n## bar\n## baz\n*** quux\n* cuack\n",
|
143
|
+
"<ul><li>foo<ol><li>bar</li><li>baz<ul><li>quux</li></ul></li></ol></li><li>cuack</li></ul>"
|
144
|
+
end
|
145
|
+
|
146
|
+
test "converts a definition list" do
|
147
|
+
assert_renders_textile "- foo := defining foo =:\n- bar := defining bar =:\n",
|
148
|
+
"<dl><dt>foo</dt><dd>defining foo</dd><dt>bar</dt><dd>defining bar</dd></dl>"
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
context "tables" do
|
153
|
+
test "converts a simple table" do
|
154
|
+
assert_renders_textile "|foo|bar|baz|\n|1|2|3|\n",
|
155
|
+
"<table><tr><td>foo</td><td>bar</td><td>baz</td></tr><tr><td>1</td><td>2</td><td>3</td></tr></table>"
|
156
|
+
end
|
157
|
+
|
158
|
+
test "converts a table with headers" do
|
159
|
+
assert_renders_textile "|_. foo|_. bar|_. baz|\n|1|2|3|\n",
|
160
|
+
"<table><tr><th>foo</th><th>bar</th><th>baz</th></tr><tr><td>1</td><td>2</td><td>3</td></tr></table>"
|
161
|
+
end
|
162
|
+
|
163
|
+
test "converts a table with cells that span multiple columns" do
|
164
|
+
assert_renders_textile "|foo|bar|baz|\n|\\2. 1|2|\n",
|
165
|
+
"<table><tr><td>foo</td><td>bar</td><td>baz</td></tr><tr><td colspan='2'>1</td><td>2</td></tr></table>"
|
166
|
+
end
|
167
|
+
|
168
|
+
test "converts a table with cells that span multiple rows" do
|
169
|
+
assert_renders_textile "|/2. foo|bar|baz|\n|1|2|\n",
|
170
|
+
"<table><tr><td rowspan='2'>foo</td><td>bar</td><td>baz</td></tr><tr><td>1</td><td>2</td></tr></table>"
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
context "applying post processing rules" do
|
175
|
+
test "compresses newlines to a maximum of two consecutive newlines" do
|
176
|
+
assert_renders_textile "Foo\n\nBar\n\nBaz\n\n* Quux 1\n* Quux 2\n", "<p>Foo</p><p>Bar</p><p>Baz</p><ul><li>Quux 1</li><li>Quux 2</li></p>"
|
177
|
+
end
|
178
|
+
|
179
|
+
test "strips trailing newlines from the start and end of the output string" do
|
180
|
+
assert_renders_textile "Foo\n", "<p>Foo</p>"
|
181
|
+
end
|
182
|
+
|
183
|
+
test "converts all fancy characters introduced by textile back into their 'source code'" do
|
184
|
+
assert_renders_textile "What the ... hell?", "What the … hell?"
|
185
|
+
assert_renders_textile "It's mine", "It’s mine"
|
186
|
+
assert_renders_textile "\"Fancy quoting\"", "“Fancy quoting”"
|
187
|
+
assert_renders_textile "How dashing--right?", "How dashing—right?"
|
188
|
+
assert_renders_textile "How dashing - right?", "How dashing – right?"
|
189
|
+
assert_renders_textile "2 x 2 = 4", "2 × 2 = 4"
|
190
|
+
assert_renders_textile "2x2 = 4", "2×2 = 4"
|
191
|
+
assert_renders_textile "Registered(r)", "Registered®"
|
192
|
+
assert_renders_textile "Copyrighted(c)", "Copyrighted©"
|
193
|
+
assert_renders_textile "Trademarked(tm)", "Trademarked™"
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
data/undress.gemspec
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "undress"
|
3
|
+
s.version = "0.1"
|
4
|
+
s.date = "2009-07-13"
|
5
|
+
|
6
|
+
s.description = "Simply translate HTML to Textile, Markdown, or whatever other markup format you need"
|
7
|
+
s.summary = "Convert HTML into other markup languages"
|
8
|
+
s.homepage = "http://undress.rubyforge.org"
|
9
|
+
|
10
|
+
s.authors = "Nicolás Sanguinetti"
|
11
|
+
s.email = "contacto@nicolassanguinetti.info"
|
12
|
+
|
13
|
+
s.require_paths = ["lib"]
|
14
|
+
s.rubyforge_project = "undress"
|
15
|
+
s.has_rdoc = true
|
16
|
+
s.rubygems_version = "1.3.1"
|
17
|
+
|
18
|
+
s.add_dependency "hpricot"
|
19
|
+
|
20
|
+
if s.respond_to?(:add_development_dependency)
|
21
|
+
s.add_development_dependency "sr-mg"
|
22
|
+
s.add_development_dependency "contest"
|
23
|
+
s.add_development_dependency "redgreen"
|
24
|
+
end
|
25
|
+
|
26
|
+
s.files = %w[
|
27
|
+
.gitignore
|
28
|
+
LICENSE
|
29
|
+
README.rdoc
|
30
|
+
Rakefile
|
31
|
+
undress.gemspec
|
32
|
+
lib/undress.rb
|
33
|
+
lib/undress/grammar.rb
|
34
|
+
lib/undress/textile.rb
|
35
|
+
lib/core_ext/object.rb
|
36
|
+
test/test_helper.rb
|
37
|
+
test/test_grammar.rb
|
38
|
+
test/test_textile.rb
|
39
|
+
]
|
40
|
+
end
|
metadata
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: undress
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: "0.1"
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- "Nicol\xC3\xA1s Sanguinetti"
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-07-13 00:00:00 -03:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hpricot
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: sr-mg
|
27
|
+
type: :development
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: contest
|
37
|
+
type: :development
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: "0"
|
44
|
+
version:
|
45
|
+
- !ruby/object:Gem::Dependency
|
46
|
+
name: redgreen
|
47
|
+
type: :development
|
48
|
+
version_requirement:
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: "0"
|
54
|
+
version:
|
55
|
+
description: Simply translate HTML to Textile, Markdown, or whatever other markup format you need
|
56
|
+
email: contacto@nicolassanguinetti.info
|
57
|
+
executables: []
|
58
|
+
|
59
|
+
extensions: []
|
60
|
+
|
61
|
+
extra_rdoc_files: []
|
62
|
+
|
63
|
+
files:
|
64
|
+
- .gitignore
|
65
|
+
- LICENSE
|
66
|
+
- README.rdoc
|
67
|
+
- Rakefile
|
68
|
+
- undress.gemspec
|
69
|
+
- lib/undress.rb
|
70
|
+
- lib/undress/grammar.rb
|
71
|
+
- lib/undress/textile.rb
|
72
|
+
- lib/core_ext/object.rb
|
73
|
+
- test/test_helper.rb
|
74
|
+
- test/test_grammar.rb
|
75
|
+
- test/test_textile.rb
|
76
|
+
has_rdoc: true
|
77
|
+
homepage: http://undress.rubyforge.org
|
78
|
+
licenses: []
|
79
|
+
|
80
|
+
post_install_message:
|
81
|
+
rdoc_options: []
|
82
|
+
|
83
|
+
require_paths:
|
84
|
+
- lib
|
85
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: "0"
|
90
|
+
version:
|
91
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: "0"
|
96
|
+
version:
|
97
|
+
requirements: []
|
98
|
+
|
99
|
+
rubyforge_project: undress
|
100
|
+
rubygems_version: 1.3.4
|
101
|
+
signing_key:
|
102
|
+
specification_version: 3
|
103
|
+
summary: Convert HTML into other markup languages
|
104
|
+
test_files: []
|
105
|
+
|