zevarito-undress 0.1 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +1 -1
- data/lib/undress.rb +47 -0
- data/lib/undress/textile.rb +0 -7
- data/test/test_greencloth.rb +83 -3
- data/undress.gemspec +2 -2
- metadata +4 -3
data/README.rdoc
CHANGED
data/lib/undress.rb
CHANGED
@@ -27,6 +27,8 @@ module Undress
|
|
27
27
|
class Document #:nodoc:
|
28
28
|
def initialize(html, options)
|
29
29
|
@doc = Hpricot(html, options)
|
30
|
+
xhtmlize!
|
31
|
+
cleanup_indentation
|
30
32
|
end
|
31
33
|
|
32
34
|
def self.add_markup(name, grammar)
|
@@ -34,6 +36,51 @@ module Undress
|
|
34
36
|
grammar.process!(@doc)
|
35
37
|
end
|
36
38
|
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
# We try to fix those elements which aren't write as xhtml standard but more
|
43
|
+
# important we can't parse it ok without correct it before.
|
44
|
+
def xhtmlize!
|
45
|
+
(@doc/"ul|ol").each do |list|
|
46
|
+
fixup_list(list) if list.parent != "li" && list.parent.name !~ /ul|ol/
|
47
|
+
end
|
48
|
+
|
49
|
+
(@doc/"span[@style*='italic']").each { |e| e.swap "<em>#{e.inner_html}</em>" }
|
50
|
+
|
51
|
+
(@doc/"span[@style*='underline']").each { |e| e.swap "<ins>#{e.inner_html}</ins>" }
|
52
|
+
|
53
|
+
(@doc/"span[@style*='line-through']").each { |e| e.swap "<del>#{e.inner_html}</del>" }
|
54
|
+
|
55
|
+
(@doc/"span[@style*='bold']").each { |e| e.swap "<strong>#{e.inner_html}</strong>" }
|
56
|
+
end
|
57
|
+
|
58
|
+
# Delete tabs, newlines and more than 2 spaces from inside elements
|
59
|
+
# except <pre> or <code> elements
|
60
|
+
def cleanup_indentation
|
61
|
+
(@doc/"*").each do |e|
|
62
|
+
if e.elem? && e.inner_html != "" && e.name !~ (/pre|code/) && e.children.size == 0
|
63
|
+
e.inner_html = e.inner_html.gsub(/\n|\t/,"").gsub(/\s+/," ")
|
64
|
+
elsif e.text? && e.parent.name !~ /pre|code/
|
65
|
+
e.content = e.content.gsub(/\n|\t/,"").gsub(/\s+/," ").gsub(/^\s$/, "")
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# Fixup a badly nested list such as <ul> sibling to <li> instead inside of <li>.
|
71
|
+
def fixup_list(list)
|
72
|
+
list.children.each {|e| fixup_list(e) if e.elem? && e.name =~ /ol|ul/}
|
73
|
+
|
74
|
+
if list.parent.name != "li"
|
75
|
+
li_side = list.next_sibling if list.next_sibling && list.next_sibling.name == "li"
|
76
|
+
li_side = list.previous_sibling if list.previous_sibling && list.previous_sibling.name == "li"
|
77
|
+
|
78
|
+
if li_side
|
79
|
+
li_side.inner_html = "#{li_side.inner_html}#{list.to_html}"
|
80
|
+
list.parent.replace_child(list, "")
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
37
84
|
end
|
38
85
|
|
39
86
|
module ::Hpricot #:nodoc:
|
data/lib/undress/textile.rb
CHANGED
@@ -3,13 +3,6 @@ require File.expand_path(File.dirname(__FILE__) + "/../undress")
|
|
3
3
|
module Undress
|
4
4
|
class Textile < Grammar
|
5
5
|
|
6
|
-
# delete tabs and newlines from inside elements
|
7
|
-
pre_processing("*") do |e|
|
8
|
-
if e.elem? && e.parent.doc? && e.inner_html != "" && e.name != "pre"
|
9
|
-
e.inner_html = e.inner_html.gsub(/\n|\t/,"")
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
6
|
# whitespace handling
|
14
7
|
post_processing(/\n\n+/, "\n\n")
|
15
8
|
post_processing(/\A\s+/, "")
|
data/test/test_greencloth.rb
CHANGED
@@ -2,7 +2,87 @@ require File.expand_path(File.dirname(__FILE__) + "/test_helper")
|
|
2
2
|
|
3
3
|
class Undress::GreenClothTest < Test::Unit::TestCase
|
4
4
|
def assert_renders_greencloth(greencloth, html)
|
5
|
-
assert_equal greencloth, Undress(html
|
5
|
+
assert_equal greencloth, Undress(html).to_greencloth
|
6
|
+
end
|
7
|
+
|
8
|
+
context "parsing badly indented documents" do
|
9
|
+
test "badly indent doc" do
|
10
|
+
html = "<ul>
|
11
|
+
<li>foo</li>
|
12
|
+
<li>bar</li>
|
13
|
+
<li>and x is also.</li>
|
14
|
+
</ul>"
|
15
|
+
greencloth = "* foo\n* bar\n* and x is also.\n"
|
16
|
+
assert_renders_greencloth greencloth, html
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# TODO:
|
21
|
+
# this is ok to ensure invalid html -> to greencloth but xhtmlize! must have
|
22
|
+
# tests on test_undress or something too
|
23
|
+
context "parsing not valid xhtml documents" do
|
24
|
+
test "font-weight=bold styles in <span> elements should be <strong>" do
|
25
|
+
html = "<p>some text <span style='font-weight=bold'>bold</span> with style</p>"
|
26
|
+
greencloth = "some text *bold* with style\n"
|
27
|
+
assert_renders_greencloth greencloth, html
|
28
|
+
end
|
29
|
+
|
30
|
+
test "style 'line-through' should be converted to <del> in <span> elements" do
|
31
|
+
html = "<p>with <span style='text-decoration: line-through;'>some</span> in the <span style='text-decoration-: line-through;'>paragraph</span></p>"
|
32
|
+
greencloth = "with -some- in the -paragraph-\n"
|
33
|
+
assert_renders_greencloth greencloth, html
|
34
|
+
html = "<p style='text-decoration: line-through;'>with some in the paragraph</p>"
|
35
|
+
greencloth = "with some in the paragraph\n"
|
36
|
+
assert_renders_greencloth greencloth, html
|
37
|
+
end
|
38
|
+
|
39
|
+
test "style 'underline' should be converted to <ins> in <span> elements" do
|
40
|
+
html = "<p>with <span style='text-decoration: underline;'>some</span> in the <span style='text-decoration: underline;'>paragraph</span></p>"
|
41
|
+
greencloth = "with +some+ in the +paragraph+\n"
|
42
|
+
assert_renders_greencloth greencloth, html
|
43
|
+
html = "<p style='text-decoration: underline;'>with some in the paragraph</p>"
|
44
|
+
greencloth = "with some in the paragraph\n"
|
45
|
+
assert_renders_greencloth greencloth, html
|
46
|
+
end
|
47
|
+
|
48
|
+
test "style 'italic' should be converted to <em> in <span> elements" do
|
49
|
+
html = "<p>with <span style='font-style: italic;'>some</span> in the <span style='font-style: italic;'>paragraph</span></p>"
|
50
|
+
greencloth = "with _some_ in the _paragraph_\n"
|
51
|
+
assert_renders_greencloth greencloth, html
|
52
|
+
html = "<p style='font-style: italic;'>with some in the paragraph</p>"
|
53
|
+
greencloth = "with some in the paragraph\n"
|
54
|
+
assert_renders_greencloth greencloth, html
|
55
|
+
end
|
56
|
+
|
57
|
+
test "a nested invalid unordered list" do
|
58
|
+
html = "<ul><li>item 1</li><li>item 2</li><ul><li>nested 1</li><li>nested 2</li></ul><li>item 3</li></ul>"
|
59
|
+
greencloth = "* item 1\n* item 2\n** nested 1\n** nested 2\n* item 3\n"
|
60
|
+
assert_renders_greencloth greencloth, html
|
61
|
+
end
|
62
|
+
|
63
|
+
test "a nested invalid ordered list" do
|
64
|
+
html = "<ol><li>item 1</li><li>item 2</li><ol><li>nested 1</li><li>nested 2</li></ol><li>item 3</li></ol>"
|
65
|
+
greencloth = "# item 1\n# item 2\n## nested 1\n## nested 2\n# item 3\n"
|
66
|
+
assert_renders_greencloth greencloth, html
|
67
|
+
end
|
68
|
+
|
69
|
+
test "a nested invalid mixed list with 3 levels" do
|
70
|
+
html = "<ul><li>item 1</li><li>item 2</li><ol><li>nested 1</li><li>nested 2</li><ul><li>nested2 1</li><li>nested2 2</li></ul></ol><li>item 3</li></ul>"
|
71
|
+
greencloth = "* item 1\n* item 2\n*# nested 1\n*# nested 2\n*#* nested2 1\n*#* nested2 2\n* item 3\n"
|
72
|
+
assert_renders_greencloth greencloth, html
|
73
|
+
end
|
74
|
+
|
75
|
+
test "a nested invalid mixed list" do
|
76
|
+
html = "<ul><li>item 1</li><li>item 2</li><ol><li>nested 1</li><li>nested 2</li></ol><li>item 3</li></ul>"
|
77
|
+
greencloth = "* item 1\n* item 2\n*# nested 1\n*# nested 2\n* item 3\n"
|
78
|
+
assert_renders_greencloth greencloth, html
|
79
|
+
end
|
80
|
+
|
81
|
+
test "2 badly nested list inside" do
|
82
|
+
html = "<ul><li>item 1</li><li>item 2</li><ul><li>nested 1</li><ul><li>item 1x</li><li>item 2x</li></ul><li>nested 2</li></ul><li>item 3</li></ul>"
|
83
|
+
greencloth = "* item 1\n* item 2\n** nested 1\n*** item 1x\n*** item 2x\n** nested 2\n* item 3\n"
|
84
|
+
assert_renders_greencloth greencloth, html
|
85
|
+
end
|
6
86
|
end
|
7
87
|
|
8
88
|
# unallowed tags
|
@@ -52,13 +132,13 @@ class Undress::GreenClothTest < Test::Unit::TestCase
|
|
52
132
|
context "embed and object" do
|
53
133
|
test "embed" do
|
54
134
|
html = "<p>do you like my embedded blip.tv <embed src='http://blip.tv/play/Ac3GfI+2HA' allowfullscreen='true' type='application/x-shockwave-flash' allowscriptaccess='always' height='510' width='720' />?</p>"
|
55
|
-
greencloth = "do you like my embedded blip.tv <embed src=\"http://blip.tv/play/Ac3GfI+2HA\"
|
135
|
+
greencloth = "do you like my embedded blip.tv <embed allowfullscreen=\"true\" src=\"http://blip.tv/play/Ac3GfI+2HA\" allowscriptaccess=\"always\" type=\"application/x-shockwave-flash\" height=\"510\" width=\"720\" />?\n"
|
56
136
|
assert_renders_greencloth greencloth, html
|
57
137
|
end
|
58
138
|
|
59
139
|
test "object" do
|
60
140
|
html = "<p>do you like my embedded youtube <object width='425' height='344'><param name='movie' value='http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1' /><param name='allowFullScreen' value='true' /><embed src='http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1' type='application/x-shockwave-flash' width='425' height='344' allowfullscreen='true' /></object>?</p>"
|
61
|
-
greencloth = "do you like my embedded youtube <object height=\"344\" width=\"425\"><param name=\"movie\" value=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" /><param name=\"allowFullScreen\" value=\"true\" /><embed src=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\"
|
141
|
+
greencloth = "do you like my embedded youtube <object height=\"344\" width=\"425\"><param name=\"movie\" value=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" /><param name=\"allowFullScreen\" value=\"true\" /><embed allowfullscreen=\"true\" src=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" type=\"application/x-shockwave-flash\" height=\"344\" width=\"425\" /></object>?\n"
|
62
142
|
assert_renders_greencloth greencloth, html
|
63
143
|
end
|
64
144
|
end
|
data/undress.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "undress"
|
3
|
-
s.version = "0.
|
4
|
-
s.date = "2009-07-
|
3
|
+
s.version = "0.2"
|
4
|
+
s.date = "2009-07-29"
|
5
5
|
|
6
6
|
s.description = "Simply translate HTML to Textile, Markdown, or whatever other markup format you need"
|
7
7
|
s.summary = "Convert HTML into other markup languages"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zevarito-undress
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: "0.
|
4
|
+
version: "0.2"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- "Nicol\xC3\xA1s Sanguinetti"
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-07-
|
12
|
+
date: 2009-07-29 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -77,6 +77,7 @@ files:
|
|
77
77
|
- test/test_greencloth.rb
|
78
78
|
has_rdoc: true
|
79
79
|
homepage: http://undress.rubyforge.org
|
80
|
+
licenses:
|
80
81
|
post_install_message:
|
81
82
|
rdoc_options: []
|
82
83
|
|
@@ -97,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
97
98
|
requirements: []
|
98
99
|
|
99
100
|
rubyforge_project: undress
|
100
|
-
rubygems_version: 1.
|
101
|
+
rubygems_version: 1.3.5
|
101
102
|
signing_key:
|
102
103
|
specification_version: 2
|
103
104
|
summary: Convert HTML into other markup languages
|