zevarito-undress 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/undress.rb +26 -23
- data/lib/undress/grammar.rb +22 -3
- data/lib/undress/greencloth.rb +3 -3
- data/lib/undress/textile.rb +7 -4
- data/test/test_greencloth.rb +9 -3
- data/test/test_textile.rb +48 -0
- data/undress.gemspec +1 -1
- metadata +1 -1
data/lib/undress.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require "
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/hpricot_ext")
|
2
2
|
require File.expand_path(File.dirname(__FILE__) + "/core_ext/object")
|
3
3
|
require File.expand_path(File.dirname(__FILE__) + "/undress/grammar")
|
4
4
|
|
@@ -12,9 +12,7 @@ end
|
|
12
12
|
|
13
13
|
module Undress
|
14
14
|
|
15
|
-
|
16
|
-
# if the processed node name not exist in this array we drop it
|
17
|
-
ALLOWED_TAGS = []
|
15
|
+
INLINE_ELEMENTS = ['span', 'b', 'strong', 'i', 'em', 'ins', 'del','strike', 'abbr', 'acronym', 'cite', 'code', 'label', 'sub', 'sup']
|
18
16
|
|
19
17
|
# Register a markup language. The name will become the method used to convert
|
20
18
|
# HTML to this markup language: for example registering the name +:textile+
|
@@ -58,7 +56,8 @@ module Undress
|
|
58
56
|
if e.elem? && e.inner_html != "" && e.name !~ (/pre|code/) && e.children.size == 0
|
59
57
|
e.inner_html = e.inner_html.gsub(/\n|\t/,"").gsub(/\s+/," ")
|
60
58
|
elsif e.text? && e.parent.name !~ /pre|code/
|
61
|
-
e.content = e.content.gsub(/\n|\t/,"").gsub(/\s+/," ")
|
59
|
+
e.content = e.content.gsub(/\n|\t/,"").gsub(/\s+/," ")
|
60
|
+
e.content = e.content.gsub(/^\s+$/, "") if e.next_node && ! INLINE_ELEMENTS.include?(e.next_node.name)
|
62
61
|
end
|
63
62
|
end
|
64
63
|
end
|
@@ -67,12 +66,29 @@ module Undress
|
|
67
66
|
# such as those used on wysiwyg editors, we remove that after convert to not
|
68
67
|
# use them on the final convertion.
|
69
68
|
def fixup_span_with_styles(e)
|
70
|
-
|
69
|
+
return if !e.has_attribute?("style")
|
71
70
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
71
|
+
if e.get_style("font-style") == "italic"
|
72
|
+
e.inner_html = "<em>#{e.inner_html}</em>"
|
73
|
+
e.del_style("font-style")
|
74
|
+
end
|
75
|
+
|
76
|
+
if e.get_style("text-decoration") == "underline"
|
77
|
+
e.inner_html = "<ins>#{e.inner_html}</ins>"
|
78
|
+
e.del_style("text-decoration")
|
79
|
+
end
|
80
|
+
|
81
|
+
if e.get_style("text-decoration") == "line-through"
|
82
|
+
e.inner_html = "<del>#{e.inner_html}</del>"
|
83
|
+
e.del_style("text-decoration")
|
84
|
+
end
|
85
|
+
|
86
|
+
if e.get_style("font-weight") == "bold"
|
87
|
+
e.inner_html = "<strong>#{e.inner_html}</strong>"
|
88
|
+
e.del_style("font-weight")
|
89
|
+
end
|
90
|
+
|
91
|
+
e.swap e.inner_html if e.styles.empty? && e.name == "span"
|
76
92
|
end
|
77
93
|
|
78
94
|
# Fixup a badly nested list such as <ul> sibling to <li> instead inside of <li>.
|
@@ -90,17 +106,4 @@ module Undress
|
|
90
106
|
end
|
91
107
|
end
|
92
108
|
end
|
93
|
-
|
94
|
-
module ::Hpricot #:nodoc:
|
95
|
-
class Elem #:nodoc:
|
96
|
-
def ancestors
|
97
|
-
node, ancestors = parent, Elements[]
|
98
|
-
while node.respond_to?(:parent) && node.parent
|
99
|
-
ancestors << node
|
100
|
-
node = node.parent
|
101
|
-
end
|
102
|
-
ancestors
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
106
109
|
end
|
data/lib/undress/grammar.rb
CHANGED
@@ -89,7 +89,7 @@ module Undress
|
|
89
89
|
if node.text?
|
90
90
|
node.to_html
|
91
91
|
elsif node.elem?
|
92
|
-
send node.name.to_sym, node if ALLOWED_TAGS.empty? || ALLOWED_TAGS.include?(node.name)
|
92
|
+
send node.name.to_sym, node if ! defined?(ALLOWED_TAGS) || ALLOWED_TAGS.empty? || ALLOWED_TAGS.include?(node.name)
|
93
93
|
else
|
94
94
|
""
|
95
95
|
end
|
@@ -116,8 +116,27 @@ module Undress
|
|
116
116
|
# Helper method that tells you if the given DOM node is immediately
|
117
117
|
# surrounded by whitespace.
|
118
118
|
def surrounded_by_whitespace?(node)
|
119
|
-
(node.previous.text? && node.previous.to_s =~ /\s+$/) ||
|
120
|
-
(node.next.text? && node.next.to_s =~ /^\s+/)
|
119
|
+
(node.previous && node.previous.text? && node.previous.to_s =~ /\s+$/) ||
|
120
|
+
(node.next && node.next.text? && node.next.to_s =~ /^\s+/)
|
121
|
+
end
|
122
|
+
|
123
|
+
# Helper to determine if a node contents a whole word
|
124
|
+
# useful to convert for example a letter italic inside a word
|
125
|
+
def complete_word?(node)
|
126
|
+
return true if ! node.previous_node || ! node.next_node
|
127
|
+
|
128
|
+
p, n = node.previous_node, node.next_node
|
129
|
+
|
130
|
+
if p.respond_to?(:content)
|
131
|
+
return false if p.content !~ /\s$/
|
132
|
+
elsif p.respond_to?(:inner_html)
|
133
|
+
return false if p.inner_html !~ /\s$/
|
134
|
+
elsif n.respond_to?(:content)
|
135
|
+
return false if n.content !~ /^\s/
|
136
|
+
elsif n.respond_to?(:inner_html)
|
137
|
+
return false if n.content !~ /^\s/
|
138
|
+
end
|
139
|
+
true
|
121
140
|
end
|
122
141
|
|
123
142
|
def method_missing(tag, node, *args) #:nodoc:
|
data/lib/undress/greencloth.rb
CHANGED
@@ -5,9 +5,9 @@ module Undress
|
|
5
5
|
|
6
6
|
Undress::ALLOWED_TAGS = [
|
7
7
|
'div', 'a', 'img', 'br', 'i', 'u', 'b', 'pre', 'kbd', 'code', 'cite', 'strong', 'em',
|
8
|
-
'ins', 'sup', 'sub', 'del', 'table', '
|
9
|
-
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'notextile', 'blockquote',
|
10
|
-
'param', 'acronym', 'dd', 'dl', 'dt'
|
8
|
+
'ins', 'sup', 'sub', 'del', 'table', 'tbody', 'thead', 'tr', 'td', 'th', 'ol', 'ul',
|
9
|
+
'li', 'p', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'notextile', 'blockquote',
|
10
|
+
'object', 'embed', 'param', 'acronym', 'dd', 'dl', 'dt'
|
11
11
|
]
|
12
12
|
|
13
13
|
# table of contents
|
data/lib/undress/textile.rb
CHANGED
@@ -2,6 +2,8 @@ require File.expand_path(File.dirname(__FILE__) + "/../undress")
|
|
2
2
|
|
3
3
|
module Undress
|
4
4
|
class Textile < Grammar
|
5
|
+
# entities
|
6
|
+
post_processing(/ /, " ")
|
5
7
|
|
6
8
|
# whitespace handling
|
7
9
|
post_processing(/\n\n+/, "\n\n")
|
@@ -28,14 +30,15 @@ module Undress
|
|
28
30
|
alt = e.has_attribute?("alt") ? "(#{e["alt"]})" : ""
|
29
31
|
"!#{e["src"]}#{alt}!"
|
30
32
|
}
|
31
|
-
|
32
|
-
rule_for(:
|
33
|
+
|
34
|
+
rule_for(:strong) {|e| complete_word?(e) ? "*#{content_of(e)}*" : "[*#{content_of(e)}*]"}
|
35
|
+
rule_for(:em) {|e| complete_word?(e) ? "_#{content_of(e)}_" : "[_#{content_of(e)}_]"}
|
33
36
|
rule_for(:code) {|e| "@#{content_of(e)}@" }
|
34
37
|
rule_for(:cite) {|e| "??#{content_of(e)}??" }
|
35
38
|
rule_for(:sup) {|e| surrounded_by_whitespace?(e) ? "^#{content_of(e)}^" : "[^#{content_of(e)}^]" }
|
36
39
|
rule_for(:sub) {|e| surrounded_by_whitespace?(e) ? "~#{content_of(e)}~" : "[~#{content_of(e)}~]" }
|
37
|
-
rule_for(:ins) {|e| "+#{content_of(e)}+" }
|
38
|
-
rule_for(:del) {|e| "-#{content_of(e)}-" }
|
40
|
+
rule_for(:ins) {|e| complete_word?(e) ? "+#{content_of(e)}+" : "[+#{content_of(e)}+]"}
|
41
|
+
rule_for(:del) {|e| complete_word?(e) ? "-#{content_of(e)}-" : "[-#{content_of(e)}-]"}
|
39
42
|
rule_for(:acronym) {|e| e.has_attribute?("title") ? "#{content_of(e)}(#{e["title"]})" : content_of(e) }
|
40
43
|
|
41
44
|
# text formatting and layout
|
data/test/test_greencloth.rb
CHANGED
@@ -21,9 +21,15 @@ class Undress::GreenClothTest < Test::Unit::TestCase
|
|
21
21
|
# this is ok to ensure invalid html -> to greencloth but xhtmlize! must have
|
22
22
|
# tests on test_undress or something too
|
23
23
|
context "parsing not valid xhtml documents" do
|
24
|
+
test "space between 2 spans with styles" do
|
25
|
+
html = "<p><span style='font-weight: bold;'>bold</span> <span style='font-style: italic;'>italic</span></p>"
|
26
|
+
greencloth = "*bold* _italic_\n"
|
27
|
+
assert_renders_greencloth greencloth, html
|
28
|
+
end
|
29
|
+
|
24
30
|
test "a <span> bold, italic, underline, line-through at the same time" do
|
25
|
-
html = "<p>some text <span style='font-weight:bold; font-style:italic; text-decoration:underline;
|
26
|
-
greencloth = "some text
|
31
|
+
html = "<p>some text <span style='font-weight:bold; font-style:italic; text-decoration:underline;'>bold</span> with style</p>"
|
32
|
+
greencloth = "some text *+_bold_+* with style\n"
|
27
33
|
assert_renders_greencloth greencloth, html
|
28
34
|
end
|
29
35
|
|
@@ -37,7 +43,7 @@ class Undress::GreenClothTest < Test::Unit::TestCase
|
|
37
43
|
end
|
38
44
|
|
39
45
|
test "style 'line-through' should be converted to <del> in <span> elements" do
|
40
|
-
html = "<p>with <span style='text-decoration: line-through;'>some</span> in the <span style='text-decoration
|
46
|
+
html = "<p>with <span style='text-decoration: line-through;'>some</span> in the <span style='text-decoration: line-through;'>paragraph</span></p>"
|
41
47
|
greencloth = "with -some- in the -paragraph-\n"
|
42
48
|
assert_renders_greencloth greencloth, html
|
43
49
|
html = "<p style='text-decoration: line-through;'>with some in the paragraph</p>"
|
data/test/test_textile.rb
CHANGED
@@ -11,6 +11,54 @@ module Undress
|
|
11
11
|
assert_renders_textile "h2. _this is *very* important_\n", "<h2><em>this is <strong>very</strong> important</em></h2>"
|
12
12
|
end
|
13
13
|
|
14
|
+
context "some troubles" do
|
15
|
+
test "with sup" do
|
16
|
+
html = "<p>e = mc<sup>2</sup></p>"
|
17
|
+
textile = "e = mc[^2^]\n"
|
18
|
+
assert_renders_textile textile, html
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
context "convert enetities" do
|
23
|
+
test " " do
|
24
|
+
textile = "some word\n"
|
25
|
+
html = "<p>some word</p>"
|
26
|
+
assert_renders_textile textile, html
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
context "convert parts of a word" do
|
31
|
+
test "some" do
|
32
|
+
textile = "s[*o*]me\n"
|
33
|
+
html = "<p>s<span style='font-weight:bold;'>o</span>me</p>"
|
34
|
+
assert_renders_textile textile, html
|
35
|
+
end
|
36
|
+
|
37
|
+
test "italics" do
|
38
|
+
textile = "a perfect wo[_r_]ld\n"
|
39
|
+
html = "<p>a perfect wo<em>r</em>ld</p>"
|
40
|
+
assert_renders_textile textile, html
|
41
|
+
end
|
42
|
+
|
43
|
+
test "bolds" do
|
44
|
+
textile = "a perfect wo[*r*]ld\n"
|
45
|
+
html = "<p>a perfect wo<strong>r</strong>ld</p>"
|
46
|
+
assert_renders_textile textile, html
|
47
|
+
end
|
48
|
+
|
49
|
+
test "underlines" do
|
50
|
+
textile = "a perfect wo[+r+]ld\n"
|
51
|
+
html = "<p>a perfect wo<ins>r</ins>ld</p>"
|
52
|
+
assert_renders_textile textile, html
|
53
|
+
end
|
54
|
+
|
55
|
+
test "line through" do
|
56
|
+
textile = "a perfect wo[-r-]ld\n"
|
57
|
+
html = "<p>a perfect wo<del>r</del>ld</p>"
|
58
|
+
assert_renders_textile textile, html
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
14
62
|
context "inline elements" do
|
15
63
|
test "converts <strong> tags" do
|
16
64
|
assert_renders_textile "*foo bar*", "<strong>foo bar</strong>"
|
data/undress.gemspec
CHANGED