zevarito-undress 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/undress.rb +26 -23
- data/lib/undress/grammar.rb +22 -3
- data/lib/undress/greencloth.rb +3 -3
- data/lib/undress/textile.rb +7 -4
- data/test/test_greencloth.rb +9 -3
- data/test/test_textile.rb +48 -0
- data/undress.gemspec +1 -1
- metadata +1 -1
data/lib/undress.rb
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
require "
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/hpricot_ext")
|
|
2
2
|
require File.expand_path(File.dirname(__FILE__) + "/core_ext/object")
|
|
3
3
|
require File.expand_path(File.dirname(__FILE__) + "/undress/grammar")
|
|
4
4
|
|
|
@@ -12,9 +12,7 @@ end
|
|
|
12
12
|
|
|
13
13
|
module Undress
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
# if the processed node name not exist in this array we drop it
|
|
17
|
-
ALLOWED_TAGS = []
|
|
15
|
+
INLINE_ELEMENTS = ['span', 'b', 'strong', 'i', 'em', 'ins', 'del','strike', 'abbr', 'acronym', 'cite', 'code', 'label', 'sub', 'sup']
|
|
18
16
|
|
|
19
17
|
# Register a markup language. The name will become the method used to convert
|
|
20
18
|
# HTML to this markup language: for example registering the name +:textile+
|
|
@@ -58,7 +56,8 @@ module Undress
|
|
|
58
56
|
if e.elem? && e.inner_html != "" && e.name !~ (/pre|code/) && e.children.size == 0
|
|
59
57
|
e.inner_html = e.inner_html.gsub(/\n|\t/,"").gsub(/\s+/," ")
|
|
60
58
|
elsif e.text? && e.parent.name !~ /pre|code/
|
|
61
|
-
e.content = e.content.gsub(/\n|\t/,"").gsub(/\s+/," ")
|
|
59
|
+
e.content = e.content.gsub(/\n|\t/,"").gsub(/\s+/," ")
|
|
60
|
+
e.content = e.content.gsub(/^\s+$/, "") if e.next_node && ! INLINE_ELEMENTS.include?(e.next_node.name)
|
|
62
61
|
end
|
|
63
62
|
end
|
|
64
63
|
end
|
|
@@ -67,12 +66,29 @@ module Undress
|
|
|
67
66
|
# such as those used on wysiwyg editors, we remove that after convert to not
|
|
68
67
|
# use them on the final convertion.
|
|
69
68
|
def fixup_span_with_styles(e)
|
|
70
|
-
|
|
69
|
+
return if !e.has_attribute?("style")
|
|
71
70
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
71
|
+
if e.get_style("font-style") == "italic"
|
|
72
|
+
e.inner_html = "<em>#{e.inner_html}</em>"
|
|
73
|
+
e.del_style("font-style")
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
if e.get_style("text-decoration") == "underline"
|
|
77
|
+
e.inner_html = "<ins>#{e.inner_html}</ins>"
|
|
78
|
+
e.del_style("text-decoration")
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
if e.get_style("text-decoration") == "line-through"
|
|
82
|
+
e.inner_html = "<del>#{e.inner_html}</del>"
|
|
83
|
+
e.del_style("text-decoration")
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
if e.get_style("font-weight") == "bold"
|
|
87
|
+
e.inner_html = "<strong>#{e.inner_html}</strong>"
|
|
88
|
+
e.del_style("font-weight")
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
e.swap e.inner_html if e.styles.empty? && e.name == "span"
|
|
76
92
|
end
|
|
77
93
|
|
|
78
94
|
# Fixup a badly nested list such as <ul> sibling to <li> instead inside of <li>.
|
|
@@ -90,17 +106,4 @@ module Undress
|
|
|
90
106
|
end
|
|
91
107
|
end
|
|
92
108
|
end
|
|
93
|
-
|
|
94
|
-
module ::Hpricot #:nodoc:
|
|
95
|
-
class Elem #:nodoc:
|
|
96
|
-
def ancestors
|
|
97
|
-
node, ancestors = parent, Elements[]
|
|
98
|
-
while node.respond_to?(:parent) && node.parent
|
|
99
|
-
ancestors << node
|
|
100
|
-
node = node.parent
|
|
101
|
-
end
|
|
102
|
-
ancestors
|
|
103
|
-
end
|
|
104
|
-
end
|
|
105
|
-
end
|
|
106
109
|
end
|
data/lib/undress/grammar.rb
CHANGED
|
@@ -89,7 +89,7 @@ module Undress
|
|
|
89
89
|
if node.text?
|
|
90
90
|
node.to_html
|
|
91
91
|
elsif node.elem?
|
|
92
|
-
send node.name.to_sym, node if ALLOWED_TAGS.empty? || ALLOWED_TAGS.include?(node.name)
|
|
92
|
+
send node.name.to_sym, node if ! defined?(ALLOWED_TAGS) || ALLOWED_TAGS.empty? || ALLOWED_TAGS.include?(node.name)
|
|
93
93
|
else
|
|
94
94
|
""
|
|
95
95
|
end
|
|
@@ -116,8 +116,27 @@ module Undress
|
|
|
116
116
|
# Helper method that tells you if the given DOM node is immediately
|
|
117
117
|
# surrounded by whitespace.
|
|
118
118
|
def surrounded_by_whitespace?(node)
|
|
119
|
-
(node.previous.text? && node.previous.to_s =~ /\s+$/) ||
|
|
120
|
-
(node.next.text? && node.next.to_s =~ /^\s+/)
|
|
119
|
+
(node.previous && node.previous.text? && node.previous.to_s =~ /\s+$/) ||
|
|
120
|
+
(node.next && node.next.text? && node.next.to_s =~ /^\s+/)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Helper to determine if a node contents a whole word
|
|
124
|
+
# useful to convert for example a letter italic inside a word
|
|
125
|
+
def complete_word?(node)
|
|
126
|
+
return true if ! node.previous_node || ! node.next_node
|
|
127
|
+
|
|
128
|
+
p, n = node.previous_node, node.next_node
|
|
129
|
+
|
|
130
|
+
if p.respond_to?(:content)
|
|
131
|
+
return false if p.content !~ /\s$/
|
|
132
|
+
elsif p.respond_to?(:inner_html)
|
|
133
|
+
return false if p.inner_html !~ /\s$/
|
|
134
|
+
elsif n.respond_to?(:content)
|
|
135
|
+
return false if n.content !~ /^\s/
|
|
136
|
+
elsif n.respond_to?(:inner_html)
|
|
137
|
+
return false if n.content !~ /^\s/
|
|
138
|
+
end
|
|
139
|
+
true
|
|
121
140
|
end
|
|
122
141
|
|
|
123
142
|
def method_missing(tag, node, *args) #:nodoc:
|
data/lib/undress/greencloth.rb
CHANGED
|
@@ -5,9 +5,9 @@ module Undress
|
|
|
5
5
|
|
|
6
6
|
Undress::ALLOWED_TAGS = [
|
|
7
7
|
'div', 'a', 'img', 'br', 'i', 'u', 'b', 'pre', 'kbd', 'code', 'cite', 'strong', 'em',
|
|
8
|
-
'ins', 'sup', 'sub', 'del', 'table', '
|
|
9
|
-
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'notextile', 'blockquote',
|
|
10
|
-
'param', 'acronym', 'dd', 'dl', 'dt'
|
|
8
|
+
'ins', 'sup', 'sub', 'del', 'table', 'tbody', 'thead', 'tr', 'td', 'th', 'ol', 'ul',
|
|
9
|
+
'li', 'p', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'notextile', 'blockquote',
|
|
10
|
+
'object', 'embed', 'param', 'acronym', 'dd', 'dl', 'dt'
|
|
11
11
|
]
|
|
12
12
|
|
|
13
13
|
# table of contents
|
data/lib/undress/textile.rb
CHANGED
|
@@ -2,6 +2,8 @@ require File.expand_path(File.dirname(__FILE__) + "/../undress")
|
|
|
2
2
|
|
|
3
3
|
module Undress
|
|
4
4
|
class Textile < Grammar
|
|
5
|
+
# entities
|
|
6
|
+
post_processing(/ /, " ")
|
|
5
7
|
|
|
6
8
|
# whitespace handling
|
|
7
9
|
post_processing(/\n\n+/, "\n\n")
|
|
@@ -28,14 +30,15 @@ module Undress
|
|
|
28
30
|
alt = e.has_attribute?("alt") ? "(#{e["alt"]})" : ""
|
|
29
31
|
"!#{e["src"]}#{alt}!"
|
|
30
32
|
}
|
|
31
|
-
|
|
32
|
-
rule_for(:
|
|
33
|
+
|
|
34
|
+
rule_for(:strong) {|e| complete_word?(e) ? "*#{content_of(e)}*" : "[*#{content_of(e)}*]"}
|
|
35
|
+
rule_for(:em) {|e| complete_word?(e) ? "_#{content_of(e)}_" : "[_#{content_of(e)}_]"}
|
|
33
36
|
rule_for(:code) {|e| "@#{content_of(e)}@" }
|
|
34
37
|
rule_for(:cite) {|e| "??#{content_of(e)}??" }
|
|
35
38
|
rule_for(:sup) {|e| surrounded_by_whitespace?(e) ? "^#{content_of(e)}^" : "[^#{content_of(e)}^]" }
|
|
36
39
|
rule_for(:sub) {|e| surrounded_by_whitespace?(e) ? "~#{content_of(e)}~" : "[~#{content_of(e)}~]" }
|
|
37
|
-
rule_for(:ins) {|e| "+#{content_of(e)}+" }
|
|
38
|
-
rule_for(:del) {|e| "-#{content_of(e)}-" }
|
|
40
|
+
rule_for(:ins) {|e| complete_word?(e) ? "+#{content_of(e)}+" : "[+#{content_of(e)}+]"}
|
|
41
|
+
rule_for(:del) {|e| complete_word?(e) ? "-#{content_of(e)}-" : "[-#{content_of(e)}-]"}
|
|
39
42
|
rule_for(:acronym) {|e| e.has_attribute?("title") ? "#{content_of(e)}(#{e["title"]})" : content_of(e) }
|
|
40
43
|
|
|
41
44
|
# text formatting and layout
|
data/test/test_greencloth.rb
CHANGED
|
@@ -21,9 +21,15 @@ class Undress::GreenClothTest < Test::Unit::TestCase
|
|
|
21
21
|
# this is ok to ensure invalid html -> to greencloth but xhtmlize! must have
|
|
22
22
|
# tests on test_undress or something too
|
|
23
23
|
context "parsing not valid xhtml documents" do
|
|
24
|
+
test "space between 2 spans with styles" do
|
|
25
|
+
html = "<p><span style='font-weight: bold;'>bold</span> <span style='font-style: italic;'>italic</span></p>"
|
|
26
|
+
greencloth = "*bold* _italic_\n"
|
|
27
|
+
assert_renders_greencloth greencloth, html
|
|
28
|
+
end
|
|
29
|
+
|
|
24
30
|
test "a <span> bold, italic, underline, line-through at the same time" do
|
|
25
|
-
html = "<p>some text <span style='font-weight:bold; font-style:italic; text-decoration:underline;
|
|
26
|
-
greencloth = "some text
|
|
31
|
+
html = "<p>some text <span style='font-weight:bold; font-style:italic; text-decoration:underline;'>bold</span> with style</p>"
|
|
32
|
+
greencloth = "some text *+_bold_+* with style\n"
|
|
27
33
|
assert_renders_greencloth greencloth, html
|
|
28
34
|
end
|
|
29
35
|
|
|
@@ -37,7 +43,7 @@ class Undress::GreenClothTest < Test::Unit::TestCase
|
|
|
37
43
|
end
|
|
38
44
|
|
|
39
45
|
test "style 'line-through' should be converted to <del> in <span> elements" do
|
|
40
|
-
html = "<p>with <span style='text-decoration: line-through;'>some</span> in the <span style='text-decoration
|
|
46
|
+
html = "<p>with <span style='text-decoration: line-through;'>some</span> in the <span style='text-decoration: line-through;'>paragraph</span></p>"
|
|
41
47
|
greencloth = "with -some- in the -paragraph-\n"
|
|
42
48
|
assert_renders_greencloth greencloth, html
|
|
43
49
|
html = "<p style='text-decoration: line-through;'>with some in the paragraph</p>"
|
data/test/test_textile.rb
CHANGED
|
@@ -11,6 +11,54 @@ module Undress
|
|
|
11
11
|
assert_renders_textile "h2. _this is *very* important_\n", "<h2><em>this is <strong>very</strong> important</em></h2>"
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
+
context "some troubles" do
|
|
15
|
+
test "with sup" do
|
|
16
|
+
html = "<p>e = mc<sup>2</sup></p>"
|
|
17
|
+
textile = "e = mc[^2^]\n"
|
|
18
|
+
assert_renders_textile textile, html
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
context "convert enetities" do
|
|
23
|
+
test " " do
|
|
24
|
+
textile = "some word\n"
|
|
25
|
+
html = "<p>some word</p>"
|
|
26
|
+
assert_renders_textile textile, html
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
context "convert parts of a word" do
|
|
31
|
+
test "some" do
|
|
32
|
+
textile = "s[*o*]me\n"
|
|
33
|
+
html = "<p>s<span style='font-weight:bold;'>o</span>me</p>"
|
|
34
|
+
assert_renders_textile textile, html
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
test "italics" do
|
|
38
|
+
textile = "a perfect wo[_r_]ld\n"
|
|
39
|
+
html = "<p>a perfect wo<em>r</em>ld</p>"
|
|
40
|
+
assert_renders_textile textile, html
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
test "bolds" do
|
|
44
|
+
textile = "a perfect wo[*r*]ld\n"
|
|
45
|
+
html = "<p>a perfect wo<strong>r</strong>ld</p>"
|
|
46
|
+
assert_renders_textile textile, html
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
test "underlines" do
|
|
50
|
+
textile = "a perfect wo[+r+]ld\n"
|
|
51
|
+
html = "<p>a perfect wo<ins>r</ins>ld</p>"
|
|
52
|
+
assert_renders_textile textile, html
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
test "line through" do
|
|
56
|
+
textile = "a perfect wo[-r-]ld\n"
|
|
57
|
+
html = "<p>a perfect wo<del>r</del>ld</p>"
|
|
58
|
+
assert_renders_textile textile, html
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
14
62
|
context "inline elements" do
|
|
15
63
|
test "converts <strong> tags" do
|
|
16
64
|
assert_renders_textile "*foo bar*", "<strong>foo bar</strong>"
|
data/undress.gemspec
CHANGED