zevarito-undress 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
data/lib/undress.rb CHANGED
@@ -1,4 +1,4 @@
1
- require "hpricot"
1
+ require File.expand_path(File.dirname(__FILE__) + "/hpricot_ext")
2
2
  require File.expand_path(File.dirname(__FILE__) + "/core_ext/object")
3
3
  require File.expand_path(File.dirname(__FILE__) + "/undress/grammar")
4
4
 
@@ -12,9 +12,7 @@ end
12
12
 
13
13
  module Undress
14
14
 
15
- # if this array is empty we allow all tags
16
- # if the processed node name not exist in this array we drop it
17
- ALLOWED_TAGS = []
15
+ INLINE_ELEMENTS = ['span', 'b', 'strong', 'i', 'em', 'ins', 'del','strike', 'abbr', 'acronym', 'cite', 'code', 'label', 'sub', 'sup']
18
16
 
19
17
  # Register a markup language. The name will become the method used to convert
20
18
  # HTML to this markup language: for example registering the name +:textile+
@@ -58,7 +56,8 @@ module Undress
58
56
  if e.elem? && e.inner_html != "" && e.name !~ (/pre|code/) && e.children.size == 0
59
57
  e.inner_html = e.inner_html.gsub(/\n|\t/,"").gsub(/\s+/," ")
60
58
  elsif e.text? && e.parent.name !~ /pre|code/
61
- e.content = e.content.gsub(/\n|\t/,"").gsub(/\s+/," ").gsub(/^\s$/, "")
59
+ e.content = e.content.gsub(/\n|\t/,"").gsub(/\s+/," ")
60
+ e.content = e.content.gsub(/^\s+$/, "") if e.next_node && ! INLINE_ELEMENTS.include?(e.next_node.name)
62
61
  end
63
62
  end
64
63
  end
@@ -67,12 +66,29 @@ module Undress
67
66
  # such as those used on wysiwyg editors, we remove that after convert to not
68
67
  # use them on the final convertion.
69
68
  def fixup_span_with_styles(e)
70
- return if !e.has_attribute?("style")
69
+ return if !e.has_attribute?("style")
71
70
 
72
- if e["style"] =~ /italic/ then e.inner_html = "<em>#{e.inner_html}</em>" end
73
- if e["style"] =~ /underline/ then e.inner_html = "<ins>#{e.inner_html}</ins>" end
74
- if e["style"] =~ /line-through/ then e.inner_html = "<del>#{e.inner_html}</del>" end
75
- if e["style"] =~ /bold/ then e.inner_html = "<strong>#{e.inner_html}</strong>" end
71
+ if e.get_style("font-style") == "italic"
72
+ e.inner_html = "<em>#{e.inner_html}</em>"
73
+ e.del_style("font-style")
74
+ end
75
+
76
+ if e.get_style("text-decoration") == "underline"
77
+ e.inner_html = "<ins>#{e.inner_html}</ins>"
78
+ e.del_style("text-decoration")
79
+ end
80
+
81
+ if e.get_style("text-decoration") == "line-through"
82
+ e.inner_html = "<del>#{e.inner_html}</del>"
83
+ e.del_style("text-decoration")
84
+ end
85
+
86
+ if e.get_style("font-weight") == "bold"
87
+ e.inner_html = "<strong>#{e.inner_html}</strong>"
88
+ e.del_style("font-weight")
89
+ end
90
+
91
+ e.swap e.inner_html if e.styles.empty? && e.name == "span"
76
92
  end
77
93
 
78
94
  # Fixup a badly nested list such as <ul> sibling to <li> instead inside of <li>.
@@ -90,17 +106,4 @@ module Undress
90
106
  end
91
107
  end
92
108
  end
93
-
94
- module ::Hpricot #:nodoc:
95
- class Elem #:nodoc:
96
- def ancestors
97
- node, ancestors = parent, Elements[]
98
- while node.respond_to?(:parent) && node.parent
99
- ancestors << node
100
- node = node.parent
101
- end
102
- ancestors
103
- end
104
- end
105
- end
106
109
  end
@@ -89,7 +89,7 @@ module Undress
89
89
  if node.text?
90
90
  node.to_html
91
91
  elsif node.elem?
92
- send node.name.to_sym, node if ALLOWED_TAGS.empty? || ALLOWED_TAGS.include?(node.name)
92
+ send node.name.to_sym, node if ! defined?(ALLOWED_TAGS) || ALLOWED_TAGS.empty? || ALLOWED_TAGS.include?(node.name)
93
93
  else
94
94
  ""
95
95
  end
@@ -116,8 +116,27 @@ module Undress
116
116
  # Helper method that tells you if the given DOM node is immediately
117
117
  # surrounded by whitespace.
118
118
  def surrounded_by_whitespace?(node)
119
- (node.previous.text? && node.previous.to_s =~ /\s+$/) ||
120
- (node.next.text? && node.next.to_s =~ /^\s+/)
119
+ (node.previous && node.previous.text? && node.previous.to_s =~ /\s+$/) ||
120
+ (node.next && node.next.text? && node.next.to_s =~ /^\s+/)
121
+ end
122
+
123
+ # Helper to determine if a node contents a whole word
124
+ # useful to convert for example a letter italic inside a word
125
+ def complete_word?(node)
126
+ return true if ! node.previous_node || ! node.next_node
127
+
128
+ p, n = node.previous_node, node.next_node
129
+
130
+ if p.respond_to?(:content)
131
+ return false if p.content !~ /\s$/
132
+ elsif p.respond_to?(:inner_html)
133
+ return false if p.inner_html !~ /\s$/
134
+ elsif n.respond_to?(:content)
135
+ return false if n.content !~ /^\s/
136
+ elsif n.respond_to?(:inner_html)
137
+ return false if n.content !~ /^\s/
138
+ end
139
+ true
121
140
  end
122
141
 
123
142
  def method_missing(tag, node, *args) #:nodoc:
@@ -5,9 +5,9 @@ module Undress
5
5
 
6
6
  Undress::ALLOWED_TAGS = [
7
7
  'div', 'a', 'img', 'br', 'i', 'u', 'b', 'pre', 'kbd', 'code', 'cite', 'strong', 'em',
8
- 'ins', 'sup', 'sub', 'del', 'table', 'tr', 'td', 'th', 'ol', 'ul', 'li', 'p', 'span',
9
- 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'notextile', 'blockquote', 'object', 'embed',
10
- 'param', 'acronym', 'dd', 'dl', 'dt'
8
+ 'ins', 'sup', 'sub', 'del', 'table', 'tbody', 'thead', 'tr', 'td', 'th', 'ol', 'ul',
9
+ 'li', 'p', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'notextile', 'blockquote',
10
+ 'object', 'embed', 'param', 'acronym', 'dd', 'dl', 'dt'
11
11
  ]
12
12
 
13
13
  # table of contents
@@ -2,6 +2,8 @@ require File.expand_path(File.dirname(__FILE__) + "/../undress")
2
2
 
3
3
  module Undress
4
4
  class Textile < Grammar
5
+ # entities
6
+ post_processing(/&nbsp;/, " ")
5
7
 
6
8
  # whitespace handling
7
9
  post_processing(/\n\n+/, "\n\n")
@@ -28,14 +30,15 @@ module Undress
28
30
  alt = e.has_attribute?("alt") ? "(#{e["alt"]})" : ""
29
31
  "!#{e["src"]}#{alt}!"
30
32
  }
31
- rule_for(:strong) {|e| "*#{content_of(e)}*" }
32
- rule_for(:em) {|e| "_#{content_of(e)}_" }
33
+
34
+ rule_for(:strong) {|e| complete_word?(e) ? "*#{content_of(e)}*" : "[*#{content_of(e)}*]"}
35
+ rule_for(:em) {|e| complete_word?(e) ? "_#{content_of(e)}_" : "[_#{content_of(e)}_]"}
33
36
  rule_for(:code) {|e| "@#{content_of(e)}@" }
34
37
  rule_for(:cite) {|e| "??#{content_of(e)}??" }
35
38
  rule_for(:sup) {|e| surrounded_by_whitespace?(e) ? "^#{content_of(e)}^" : "[^#{content_of(e)}^]" }
36
39
  rule_for(:sub) {|e| surrounded_by_whitespace?(e) ? "~#{content_of(e)}~" : "[~#{content_of(e)}~]" }
37
- rule_for(:ins) {|e| "+#{content_of(e)}+" }
38
- rule_for(:del) {|e| "-#{content_of(e)}-" }
40
+ rule_for(:ins) {|e| complete_word?(e) ? "+#{content_of(e)}+" : "[+#{content_of(e)}+]"}
41
+ rule_for(:del) {|e| complete_word?(e) ? "-#{content_of(e)}-" : "[-#{content_of(e)}-]"}
39
42
  rule_for(:acronym) {|e| e.has_attribute?("title") ? "#{content_of(e)}(#{e["title"]})" : content_of(e) }
40
43
 
41
44
  # text formatting and layout
@@ -21,9 +21,15 @@ class Undress::GreenClothTest < Test::Unit::TestCase
21
21
  # this is ok to ensure invalid html -> to greencloth but xhtmlize! must have
22
22
  # tests on test_undress or something too
23
23
  context "parsing not valid xhtml documents" do
24
+ test "space between 2 spans with styles" do
25
+ html = "<p><span style='font-weight: bold;'>bold</span> <span style='font-style: italic;'>italic</span></p>"
26
+ greencloth = "*bold* _italic_\n"
27
+ assert_renders_greencloth greencloth, html
28
+ end
29
+
24
30
  test "a <span> bold, italic, underline, line-through at the same time" do
25
- html = "<p>some text <span style='font-weight:bold; font-style:italic; text-decoration:underline; text-decoration:line-through'>bold</span> with style</p>"
26
- greencloth = "some text *-+_bold_+-* with style\n"
31
+ html = "<p>some text <span style='font-weight:bold; font-style:italic; text-decoration:underline;'>bold</span> with style</p>"
32
+ greencloth = "some text *+_bold_+* with style\n"
27
33
  assert_renders_greencloth greencloth, html
28
34
  end
29
35
 
@@ -37,7 +43,7 @@ class Undress::GreenClothTest < Test::Unit::TestCase
37
43
  end
38
44
 
39
45
  test "style 'line-through' should be converted to <del> in <span> elements" do
40
- html = "<p>with <span style='text-decoration: line-through;'>some</span> in the <span style='text-decoration-: line-through;'>paragraph</span></p>"
46
+ html = "<p>with <span style='text-decoration: line-through;'>some</span> in the <span style='text-decoration: line-through;'>paragraph</span></p>"
41
47
  greencloth = "with -some- in the -paragraph-\n"
42
48
  assert_renders_greencloth greencloth, html
43
49
  html = "<p style='text-decoration: line-through;'>with some in the paragraph</p>"
data/test/test_textile.rb CHANGED
@@ -11,6 +11,54 @@ module Undress
11
11
  assert_renders_textile "h2. _this is *very* important_\n", "<h2><em>this is <strong>very</strong> important</em></h2>"
12
12
  end
13
13
 
14
+ context "some troubles" do
15
+ test "with sup" do
16
+ html = "<p>e = mc<sup>2</sup></p>"
17
+ textile = "e = mc[^2^]\n"
18
+ assert_renders_textile textile, html
19
+ end
20
+ end
21
+
22
+ context "convert enetities" do
23
+ test "&nbsp;" do
24
+ textile = "some word\n"
25
+ html = "<p>some&nbsp;word</p>"
26
+ assert_renders_textile textile, html
27
+ end
28
+ end
29
+
30
+ context "convert parts of a word" do
31
+ test "some" do
32
+ textile = "s[*o*]me\n"
33
+ html = "<p>s<span style='font-weight:bold;'>o</span>me</p>"
34
+ assert_renders_textile textile, html
35
+ end
36
+
37
+ test "italics" do
38
+ textile = "a perfect wo[_r_]ld\n"
39
+ html = "<p>a perfect wo<em>r</em>ld</p>"
40
+ assert_renders_textile textile, html
41
+ end
42
+
43
+ test "bolds" do
44
+ textile = "a perfect wo[*r*]ld\n"
45
+ html = "<p>a perfect wo<strong>r</strong>ld</p>"
46
+ assert_renders_textile textile, html
47
+ end
48
+
49
+ test "underlines" do
50
+ textile = "a perfect wo[+r+]ld\n"
51
+ html = "<p>a perfect wo<ins>r</ins>ld</p>"
52
+ assert_renders_textile textile, html
53
+ end
54
+
55
+ test "line through" do
56
+ textile = "a perfect wo[-r-]ld\n"
57
+ html = "<p>a perfect wo<del>r</del>ld</p>"
58
+ assert_renders_textile textile, html
59
+ end
60
+ end
61
+
14
62
  context "inline elements" do
15
63
  test "converts <strong> tags" do
16
64
  assert_renders_textile "*foo bar*", "<strong>foo bar</strong>"
data/undress.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "undress"
3
- s.version = "0.2.2"
3
+ s.version = "0.2.3"
4
4
  s.date = "2009-07-29"
5
5
 
6
6
  s.description = "Simply translate HTML to Textile, Markdown, or whatever other markup format you need"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zevarito-undress
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - "Nicol\xC3\xA1s Sanguinetti"