zevarito-undress 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/undress.rb CHANGED
@@ -1,4 +1,4 @@
1
- require "hpricot"
1
+ require File.expand_path(File.dirname(__FILE__) + "/hpricot_ext")
2
2
  require File.expand_path(File.dirname(__FILE__) + "/core_ext/object")
3
3
  require File.expand_path(File.dirname(__FILE__) + "/undress/grammar")
4
4
 
@@ -12,9 +12,7 @@ end
12
12
 
13
13
  module Undress
14
14
 
15
- # if this array is empty we allow all tags
16
- # if the processed node name not exist in this array we drop it
17
- ALLOWED_TAGS = []
15
+ INLINE_ELEMENTS = ['span', 'b', 'strong', 'i', 'em', 'ins', 'del','strike', 'abbr', 'acronym', 'cite', 'code', 'label', 'sub', 'sup']
18
16
 
19
17
  # Register a markup language. The name will become the method used to convert
20
18
  # HTML to this markup language: for example registering the name +:textile+
@@ -58,7 +56,8 @@ module Undress
58
56
  if e.elem? && e.inner_html != "" && e.name !~ (/pre|code/) && e.children.size == 0
59
57
  e.inner_html = e.inner_html.gsub(/\n|\t/,"").gsub(/\s+/," ")
60
58
  elsif e.text? && e.parent.name !~ /pre|code/
61
- e.content = e.content.gsub(/\n|\t/,"").gsub(/\s+/," ").gsub(/^\s$/, "")
59
+ e.content = e.content.gsub(/\n|\t/,"").gsub(/\s+/," ")
60
+ e.content = e.content.gsub(/^\s+$/, "") if e.next_node && ! INLINE_ELEMENTS.include?(e.next_node.name)
62
61
  end
63
62
  end
64
63
  end
@@ -67,12 +66,29 @@ module Undress
67
66
  # such as those used on wysiwyg editors, we remove that after convert to not
68
67
  # use them on the final convertion.
69
68
  def fixup_span_with_styles(e)
70
- return if !e.has_attribute?("style")
69
+ return if !e.has_attribute?("style")
71
70
 
72
- if e["style"] =~ /italic/ then e.inner_html = "<em>#{e.inner_html}</em>" end
73
- if e["style"] =~ /underline/ then e.inner_html = "<ins>#{e.inner_html}</ins>" end
74
- if e["style"] =~ /line-through/ then e.inner_html = "<del>#{e.inner_html}</del>" end
75
- if e["style"] =~ /bold/ then e.inner_html = "<strong>#{e.inner_html}</strong>" end
71
+ if e.get_style("font-style") == "italic"
72
+ e.inner_html = "<em>#{e.inner_html}</em>"
73
+ e.del_style("font-style")
74
+ end
75
+
76
+ if e.get_style("text-decoration") == "underline"
77
+ e.inner_html = "<ins>#{e.inner_html}</ins>"
78
+ e.del_style("text-decoration")
79
+ end
80
+
81
+ if e.get_style("text-decoration") == "line-through"
82
+ e.inner_html = "<del>#{e.inner_html}</del>"
83
+ e.del_style("text-decoration")
84
+ end
85
+
86
+ if e.get_style("font-weight") == "bold"
87
+ e.inner_html = "<strong>#{e.inner_html}</strong>"
88
+ e.del_style("font-weight")
89
+ end
90
+
91
+ e.swap e.inner_html if e.styles.empty? && e.name == "span"
76
92
  end
77
93
 
78
94
  # Fixup a badly nested list such as <ul> sibling to <li> instead inside of <li>.
@@ -90,17 +106,4 @@ module Undress
90
106
  end
91
107
  end
92
108
  end
93
-
94
- module ::Hpricot #:nodoc:
95
- class Elem #:nodoc:
96
- def ancestors
97
- node, ancestors = parent, Elements[]
98
- while node.respond_to?(:parent) && node.parent
99
- ancestors << node
100
- node = node.parent
101
- end
102
- ancestors
103
- end
104
- end
105
- end
106
109
  end
@@ -89,7 +89,7 @@ module Undress
89
89
  if node.text?
90
90
  node.to_html
91
91
  elsif node.elem?
92
- send node.name.to_sym, node if ALLOWED_TAGS.empty? || ALLOWED_TAGS.include?(node.name)
92
+ send node.name.to_sym, node if ! defined?(ALLOWED_TAGS) || ALLOWED_TAGS.empty? || ALLOWED_TAGS.include?(node.name)
93
93
  else
94
94
  ""
95
95
  end
@@ -116,8 +116,27 @@ module Undress
116
116
  # Helper method that tells you if the given DOM node is immediately
117
117
  # surrounded by whitespace.
118
118
  def surrounded_by_whitespace?(node)
119
- (node.previous.text? && node.previous.to_s =~ /\s+$/) ||
120
- (node.next.text? && node.next.to_s =~ /^\s+/)
119
+ (node.previous && node.previous.text? && node.previous.to_s =~ /\s+$/) ||
120
+ (node.next && node.next.text? && node.next.to_s =~ /^\s+/)
121
+ end
122
+
123
+ # Helper to determine if a node contents a whole word
124
+ # useful to convert for example a letter italic inside a word
125
+ def complete_word?(node)
126
+ return true if ! node.previous_node || ! node.next_node
127
+
128
+ p, n = node.previous_node, node.next_node
129
+
130
+ if p.respond_to?(:content)
131
+ return false if p.content !~ /\s$/
132
+ elsif p.respond_to?(:inner_html)
133
+ return false if p.inner_html !~ /\s$/
134
+ elsif n.respond_to?(:content)
135
+ return false if n.content !~ /^\s/
136
+ elsif n.respond_to?(:inner_html)
137
+ return false if n.content !~ /^\s/
138
+ end
139
+ true
121
140
  end
122
141
 
123
142
  def method_missing(tag, node, *args) #:nodoc:
@@ -5,9 +5,9 @@ module Undress
5
5
 
6
6
  Undress::ALLOWED_TAGS = [
7
7
  'div', 'a', 'img', 'br', 'i', 'u', 'b', 'pre', 'kbd', 'code', 'cite', 'strong', 'em',
8
- 'ins', 'sup', 'sub', 'del', 'table', 'tr', 'td', 'th', 'ol', 'ul', 'li', 'p', 'span',
9
- 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'notextile', 'blockquote', 'object', 'embed',
10
- 'param', 'acronym', 'dd', 'dl', 'dt'
8
+ 'ins', 'sup', 'sub', 'del', 'table', 'tbody', 'thead', 'tr', 'td', 'th', 'ol', 'ul',
9
+ 'li', 'p', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'notextile', 'blockquote',
10
+ 'object', 'embed', 'param', 'acronym', 'dd', 'dl', 'dt'
11
11
  ]
12
12
 
13
13
  # table of contents
@@ -2,6 +2,8 @@ require File.expand_path(File.dirname(__FILE__) + "/../undress")
2
2
 
3
3
  module Undress
4
4
  class Textile < Grammar
5
+ # entities
6
+ post_processing(/&nbsp;/, " ")
5
7
 
6
8
  # whitespace handling
7
9
  post_processing(/\n\n+/, "\n\n")
@@ -28,14 +30,15 @@ module Undress
28
30
  alt = e.has_attribute?("alt") ? "(#{e["alt"]})" : ""
29
31
  "!#{e["src"]}#{alt}!"
30
32
  }
31
- rule_for(:strong) {|e| "*#{content_of(e)}*" }
32
- rule_for(:em) {|e| "_#{content_of(e)}_" }
33
+
34
+ rule_for(:strong) {|e| complete_word?(e) ? "*#{content_of(e)}*" : "[*#{content_of(e)}*]"}
35
+ rule_for(:em) {|e| complete_word?(e) ? "_#{content_of(e)}_" : "[_#{content_of(e)}_]"}
33
36
  rule_for(:code) {|e| "@#{content_of(e)}@" }
34
37
  rule_for(:cite) {|e| "??#{content_of(e)}??" }
35
38
  rule_for(:sup) {|e| surrounded_by_whitespace?(e) ? "^#{content_of(e)}^" : "[^#{content_of(e)}^]" }
36
39
  rule_for(:sub) {|e| surrounded_by_whitespace?(e) ? "~#{content_of(e)}~" : "[~#{content_of(e)}~]" }
37
- rule_for(:ins) {|e| "+#{content_of(e)}+" }
38
- rule_for(:del) {|e| "-#{content_of(e)}-" }
40
+ rule_for(:ins) {|e| complete_word?(e) ? "+#{content_of(e)}+" : "[+#{content_of(e)}+]"}
41
+ rule_for(:del) {|e| complete_word?(e) ? "-#{content_of(e)}-" : "[-#{content_of(e)}-]"}
39
42
  rule_for(:acronym) {|e| e.has_attribute?("title") ? "#{content_of(e)}(#{e["title"]})" : content_of(e) }
40
43
 
41
44
  # text formatting and layout
@@ -21,9 +21,15 @@ class Undress::GreenClothTest < Test::Unit::TestCase
21
21
  # this is ok to ensure invalid html -> to greencloth but xhtmlize! must have
22
22
  # tests on test_undress or something too
23
23
  context "parsing not valid xhtml documents" do
24
+ test "space between 2 spans with styles" do
25
+ html = "<p><span style='font-weight: bold;'>bold</span> <span style='font-style: italic;'>italic</span></p>"
26
+ greencloth = "*bold* _italic_\n"
27
+ assert_renders_greencloth greencloth, html
28
+ end
29
+
24
30
  test "a <span> bold, italic, underline, line-through at the same time" do
25
- html = "<p>some text <span style='font-weight:bold; font-style:italic; text-decoration:underline; text-decoration:line-through'>bold</span> with style</p>"
26
- greencloth = "some text *-+_bold_+-* with style\n"
31
+ html = "<p>some text <span style='font-weight:bold; font-style:italic; text-decoration:underline;'>bold</span> with style</p>"
32
+ greencloth = "some text *+_bold_+* with style\n"
27
33
  assert_renders_greencloth greencloth, html
28
34
  end
29
35
 
@@ -37,7 +43,7 @@ class Undress::GreenClothTest < Test::Unit::TestCase
37
43
  end
38
44
 
39
45
  test "style 'line-through' should be converted to <del> in <span> elements" do
40
- html = "<p>with <span style='text-decoration: line-through;'>some</span> in the <span style='text-decoration-: line-through;'>paragraph</span></p>"
46
+ html = "<p>with <span style='text-decoration: line-through;'>some</span> in the <span style='text-decoration: line-through;'>paragraph</span></p>"
41
47
  greencloth = "with -some- in the -paragraph-\n"
42
48
  assert_renders_greencloth greencloth, html
43
49
  html = "<p style='text-decoration: line-through;'>with some in the paragraph</p>"
data/test/test_textile.rb CHANGED
@@ -11,6 +11,54 @@ module Undress
11
11
  assert_renders_textile "h2. _this is *very* important_\n", "<h2><em>this is <strong>very</strong> important</em></h2>"
12
12
  end
13
13
 
14
+ context "some troubles" do
15
+ test "with sup" do
16
+ html = "<p>e = mc<sup>2</sup></p>"
17
+ textile = "e = mc[^2^]\n"
18
+ assert_renders_textile textile, html
19
+ end
20
+ end
21
+
22
+ context "convert enetities" do
23
+ test "&nbsp;" do
24
+ textile = "some word\n"
25
+ html = "<p>some&nbsp;word</p>"
26
+ assert_renders_textile textile, html
27
+ end
28
+ end
29
+
30
+ context "convert parts of a word" do
31
+ test "some" do
32
+ textile = "s[*o*]me\n"
33
+ html = "<p>s<span style='font-weight:bold;'>o</span>me</p>"
34
+ assert_renders_textile textile, html
35
+ end
36
+
37
+ test "italics" do
38
+ textile = "a perfect wo[_r_]ld\n"
39
+ html = "<p>a perfect wo<em>r</em>ld</p>"
40
+ assert_renders_textile textile, html
41
+ end
42
+
43
+ test "bolds" do
44
+ textile = "a perfect wo[*r*]ld\n"
45
+ html = "<p>a perfect wo<strong>r</strong>ld</p>"
46
+ assert_renders_textile textile, html
47
+ end
48
+
49
+ test "underlines" do
50
+ textile = "a perfect wo[+r+]ld\n"
51
+ html = "<p>a perfect wo<ins>r</ins>ld</p>"
52
+ assert_renders_textile textile, html
53
+ end
54
+
55
+ test "line through" do
56
+ textile = "a perfect wo[-r-]ld\n"
57
+ html = "<p>a perfect wo<del>r</del>ld</p>"
58
+ assert_renders_textile textile, html
59
+ end
60
+ end
61
+
14
62
  context "inline elements" do
15
63
  test "converts <strong> tags" do
16
64
  assert_renders_textile "*foo bar*", "<strong>foo bar</strong>"
data/undress.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "undress"
3
- s.version = "0.2.2"
3
+ s.version = "0.2.3"
4
4
  s.date = "2009-07-29"
5
5
 
6
6
  s.description = "Simply translate HTML to Textile, Markdown, or whatever other markup format you need"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zevarito-undress
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - "Nicol\xC3\xA1s Sanguinetti"