zevarito-undress 0.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -34,7 +34,7 @@ Will produce
34
34
 
35
35
  == Get it
36
36
 
37
- gem install undress
37
+ gem install zevarito-undress
38
38
 
39
39
  == License
40
40
 
data/lib/undress.rb CHANGED
@@ -27,6 +27,8 @@ module Undress
27
27
  class Document #:nodoc:
28
28
  def initialize(html, options)
29
29
  @doc = Hpricot(html, options)
30
+ xhtmlize!
31
+ cleanup_indentation
30
32
  end
31
33
 
32
34
  def self.add_markup(name, grammar)
@@ -34,6 +36,51 @@ module Undress
34
36
  grammar.process!(@doc)
35
37
  end
36
38
  end
39
+
40
+ private
41
+
42
+ # We try to fix those elements which aren't write as xhtml standard but more
43
+ # important we can't parse it ok without correct it before.
44
+ def xhtmlize!
45
+ (@doc/"ul|ol").each do |list|
46
+ fixup_list(list) if list.parent != "li" && list.parent.name !~ /ul|ol/
47
+ end
48
+
49
+ (@doc/"span[@style*='italic']").each { |e| e.swap "<em>#{e.inner_html}</em>" }
50
+
51
+ (@doc/"span[@style*='underline']").each { |e| e.swap "<ins>#{e.inner_html}</ins>" }
52
+
53
+ (@doc/"span[@style*='line-through']").each { |e| e.swap "<del>#{e.inner_html}</del>" }
54
+
55
+ (@doc/"span[@style*='bold']").each { |e| e.swap "<strong>#{e.inner_html}</strong>" }
56
+ end
57
+
58
+ # Delete tabs, newlines and more than 2 spaces from inside elements
59
+ # except <pre> or <code> elements
60
+ def cleanup_indentation
61
+ (@doc/"*").each do |e|
62
+ if e.elem? && e.inner_html != "" && e.name !~ (/pre|code/) && e.children.size == 0
63
+ e.inner_html = e.inner_html.gsub(/\n|\t/,"").gsub(/\s+/," ")
64
+ elsif e.text? && e.parent.name !~ /pre|code/
65
+ e.content = e.content.gsub(/\n|\t/,"").gsub(/\s+/," ").gsub(/^\s$/, "")
66
+ end
67
+ end
68
+ end
69
+
70
+ # Fixup a badly nested list such as <ul> sibling to <li> instead inside of <li>.
71
+ def fixup_list(list)
72
+ list.children.each {|e| fixup_list(e) if e.elem? && e.name =~ /ol|ul/}
73
+
74
+ if list.parent.name != "li"
75
+ li_side = list.next_sibling if list.next_sibling && list.next_sibling.name == "li"
76
+ li_side = list.previous_sibling if list.previous_sibling && list.previous_sibling.name == "li"
77
+
78
+ if li_side
79
+ li_side.inner_html = "#{li_side.inner_html}#{list.to_html}"
80
+ list.parent.replace_child(list, "")
81
+ end
82
+ end
83
+ end
37
84
  end
38
85
 
39
86
  module ::Hpricot #:nodoc:
@@ -3,13 +3,6 @@ require File.expand_path(File.dirname(__FILE__) + "/../undress")
3
3
  module Undress
4
4
  class Textile < Grammar
5
5
 
6
- # delete tabs and newlines from inside elements
7
- pre_processing("*") do |e|
8
- if e.elem? && e.parent.doc? && e.inner_html != "" && e.name != "pre"
9
- e.inner_html = e.inner_html.gsub(/\n|\t/,"")
10
- end
11
- end
12
-
13
6
  # whitespace handling
14
7
  post_processing(/\n\n+/, "\n\n")
15
8
  post_processing(/\A\s+/, "")
@@ -2,7 +2,87 @@ require File.expand_path(File.dirname(__FILE__) + "/test_helper")
2
2
 
3
3
  class Undress::GreenClothTest < Test::Unit::TestCase
4
4
  def assert_renders_greencloth(greencloth, html)
5
- assert_equal greencloth, Undress(html, :xhtml_strict => true).to_greencloth
5
+ assert_equal greencloth, Undress(html).to_greencloth
6
+ end
7
+
8
+ context "parsing badly indented documents" do
9
+ test "badly indent doc" do
10
+ html = "<ul>
11
+ <li>foo</li>
12
+ <li>bar</li>
13
+ <li>and x is also.</li>
14
+ </ul>"
15
+ greencloth = "* foo\n* bar\n* and x is also.\n"
16
+ assert_renders_greencloth greencloth, html
17
+ end
18
+ end
19
+
20
+ # TODO:
21
+ # this is ok to ensure invalid html -> to greencloth but xhtmlize! must have
22
+ # tests on test_undress or something too
23
+ context "parsing not valid xhtml documents" do
24
+ test "font-weight=bold styles in <span> elements should be <strong>" do
25
+ html = "<p>some text <span style='font-weight=bold'>bold</span> with style</p>"
26
+ greencloth = "some text *bold* with style\n"
27
+ assert_renders_greencloth greencloth, html
28
+ end
29
+
30
+ test "style 'line-through' should be converted to <del> in <span> elements" do
31
+ html = "<p>with <span style='text-decoration: line-through;'>some</span> in the <span style='text-decoration-: line-through;'>paragraph</span></p>"
32
+ greencloth = "with -some- in the -paragraph-\n"
33
+ assert_renders_greencloth greencloth, html
34
+ html = "<p style='text-decoration: line-through;'>with some in the paragraph</p>"
35
+ greencloth = "with some in the paragraph\n"
36
+ assert_renders_greencloth greencloth, html
37
+ end
38
+
39
+ test "style 'underline' should be converted to <ins> in <span> elements" do
40
+ html = "<p>with <span style='text-decoration: underline;'>some</span> in the <span style='text-decoration: underline;'>paragraph</span></p>"
41
+ greencloth = "with +some+ in the +paragraph+\n"
42
+ assert_renders_greencloth greencloth, html
43
+ html = "<p style='text-decoration: underline;'>with some in the paragraph</p>"
44
+ greencloth = "with some in the paragraph\n"
45
+ assert_renders_greencloth greencloth, html
46
+ end
47
+
48
+ test "style 'italic' should be converted to <em> in <span> elements" do
49
+ html = "<p>with <span style='font-style: italic;'>some</span> in the <span style='font-style: italic;'>paragraph</span></p>"
50
+ greencloth = "with _some_ in the _paragraph_\n"
51
+ assert_renders_greencloth greencloth, html
52
+ html = "<p style='font-style: italic;'>with some in the paragraph</p>"
53
+ greencloth = "with some in the paragraph\n"
54
+ assert_renders_greencloth greencloth, html
55
+ end
56
+
57
+ test "a nested invalid unordered list" do
58
+ html = "<ul><li>item 1</li><li>item 2</li><ul><li>nested 1</li><li>nested 2</li></ul><li>item 3</li></ul>"
59
+ greencloth = "* item 1\n* item 2\n** nested 1\n** nested 2\n* item 3\n"
60
+ assert_renders_greencloth greencloth, html
61
+ end
62
+
63
+ test "a nested invalid ordered list" do
64
+ html = "<ol><li>item 1</li><li>item 2</li><ol><li>nested 1</li><li>nested 2</li></ol><li>item 3</li></ol>"
65
+ greencloth = "# item 1\n# item 2\n## nested 1\n## nested 2\n# item 3\n"
66
+ assert_renders_greencloth greencloth, html
67
+ end
68
+
69
+ test "a nested invalid mixed list with 3 levels" do
70
+ html = "<ul><li>item 1</li><li>item 2</li><ol><li>nested 1</li><li>nested 2</li><ul><li>nested2 1</li><li>nested2 2</li></ul></ol><li>item 3</li></ul>"
71
+ greencloth = "* item 1\n* item 2\n*# nested 1\n*# nested 2\n*#* nested2 1\n*#* nested2 2\n* item 3\n"
72
+ assert_renders_greencloth greencloth, html
73
+ end
74
+
75
+ test "a nested invalid mixed list" do
76
+ html = "<ul><li>item 1</li><li>item 2</li><ol><li>nested 1</li><li>nested 2</li></ol><li>item 3</li></ul>"
77
+ greencloth = "* item 1\n* item 2\n*# nested 1\n*# nested 2\n* item 3\n"
78
+ assert_renders_greencloth greencloth, html
79
+ end
80
+
81
+ test "2 badly nested list inside" do
82
+ html = "<ul><li>item 1</li><li>item 2</li><ul><li>nested 1</li><ul><li>item 1x</li><li>item 2x</li></ul><li>nested 2</li></ul><li>item 3</li></ul>"
83
+ greencloth = "* item 1\n* item 2\n** nested 1\n*** item 1x\n*** item 2x\n** nested 2\n* item 3\n"
84
+ assert_renders_greencloth greencloth, html
85
+ end
6
86
  end
7
87
 
8
88
  # unallowed tags
@@ -52,13 +132,13 @@ class Undress::GreenClothTest < Test::Unit::TestCase
52
132
  context "embed and object" do
53
133
  test "embed" do
54
134
  html = "<p>do you like my embedded blip.tv <embed src='http://blip.tv/play/Ac3GfI+2HA' allowfullscreen='true' type='application/x-shockwave-flash' allowscriptaccess='always' height='510' width='720' />?</p>"
55
- greencloth = "do you like my embedded blip.tv <embed src=\"http://blip.tv/play/Ac3GfI+2HA\" allowfullscreen=\"true\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" height=\"510\" width=\"720\" />?\n"
135
+ greencloth = "do you like my embedded blip.tv <embed allowfullscreen=\"true\" src=\"http://blip.tv/play/Ac3GfI+2HA\" allowscriptaccess=\"always\" type=\"application/x-shockwave-flash\" height=\"510\" width=\"720\" />?\n"
56
136
  assert_renders_greencloth greencloth, html
57
137
  end
58
138
 
59
139
  test "object" do
60
140
  html = "<p>do you like my embedded youtube <object width='425' height='344'><param name='movie' value='http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1' /><param name='allowFullScreen' value='true' /><embed src='http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1' type='application/x-shockwave-flash' width='425' height='344' allowfullscreen='true' /></object>?</p>"
61
- greencloth = "do you like my embedded youtube <object height=\"344\" width=\"425\"><param name=\"movie\" value=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" /><param name=\"allowFullScreen\" value=\"true\" /><embed src=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" allowfullscreen=\"true\" type=\"application/x-shockwave-flash\" height=\"344\" width=\"425\" /></object>?\n"
141
+ greencloth = "do you like my embedded youtube <object height=\"344\" width=\"425\"><param name=\"movie\" value=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" /><param name=\"allowFullScreen\" value=\"true\" /><embed allowfullscreen=\"true\" src=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" type=\"application/x-shockwave-flash\" height=\"344\" width=\"425\" /></object>?\n"
62
142
  assert_renders_greencloth greencloth, html
63
143
  end
64
144
  end
data/undress.gemspec CHANGED
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "undress"
3
- s.version = "0.1"
4
- s.date = "2009-07-13"
3
+ s.version = "0.2"
4
+ s.date = "2009-07-29"
5
5
 
6
6
  s.description = "Simply translate HTML to Textile, Markdown, or whatever other markup format you need"
7
7
  s.summary = "Convert HTML into other markup languages"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zevarito-undress
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.1"
4
+ version: "0.2"
5
5
  platform: ruby
6
6
  authors:
7
7
  - "Nicol\xC3\xA1s Sanguinetti"
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-07-13 00:00:00 -07:00
12
+ date: 2009-07-29 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -77,6 +77,7 @@ files:
77
77
  - test/test_greencloth.rb
78
78
  has_rdoc: true
79
79
  homepage: http://undress.rubyforge.org
80
+ licenses:
80
81
  post_install_message:
81
82
  rdoc_options: []
82
83
 
@@ -97,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
97
98
  requirements: []
98
99
 
99
100
  rubyforge_project: undress
100
- rubygems_version: 1.2.0
101
+ rubygems_version: 1.3.5
101
102
  signing_key:
102
103
  specification_version: 2
103
104
  summary: Convert HTML into other markup languages