zevarito-undress 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -34,7 +34,7 @@ Will produce
34
34
 
35
35
  == Get it
36
36
 
37
- gem install undress
37
+ gem install zevarito-undress
38
38
 
39
39
  == License
40
40
 
data/lib/undress.rb CHANGED
@@ -27,6 +27,8 @@ module Undress
27
27
  class Document #:nodoc:
28
28
  def initialize(html, options)
29
29
  @doc = Hpricot(html, options)
30
+ xhtmlize!
31
+ cleanup_indentation
30
32
  end
31
33
 
32
34
  def self.add_markup(name, grammar)
@@ -34,6 +36,51 @@ module Undress
34
36
  grammar.process!(@doc)
35
37
  end
36
38
  end
39
+
40
+ private
41
+
42
+ # We try to fix those elements which aren't write as xhtml standard but more
43
+ # important we can't parse it ok without correct it before.
44
+ def xhtmlize!
45
+ (@doc/"ul|ol").each do |list|
46
+ fixup_list(list) if list.parent != "li" && list.parent.name !~ /ul|ol/
47
+ end
48
+
49
+ (@doc/"span[@style*='italic']").each { |e| e.swap "<em>#{e.inner_html}</em>" }
50
+
51
+ (@doc/"span[@style*='underline']").each { |e| e.swap "<ins>#{e.inner_html}</ins>" }
52
+
53
+ (@doc/"span[@style*='line-through']").each { |e| e.swap "<del>#{e.inner_html}</del>" }
54
+
55
+ (@doc/"span[@style*='bold']").each { |e| e.swap "<strong>#{e.inner_html}</strong>" }
56
+ end
57
+
58
+ # Delete tabs, newlines and more than 2 spaces from inside elements
59
+ # except <pre> or <code> elements
60
+ def cleanup_indentation
61
+ (@doc/"*").each do |e|
62
+ if e.elem? && e.inner_html != "" && e.name !~ (/pre|code/) && e.children.size == 0
63
+ e.inner_html = e.inner_html.gsub(/\n|\t/,"").gsub(/\s+/," ")
64
+ elsif e.text? && e.parent.name !~ /pre|code/
65
+ e.content = e.content.gsub(/\n|\t/,"").gsub(/\s+/," ").gsub(/^\s$/, "")
66
+ end
67
+ end
68
+ end
69
+
70
+ # Fixup a badly nested list such as <ul> sibling to <li> instead inside of <li>.
71
+ def fixup_list(list)
72
+ list.children.each {|e| fixup_list(e) if e.elem? && e.name =~ /ol|ul/}
73
+
74
+ if list.parent.name != "li"
75
+ li_side = list.next_sibling if list.next_sibling && list.next_sibling.name == "li"
76
+ li_side = list.previous_sibling if list.previous_sibling && list.previous_sibling.name == "li"
77
+
78
+ if li_side
79
+ li_side.inner_html = "#{li_side.inner_html}#{list.to_html}"
80
+ list.parent.replace_child(list, "")
81
+ end
82
+ end
83
+ end
37
84
  end
38
85
 
39
86
  module ::Hpricot #:nodoc:
@@ -3,13 +3,6 @@ require File.expand_path(File.dirname(__FILE__) + "/../undress")
3
3
  module Undress
4
4
  class Textile < Grammar
5
5
 
6
- # delete tabs and newlines from inside elements
7
- pre_processing("*") do |e|
8
- if e.elem? && e.parent.doc? && e.inner_html != "" && e.name != "pre"
9
- e.inner_html = e.inner_html.gsub(/\n|\t/,"")
10
- end
11
- end
12
-
13
6
  # whitespace handling
14
7
  post_processing(/\n\n+/, "\n\n")
15
8
  post_processing(/\A\s+/, "")
@@ -2,7 +2,87 @@ require File.expand_path(File.dirname(__FILE__) + "/test_helper")
2
2
 
3
3
  class Undress::GreenClothTest < Test::Unit::TestCase
4
4
  def assert_renders_greencloth(greencloth, html)
5
- assert_equal greencloth, Undress(html, :xhtml_strict => true).to_greencloth
5
+ assert_equal greencloth, Undress(html).to_greencloth
6
+ end
7
+
8
+ context "parsing badly indented documents" do
9
+ test "badly indent doc" do
10
+ html = "<ul>
11
+ <li>foo</li>
12
+ <li>bar</li>
13
+ <li>and x is also.</li>
14
+ </ul>"
15
+ greencloth = "* foo\n* bar\n* and x is also.\n"
16
+ assert_renders_greencloth greencloth, html
17
+ end
18
+ end
19
+
20
+ # TODO:
21
+ # this is ok to ensure invalid html -> to greencloth but xhtmlize! must have
22
+ # tests on test_undress or something too
23
+ context "parsing not valid xhtml documents" do
24
+ test "font-weight=bold styles in <span> elements should be <strong>" do
25
+ html = "<p>some text <span style='font-weight=bold'>bold</span> with style</p>"
26
+ greencloth = "some text *bold* with style\n"
27
+ assert_renders_greencloth greencloth, html
28
+ end
29
+
30
+ test "style 'line-through' should be converted to <del> in <span> elements" do
31
+ html = "<p>with <span style='text-decoration: line-through;'>some</span> in the <span style='text-decoration-: line-through;'>paragraph</span></p>"
32
+ greencloth = "with -some- in the -paragraph-\n"
33
+ assert_renders_greencloth greencloth, html
34
+ html = "<p style='text-decoration: line-through;'>with some in the paragraph</p>"
35
+ greencloth = "with some in the paragraph\n"
36
+ assert_renders_greencloth greencloth, html
37
+ end
38
+
39
+ test "style 'underline' should be converted to <ins> in <span> elements" do
40
+ html = "<p>with <span style='text-decoration: underline;'>some</span> in the <span style='text-decoration: underline;'>paragraph</span></p>"
41
+ greencloth = "with +some+ in the +paragraph+\n"
42
+ assert_renders_greencloth greencloth, html
43
+ html = "<p style='text-decoration: underline;'>with some in the paragraph</p>"
44
+ greencloth = "with some in the paragraph\n"
45
+ assert_renders_greencloth greencloth, html
46
+ end
47
+
48
+ test "style 'italic' should be converted to <em> in <span> elements" do
49
+ html = "<p>with <span style='font-style: italic;'>some</span> in the <span style='font-style: italic;'>paragraph</span></p>"
50
+ greencloth = "with _some_ in the _paragraph_\n"
51
+ assert_renders_greencloth greencloth, html
52
+ html = "<p style='font-style: italic;'>with some in the paragraph</p>"
53
+ greencloth = "with some in the paragraph\n"
54
+ assert_renders_greencloth greencloth, html
55
+ end
56
+
57
+ test "a nested invalid unordered list" do
58
+ html = "<ul><li>item 1</li><li>item 2</li><ul><li>nested 1</li><li>nested 2</li></ul><li>item 3</li></ul>"
59
+ greencloth = "* item 1\n* item 2\n** nested 1\n** nested 2\n* item 3\n"
60
+ assert_renders_greencloth greencloth, html
61
+ end
62
+
63
+ test "a nested invalid ordered list" do
64
+ html = "<ol><li>item 1</li><li>item 2</li><ol><li>nested 1</li><li>nested 2</li></ol><li>item 3</li></ol>"
65
+ greencloth = "# item 1\n# item 2\n## nested 1\n## nested 2\n# item 3\n"
66
+ assert_renders_greencloth greencloth, html
67
+ end
68
+
69
+ test "a nested invalid mixed list with 3 levels" do
70
+ html = "<ul><li>item 1</li><li>item 2</li><ol><li>nested 1</li><li>nested 2</li><ul><li>nested2 1</li><li>nested2 2</li></ul></ol><li>item 3</li></ul>"
71
+ greencloth = "* item 1\n* item 2\n*# nested 1\n*# nested 2\n*#* nested2 1\n*#* nested2 2\n* item 3\n"
72
+ assert_renders_greencloth greencloth, html
73
+ end
74
+
75
+ test "a nested invalid mixed list" do
76
+ html = "<ul><li>item 1</li><li>item 2</li><ol><li>nested 1</li><li>nested 2</li></ol><li>item 3</li></ul>"
77
+ greencloth = "* item 1\n* item 2\n*# nested 1\n*# nested 2\n* item 3\n"
78
+ assert_renders_greencloth greencloth, html
79
+ end
80
+
81
+ test "2 badly nested list inside" do
82
+ html = "<ul><li>item 1</li><li>item 2</li><ul><li>nested 1</li><ul><li>item 1x</li><li>item 2x</li></ul><li>nested 2</li></ul><li>item 3</li></ul>"
83
+ greencloth = "* item 1\n* item 2\n** nested 1\n*** item 1x\n*** item 2x\n** nested 2\n* item 3\n"
84
+ assert_renders_greencloth greencloth, html
85
+ end
6
86
  end
7
87
 
8
88
  # unallowed tags
@@ -52,13 +132,13 @@ class Undress::GreenClothTest < Test::Unit::TestCase
52
132
  context "embed and object" do
53
133
  test "embed" do
54
134
  html = "<p>do you like my embedded blip.tv <embed src='http://blip.tv/play/Ac3GfI+2HA' allowfullscreen='true' type='application/x-shockwave-flash' allowscriptaccess='always' height='510' width='720' />?</p>"
55
- greencloth = "do you like my embedded blip.tv <embed src=\"http://blip.tv/play/Ac3GfI+2HA\" allowfullscreen=\"true\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" height=\"510\" width=\"720\" />?\n"
135
+ greencloth = "do you like my embedded blip.tv <embed allowfullscreen=\"true\" src=\"http://blip.tv/play/Ac3GfI+2HA\" allowscriptaccess=\"always\" type=\"application/x-shockwave-flash\" height=\"510\" width=\"720\" />?\n"
56
136
  assert_renders_greencloth greencloth, html
57
137
  end
58
138
 
59
139
  test "object" do
60
140
  html = "<p>do you like my embedded youtube <object width='425' height='344'><param name='movie' value='http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1' /><param name='allowFullScreen' value='true' /><embed src='http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1' type='application/x-shockwave-flash' width='425' height='344' allowfullscreen='true' /></object>?</p>"
61
- greencloth = "do you like my embedded youtube <object height=\"344\" width=\"425\"><param name=\"movie\" value=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" /><param name=\"allowFullScreen\" value=\"true\" /><embed src=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" allowfullscreen=\"true\" type=\"application/x-shockwave-flash\" height=\"344\" width=\"425\" /></object>?\n"
141
+ greencloth = "do you like my embedded youtube <object height=\"344\" width=\"425\"><param name=\"movie\" value=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" /><param name=\"allowFullScreen\" value=\"true\" /><embed allowfullscreen=\"true\" src=\"http://www.youtube.com/v/suvDQoXA-TA&hl=en&fs=1\" type=\"application/x-shockwave-flash\" height=\"344\" width=\"425\" /></object>?\n"
62
142
  assert_renders_greencloth greencloth, html
63
143
  end
64
144
  end
data/undress.gemspec CHANGED
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "undress"
3
- s.version = "0.1"
4
- s.date = "2009-07-13"
3
+ s.version = "0.2"
4
+ s.date = "2009-07-29"
5
5
 
6
6
  s.description = "Simply translate HTML to Textile, Markdown, or whatever other markup format you need"
7
7
  s.summary = "Convert HTML into other markup languages"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zevarito-undress
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.1"
4
+ version: "0.2"
5
5
  platform: ruby
6
6
  authors:
7
7
  - "Nicol\xC3\xA1s Sanguinetti"
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-07-13 00:00:00 -07:00
12
+ date: 2009-07-29 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -77,6 +77,7 @@ files:
77
77
  - test/test_greencloth.rb
78
78
  has_rdoc: true
79
79
  homepage: http://undress.rubyforge.org
80
+ licenses:
80
81
  post_install_message:
81
82
  rdoc_options: []
82
83
 
@@ -97,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
97
98
  requirements: []
98
99
 
99
100
  rubyforge_project: undress
100
- rubygems_version: 1.2.0
101
+ rubygems_version: 1.3.5
101
102
  signing_key:
102
103
  specification_version: 2
103
104
  summary: Convert HTML into other markup languages