tiegz-kadoku 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +40 -0
- data/kadoku.gemspec +4 -4
- data/lib/kadoku.rb +7 -0
- data/lib/kadoku/markup.rb +76 -0
- data/lib/kadoku/markup_after_filter.rb +24 -0
- metadata +4 -4
data/README
CHANGED
@@ -0,0 +1,40 @@
|
|
1
|
+
kadoku
|
2
|
+
by Tieg Zaharia
|
3
|
+
http://github.com/tiegz/kadoku
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
Kadoku is a Ruby library that uses Hpricot to clean up your unreadable
|
8
|
+
HTML (ie ERB-generated html) [use with caution].
|
9
|
+
|
10
|
+
== REQUIREMENTS:
|
11
|
+
|
12
|
+
* Hpricot
|
13
|
+
|
14
|
+
== INSTALL:
|
15
|
+
|
16
|
+
$ gem sources -a http://gems.github.com/ (you only need to do this once)
|
17
|
+
$ gem install tiegz-kadoku
|
18
|
+
|
19
|
+
== USAGE:
|
20
|
+
|
21
|
+
Require the gem...
|
22
|
+
|
23
|
+
require 'tiegz-kadoku'
|
24
|
+
|
25
|
+
To load the doc,
|
26
|
+
|
27
|
+
a = open('my.site.com/index.html')
|
28
|
+
b = Kadoku::Markup.new(a)
|
29
|
+
|
30
|
+
To get the cleaned-up markup,
|
31
|
+
|
32
|
+
b.to_clean_html
|
33
|
+
|
34
|
+
To get the original html (rebuilt by Hpricot),
|
35
|
+
|
36
|
+
b.to_html
|
37
|
+
|
38
|
+
== RELEVANT LINKS:
|
39
|
+
|
40
|
+
* http://blog.solid1pxred.com/articles/2008/12/18/kadoku-the-markup-apologist
|
data/kadoku.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "kadoku"
|
3
|
-
s.version = "0.1.
|
3
|
+
s.version = "0.1.2"
|
4
4
|
s.date = "2008-12-17"
|
5
5
|
s.summary = "Uses Hpricot to clean up your unreadable HTML (ie ERB-generated html) [use with caution]"
|
6
6
|
s.email = "tieg.zaharia+kadoku@gmail.com"
|
@@ -13,9 +13,9 @@ Gem::Specification.new do |s|
|
|
13
13
|
"Rakefile",
|
14
14
|
"README",
|
15
15
|
"TODO",
|
16
|
-
"lib/kadoku
|
17
|
-
"lib/kadoku/
|
18
|
-
"lib/kadoku/
|
16
|
+
"lib/kadoku.rb",
|
17
|
+
"lib/kadoku/markup.rb",
|
18
|
+
"lib/kadoku/markup_after_filter.rb"]
|
19
19
|
s.test_files = ["test/test_helper.rb",
|
20
20
|
"test/kadoku_markup_test.rb"]
|
21
21
|
s.rdoc_options = ["--main", "README"]
|
data/lib/kadoku.rb
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'hpricot'
|
2
|
+
|
3
|
+
module Kadoku
|
4
|
+
class Markup
|
5
|
+
attr_accessor :hpricot
|
6
|
+
|
7
|
+
def initialize(content, options={})
|
8
|
+
@clean_newline = "#{options[:clean_newline] || "\n"}"
|
9
|
+
@clean_indent = "#{options[:clean_indent] || "\t"}"
|
10
|
+
@hpricot = Hpricot(content)
|
11
|
+
@indent_counter = 0
|
12
|
+
end
|
13
|
+
|
14
|
+
# Returns the original document parsed by Hpricot without cleanup
|
15
|
+
def to_html
|
16
|
+
@hpricot.to_html
|
17
|
+
end
|
18
|
+
|
19
|
+
# Returns the original markup parsed by Hpricot with extra cleanup
|
20
|
+
def to_clean_html
|
21
|
+
str = ''
|
22
|
+
|
23
|
+
# A recursive lambda that rejects any empty (includes newlines/spaces/tabs) nodes
|
24
|
+
empty_elements = lambda { |el| el.respond_to?(:children) ? (el.children.reject!(&empty_elements); false) : (el.to_html.strip.empty?) }
|
25
|
+
|
26
|
+
# A recursive lambda that re-maps the nodes with all newlines and extra spaces stripped out
|
27
|
+
stripped_down = lambda do |el|
|
28
|
+
if el.respond_to?(:children)
|
29
|
+
el.children.map(&stripped_down) # iterate over children if possible
|
30
|
+
else
|
31
|
+
#unless el.parent.respond_to?(:name) && el.parent.name == 'script' # preserve JS newlines
|
32
|
+
el.content.gsub!(/\n/, '') if el.is_a?(Hpricot::Text) || el.is_a?(Hpricot::Comment) # remove newlines from text
|
33
|
+
el.content.gsub!(/\s+/, ' ') if el.is_a?(Hpricot::Text) || el.is_a?(Hpricot::Comment) # convert all groups of spaces into one space
|
34
|
+
#end
|
35
|
+
end
|
36
|
+
el
|
37
|
+
end
|
38
|
+
|
39
|
+
# A recursive lambda that iterates over each node and rebuilds the html
|
40
|
+
pretty_print = lambda do |el, i|
|
41
|
+
if el.respond_to?(:children)
|
42
|
+
if el.children.empty?
|
43
|
+
# isn't there a less hackish way to turn a tag into a self-ending tag?
|
44
|
+
_el = Hpricot::Elem.new(Hpricot::STag.parse(el.name, el.attributes, nil, nil))
|
45
|
+
str << indented(_el) + @clean_newline
|
46
|
+
else
|
47
|
+
str << indented(el.stag) + @clean_newline # add start tag
|
48
|
+
unless el.children.empty?
|
49
|
+
@indent_counter += 1
|
50
|
+
el.children.each_with_index(&pretty_print)
|
51
|
+
@indent_counter -= 1
|
52
|
+
str << indented(el.etag) + @clean_newline if el.etag # add end tag
|
53
|
+
end
|
54
|
+
end
|
55
|
+
else
|
56
|
+
case el
|
57
|
+
when Hpricot::DocType # need to do anything special to the doc type?
|
58
|
+
str << indented(el) + @clean_newline
|
59
|
+
else
|
60
|
+
str << indented(el) + @clean_newline
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
@hpricot.children.reject!(&empty_elements).map!(&stripped_down).each_with_index(&pretty_print)
|
66
|
+
@indent_counter = 0 # reset
|
67
|
+
|
68
|
+
str
|
69
|
+
end
|
70
|
+
|
71
|
+
protected
|
72
|
+
def indented(el)
|
73
|
+
@clean_indent * @indent_counter + el.output('')
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Kadoku
|
2
|
+
# An ActionController after_filter.
|
3
|
+
#
|
4
|
+
# Note: Kadoku is still experimental and isn't guaranteed to not break your html.
|
5
|
+
# You might need to restyle some things, so I wouldn't use it untested in Production env.
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
#
|
9
|
+
# class Application < ActionController::Base
|
10
|
+
# ... all your filters here
|
11
|
+
# after_filter ERBApologist::Filters::ReadableHTML
|
12
|
+
# end
|
13
|
+
#
|
14
|
+
# -or-
|
15
|
+
# class Application < ActoinController::Base
|
16
|
+
# ... all your filters here
|
17
|
+
# cleanup_html
|
18
|
+
# end
|
19
|
+
class MarkupAfterFilter
|
20
|
+
def self.filter(controller)
|
21
|
+
controller.response.body = Markup.new(controller.response.body)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiegz-kadoku
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tieg Zaharia
|
@@ -29,9 +29,9 @@ files:
|
|
29
29
|
- Rakefile
|
30
30
|
- README
|
31
31
|
- TODO
|
32
|
-
- lib/kadoku
|
33
|
-
- lib/kadoku/
|
34
|
-
- lib/kadoku/
|
32
|
+
- lib/kadoku.rb
|
33
|
+
- lib/kadoku/markup.rb
|
34
|
+
- lib/kadoku/markup_after_filter.rb
|
35
35
|
has_rdoc: true
|
36
36
|
homepage: http://github.com/tiegz/kadoku
|
37
37
|
post_install_message:
|