tiegz-kadoku 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +40 -0
- data/kadoku.gemspec +4 -4
- data/lib/kadoku.rb +7 -0
- data/lib/kadoku/markup.rb +76 -0
- data/lib/kadoku/markup_after_filter.rb +24 -0
- metadata +4 -4
data/README
CHANGED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
kadoku
|
|
2
|
+
by Tieg Zaharia
|
|
3
|
+
http://github.com/tiegz/kadoku
|
|
4
|
+
|
|
5
|
+
== DESCRIPTION:
|
|
6
|
+
|
|
7
|
+
Kadoku is a Ruby library that uses Hpricot to clean up your unreadable
|
|
8
|
+
HTML (ie ERB-generated html) [use with caution].
|
|
9
|
+
|
|
10
|
+
== REQUIREMENTS:
|
|
11
|
+
|
|
12
|
+
* Hpricot
|
|
13
|
+
|
|
14
|
+
== INSTALL:
|
|
15
|
+
|
|
16
|
+
$ gem sources -a http://gems.github.com/ (you only need to do this once)
|
|
17
|
+
$ gem install tiegz-kadoku
|
|
18
|
+
|
|
19
|
+
== USAGE:
|
|
20
|
+
|
|
21
|
+
Require the gem...
|
|
22
|
+
|
|
23
|
+
require 'tiegz-kadoku'
|
|
24
|
+
|
|
25
|
+
To load the doc,
|
|
26
|
+
|
|
27
|
+
a = open('my.site.com/index.html')
|
|
28
|
+
b = Kadoku::Markup.new(a)
|
|
29
|
+
|
|
30
|
+
To get the cleaned-up markup,
|
|
31
|
+
|
|
32
|
+
b.to_clean_html
|
|
33
|
+
|
|
34
|
+
To get the original html (rebuilt by Hpricot),
|
|
35
|
+
|
|
36
|
+
b.to_html
|
|
37
|
+
|
|
38
|
+
== RELEVANT LINKS:
|
|
39
|
+
|
|
40
|
+
* http://blog.solid1pxred.com/articles/2008/12/18/kadoku-the-markup-apologist
|
data/kadoku.gemspec
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Gem::Specification.new do |s|
|
|
2
2
|
s.name = "kadoku"
|
|
3
|
-
s.version = "0.1.
|
|
3
|
+
s.version = "0.1.2"
|
|
4
4
|
s.date = "2008-12-17"
|
|
5
5
|
s.summary = "Uses Hpricot to clean up your unreadable HTML (ie ERB-generated html) [use with caution]"
|
|
6
6
|
s.email = "tieg.zaharia+kadoku@gmail.com"
|
|
@@ -13,9 +13,9 @@ Gem::Specification.new do |s|
|
|
|
13
13
|
"Rakefile",
|
|
14
14
|
"README",
|
|
15
15
|
"TODO",
|
|
16
|
-
"lib/kadoku
|
|
17
|
-
"lib/kadoku/
|
|
18
|
-
"lib/kadoku/
|
|
16
|
+
"lib/kadoku.rb",
|
|
17
|
+
"lib/kadoku/markup.rb",
|
|
18
|
+
"lib/kadoku/markup_after_filter.rb"]
|
|
19
19
|
s.test_files = ["test/test_helper.rb",
|
|
20
20
|
"test/kadoku_markup_test.rb"]
|
|
21
21
|
s.rdoc_options = ["--main", "README"]
|
data/lib/kadoku.rb
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
require 'hpricot'
|
|
2
|
+
|
|
3
|
+
module Kadoku
|
|
4
|
+
class Markup
|
|
5
|
+
attr_accessor :hpricot
|
|
6
|
+
|
|
7
|
+
def initialize(content, options={})
|
|
8
|
+
@clean_newline = "#{options[:clean_newline] || "\n"}"
|
|
9
|
+
@clean_indent = "#{options[:clean_indent] || "\t"}"
|
|
10
|
+
@hpricot = Hpricot(content)
|
|
11
|
+
@indent_counter = 0
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Returns the original document parsed by Hpricot without cleanup
|
|
15
|
+
def to_html
|
|
16
|
+
@hpricot.to_html
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Returns the original markup parsed by Hpricot with extra cleanup
|
|
20
|
+
def to_clean_html
|
|
21
|
+
str = ''
|
|
22
|
+
|
|
23
|
+
# A recursive lambda that rejects any empty (includes newlines/spaces/tabs) nodes
|
|
24
|
+
empty_elements = lambda { |el| el.respond_to?(:children) ? (el.children.reject!(&empty_elements); false) : (el.to_html.strip.empty?) }
|
|
25
|
+
|
|
26
|
+
# A recursive lambda that re-maps the nodes with all newlines and extra spaces stripped out
|
|
27
|
+
stripped_down = lambda do |el|
|
|
28
|
+
if el.respond_to?(:children)
|
|
29
|
+
el.children.map(&stripped_down) # iterate over children if possible
|
|
30
|
+
else
|
|
31
|
+
#unless el.parent.respond_to?(:name) && el.parent.name == 'script' # preserve JS newlines
|
|
32
|
+
el.content.gsub!(/\n/, '') if el.is_a?(Hpricot::Text) || el.is_a?(Hpricot::Comment) # remove newlines from text
|
|
33
|
+
el.content.gsub!(/\s+/, ' ') if el.is_a?(Hpricot::Text) || el.is_a?(Hpricot::Comment) # convert all groups of spaces into one space
|
|
34
|
+
#end
|
|
35
|
+
end
|
|
36
|
+
el
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# A recursive lambda that iterates over each node and rebuilds the html
|
|
40
|
+
pretty_print = lambda do |el, i|
|
|
41
|
+
if el.respond_to?(:children)
|
|
42
|
+
if el.children.empty?
|
|
43
|
+
# isn't there a less hackish way to turn a tag into a self-ending tag?
|
|
44
|
+
_el = Hpricot::Elem.new(Hpricot::STag.parse(el.name, el.attributes, nil, nil))
|
|
45
|
+
str << indented(_el) + @clean_newline
|
|
46
|
+
else
|
|
47
|
+
str << indented(el.stag) + @clean_newline # add start tag
|
|
48
|
+
unless el.children.empty?
|
|
49
|
+
@indent_counter += 1
|
|
50
|
+
el.children.each_with_index(&pretty_print)
|
|
51
|
+
@indent_counter -= 1
|
|
52
|
+
str << indented(el.etag) + @clean_newline if el.etag # add end tag
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
else
|
|
56
|
+
case el
|
|
57
|
+
when Hpricot::DocType # need to do anything special to the doc type?
|
|
58
|
+
str << indented(el) + @clean_newline
|
|
59
|
+
else
|
|
60
|
+
str << indented(el) + @clean_newline
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
@hpricot.children.reject!(&empty_elements).map!(&stripped_down).each_with_index(&pretty_print)
|
|
66
|
+
@indent_counter = 0 # reset
|
|
67
|
+
|
|
68
|
+
str
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
protected
|
|
72
|
+
def indented(el)
|
|
73
|
+
@clean_indent * @indent_counter + el.output('')
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
module Kadoku
|
|
2
|
+
# An ActionController after_filter.
|
|
3
|
+
#
|
|
4
|
+
# Note: Kadoku is still experimental and isn't guaranteed to not break your html.
|
|
5
|
+
# You might need to restyle some things, so I wouldn't use it untested in Production env.
|
|
6
|
+
#
|
|
7
|
+
# Example:
|
|
8
|
+
#
|
|
9
|
+
# class Application < ActionController::Base
|
|
10
|
+
# ... all your filters here
|
|
11
|
+
# after_filter ERBApologist::Filters::ReadableHTML
|
|
12
|
+
# end
|
|
13
|
+
#
|
|
14
|
+
# -or-
|
|
15
|
+
# class Application < ActoinController::Base
|
|
16
|
+
# ... all your filters here
|
|
17
|
+
# cleanup_html
|
|
18
|
+
# end
|
|
19
|
+
class MarkupAfterFilter
|
|
20
|
+
def self.filter(controller)
|
|
21
|
+
controller.response.body = Markup.new(controller.response.body)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: tiegz-kadoku
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Tieg Zaharia
|
|
@@ -29,9 +29,9 @@ files:
|
|
|
29
29
|
- Rakefile
|
|
30
30
|
- README
|
|
31
31
|
- TODO
|
|
32
|
-
- lib/kadoku
|
|
33
|
-
- lib/kadoku/
|
|
34
|
-
- lib/kadoku/
|
|
32
|
+
- lib/kadoku.rb
|
|
33
|
+
- lib/kadoku/markup.rb
|
|
34
|
+
- lib/kadoku/markup_after_filter.rb
|
|
35
35
|
has_rdoc: true
|
|
36
36
|
homepage: http://github.com/tiegz/kadoku
|
|
37
37
|
post_install_message:
|