tiegz-kadoku 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -0,0 +1,40 @@
1
+ kadoku
2
+ by Tieg Zaharia
3
+ http://github.com/tiegz/kadoku
4
+
5
+ == DESCRIPTION:
6
+
7
+ Kadoku is a Ruby library that uses Hpricot to clean up your unreadable
8
+ HTML (ie ERB-generated html) [use with caution].
9
+
10
+ == REQUIREMENTS:
11
+
12
+ * Hpricot
13
+
14
+ == INSTALL:
15
+
16
+ $ gem sources -a http://gems.github.com/ (you only need to do this once)
17
+ $ gem install tiegz-kadoku
18
+
19
+ == USAGE:
20
+
21
+ Require the gem...
22
+
23
+ require 'tiegz-kadoku'
24
+
25
+ To load the doc,
26
+
27
+ a = open('my.site.com/index.html')
28
+ b = Kadoku::Markup.new(a)
29
+
30
+ To get the cleaned-up markup,
31
+
32
+ b.to_clean_html
33
+
34
+ To get the original html (rebuilt by Hpricot),
35
+
36
+ b.to_html
37
+
38
+ == RELEVANT LINKS:
39
+
40
+ * http://blog.solid1pxred.com/articles/2008/12/18/kadoku-the-markup-apologist
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "kadoku"
3
- s.version = "0.1.1"
3
+ s.version = "0.1.2"
4
4
  s.date = "2008-12-17"
5
5
  s.summary = "Uses Hpricot to clean up your unreadable HTML (ie ERB-generated html) [use with caution]"
6
6
  s.email = "tieg.zaharia+kadoku@gmail.com"
@@ -13,9 +13,9 @@ Gem::Specification.new do |s|
13
13
  "Rakefile",
14
14
  "README",
15
15
  "TODO",
16
- "lib/kadoku/kadoku.rb",
17
- "lib/kadoku/kadoku/markup.rb",
18
- "lib/kadoku/kadoku/markup_after_filter.rb"]
16
+ "lib/kadoku.rb",
17
+ "lib/kadoku/markup.rb",
18
+ "lib/kadoku/markup_after_filter.rb"]
19
19
  s.test_files = ["test/test_helper.rb",
20
20
  "test/kadoku_markup_test.rb"]
21
21
  s.rdoc_options = ["--main", "README"]
@@ -0,0 +1,7 @@
1
+ require 'hpricot'
2
+ require 'kadoku/markup'
3
+ require 'kadoku/markup_after_filter'
4
+
5
+ # ( かどく or Kadoku).is_for("making readable markup").with("Hpricot")
6
+ module Kadoku
7
+ end
@@ -0,0 +1,76 @@
1
+ require 'hpricot'
2
+
3
+ module Kadoku
4
+ class Markup
5
+ attr_accessor :hpricot
6
+
7
+ def initialize(content, options={})
8
+ @clean_newline = "#{options[:clean_newline] || "\n"}"
9
+ @clean_indent = "#{options[:clean_indent] || "\t"}"
10
+ @hpricot = Hpricot(content)
11
+ @indent_counter = 0
12
+ end
13
+
14
+ # Returns the original document parsed by Hpricot without cleanup
15
+ def to_html
16
+ @hpricot.to_html
17
+ end
18
+
19
+ # Returns the original markup parsed by Hpricot with extra cleanup
20
+ def to_clean_html
21
+ str = ''
22
+
23
+ # A recursive lambda that rejects any empty (includes newlines/spaces/tabs) nodes
24
+ empty_elements = lambda { |el| el.respond_to?(:children) ? (el.children.reject!(&empty_elements); false) : (el.to_html.strip.empty?) }
25
+
26
+ # A recursive lambda that re-maps the nodes with all newlines and extra spaces stripped out
27
+ stripped_down = lambda do |el|
28
+ if el.respond_to?(:children)
29
+ el.children.map(&stripped_down) # iterate over children if possible
30
+ else
31
+ #unless el.parent.respond_to?(:name) && el.parent.name == 'script' # preserve JS newlines
32
+ el.content.gsub!(/\n/, '') if el.is_a?(Hpricot::Text) || el.is_a?(Hpricot::Comment) # remove newlines from text
33
+ el.content.gsub!(/\s+/, ' ') if el.is_a?(Hpricot::Text) || el.is_a?(Hpricot::Comment) # convert all groups of spaces into one space
34
+ #end
35
+ end
36
+ el
37
+ end
38
+
39
+ # A recursive lambda that iterates over each node and rebuilds the html
40
+ pretty_print = lambda do |el, i|
41
+ if el.respond_to?(:children)
42
+ if el.children.empty?
43
+ # isn't there a less hackish way to turn a tag into a self-ending tag?
44
+ _el = Hpricot::Elem.new(Hpricot::STag.parse(el.name, el.attributes, nil, nil))
45
+ str << indented(_el) + @clean_newline
46
+ else
47
+ str << indented(el.stag) + @clean_newline # add start tag
48
+ unless el.children.empty?
49
+ @indent_counter += 1
50
+ el.children.each_with_index(&pretty_print)
51
+ @indent_counter -= 1
52
+ str << indented(el.etag) + @clean_newline if el.etag # add end tag
53
+ end
54
+ end
55
+ else
56
+ case el
57
+ when Hpricot::DocType # need to do anything special to the doc type?
58
+ str << indented(el) + @clean_newline
59
+ else
60
+ str << indented(el) + @clean_newline
61
+ end
62
+ end
63
+ end
64
+
65
+ @hpricot.children.reject!(&empty_elements).map!(&stripped_down).each_with_index(&pretty_print)
66
+ @indent_counter = 0 # reset
67
+
68
+ str
69
+ end
70
+
71
+ protected
72
+ def indented(el)
73
+ @clean_indent * @indent_counter + el.output('')
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,24 @@
1
+ module Kadoku
2
+ # An ActionController after_filter.
3
+ #
4
+ # Note: Kadoku is still experimental and isn't guaranteed to not break your html.
5
+ # You might need to restyle some things, so I wouldn't use it untested in Production env.
6
+ #
7
+ # Example:
8
+ #
9
+ # class Application < ActionController::Base
10
+ # ... all your filters here
11
+ # after_filter ERBApologist::Filters::ReadableHTML
12
+ # end
13
+ #
14
+ # -or-
15
+ # class Application < ActoinController::Base
16
+ # ... all your filters here
17
+ # cleanup_html
18
+ # end
19
+ class MarkupAfterFilter
20
+ def self.filter(controller)
21
+ controller.response.body = Markup.new(controller.response.body)
22
+ end
23
+ end
24
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tiegz-kadoku
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tieg Zaharia
@@ -29,9 +29,9 @@ files:
29
29
  - Rakefile
30
30
  - README
31
31
  - TODO
32
- - lib/kadoku/kadoku.rb
33
- - lib/kadoku/kadoku/markup.rb
34
- - lib/kadoku/kadoku/markup_after_filter.rb
32
+ - lib/kadoku.rb
33
+ - lib/kadoku/markup.rb
34
+ - lib/kadoku/markup_after_filter.rb
35
35
  has_rdoc: true
36
36
  homepage: http://github.com/tiegz/kadoku
37
37
  post_install_message: