tiegz-kadoku 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -0,0 +1,40 @@
1
+ kadoku
2
+ by Tieg Zaharia
3
+ http://github.com/tiegz/kadoku
4
+
5
+ == DESCRIPTION:
6
+
7
+ Kadoku is a Ruby library that uses Hpricot to clean up your unreadable
8
+ HTML (ie ERB-generated html) [use with caution].
9
+
10
+ == REQUIREMENTS:
11
+
12
+ * Hpricot
13
+
14
+ == INSTALL:
15
+
16
+ $ gem sources -a http://gems.github.com/ (you only need to do this once)
17
+ $ gem install tiegz-kadoku
18
+
19
+ == USAGE:
20
+
21
+ Require the gem...
22
+
23
+ require 'tiegz-kadoku'
24
+
25
+ To load the doc,
26
+
27
+ a = open('my.site.com/index.html')
28
+ b = Kadoku::Markup.new(a)
29
+
30
+ To get the cleaned-up markup,
31
+
32
+ b.to_clean_html
33
+
34
+ To get the original html (rebuilt by Hpricot),
35
+
36
+ b.to_html
37
+
38
+ == RELEVANT LINKS:
39
+
40
+ * http://blog.solid1pxred.com/articles/2008/12/18/kadoku-the-markup-apologist
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "kadoku"
3
- s.version = "0.1.1"
3
+ s.version = "0.1.2"
4
4
  s.date = "2008-12-17"
5
5
  s.summary = "Uses Hpricot to clean up your unreadable HTML (ie ERB-generated html) [use with caution]"
6
6
  s.email = "tieg.zaharia+kadoku@gmail.com"
@@ -13,9 +13,9 @@ Gem::Specification.new do |s|
13
13
  "Rakefile",
14
14
  "README",
15
15
  "TODO",
16
- "lib/kadoku/kadoku.rb",
17
- "lib/kadoku/kadoku/markup.rb",
18
- "lib/kadoku/kadoku/markup_after_filter.rb"]
16
+ "lib/kadoku.rb",
17
+ "lib/kadoku/markup.rb",
18
+ "lib/kadoku/markup_after_filter.rb"]
19
19
  s.test_files = ["test/test_helper.rb",
20
20
  "test/kadoku_markup_test.rb"]
21
21
  s.rdoc_options = ["--main", "README"]
@@ -0,0 +1,7 @@
1
+ require 'hpricot'
2
+ require 'kadoku/markup'
3
+ require 'kadoku/markup_after_filter'
4
+
5
+ # ( かどく or Kadoku).is_for("making readable markup").with("Hpricot")
6
+ module Kadoku
7
+ end
@@ -0,0 +1,76 @@
1
+ require 'hpricot'
2
+
3
+ module Kadoku
4
+ class Markup
5
+ attr_accessor :hpricot
6
+
7
+ def initialize(content, options={})
8
+ @clean_newline = "#{options[:clean_newline] || "\n"}"
9
+ @clean_indent = "#{options[:clean_indent] || "\t"}"
10
+ @hpricot = Hpricot(content)
11
+ @indent_counter = 0
12
+ end
13
+
14
+ # Returns the original document parsed by Hpricot without cleanup
15
+ def to_html
16
+ @hpricot.to_html
17
+ end
18
+
19
+ # Returns the original markup parsed by Hpricot with extra cleanup
20
+ def to_clean_html
21
+ str = ''
22
+
23
+ # A recursive lambda that rejects any empty (includes newlines/spaces/tabs) nodes
24
+ empty_elements = lambda { |el| el.respond_to?(:children) ? (el.children.reject!(&empty_elements); false) : (el.to_html.strip.empty?) }
25
+
26
+ # A recursive lambda that re-maps the nodes with all newlines and extra spaces stripped out
27
+ stripped_down = lambda do |el|
28
+ if el.respond_to?(:children)
29
+ el.children.map(&stripped_down) # iterate over children if possible
30
+ else
31
+ #unless el.parent.respond_to?(:name) && el.parent.name == 'script' # preserve JS newlines
32
+ el.content.gsub!(/\n/, '') if el.is_a?(Hpricot::Text) || el.is_a?(Hpricot::Comment) # remove newlines from text
33
+ el.content.gsub!(/\s+/, ' ') if el.is_a?(Hpricot::Text) || el.is_a?(Hpricot::Comment) # convert all groups of spaces into one space
34
+ #end
35
+ end
36
+ el
37
+ end
38
+
39
+ # A recursive lambda that iterates over each node and rebuilds the html
40
+ pretty_print = lambda do |el, i|
41
+ if el.respond_to?(:children)
42
+ if el.children.empty?
43
+ # isn't there a less hackish way to turn a tag into a self-ending tag?
44
+ _el = Hpricot::Elem.new(Hpricot::STag.parse(el.name, el.attributes, nil, nil))
45
+ str << indented(_el) + @clean_newline
46
+ else
47
+ str << indented(el.stag) + @clean_newline # add start tag
48
+ unless el.children.empty?
49
+ @indent_counter += 1
50
+ el.children.each_with_index(&pretty_print)
51
+ @indent_counter -= 1
52
+ str << indented(el.etag) + @clean_newline if el.etag # add end tag
53
+ end
54
+ end
55
+ else
56
+ case el
57
+ when Hpricot::DocType # need to do anything special to the doc type?
58
+ str << indented(el) + @clean_newline
59
+ else
60
+ str << indented(el) + @clean_newline
61
+ end
62
+ end
63
+ end
64
+
65
+ @hpricot.children.reject!(&empty_elements).map!(&stripped_down).each_with_index(&pretty_print)
66
+ @indent_counter = 0 # reset
67
+
68
+ str
69
+ end
70
+
71
+ protected
72
+ def indented(el)
73
+ @clean_indent * @indent_counter + el.output('')
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,24 @@
1
+ module Kadoku
2
+ # An ActionController after_filter.
3
+ #
4
+ # Note: Kadoku is still experimental and isn't guaranteed to not break your html.
5
+ # You might need to restyle some things, so I wouldn't use it untested in Production env.
6
+ #
7
+ # Example:
8
+ #
9
+ # class Application < ActionController::Base
10
+ # ... all your filters here
11
+ # after_filter ERBApologist::Filters::ReadableHTML
12
+ # end
13
+ #
14
+ # -or-
15
+ # class Application < ActoinController::Base
16
+ # ... all your filters here
17
+ # cleanup_html
18
+ # end
19
+ class MarkupAfterFilter
20
+ def self.filter(controller)
21
+ controller.response.body = Markup.new(controller.response.body)
22
+ end
23
+ end
24
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tiegz-kadoku
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tieg Zaharia
@@ -29,9 +29,9 @@ files:
29
29
  - Rakefile
30
30
  - README
31
31
  - TODO
32
- - lib/kadoku/kadoku.rb
33
- - lib/kadoku/kadoku/markup.rb
34
- - lib/kadoku/kadoku/markup_after_filter.rb
32
+ - lib/kadoku.rb
33
+ - lib/kadoku/markup.rb
34
+ - lib/kadoku/markup_after_filter.rb
35
35
  has_rdoc: true
36
36
  homepage: http://github.com/tiegz/kadoku
37
37
  post_install_message: