htmlbeautifier 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ = HTML Beautifier
2
+
3
+ A normaliser/beautifier for HTML that also understands embedded Ruby. Ideal for tidying up Rails templates.
4
+
5
+ == What it does
6
+
7
+ * Normalises hard tabs to spaces
8
+ * Removes trailing spaces
9
+ * Indents after opening HTML elements
10
+ * Outdents before closing elements
11
+ * Collapses multiple whitespace
12
+ * Indents after block-opening embedded Ruby (if, do etc.)
13
+ * Outdents before closing Ruby blocks
14
+ * Outdents elsif and then indents again
15
+ * Indents the left-hand margin of JavaScript and CSS blocks to match the indentation level of the code
16
+
@@ -0,0 +1,93 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+
4
+ task :default => :test
5
+
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs << 'lib'
8
+ t.pattern = 'test/test_*.rb'
9
+ t.verbose = true
10
+ end
11
+
12
+ desc 'Install libraries and command-line utility'
13
+ task :install do |t|
14
+ sh 'ruby setup.rb'
15
+ end
16
+
17
+ desc 'Clean up files left over after installation'
18
+ task :cleanup do |t|
19
+ rm_f 'InstalledFiles'
20
+ rm_f '.config'
21
+ end
22
+
23
+
24
+ require "rubygems"
25
+ require "rake/gempackagetask"
26
+ require "rake/rdoctask"
27
+
28
+ task :default => :test
29
+
30
+ require "rake/testtask"
31
+ Rake::TestTask.new do |t|
32
+ t.libs << "test"
33
+ t.test_files = FileList["test/**/*_test.rb"]
34
+ t.verbose = true
35
+ end
36
+
37
+ # This builds the actual gem. For details of what all these options
38
+ # mean, and other ones you can add, check the documentation here:
39
+ #
40
+ # http://rubygems.org/read/chapter/20
41
+ #
42
+ require "lib/htmlbeautifier/version"
43
+ spec = Gem::Specification.new do |s|
44
+
45
+ # Change these as appropriate
46
+ s.name = "htmlbeautifier"
47
+ s.version = HtmlBeautifier::VERSION::STRING
48
+ s.summary = "A normaliser/beautifier for HTML that also understands embedded Ruby."
49
+ s.author = "Paul Battley"
50
+ s.email = "pbattley@gmail.com"
51
+ s.homepage = "http://github.com/threedaymonk/htmlbeautifier"
52
+
53
+ s.has_rdoc = true
54
+ s.extra_rdoc_files = %w(README.txt)
55
+ s.rdoc_options = %w(--main README.txt)
56
+
57
+ # Add any extra files to include in the gem
58
+ s.files = %w(Rakefile README.txt) + Dir.glob("{bin,test,lib}/**/*")
59
+ s.executables = FileList["bin/**"].map { |f| File.basename(f) }
60
+
61
+ s.require_paths = ["lib"]
62
+
63
+ # If you want to depend on other gems, add them here, along with any
64
+ # relevant versions
65
+ # s.add_dependency("some_other_gem", "~> 0.1.0")
66
+
67
+ # If your tests use any gems, include them here
68
+ # s.add_development_dependency("mocha")
69
+ end
70
+
71
+ # This task actually builds the gem. We also regenerate a static
72
+ # .gemspec file, which is useful if something (i.e. GitHub) will
73
+ # be automatically building a gem for this project. If you're not
74
+ # using GitHub, edit as appropriate.
75
+ Rake::GemPackageTask.new(spec) do |pkg|
76
+ pkg.gem_spec = spec
77
+
78
+ # Generate the gemspec file for github.
79
+ file = File.dirname(__FILE__) + "/#{spec.name}.gemspec"
80
+ File.open(file, "w") {|f| f << spec.to_ruby }
81
+ end
82
+
83
+ # Generate documentation
84
+ Rake::RDocTask.new do |rd|
85
+ rd.main = "README.txt"
86
+ rd.rdoc_files.include("README.txt", "lib/**/*.rb")
87
+ rd.rdoc_dir = "rdoc"
88
+ end
89
+
90
+ desc 'Clear out RDoc and generated packages'
91
+ task :clean => [:clobber_rdoc, :clobber_package] do
92
+ rm "#{spec.name}.gemspec"
93
+ end
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require 'htmlbeautifier/beautifier'
3
+ HtmlBeautifier::Beautifier.new($stdout).scan($stdin.read)
4
+ $stdout << "\n"
@@ -0,0 +1,5 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+
3
+ module HtmlBeautifier
4
+
5
+ end
@@ -0,0 +1,115 @@
1
+ require 'htmlbeautifier/parser'
2
+
3
+ module HtmlBeautifier
4
+ class Beautifier
5
+
6
+ RUBY_INDENT =
7
+ %r{ ^ ( if | unless | while | begin | elsif )\b
8
+ | \b ( do | \{ ) ( \s* \| [^\|]+ \| )? $
9
+ }x
10
+ RUBY_OUTDENT =
11
+ %r{ ^ ( end | elsif |\} ) \b
12
+ }x
13
+ ELEMENT_CONTENT = %r{ (?:[^<>]|<%.*?%>)* }mx
14
+
15
+ def initialize(output)
16
+ @level = 0
17
+ @new_line = true
18
+ self.tab_stops = 2
19
+ @output = output
20
+ end
21
+
22
+ def tab_stops=(n)
23
+ @tab = ' ' * n
24
+ end
25
+
26
+ def indent
27
+ @level += 1
28
+ end
29
+
30
+ def outdent
31
+ @level -= 1
32
+ raise "Outdented too far" if @level < 0
33
+ end
34
+
35
+ def emit(s)
36
+ if (@new_line)
37
+ @output << (@tab * @level)
38
+ end
39
+ @output << s
40
+ @new_line = false
41
+ end
42
+
43
+ def whitespace(*x)
44
+ emit "\n"
45
+ @new_line = true
46
+ end
47
+
48
+ def embed(opening, code, closing)
49
+ lines = code.split(/\n/).map{ |l| l.strip }
50
+ outdent if lines.first =~ RUBY_OUTDENT
51
+ emit opening + code + closing
52
+ indent if lines.last =~ RUBY_INDENT
53
+ end
54
+
55
+ def foreign_block(opening, code, closing)
56
+ emit opening
57
+ unless code.empty?
58
+ indent
59
+
60
+ lines = code.split(/\n/)
61
+ lines.shift while lines.first.strip.empty?
62
+ lines.pop while lines.last.strip.empty?
63
+ indentation = lines.first[/^ +/]
64
+
65
+ whitespace
66
+ lines.each do |line|
67
+ emit line.rstrip.sub(/^#{indentation}/, '')
68
+ whitespace
69
+ end
70
+
71
+ outdent
72
+ end
73
+ emit closing
74
+ end
75
+
76
+ def standalone_element(e)
77
+ emit e
78
+ end
79
+
80
+ def close_element(e)
81
+ outdent
82
+ emit e
83
+ end
84
+
85
+ def open_element(e)
86
+ emit e
87
+ indent
88
+ end
89
+
90
+ def text(t)
91
+ emit(t.strip)
92
+ whitespace if t =~ /\s$/
93
+ end
94
+
95
+ def scan(html)
96
+ html = html.strip.gsub(/\t/, @tab)
97
+ parser = Parser.new do
98
+ map %r{(<%=?)(.*?)(%>)}m, :embed
99
+ map %r{<!--\[.*?\]>}m, :open_element
100
+ map %r{<!\[.*?\]-->}m, :close_element
101
+ map %r{<!--.*?-->}m, :standalone_element
102
+ map %r{<!.*?>}m, :standalone_element
103
+ map %r{(<script#{ELEMENT_CONTENT}>)(.*?)(</script>)}m, :foreign_block
104
+ map %r{(<style#{ELEMENT_CONTENT}>)(.*?)(</style>)}m, :foreign_block
105
+ map %r{<#{ELEMENT_CONTENT}/>}m, :standalone_element
106
+ map %r{</#{ELEMENT_CONTENT}>}m, :close_element
107
+ map %r{<#{ELEMENT_CONTENT}>}m, :open_element
108
+ map %r{\s+}, :whitespace
109
+ map %r{[^<]+}, :text
110
+ end
111
+ parser.scan(html, self)
112
+ end
113
+
114
+ end
115
+ end
@@ -0,0 +1,53 @@
1
+ require 'strscan'
2
+
3
+ module HtmlBeautifier
4
+ class Parser
5
+
6
+ def self.debug_block(&blk)
7
+ @debug_block = blk
8
+ end
9
+
10
+ def self.debug(match, method)
11
+ if defined? @debug_block
12
+ @debug_block.call(match, method)
13
+ end
14
+ end
15
+
16
+ def initialize(&blk)
17
+ @maps = []
18
+ if block_given?
19
+ self.instance_eval(&blk)
20
+ end
21
+ end
22
+
23
+ def map(pattern, method)
24
+ @maps << [pattern, method]
25
+ end
26
+
27
+ def scan(subject, receiver)
28
+ scanner = StringScanner.new(subject)
29
+ until scanner.eos?
30
+ dispatch(scanner, receiver)
31
+ end
32
+ end
33
+
34
+ def dispatch(scanner, receiver)
35
+ @maps.each do |pattern, method|
36
+ if scanner.scan(pattern)
37
+ params = []
38
+ i = 1
39
+ while scanner[i]
40
+ params << scanner[i]
41
+ i += 1
42
+ end
43
+ params = [scanner[0]] if params.empty?
44
+ self.class.debug(scanner[0], method)
45
+ receiver.__send__(method, *params)
46
+ return
47
+ end
48
+ end
49
+ raise "Unmatched sequence #{match.inspect}"
50
+ end
51
+
52
+ end
53
+ end
@@ -0,0 +1,9 @@
1
+ module HtmlBeautifier #:nodoc:
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 0
5
+ TINY = 2
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
@@ -0,0 +1,19 @@
1
+ module HtmlBeautifierTestUtilities
2
+
3
+ def code(str)
4
+ str = str.gsub(/\A\n|\n\s*\Z/, '')
5
+ indentation = str[/\A +/]
6
+ lines = str.split(/\n/)
7
+ lines.map{ |line| line.sub(/^#{indentation}/, '') }.join("\n")
8
+ end
9
+
10
+ def assert_beautifies(expected, source)
11
+ actual = ''
12
+ beautifier = HtmlBeautifier::Beautifier.new(actual)
13
+ beautifier.scan(source)
14
+ # puts expected
15
+ # puts actual
16
+ assert_equal expected, actual
17
+ end
18
+
19
+ end
@@ -0,0 +1,3 @@
1
+ require 'test/unit'
2
+ require File.dirname(__FILE__) + '/html_beautifier_test_utilities'
3
+ require File.dirname(__FILE__) + '/../lib/htmlbeautifier'
@@ -0,0 +1,82 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class TestHtmlBeautifierIntegration < Test::Unit::TestCase
4
+ include HtmlBeautifierTestUtilities
5
+
6
+ def test_should_correctly_indent_mixed_document
7
+ source = code(%q(
8
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
9
+ <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
10
+ <head>
11
+ <meta http-equiv="content-type" content="text/html; charset=utf-8" />
12
+ <script src="/javascripts/prototype.js" type="text/javascript"></script>
13
+ <link rel="stylesheet" type="text/css" href="/stylesheets/screen.css" media="screen"/>
14
+ <!--[if IE 6]>
15
+ <link rel="stylesheet" href="/stylesheets/screen_ie6.css" type="text/css" />
16
+ <![endif]-->
17
+ <title>Title Goes Here</title>
18
+ <script type="text/javascript" charset="utf-8">
19
+ doSomething();
20
+ </script>
21
+ </head>
22
+ <body>
23
+ <div id="something">
24
+ <h1>
25
+ Heading 1
26
+ </h1>
27
+ </div>
28
+ <div id="somethingElse">
29
+ <p>Lorem Ipsum</p>
30
+ <% if @x %>
31
+ <% @ys.each do |y| %>
32
+ <p>
33
+ <%= h y %>
34
+ </p>
35
+ <% end %>
36
+ <% elsif @z %>
37
+ <hr />
38
+ <% end %>
39
+ </div>
40
+ </body>
41
+ </html>
42
+ ))
43
+ expected = code(%q(
44
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
45
+ <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
46
+ <head>
47
+ <meta http-equiv="content-type" content="text/html; charset=utf-8" />
48
+ <script src="/javascripts/prototype.js" type="text/javascript"></script>
49
+ <link rel="stylesheet" type="text/css" href="/stylesheets/screen.css" media="screen"/>
50
+ <!--[if IE 6]>
51
+ <link rel="stylesheet" href="/stylesheets/screen_ie6.css" type="text/css" />
52
+ <![endif]-->
53
+ <title>Title Goes Here</title>
54
+ <script type="text/javascript" charset="utf-8">
55
+ doSomething();
56
+ </script>
57
+ </head>
58
+ <body>
59
+ <div id="something">
60
+ <h1>
61
+ Heading 1
62
+ </h1>
63
+ </div>
64
+ <div id="somethingElse">
65
+ <p>Lorem Ipsum</p>
66
+ <% if @x %>
67
+ <% @ys.each do |y| %>
68
+ <p>
69
+ <%= h y %>
70
+ </p>
71
+ <% end %>
72
+ <% elsif @z %>
73
+ <hr />
74
+ <% end %>
75
+ </div>
76
+ </body>
77
+ </html>
78
+ ))
79
+ assert_beautifies expected, source
80
+ end
81
+
82
+ end
@@ -0,0 +1,181 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+ require 'htmlbeautifier/beautifier'
3
+
4
+ class HtmlBeautifierRegressionTest < Test::Unit::TestCase
5
+
6
+ include HtmlBeautifierTestUtilities
7
+
8
+ def setup
9
+ # HtmlBeautifier::Parser.debug_block{ |match, method| puts("#{match.inspect} => #{method}") }
10
+ end
11
+
12
+ def test_should_ignore_html_fragments_in_embedded_code
13
+ source = code(%q(
14
+ <div>
15
+ <%= a[:b].gsub("\n","<br />\n") %>
16
+ </div>
17
+ ))
18
+ expected = code(%q(
19
+ <div>
20
+ <%= a[:b].gsub("\n","<br />\n") %>
21
+ </div>
22
+ ))
23
+ assert_beautifies expected, source
24
+ end
25
+
26
+ def test_should_indent_scripts
27
+ source = code(%q(
28
+ <script>
29
+ function(f) {
30
+ g();
31
+ return 42;
32
+ }
33
+ </script>
34
+ ))
35
+ expected = code(%q(
36
+ <script>
37
+ function(f) {
38
+ g();
39
+ return 42;
40
+ }
41
+ </script>
42
+ ))
43
+ assert_beautifies expected, source
44
+ end
45
+
46
+ def test_should_remove_blank_lines_around_scripts
47
+ source = code(%q(
48
+ <script>
49
+
50
+ f();
51
+
52
+ </script>
53
+ ))
54
+ expected = code(%q(
55
+ <script>
56
+ f();
57
+ </script>
58
+ ))
59
+ assert_beautifies expected, source
60
+ end
61
+
62
+ def test_should_remove_trailing_space_from_script_lines
63
+ source = code(%q(
64
+ <script>
65
+ f();
66
+ </script>
67
+ ))
68
+ expected = code(%q(
69
+ <script>
70
+ f();
71
+ </script>
72
+ ))
73
+ assert_beautifies expected, source
74
+ end
75
+
76
+ def test_should_skip_over_empty_scripts
77
+ source = %q(<script src="/foo.js" type="text/javascript" charset="utf-8"></script>)
78
+ expected = source
79
+ assert_beautifies expected, source
80
+ end
81
+
82
+ def test_should_indent_styles
83
+ source = code(%q(
84
+ <style>
85
+ .foo{ margin: 0; }
86
+ .bar{
87
+ padding: 0;
88
+ margin: 0;
89
+ }
90
+ </style>
91
+ ))
92
+ expected = code(%q(
93
+ <style>
94
+ .foo{ margin: 0; }
95
+ .bar{
96
+ padding: 0;
97
+ margin: 0;
98
+ }
99
+ </style>
100
+ ))
101
+ assert_beautifies expected, source
102
+ end
103
+
104
+ def test_should_remove_blank_lines_around_styles
105
+ source = code(%q(
106
+ <style>
107
+
108
+ .foo{ margin: 0; }
109
+
110
+ </style>
111
+ ))
112
+ expected = code(%q(
113
+ <style>
114
+ .foo{ margin: 0; }
115
+ </style>
116
+ ))
117
+ assert_beautifies expected, source
118
+ end
119
+
120
+ def test_should_remove_trailing_space_from_style_lines
121
+ source = code(%q(
122
+ <style>
123
+ .foo{ margin: 0; }
124
+ </style>
125
+ ))
126
+ expected = code(%q(
127
+ <style>
128
+ .foo{ margin: 0; }
129
+ </style>
130
+ ))
131
+ assert_beautifies expected, source
132
+ end
133
+
134
+ def test_should_indent_divs_containing_standalone_elements
135
+ source = code(%q(
136
+ <div>
137
+ <div>
138
+ <img src="foo" alt="" />
139
+ </div>
140
+ <div>
141
+ <img src="foo" alt="" />
142
+ </div>
143
+ </div>
144
+ ))
145
+ expected = source
146
+ assert_beautifies expected, source
147
+ end
148
+
149
+ def test_should_not_break_line_on_embedded_code_within_script_opening_element
150
+ source = '<script src="<%= path %>" type="text/javascript"></script>'
151
+ expected = source
152
+ assert_beautifies expected, source
153
+ end
154
+
155
+ def test_should_not_break_line_on_embedded_code_within_normal_element
156
+ source = '<img src="<%= path %>" alt="foo" />'
157
+ expected = source
158
+ assert_beautifies expected, source
159
+ end
160
+
161
+ def test_should_indent_inside_IE_conditional_comments
162
+ source = code(%q(
163
+ <!--[if IE 6]>
164
+ <link rel="stylesheet" href="/stylesheets/ie6.css" type="text/css" />
165
+ <![endif]-->
166
+ <!--[if IE 5]>
167
+ <link rel="stylesheet" href="/stylesheets/ie5.css" type="text/css" />
168
+ <![endif]-->
169
+ ))
170
+ expected = code(%q(
171
+ <!--[if IE 6]>
172
+ <link rel="stylesheet" href="/stylesheets/ie6.css" type="text/css" />
173
+ <![endif]-->
174
+ <!--[if IE 5]>
175
+ <link rel="stylesheet" href="/stylesheets/ie5.css" type="text/css" />
176
+ <![endif]-->
177
+ ))
178
+ assert_beautifies expected, source
179
+ end
180
+
181
+ end
@@ -0,0 +1,42 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+ require 'htmlbeautifier/parser'
3
+
4
+ class TestParser < Test::Unit::TestCase
5
+
6
+ class Receiver
7
+ attr_reader :sequence
8
+
9
+ def initialize
10
+ @sequence = []
11
+ end
12
+
13
+ def method_missing(method, *params)
14
+ @sequence << [method, params]
15
+ end
16
+ end
17
+
18
+ def setup
19
+ # HtmlBeautifier::Parser.debug_block{ |match, method| puts("#{match.inspect} => #{method}") }
20
+ end
21
+
22
+ def test_should_dispatch_matching_sequence
23
+ receiver = Receiver.new
24
+ parser = HtmlBeautifier::Parser.new{
25
+ map %r{foo}, :foo
26
+ map %r{bar\s*}, :bar
27
+ map %r{\s+}, :whitespace
28
+ }
29
+ parser.scan('foo bar ', receiver)
30
+ assert_equal [[:foo, ['foo']], [:whitespace, [' ']], [:bar, ['bar ']]], receiver.sequence
31
+ end
32
+
33
+ def test_should_send_parenthesized_components_as_separate_parameters
34
+ receiver = Receiver.new
35
+ parser = HtmlBeautifier::Parser.new{
36
+ map %r{(foo)\((.*?)\)}, :foo
37
+ }
38
+ parser.scan('foo(bar)', receiver)
39
+ assert_equal [[:foo, ['foo', 'bar']]], receiver.sequence
40
+ end
41
+
42
+ end
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: htmlbeautifier
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Paul Battley
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-11 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: pbattley@gmail.com
18
+ executables:
19
+ - htmlbeautifier
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README.txt
24
+ files:
25
+ - Rakefile
26
+ - README.txt
27
+ - bin/htmlbeautifier
28
+ - test/test_html_beautifier_regression.rb
29
+ - test/test_parser.rb
30
+ - test/test_helper.rb
31
+ - test/test_html_beautifier_integration.rb
32
+ - test/html_beautifier_test_utilities.rb
33
+ - lib/htmlbeautifier/parser.rb
34
+ - lib/htmlbeautifier/beautifier.rb
35
+ - lib/htmlbeautifier/version.rb
36
+ - lib/htmlbeautifier.rb
37
+ has_rdoc: true
38
+ homepage: http://github.com/threedaymonk/htmlbeautifier
39
+ licenses: []
40
+
41
+ post_install_message:
42
+ rdoc_options:
43
+ - --main
44
+ - README.txt
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: "0"
52
+ version:
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: "0"
58
+ version:
59
+ requirements: []
60
+
61
+ rubyforge_project:
62
+ rubygems_version: 1.3.5
63
+ signing_key:
64
+ specification_version: 3
65
+ summary: A normaliser/beautifier for HTML that also understands embedded Ruby.
66
+ test_files: []
67
+