htmlbeautifier 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,16 @@
1
+ = HTML Beautifier
2
+
3
+ A normaliser/beautifier for HTML that also understands embedded Ruby. Ideal for tidying up Rails templates.
4
+
5
+ == What it does
6
+
7
+ * Normalises hard tabs to spaces
8
+ * Removes trailing spaces
9
+ * Indents after opening HTML elements
10
+ * Outdents before closing elements
11
+ * Collapses multiple whitespace
12
+ * Indents after block-opening embedded Ruby (if, do etc.)
13
+ * Outdents before closing Ruby blocks
14
+ * Outdents elsif and then indents again
15
+ * Indents the left-hand margin of JavaScript and CSS blocks to match the indentation level of the code
16
+
@@ -0,0 +1,93 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+
4
+ task :default => :test
5
+
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs << 'lib'
8
+ t.pattern = 'test/test_*.rb'
9
+ t.verbose = true
10
+ end
11
+
12
+ desc 'Install libraries and command-line utility'
13
+ task :install do |t|
14
+ sh 'ruby setup.rb'
15
+ end
16
+
17
+ desc 'Clean up files left over after installation'
18
+ task :cleanup do |t|
19
+ rm_f 'InstalledFiles'
20
+ rm_f '.config'
21
+ end
22
+
23
+
24
+ require "rubygems"
25
+ require "rake/gempackagetask"
26
+ require "rake/rdoctask"
27
+
28
+ task :default => :test
29
+
30
+ require "rake/testtask"
31
+ Rake::TestTask.new do |t|
32
+ t.libs << "test"
33
+ t.test_files = FileList["test/**/*_test.rb"]
34
+ t.verbose = true
35
+ end
36
+
37
+ # This builds the actual gem. For details of what all these options
38
+ # mean, and other ones you can add, check the documentation here:
39
+ #
40
+ # http://rubygems.org/read/chapter/20
41
+ #
42
+ require "lib/htmlbeautifier/version"
43
+ spec = Gem::Specification.new do |s|
44
+
45
+ # Change these as appropriate
46
+ s.name = "htmlbeautifier"
47
+ s.version = HtmlBeautifier::VERSION::STRING
48
+ s.summary = "A normaliser/beautifier for HTML that also understands embedded Ruby."
49
+ s.author = "Paul Battley"
50
+ s.email = "pbattley@gmail.com"
51
+ s.homepage = "http://github.com/threedaymonk/htmlbeautifier"
52
+
53
+ s.has_rdoc = true
54
+ s.extra_rdoc_files = %w(README.txt)
55
+ s.rdoc_options = %w(--main README.txt)
56
+
57
+ # Add any extra files to include in the gem
58
+ s.files = %w(Rakefile README.txt) + Dir.glob("{bin,test,lib}/**/*")
59
+ s.executables = FileList["bin/**"].map { |f| File.basename(f) }
60
+
61
+ s.require_paths = ["lib"]
62
+
63
+ # If you want to depend on other gems, add them here, along with any
64
+ # relevant versions
65
+ # s.add_dependency("some_other_gem", "~> 0.1.0")
66
+
67
+ # If your tests use any gems, include them here
68
+ # s.add_development_dependency("mocha")
69
+ end
70
+
71
+ # This task actually builds the gem. We also regenerate a static
72
+ # .gemspec file, which is useful if something (i.e. GitHub) will
73
+ # be automatically building a gem for this project. If you're not
74
+ # using GitHub, edit as appropriate.
75
+ Rake::GemPackageTask.new(spec) do |pkg|
76
+ pkg.gem_spec = spec
77
+
78
+ # Generate the gemspec file for github.
79
+ file = File.dirname(__FILE__) + "/#{spec.name}.gemspec"
80
+ File.open(file, "w") {|f| f << spec.to_ruby }
81
+ end
82
+
83
+ # Generate documentation
84
+ Rake::RDocTask.new do |rd|
85
+ rd.main = "README.txt"
86
+ rd.rdoc_files.include("README.txt", "lib/**/*.rb")
87
+ rd.rdoc_dir = "rdoc"
88
+ end
89
+
90
+ desc 'Clear out RDoc and generated packages'
91
+ task :clean => [:clobber_rdoc, :clobber_package] do
92
+ rm "#{spec.name}.gemspec"
93
+ end
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require 'htmlbeautifier/beautifier'
3
+ HtmlBeautifier::Beautifier.new($stdout).scan($stdin.read)
4
+ $stdout << "\n"
@@ -0,0 +1,5 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+
3
+ module HtmlBeautifier
4
+
5
+ end
@@ -0,0 +1,115 @@
1
+ require 'htmlbeautifier/parser'
2
+
3
+ module HtmlBeautifier
4
+ class Beautifier
5
+
6
+ RUBY_INDENT =
7
+ %r{ ^ ( if | unless | while | begin | elsif )\b
8
+ | \b ( do | \{ ) ( \s* \| [^\|]+ \| )? $
9
+ }x
10
+ RUBY_OUTDENT =
11
+ %r{ ^ ( end | elsif |\} ) \b
12
+ }x
13
+ ELEMENT_CONTENT = %r{ (?:[^<>]|<%.*?%>)* }mx
14
+
15
+ def initialize(output)
16
+ @level = 0
17
+ @new_line = true
18
+ self.tab_stops = 2
19
+ @output = output
20
+ end
21
+
22
+ def tab_stops=(n)
23
+ @tab = ' ' * n
24
+ end
25
+
26
+ def indent
27
+ @level += 1
28
+ end
29
+
30
+ def outdent
31
+ @level -= 1
32
+ raise "Outdented too far" if @level < 0
33
+ end
34
+
35
+ def emit(s)
36
+ if (@new_line)
37
+ @output << (@tab * @level)
38
+ end
39
+ @output << s
40
+ @new_line = false
41
+ end
42
+
43
+ def whitespace(*x)
44
+ emit "\n"
45
+ @new_line = true
46
+ end
47
+
48
+ def embed(opening, code, closing)
49
+ lines = code.split(/\n/).map{ |l| l.strip }
50
+ outdent if lines.first =~ RUBY_OUTDENT
51
+ emit opening + code + closing
52
+ indent if lines.last =~ RUBY_INDENT
53
+ end
54
+
55
+ def foreign_block(opening, code, closing)
56
+ emit opening
57
+ unless code.empty?
58
+ indent
59
+
60
+ lines = code.split(/\n/)
61
+ lines.shift while lines.first.strip.empty?
62
+ lines.pop while lines.last.strip.empty?
63
+ indentation = lines.first[/^ +/]
64
+
65
+ whitespace
66
+ lines.each do |line|
67
+ emit line.rstrip.sub(/^#{indentation}/, '')
68
+ whitespace
69
+ end
70
+
71
+ outdent
72
+ end
73
+ emit closing
74
+ end
75
+
76
+ def standalone_element(e)
77
+ emit e
78
+ end
79
+
80
+ def close_element(e)
81
+ outdent
82
+ emit e
83
+ end
84
+
85
+ def open_element(e)
86
+ emit e
87
+ indent
88
+ end
89
+
90
+ def text(t)
91
+ emit(t.strip)
92
+ whitespace if t =~ /\s$/
93
+ end
94
+
95
+ def scan(html)
96
+ html = html.strip.gsub(/\t/, @tab)
97
+ parser = Parser.new do
98
+ map %r{(<%=?)(.*?)(%>)}m, :embed
99
+ map %r{<!--\[.*?\]>}m, :open_element
100
+ map %r{<!\[.*?\]-->}m, :close_element
101
+ map %r{<!--.*?-->}m, :standalone_element
102
+ map %r{<!.*?>}m, :standalone_element
103
+ map %r{(<script#{ELEMENT_CONTENT}>)(.*?)(</script>)}m, :foreign_block
104
+ map %r{(<style#{ELEMENT_CONTENT}>)(.*?)(</style>)}m, :foreign_block
105
+ map %r{<#{ELEMENT_CONTENT}/>}m, :standalone_element
106
+ map %r{</#{ELEMENT_CONTENT}>}m, :close_element
107
+ map %r{<#{ELEMENT_CONTENT}>}m, :open_element
108
+ map %r{\s+}, :whitespace
109
+ map %r{[^<]+}, :text
110
+ end
111
+ parser.scan(html, self)
112
+ end
113
+
114
+ end
115
+ end
@@ -0,0 +1,53 @@
1
+ require 'strscan'
2
+
3
+ module HtmlBeautifier
4
+ class Parser
5
+
6
+ def self.debug_block(&blk)
7
+ @debug_block = blk
8
+ end
9
+
10
+ def self.debug(match, method)
11
+ if defined? @debug_block
12
+ @debug_block.call(match, method)
13
+ end
14
+ end
15
+
16
+ def initialize(&blk)
17
+ @maps = []
18
+ if block_given?
19
+ self.instance_eval(&blk)
20
+ end
21
+ end
22
+
23
+ def map(pattern, method)
24
+ @maps << [pattern, method]
25
+ end
26
+
27
+ def scan(subject, receiver)
28
+ scanner = StringScanner.new(subject)
29
+ until scanner.eos?
30
+ dispatch(scanner, receiver)
31
+ end
32
+ end
33
+
34
+ def dispatch(scanner, receiver)
35
+ @maps.each do |pattern, method|
36
+ if scanner.scan(pattern)
37
+ params = []
38
+ i = 1
39
+ while scanner[i]
40
+ params << scanner[i]
41
+ i += 1
42
+ end
43
+ params = [scanner[0]] if params.empty?
44
+ self.class.debug(scanner[0], method)
45
+ receiver.__send__(method, *params)
46
+ return
47
+ end
48
+ end
49
+ raise "Unmatched sequence #{match.inspect}"
50
+ end
51
+
52
+ end
53
+ end
@@ -0,0 +1,9 @@
1
+ module HtmlBeautifier #:nodoc:
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 0
5
+ TINY = 2
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
@@ -0,0 +1,19 @@
1
+ module HtmlBeautifierTestUtilities
2
+
3
+ def code(str)
4
+ str = str.gsub(/\A\n|\n\s*\Z/, '')
5
+ indentation = str[/\A +/]
6
+ lines = str.split(/\n/)
7
+ lines.map{ |line| line.sub(/^#{indentation}/, '') }.join("\n")
8
+ end
9
+
10
+ def assert_beautifies(expected, source)
11
+ actual = ''
12
+ beautifier = HtmlBeautifier::Beautifier.new(actual)
13
+ beautifier.scan(source)
14
+ # puts expected
15
+ # puts actual
16
+ assert_equal expected, actual
17
+ end
18
+
19
+ end
@@ -0,0 +1,3 @@
1
+ require 'test/unit'
2
+ require File.dirname(__FILE__) + '/html_beautifier_test_utilities'
3
+ require File.dirname(__FILE__) + '/../lib/htmlbeautifier'
@@ -0,0 +1,82 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class TestHtmlBeautifierIntegration < Test::Unit::TestCase
4
+ include HtmlBeautifierTestUtilities
5
+
6
+ def test_should_correctly_indent_mixed_document
7
+ source = code(%q(
8
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
9
+ <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
10
+ <head>
11
+ <meta http-equiv="content-type" content="text/html; charset=utf-8" />
12
+ <script src="/javascripts/prototype.js" type="text/javascript"></script>
13
+ <link rel="stylesheet" type="text/css" href="/stylesheets/screen.css" media="screen"/>
14
+ <!--[if IE 6]>
15
+ <link rel="stylesheet" href="/stylesheets/screen_ie6.css" type="text/css" />
16
+ <![endif]-->
17
+ <title>Title Goes Here</title>
18
+ <script type="text/javascript" charset="utf-8">
19
+ doSomething();
20
+ </script>
21
+ </head>
22
+ <body>
23
+ <div id="something">
24
+ <h1>
25
+ Heading 1
26
+ </h1>
27
+ </div>
28
+ <div id="somethingElse">
29
+ <p>Lorem Ipsum</p>
30
+ <% if @x %>
31
+ <% @ys.each do |y| %>
32
+ <p>
33
+ <%= h y %>
34
+ </p>
35
+ <% end %>
36
+ <% elsif @z %>
37
+ <hr />
38
+ <% end %>
39
+ </div>
40
+ </body>
41
+ </html>
42
+ ))
43
+ expected = code(%q(
44
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
45
+ <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
46
+ <head>
47
+ <meta http-equiv="content-type" content="text/html; charset=utf-8" />
48
+ <script src="/javascripts/prototype.js" type="text/javascript"></script>
49
+ <link rel="stylesheet" type="text/css" href="/stylesheets/screen.css" media="screen"/>
50
+ <!--[if IE 6]>
51
+ <link rel="stylesheet" href="/stylesheets/screen_ie6.css" type="text/css" />
52
+ <![endif]-->
53
+ <title>Title Goes Here</title>
54
+ <script type="text/javascript" charset="utf-8">
55
+ doSomething();
56
+ </script>
57
+ </head>
58
+ <body>
59
+ <div id="something">
60
+ <h1>
61
+ Heading 1
62
+ </h1>
63
+ </div>
64
+ <div id="somethingElse">
65
+ <p>Lorem Ipsum</p>
66
+ <% if @x %>
67
+ <% @ys.each do |y| %>
68
+ <p>
69
+ <%= h y %>
70
+ </p>
71
+ <% end %>
72
+ <% elsif @z %>
73
+ <hr />
74
+ <% end %>
75
+ </div>
76
+ </body>
77
+ </html>
78
+ ))
79
+ assert_beautifies expected, source
80
+ end
81
+
82
+ end
@@ -0,0 +1,181 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+ require 'htmlbeautifier/beautifier'
3
+
4
+ class HtmlBeautifierRegressionTest < Test::Unit::TestCase
5
+
6
+ include HtmlBeautifierTestUtilities
7
+
8
+ def setup
9
+ # HtmlBeautifier::Parser.debug_block{ |match, method| puts("#{match.inspect} => #{method}") }
10
+ end
11
+
12
+ def test_should_ignore_html_fragments_in_embedded_code
13
+ source = code(%q(
14
+ <div>
15
+ <%= a[:b].gsub("\n","<br />\n") %>
16
+ </div>
17
+ ))
18
+ expected = code(%q(
19
+ <div>
20
+ <%= a[:b].gsub("\n","<br />\n") %>
21
+ </div>
22
+ ))
23
+ assert_beautifies expected, source
24
+ end
25
+
26
+ def test_should_indent_scripts
27
+ source = code(%q(
28
+ <script>
29
+ function(f) {
30
+ g();
31
+ return 42;
32
+ }
33
+ </script>
34
+ ))
35
+ expected = code(%q(
36
+ <script>
37
+ function(f) {
38
+ g();
39
+ return 42;
40
+ }
41
+ </script>
42
+ ))
43
+ assert_beautifies expected, source
44
+ end
45
+
46
+ def test_should_remove_blank_lines_around_scripts
47
+ source = code(%q(
48
+ <script>
49
+
50
+ f();
51
+
52
+ </script>
53
+ ))
54
+ expected = code(%q(
55
+ <script>
56
+ f();
57
+ </script>
58
+ ))
59
+ assert_beautifies expected, source
60
+ end
61
+
62
+ def test_should_remove_trailing_space_from_script_lines
63
+ source = code(%q(
64
+ <script>
65
+ f();
66
+ </script>
67
+ ))
68
+ expected = code(%q(
69
+ <script>
70
+ f();
71
+ </script>
72
+ ))
73
+ assert_beautifies expected, source
74
+ end
75
+
76
+ def test_should_skip_over_empty_scripts
77
+ source = %q(<script src="/foo.js" type="text/javascript" charset="utf-8"></script>)
78
+ expected = source
79
+ assert_beautifies expected, source
80
+ end
81
+
82
+ def test_should_indent_styles
83
+ source = code(%q(
84
+ <style>
85
+ .foo{ margin: 0; }
86
+ .bar{
87
+ padding: 0;
88
+ margin: 0;
89
+ }
90
+ </style>
91
+ ))
92
+ expected = code(%q(
93
+ <style>
94
+ .foo{ margin: 0; }
95
+ .bar{
96
+ padding: 0;
97
+ margin: 0;
98
+ }
99
+ </style>
100
+ ))
101
+ assert_beautifies expected, source
102
+ end
103
+
104
+ def test_should_remove_blank_lines_around_styles
105
+ source = code(%q(
106
+ <style>
107
+
108
+ .foo{ margin: 0; }
109
+
110
+ </style>
111
+ ))
112
+ expected = code(%q(
113
+ <style>
114
+ .foo{ margin: 0; }
115
+ </style>
116
+ ))
117
+ assert_beautifies expected, source
118
+ end
119
+
120
+ def test_should_remove_trailing_space_from_style_lines
121
+ source = code(%q(
122
+ <style>
123
+ .foo{ margin: 0; }
124
+ </style>
125
+ ))
126
+ expected = code(%q(
127
+ <style>
128
+ .foo{ margin: 0; }
129
+ </style>
130
+ ))
131
+ assert_beautifies expected, source
132
+ end
133
+
134
+ def test_should_indent_divs_containing_standalone_elements
135
+ source = code(%q(
136
+ <div>
137
+ <div>
138
+ <img src="foo" alt="" />
139
+ </div>
140
+ <div>
141
+ <img src="foo" alt="" />
142
+ </div>
143
+ </div>
144
+ ))
145
+ expected = source
146
+ assert_beautifies expected, source
147
+ end
148
+
149
+ def test_should_not_break_line_on_embedded_code_within_script_opening_element
150
+ source = '<script src="<%= path %>" type="text/javascript"></script>'
151
+ expected = source
152
+ assert_beautifies expected, source
153
+ end
154
+
155
+ def test_should_not_break_line_on_embedded_code_within_normal_element
156
+ source = '<img src="<%= path %>" alt="foo" />'
157
+ expected = source
158
+ assert_beautifies expected, source
159
+ end
160
+
161
+ def test_should_indent_inside_IE_conditional_comments
162
+ source = code(%q(
163
+ <!--[if IE 6]>
164
+ <link rel="stylesheet" href="/stylesheets/ie6.css" type="text/css" />
165
+ <![endif]-->
166
+ <!--[if IE 5]>
167
+ <link rel="stylesheet" href="/stylesheets/ie5.css" type="text/css" />
168
+ <![endif]-->
169
+ ))
170
+ expected = code(%q(
171
+ <!--[if IE 6]>
172
+ <link rel="stylesheet" href="/stylesheets/ie6.css" type="text/css" />
173
+ <![endif]-->
174
+ <!--[if IE 5]>
175
+ <link rel="stylesheet" href="/stylesheets/ie5.css" type="text/css" />
176
+ <![endif]-->
177
+ ))
178
+ assert_beautifies expected, source
179
+ end
180
+
181
+ end
@@ -0,0 +1,42 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+ require 'htmlbeautifier/parser'
3
+
4
+ class TestParser < Test::Unit::TestCase
5
+
6
+ class Receiver
7
+ attr_reader :sequence
8
+
9
+ def initialize
10
+ @sequence = []
11
+ end
12
+
13
+ def method_missing(method, *params)
14
+ @sequence << [method, params]
15
+ end
16
+ end
17
+
18
+ def setup
19
+ # HtmlBeautifier::Parser.debug_block{ |match, method| puts("#{match.inspect} => #{method}") }
20
+ end
21
+
22
+ def test_should_dispatch_matching_sequence
23
+ receiver = Receiver.new
24
+ parser = HtmlBeautifier::Parser.new{
25
+ map %r{foo}, :foo
26
+ map %r{bar\s*}, :bar
27
+ map %r{\s+}, :whitespace
28
+ }
29
+ parser.scan('foo bar ', receiver)
30
+ assert_equal [[:foo, ['foo']], [:whitespace, [' ']], [:bar, ['bar ']]], receiver.sequence
31
+ end
32
+
33
+ def test_should_send_parenthesized_components_as_separate_parameters
34
+ receiver = Receiver.new
35
+ parser = HtmlBeautifier::Parser.new{
36
+ map %r{(foo)\((.*?)\)}, :foo
37
+ }
38
+ parser.scan('foo(bar)', receiver)
39
+ assert_equal [[:foo, ['foo', 'bar']]], receiver.sequence
40
+ end
41
+
42
+ end
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: htmlbeautifier
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Paul Battley
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-11 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: pbattley@gmail.com
18
+ executables:
19
+ - htmlbeautifier
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README.txt
24
+ files:
25
+ - Rakefile
26
+ - README.txt
27
+ - bin/htmlbeautifier
28
+ - test/test_html_beautifier_regression.rb
29
+ - test/test_parser.rb
30
+ - test/test_helper.rb
31
+ - test/test_html_beautifier_integration.rb
32
+ - test/html_beautifier_test_utilities.rb
33
+ - lib/htmlbeautifier/parser.rb
34
+ - lib/htmlbeautifier/beautifier.rb
35
+ - lib/htmlbeautifier/version.rb
36
+ - lib/htmlbeautifier.rb
37
+ has_rdoc: true
38
+ homepage: http://github.com/threedaymonk/htmlbeautifier
39
+ licenses: []
40
+
41
+ post_install_message:
42
+ rdoc_options:
43
+ - --main
44
+ - README.txt
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: "0"
52
+ version:
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: "0"
58
+ version:
59
+ requirements: []
60
+
61
+ rubyforge_project:
62
+ rubygems_version: 1.3.5
63
+ signing_key:
64
+ specification_version: 3
65
+ summary: A normaliser/beautifier for HTML that also understands embedded Ruby.
66
+ test_files: []
67
+