htmlbeautifier 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/bin/htmlbeautifier +1 -1
- data/lib/htmlbeautifier.rb +2 -5
- data/lib/htmlbeautifier/beautifier.rb +10 -102
- data/lib/htmlbeautifier/builder.rb +92 -0
- data/lib/htmlbeautifier/html_parser.rb +24 -0
- data/lib/htmlbeautifier/parser.rb +21 -13
- data/lib/htmlbeautifier/version.rb +1 -1
- data/test/test_helper.rb +18 -2
- data/test/test_html_beautifier_integration.rb +13 -1
- data/test/test_html_beautifier_regression.rb +1 -1
- data/test/test_parser.rb +47 -7
- metadata +27 -35
- data/test/html_beautifier_test_utilities.rb +0 -17
data/Rakefile
CHANGED
data/bin/htmlbeautifier
CHANGED
data/lib/htmlbeautifier.rb
CHANGED
@@ -1,115 +1,23 @@
|
|
1
|
-
require 'htmlbeautifier/
|
1
|
+
require 'htmlbeautifier/html_parser'
|
2
|
+
require 'htmlbeautifier/builder'
|
2
3
|
|
3
4
|
module HtmlBeautifier
|
4
5
|
class Beautifier
|
6
|
+
attr_accessor :tab_stops
|
5
7
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
}x
|
10
|
-
RUBY_OUTDENT =
|
11
|
-
%r{ ^ ( end | elsif | else |\} ) \b
|
12
|
-
}x
|
13
|
-
ELEMENT_CONTENT = %r{ (?:[^<>]|<%.*?%>)* }mx
|
14
|
-
|
8
|
+
# Create a new Beautifier.
|
9
|
+
# output should be an object that responds to <<
|
10
|
+
# i.e. a String or an IO
|
15
11
|
def initialize(output)
|
16
|
-
@level = 0
|
17
|
-
@new_line = true
|
18
12
|
self.tab_stops = 2
|
19
13
|
@output = output
|
20
14
|
end
|
21
15
|
|
22
|
-
|
23
|
-
|
24
|
-
end
|
25
|
-
|
26
|
-
def indent
|
27
|
-
@level += 1
|
28
|
-
end
|
29
|
-
|
30
|
-
def outdent
|
31
|
-
@level -= 1
|
32
|
-
raise "Outdented too far" if @level < 0
|
33
|
-
end
|
34
|
-
|
35
|
-
def emit(s)
|
36
|
-
if (@new_line)
|
37
|
-
@output << (@tab * @level)
|
38
|
-
end
|
39
|
-
@output << s
|
40
|
-
@new_line = false
|
41
|
-
end
|
42
|
-
|
43
|
-
def whitespace(*x)
|
44
|
-
emit "\n"
|
45
|
-
@new_line = true
|
46
|
-
end
|
47
|
-
|
48
|
-
def embed(opening, code, closing)
|
49
|
-
lines = code.split(/\n/).map{ |l| l.strip }
|
50
|
-
outdent if lines.first =~ RUBY_OUTDENT
|
51
|
-
emit opening + code + closing
|
52
|
-
indent if lines.last =~ RUBY_INDENT
|
53
|
-
end
|
54
|
-
|
55
|
-
def foreign_block(opening, code, closing)
|
56
|
-
emit opening
|
57
|
-
unless code.empty?
|
58
|
-
indent
|
59
|
-
|
60
|
-
lines = code.split(/\n/)
|
61
|
-
lines.shift while lines.first.strip.empty?
|
62
|
-
lines.pop while lines.last.strip.empty?
|
63
|
-
indentation = lines.first[/^ +/]
|
64
|
-
|
65
|
-
whitespace
|
66
|
-
lines.each do |line|
|
67
|
-
emit line.rstrip.sub(/^#{indentation}/, '')
|
68
|
-
whitespace
|
69
|
-
end
|
70
|
-
|
71
|
-
outdent
|
72
|
-
end
|
73
|
-
emit closing
|
74
|
-
end
|
75
|
-
|
76
|
-
def standalone_element(e)
|
77
|
-
emit e
|
78
|
-
end
|
79
|
-
|
80
|
-
def close_element(e)
|
81
|
-
outdent
|
82
|
-
emit e
|
83
|
-
end
|
84
|
-
|
85
|
-
def open_element(e)
|
86
|
-
emit e
|
87
|
-
indent
|
88
|
-
end
|
89
|
-
|
90
|
-
def text(t)
|
91
|
-
emit(t.strip)
|
92
|
-
whitespace if t =~ /\s$/
|
93
|
-
end
|
94
|
-
|
16
|
+
# Process an HTML/HTML+ERB document
|
17
|
+
# html should be a string
|
95
18
|
def scan(html)
|
96
|
-
|
97
|
-
parser
|
98
|
-
map %r{(<%-?=?)(.*?)(-?%>)}m, :embed
|
99
|
-
map %r{<!--\[.*?\]>}m, :open_element
|
100
|
-
map %r{<!\[.*?\]-->}m, :close_element
|
101
|
-
map %r{<!--.*?-->}m, :standalone_element
|
102
|
-
map %r{<!.*?>}m, :standalone_element
|
103
|
-
map %r{(<script#{ELEMENT_CONTENT}>)(.*?)(</script>)}m, :foreign_block
|
104
|
-
map %r{(<style#{ELEMENT_CONTENT}>)(.*?)(</style>)}m, :foreign_block
|
105
|
-
map %r{<#{ELEMENT_CONTENT}/>}m, :standalone_element
|
106
|
-
map %r{</#{ELEMENT_CONTENT}>}m, :close_element
|
107
|
-
map %r{<#{ELEMENT_CONTENT}>}m, :open_element
|
108
|
-
map %r{\s+}, :whitespace
|
109
|
-
map %r{[^<]+}, :text
|
110
|
-
end
|
111
|
-
parser.scan(html, self)
|
19
|
+
@parser = HtmlParser.new
|
20
|
+
@parser.scan html.strip, Builder.new(@output, self.tab_stops)
|
112
21
|
end
|
113
|
-
|
114
22
|
end
|
115
23
|
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'htmlbeautifier/parser'
|
2
|
+
|
3
|
+
module HtmlBeautifier
|
4
|
+
class Builder
|
5
|
+
|
6
|
+
RUBY_INDENT =
|
7
|
+
%r{ ^ ( if | unless | while | begin | elsif | else )\b
|
8
|
+
| \b ( do | \{ ) ( \s* \| [^\|]+ \| )? $
|
9
|
+
}x
|
10
|
+
RUBY_OUTDENT =
|
11
|
+
%r{ ^ ( end | elsif | else |\} ) \b
|
12
|
+
}x
|
13
|
+
ELEMENT_CONTENT = %r{ (?:[^<>]|<%.*?%>)* }mx
|
14
|
+
|
15
|
+
def initialize(output, tab_stops)
|
16
|
+
@level = 0
|
17
|
+
@new_line = true
|
18
|
+
@tab = ' ' * tab_stops
|
19
|
+
@output = output
|
20
|
+
end
|
21
|
+
|
22
|
+
def indent
|
23
|
+
@level += 1
|
24
|
+
end
|
25
|
+
|
26
|
+
def outdent
|
27
|
+
@level -= 1
|
28
|
+
raise "Outdented too far" if @level < 0
|
29
|
+
end
|
30
|
+
|
31
|
+
def emit(s)
|
32
|
+
if (@new_line)
|
33
|
+
@output << (@tab * @level)
|
34
|
+
end
|
35
|
+
@output << s
|
36
|
+
@new_line = false
|
37
|
+
end
|
38
|
+
|
39
|
+
def whitespace(*x)
|
40
|
+
emit "\n"
|
41
|
+
@new_line = true
|
42
|
+
end
|
43
|
+
|
44
|
+
def embed(opening, code, closing)
|
45
|
+
lines = code.split(/\n/).map{ |l| l.strip }
|
46
|
+
outdent if lines.first =~ RUBY_OUTDENT
|
47
|
+
emit opening + code + closing
|
48
|
+
indent if lines.last =~ RUBY_INDENT
|
49
|
+
end
|
50
|
+
|
51
|
+
def foreign_block(opening, code, closing)
|
52
|
+
emit opening
|
53
|
+
unless code.empty?
|
54
|
+
indent
|
55
|
+
|
56
|
+
lines = code.split(/\n/)
|
57
|
+
lines.shift while lines.first.strip.empty?
|
58
|
+
lines.pop while lines.last.strip.empty?
|
59
|
+
indentation = lines.first[/^ +/]
|
60
|
+
|
61
|
+
whitespace
|
62
|
+
lines.each do |line|
|
63
|
+
emit line.rstrip.sub(/^#{indentation}/, '')
|
64
|
+
whitespace
|
65
|
+
end
|
66
|
+
|
67
|
+
outdent
|
68
|
+
end
|
69
|
+
emit closing
|
70
|
+
end
|
71
|
+
|
72
|
+
def standalone_element(e)
|
73
|
+
emit e
|
74
|
+
end
|
75
|
+
|
76
|
+
def close_element(e)
|
77
|
+
outdent
|
78
|
+
emit e
|
79
|
+
end
|
80
|
+
|
81
|
+
def open_element(e)
|
82
|
+
emit e
|
83
|
+
indent
|
84
|
+
end
|
85
|
+
|
86
|
+
def text(t)
|
87
|
+
emit(t.strip)
|
88
|
+
whitespace if t =~ /\s$/
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'htmlbeautifier/parser'
|
2
|
+
|
3
|
+
module HtmlBeautifier
|
4
|
+
class HtmlParser < Parser
|
5
|
+
ELEMENT_CONTENT = %r{ (?:[^<>]|<%.*?%>)* }mx
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
super do |p|
|
9
|
+
p.map %r{(<%-?=?)(.*?)(-?%>)}m, :embed
|
10
|
+
p.map %r{<!--\[.*?\]>}m, :open_element
|
11
|
+
p.map %r{<!\[.*?\]-->}m, :close_element
|
12
|
+
p.map %r{<!--.*?-->}m, :standalone_element
|
13
|
+
p.map %r{<!.*?>}m, :standalone_element
|
14
|
+
p.map %r{(<script#{ELEMENT_CONTENT}>)(.*?)(</script>)}m, :foreign_block
|
15
|
+
p.map %r{(<style#{ELEMENT_CONTENT}>)(.*?)(</style>)}m, :foreign_block
|
16
|
+
p.map %r{<#{ELEMENT_CONTENT}/>}m, :standalone_element
|
17
|
+
p.map %r{</#{ELEMENT_CONTENT}>}m, :close_element
|
18
|
+
p.map %r{<#{ELEMENT_CONTENT}>}m, :open_element
|
19
|
+
p.map %r{\s+}, :whitespace
|
20
|
+
p.map %r{[^<]+}, :text
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -15,9 +15,7 @@ module HtmlBeautifier
|
|
15
15
|
|
16
16
|
def initialize(&blk)
|
17
17
|
@maps = []
|
18
|
-
if block_given?
|
19
|
-
self.instance_eval(&blk)
|
20
|
-
end
|
18
|
+
yield self if block_given?
|
21
19
|
end
|
22
20
|
|
23
21
|
def map(pattern, method)
|
@@ -25,29 +23,39 @@ module HtmlBeautifier
|
|
25
23
|
end
|
26
24
|
|
27
25
|
def scan(subject, receiver)
|
28
|
-
scanner = StringScanner.new(subject)
|
29
|
-
until scanner.eos?
|
30
|
-
dispatch(
|
26
|
+
@scanner = StringScanner.new(subject)
|
27
|
+
until @scanner.eos?
|
28
|
+
dispatch(receiver)
|
31
29
|
end
|
32
30
|
end
|
33
31
|
|
34
|
-
def
|
32
|
+
def source_so_far
|
33
|
+
@scanner.string[0...@scanner.pos]
|
34
|
+
end
|
35
|
+
|
36
|
+
def source_line_number
|
37
|
+
[source_so_far.chomp.split(/\n/).count, 1].max
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
def dispatch(receiver)
|
35
42
|
@maps.each do |pattern, method|
|
36
|
-
if scanner.scan(pattern)
|
43
|
+
if @scanner.scan(pattern)
|
37
44
|
params = []
|
38
45
|
i = 1
|
39
|
-
while scanner[i]
|
40
|
-
params << scanner[i]
|
46
|
+
while @scanner[i]
|
47
|
+
params << @scanner[i]
|
41
48
|
i += 1
|
42
49
|
end
|
43
|
-
params = [scanner[0]] if params.empty?
|
44
|
-
self.class.debug(scanner[0], method)
|
50
|
+
params = [@scanner[0]] if params.empty?
|
51
|
+
self.class.debug(@scanner[0], method)
|
45
52
|
receiver.__send__(method, *params)
|
46
53
|
return
|
47
54
|
end
|
48
55
|
end
|
49
56
|
raise "Unmatched sequence #{match.inspect}"
|
57
|
+
rescue => ex
|
58
|
+
raise "#{ex.message} on line #{source_line_number}"
|
50
59
|
end
|
51
|
-
|
52
60
|
end
|
53
61
|
end
|
data/test/test_helper.rb
CHANGED
@@ -1,3 +1,19 @@
|
|
1
1
|
require 'test/unit'
|
2
|
-
|
3
|
-
|
2
|
+
lib = File.expand_path('../../lib', __FILE__)
|
3
|
+
$:.unshift lib unless $:.include?(lib)
|
4
|
+
|
5
|
+
module HtmlBeautifierTestUtilities
|
6
|
+
def code(str)
|
7
|
+
str = str.gsub(/\A\n|\n\s*\Z/, '')
|
8
|
+
indentation = str[/\A +/]
|
9
|
+
lines = str.split(/\n/)
|
10
|
+
lines.map{ |line| line.sub(/^#{indentation}/, '') }.join("\n")
|
11
|
+
end
|
12
|
+
|
13
|
+
def assert_beautifies(expected, source)
|
14
|
+
actual = ''
|
15
|
+
beautifier = HtmlBeautifier::Beautifier.new(actual)
|
16
|
+
beautifier.scan(source)
|
17
|
+
assert expected == actual, "Expected:\n#{expected}\nbut was:\n#{actual}"
|
18
|
+
end
|
19
|
+
end
|
@@ -1,4 +1,5 @@
|
|
1
|
-
require
|
1
|
+
require 'test_helper'
|
2
|
+
require 'htmlbeautifier/beautifier'
|
2
3
|
|
3
4
|
class TestHtmlBeautifierIntegration < Test::Unit::TestCase
|
4
5
|
include HtmlBeautifierTestUtilities
|
@@ -79,4 +80,15 @@ class TestHtmlBeautifierIntegration < Test::Unit::TestCase
|
|
79
80
|
assert_beautifies expected, source
|
80
81
|
end
|
81
82
|
|
83
|
+
def test_should_raise_an_error_with_the_source_line_of_an_illegal_outdent
|
84
|
+
begin
|
85
|
+
HtmlBeautifier::Beautifier.new('').scan("<html>\n</html>\n</html>")
|
86
|
+
rescue Exception => e
|
87
|
+
@exception = e
|
88
|
+
end
|
89
|
+
assert_equal RuntimeError, @exception.class
|
90
|
+
assert_match /outdent/i, @exception.message
|
91
|
+
assert_match /line 3/i, @exception.message
|
92
|
+
end
|
93
|
+
|
82
94
|
end
|
data/test/test_parser.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require 'test_helper'
|
2
2
|
require 'htmlbeautifier/parser'
|
3
3
|
|
4
4
|
class TestParser < Test::Unit::TestCase
|
@@ -21,10 +21,10 @@ class TestParser < Test::Unit::TestCase
|
|
21
21
|
|
22
22
|
def test_should_dispatch_matching_sequence
|
23
23
|
receiver = Receiver.new
|
24
|
-
parser = HtmlBeautifier::Parser.new{
|
25
|
-
map %r{foo}, :foo
|
26
|
-
map %r{bar\s*}, :bar
|
27
|
-
map %r{\s+}, :whitespace
|
24
|
+
parser = HtmlBeautifier::Parser.new { |p|
|
25
|
+
p.map %r{foo}, :foo
|
26
|
+
p.map %r{bar\s*}, :bar
|
27
|
+
p.map %r{\s+}, :whitespace
|
28
28
|
}
|
29
29
|
parser.scan('foo bar ', receiver)
|
30
30
|
assert_equal [[:foo, ['foo']], [:whitespace, [' ']], [:bar, ['bar ']]], receiver.sequence
|
@@ -32,11 +32,51 @@ class TestParser < Test::Unit::TestCase
|
|
32
32
|
|
33
33
|
def test_should_send_parenthesized_components_as_separate_parameters
|
34
34
|
receiver = Receiver.new
|
35
|
-
parser = HtmlBeautifier::Parser.new{
|
36
|
-
map %r{(foo)\((.*?)\)}, :foo
|
35
|
+
parser = HtmlBeautifier::Parser.new { |p|
|
36
|
+
p.map %r{(foo)\((.*?)\)}, :foo
|
37
37
|
}
|
38
38
|
parser.scan('foo(bar)', receiver)
|
39
39
|
assert_equal [[:foo, ['foo', 'bar']]], receiver.sequence
|
40
40
|
end
|
41
41
|
|
42
|
+
class SourceTrackingReceiver < Receiver
|
43
|
+
attr_reader :sources_so_far
|
44
|
+
attr_reader :source_line_numbers
|
45
|
+
|
46
|
+
def initialize(parser)
|
47
|
+
@sources_so_far = []
|
48
|
+
@source_line_numbers = []
|
49
|
+
@parser = parser
|
50
|
+
super()
|
51
|
+
end
|
52
|
+
|
53
|
+
def append_new_source_so_far(*ignored)
|
54
|
+
@sources_so_far << @parser.source_so_far
|
55
|
+
end
|
56
|
+
|
57
|
+
def append_new_source_line_number(*ignored)
|
58
|
+
@source_line_numbers << @parser.source_line_number
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_should_give_source_so_far
|
63
|
+
parser = HtmlBeautifier::Parser.new { |p|
|
64
|
+
p.map %r{(M+)}m, :append_new_source_so_far
|
65
|
+
p.map %r{([\s\n]+)}m, :space_or_newline
|
66
|
+
}
|
67
|
+
receiver = SourceTrackingReceiver.new(parser)
|
68
|
+
parser.scan("M MM MMM", receiver)
|
69
|
+
assert_equal ['M', 'M MM', 'M MM MMM'], receiver.sources_so_far
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_should_give_source_line_number
|
73
|
+
parser = HtmlBeautifier::Parser.new{ |p|
|
74
|
+
p.map %r{(M+)}m, :append_new_source_line_number
|
75
|
+
p.map %r{([\s\n]+)}m, :space_or_newline
|
76
|
+
}
|
77
|
+
receiver = SourceTrackingReceiver.new(parser)
|
78
|
+
parser.scan("M \n\nMM\nMMM", receiver)
|
79
|
+
assert_equal [1, 3, 4], receiver.source_line_numbers
|
80
|
+
end
|
81
|
+
|
42
82
|
end
|
metadata
CHANGED
@@ -1,66 +1,58 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: htmlbeautifier
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.7
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
|
-
authors:
|
7
|
+
authors:
|
7
8
|
- Paul Battley
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
|
12
|
-
date: 2010-07-02 00:00:00 +01:00
|
13
|
-
default_executable:
|
12
|
+
date: 2012-07-10 00:00:00.000000000 Z
|
14
13
|
dependencies: []
|
15
|
-
|
16
14
|
description:
|
17
15
|
email: pbattley@gmail.com
|
18
|
-
executables:
|
16
|
+
executables:
|
19
17
|
- htmlbeautifier
|
20
18
|
extensions: []
|
21
|
-
|
22
19
|
extra_rdoc_files: []
|
23
|
-
|
24
|
-
files:
|
20
|
+
files:
|
25
21
|
- Rakefile
|
26
22
|
- README.md
|
27
23
|
- bin/htmlbeautifier
|
28
|
-
- test/html_beautifier_test_utilities.rb
|
29
|
-
- test/test_helper.rb
|
30
24
|
- test/test_html_beautifier_integration.rb
|
31
|
-
- test/
|
25
|
+
- test/test_helper.rb
|
32
26
|
- test/test_parser.rb
|
27
|
+
- test/test_html_beautifier_regression.rb
|
28
|
+
- lib/htmlbeautifier/html_parser.rb
|
29
|
+
- lib/htmlbeautifier/builder.rb
|
33
30
|
- lib/htmlbeautifier/beautifier.rb
|
34
|
-
- lib/htmlbeautifier/parser.rb
|
35
31
|
- lib/htmlbeautifier/version.rb
|
32
|
+
- lib/htmlbeautifier/parser.rb
|
36
33
|
- lib/htmlbeautifier.rb
|
37
|
-
has_rdoc: true
|
38
34
|
homepage: http://github.com/threedaymonk/htmlbeautifier
|
39
35
|
licenses: []
|
40
|
-
|
41
36
|
post_install_message:
|
42
37
|
rdoc_options: []
|
43
|
-
|
44
|
-
require_paths:
|
38
|
+
require_paths:
|
45
39
|
- lib
|
46
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
48
|
+
requirements:
|
49
|
+
- - ! '>='
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
58
52
|
requirements: []
|
59
|
-
|
60
53
|
rubyforge_project:
|
61
|
-
rubygems_version: 1.
|
54
|
+
rubygems_version: 1.8.23
|
62
55
|
signing_key:
|
63
56
|
specification_version: 3
|
64
57
|
summary: A normaliser/beautifier for HTML that also understands embedded Ruby.
|
65
58
|
test_files: []
|
66
|
-
|
@@ -1,17 +0,0 @@
|
|
1
|
-
module HtmlBeautifierTestUtilities
|
2
|
-
|
3
|
-
def code(str)
|
4
|
-
str = str.gsub(/\A\n|\n\s*\Z/, '')
|
5
|
-
indentation = str[/\A +/]
|
6
|
-
lines = str.split(/\n/)
|
7
|
-
lines.map{ |line| line.sub(/^#{indentation}/, '') }.join("\n")
|
8
|
-
end
|
9
|
-
|
10
|
-
def assert_beautifies(expected, source)
|
11
|
-
actual = ''
|
12
|
-
beautifier = HtmlBeautifier::Beautifier.new(actual)
|
13
|
-
beautifier.scan(source)
|
14
|
-
assert expected == actual, "Expected:\n#{expected}\nbut was:\n#{actual}"
|
15
|
-
end
|
16
|
-
|
17
|
-
end
|