htmlbeautifier 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/bin/htmlbeautifier +1 -1
- data/lib/htmlbeautifier.rb +2 -5
- data/lib/htmlbeautifier/beautifier.rb +10 -102
- data/lib/htmlbeautifier/builder.rb +92 -0
- data/lib/htmlbeautifier/html_parser.rb +24 -0
- data/lib/htmlbeautifier/parser.rb +21 -13
- data/lib/htmlbeautifier/version.rb +1 -1
- data/test/test_helper.rb +18 -2
- data/test/test_html_beautifier_integration.rb +13 -1
- data/test/test_html_beautifier_regression.rb +1 -1
- data/test/test_parser.rb +47 -7
- metadata +27 -35
- data/test/html_beautifier_test_utilities.rb +0 -17
data/Rakefile
CHANGED
data/bin/htmlbeautifier
CHANGED
data/lib/htmlbeautifier.rb
CHANGED
@@ -1,115 +1,23 @@
|
|
1
|
-
require 'htmlbeautifier/
|
1
|
+
require 'htmlbeautifier/html_parser'
|
2
|
+
require 'htmlbeautifier/builder'
|
2
3
|
|
3
4
|
module HtmlBeautifier
|
4
5
|
class Beautifier
|
6
|
+
attr_accessor :tab_stops
|
5
7
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
}x
|
10
|
-
RUBY_OUTDENT =
|
11
|
-
%r{ ^ ( end | elsif | else |\} ) \b
|
12
|
-
}x
|
13
|
-
ELEMENT_CONTENT = %r{ (?:[^<>]|<%.*?%>)* }mx
|
14
|
-
|
8
|
+
# Create a new Beautifier.
|
9
|
+
# output should be an object that responds to <<
|
10
|
+
# i.e. a String or an IO
|
15
11
|
def initialize(output)
|
16
|
-
@level = 0
|
17
|
-
@new_line = true
|
18
12
|
self.tab_stops = 2
|
19
13
|
@output = output
|
20
14
|
end
|
21
15
|
|
22
|
-
|
23
|
-
|
24
|
-
end
|
25
|
-
|
26
|
-
def indent
|
27
|
-
@level += 1
|
28
|
-
end
|
29
|
-
|
30
|
-
def outdent
|
31
|
-
@level -= 1
|
32
|
-
raise "Outdented too far" if @level < 0
|
33
|
-
end
|
34
|
-
|
35
|
-
def emit(s)
|
36
|
-
if (@new_line)
|
37
|
-
@output << (@tab * @level)
|
38
|
-
end
|
39
|
-
@output << s
|
40
|
-
@new_line = false
|
41
|
-
end
|
42
|
-
|
43
|
-
def whitespace(*x)
|
44
|
-
emit "\n"
|
45
|
-
@new_line = true
|
46
|
-
end
|
47
|
-
|
48
|
-
def embed(opening, code, closing)
|
49
|
-
lines = code.split(/\n/).map{ |l| l.strip }
|
50
|
-
outdent if lines.first =~ RUBY_OUTDENT
|
51
|
-
emit opening + code + closing
|
52
|
-
indent if lines.last =~ RUBY_INDENT
|
53
|
-
end
|
54
|
-
|
55
|
-
def foreign_block(opening, code, closing)
|
56
|
-
emit opening
|
57
|
-
unless code.empty?
|
58
|
-
indent
|
59
|
-
|
60
|
-
lines = code.split(/\n/)
|
61
|
-
lines.shift while lines.first.strip.empty?
|
62
|
-
lines.pop while lines.last.strip.empty?
|
63
|
-
indentation = lines.first[/^ +/]
|
64
|
-
|
65
|
-
whitespace
|
66
|
-
lines.each do |line|
|
67
|
-
emit line.rstrip.sub(/^#{indentation}/, '')
|
68
|
-
whitespace
|
69
|
-
end
|
70
|
-
|
71
|
-
outdent
|
72
|
-
end
|
73
|
-
emit closing
|
74
|
-
end
|
75
|
-
|
76
|
-
def standalone_element(e)
|
77
|
-
emit e
|
78
|
-
end
|
79
|
-
|
80
|
-
def close_element(e)
|
81
|
-
outdent
|
82
|
-
emit e
|
83
|
-
end
|
84
|
-
|
85
|
-
def open_element(e)
|
86
|
-
emit e
|
87
|
-
indent
|
88
|
-
end
|
89
|
-
|
90
|
-
def text(t)
|
91
|
-
emit(t.strip)
|
92
|
-
whitespace if t =~ /\s$/
|
93
|
-
end
|
94
|
-
|
16
|
+
# Process an HTML/HTML+ERB document
|
17
|
+
# html should be a string
|
95
18
|
def scan(html)
|
96
|
-
|
97
|
-
parser
|
98
|
-
map %r{(<%-?=?)(.*?)(-?%>)}m, :embed
|
99
|
-
map %r{<!--\[.*?\]>}m, :open_element
|
100
|
-
map %r{<!\[.*?\]-->}m, :close_element
|
101
|
-
map %r{<!--.*?-->}m, :standalone_element
|
102
|
-
map %r{<!.*?>}m, :standalone_element
|
103
|
-
map %r{(<script#{ELEMENT_CONTENT}>)(.*?)(</script>)}m, :foreign_block
|
104
|
-
map %r{(<style#{ELEMENT_CONTENT}>)(.*?)(</style>)}m, :foreign_block
|
105
|
-
map %r{<#{ELEMENT_CONTENT}/>}m, :standalone_element
|
106
|
-
map %r{</#{ELEMENT_CONTENT}>}m, :close_element
|
107
|
-
map %r{<#{ELEMENT_CONTENT}>}m, :open_element
|
108
|
-
map %r{\s+}, :whitespace
|
109
|
-
map %r{[^<]+}, :text
|
110
|
-
end
|
111
|
-
parser.scan(html, self)
|
19
|
+
@parser = HtmlParser.new
|
20
|
+
@parser.scan html.strip, Builder.new(@output, self.tab_stops)
|
112
21
|
end
|
113
|
-
|
114
22
|
end
|
115
23
|
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'htmlbeautifier/parser'
|
2
|
+
|
3
|
+
module HtmlBeautifier
|
4
|
+
class Builder
|
5
|
+
|
6
|
+
RUBY_INDENT =
|
7
|
+
%r{ ^ ( if | unless | while | begin | elsif | else )\b
|
8
|
+
| \b ( do | \{ ) ( \s* \| [^\|]+ \| )? $
|
9
|
+
}x
|
10
|
+
RUBY_OUTDENT =
|
11
|
+
%r{ ^ ( end | elsif | else |\} ) \b
|
12
|
+
}x
|
13
|
+
ELEMENT_CONTENT = %r{ (?:[^<>]|<%.*?%>)* }mx
|
14
|
+
|
15
|
+
def initialize(output, tab_stops)
|
16
|
+
@level = 0
|
17
|
+
@new_line = true
|
18
|
+
@tab = ' ' * tab_stops
|
19
|
+
@output = output
|
20
|
+
end
|
21
|
+
|
22
|
+
def indent
|
23
|
+
@level += 1
|
24
|
+
end
|
25
|
+
|
26
|
+
def outdent
|
27
|
+
@level -= 1
|
28
|
+
raise "Outdented too far" if @level < 0
|
29
|
+
end
|
30
|
+
|
31
|
+
def emit(s)
|
32
|
+
if (@new_line)
|
33
|
+
@output << (@tab * @level)
|
34
|
+
end
|
35
|
+
@output << s
|
36
|
+
@new_line = false
|
37
|
+
end
|
38
|
+
|
39
|
+
def whitespace(*x)
|
40
|
+
emit "\n"
|
41
|
+
@new_line = true
|
42
|
+
end
|
43
|
+
|
44
|
+
def embed(opening, code, closing)
|
45
|
+
lines = code.split(/\n/).map{ |l| l.strip }
|
46
|
+
outdent if lines.first =~ RUBY_OUTDENT
|
47
|
+
emit opening + code + closing
|
48
|
+
indent if lines.last =~ RUBY_INDENT
|
49
|
+
end
|
50
|
+
|
51
|
+
def foreign_block(opening, code, closing)
|
52
|
+
emit opening
|
53
|
+
unless code.empty?
|
54
|
+
indent
|
55
|
+
|
56
|
+
lines = code.split(/\n/)
|
57
|
+
lines.shift while lines.first.strip.empty?
|
58
|
+
lines.pop while lines.last.strip.empty?
|
59
|
+
indentation = lines.first[/^ +/]
|
60
|
+
|
61
|
+
whitespace
|
62
|
+
lines.each do |line|
|
63
|
+
emit line.rstrip.sub(/^#{indentation}/, '')
|
64
|
+
whitespace
|
65
|
+
end
|
66
|
+
|
67
|
+
outdent
|
68
|
+
end
|
69
|
+
emit closing
|
70
|
+
end
|
71
|
+
|
72
|
+
def standalone_element(e)
|
73
|
+
emit e
|
74
|
+
end
|
75
|
+
|
76
|
+
def close_element(e)
|
77
|
+
outdent
|
78
|
+
emit e
|
79
|
+
end
|
80
|
+
|
81
|
+
def open_element(e)
|
82
|
+
emit e
|
83
|
+
indent
|
84
|
+
end
|
85
|
+
|
86
|
+
def text(t)
|
87
|
+
emit(t.strip)
|
88
|
+
whitespace if t =~ /\s$/
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'htmlbeautifier/parser'
|
2
|
+
|
3
|
+
module HtmlBeautifier
|
4
|
+
class HtmlParser < Parser
|
5
|
+
ELEMENT_CONTENT = %r{ (?:[^<>]|<%.*?%>)* }mx
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
super do |p|
|
9
|
+
p.map %r{(<%-?=?)(.*?)(-?%>)}m, :embed
|
10
|
+
p.map %r{<!--\[.*?\]>}m, :open_element
|
11
|
+
p.map %r{<!\[.*?\]-->}m, :close_element
|
12
|
+
p.map %r{<!--.*?-->}m, :standalone_element
|
13
|
+
p.map %r{<!.*?>}m, :standalone_element
|
14
|
+
p.map %r{(<script#{ELEMENT_CONTENT}>)(.*?)(</script>)}m, :foreign_block
|
15
|
+
p.map %r{(<style#{ELEMENT_CONTENT}>)(.*?)(</style>)}m, :foreign_block
|
16
|
+
p.map %r{<#{ELEMENT_CONTENT}/>}m, :standalone_element
|
17
|
+
p.map %r{</#{ELEMENT_CONTENT}>}m, :close_element
|
18
|
+
p.map %r{<#{ELEMENT_CONTENT}>}m, :open_element
|
19
|
+
p.map %r{\s+}, :whitespace
|
20
|
+
p.map %r{[^<]+}, :text
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -15,9 +15,7 @@ module HtmlBeautifier
|
|
15
15
|
|
16
16
|
def initialize(&blk)
|
17
17
|
@maps = []
|
18
|
-
if block_given?
|
19
|
-
self.instance_eval(&blk)
|
20
|
-
end
|
18
|
+
yield self if block_given?
|
21
19
|
end
|
22
20
|
|
23
21
|
def map(pattern, method)
|
@@ -25,29 +23,39 @@ module HtmlBeautifier
|
|
25
23
|
end
|
26
24
|
|
27
25
|
def scan(subject, receiver)
|
28
|
-
scanner = StringScanner.new(subject)
|
29
|
-
until scanner.eos?
|
30
|
-
dispatch(
|
26
|
+
@scanner = StringScanner.new(subject)
|
27
|
+
until @scanner.eos?
|
28
|
+
dispatch(receiver)
|
31
29
|
end
|
32
30
|
end
|
33
31
|
|
34
|
-
def
|
32
|
+
def source_so_far
|
33
|
+
@scanner.string[0...@scanner.pos]
|
34
|
+
end
|
35
|
+
|
36
|
+
def source_line_number
|
37
|
+
[source_so_far.chomp.split(/\n/).count, 1].max
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
def dispatch(receiver)
|
35
42
|
@maps.each do |pattern, method|
|
36
|
-
if scanner.scan(pattern)
|
43
|
+
if @scanner.scan(pattern)
|
37
44
|
params = []
|
38
45
|
i = 1
|
39
|
-
while scanner[i]
|
40
|
-
params << scanner[i]
|
46
|
+
while @scanner[i]
|
47
|
+
params << @scanner[i]
|
41
48
|
i += 1
|
42
49
|
end
|
43
|
-
params = [scanner[0]] if params.empty?
|
44
|
-
self.class.debug(scanner[0], method)
|
50
|
+
params = [@scanner[0]] if params.empty?
|
51
|
+
self.class.debug(@scanner[0], method)
|
45
52
|
receiver.__send__(method, *params)
|
46
53
|
return
|
47
54
|
end
|
48
55
|
end
|
49
56
|
raise "Unmatched sequence #{match.inspect}"
|
57
|
+
rescue => ex
|
58
|
+
raise "#{ex.message} on line #{source_line_number}"
|
50
59
|
end
|
51
|
-
|
52
60
|
end
|
53
61
|
end
|
data/test/test_helper.rb
CHANGED
@@ -1,3 +1,19 @@
|
|
1
1
|
require 'test/unit'
|
2
|
-
|
3
|
-
|
2
|
+
lib = File.expand_path('../../lib', __FILE__)
|
3
|
+
$:.unshift lib unless $:.include?(lib)
|
4
|
+
|
5
|
+
module HtmlBeautifierTestUtilities
|
6
|
+
def code(str)
|
7
|
+
str = str.gsub(/\A\n|\n\s*\Z/, '')
|
8
|
+
indentation = str[/\A +/]
|
9
|
+
lines = str.split(/\n/)
|
10
|
+
lines.map{ |line| line.sub(/^#{indentation}/, '') }.join("\n")
|
11
|
+
end
|
12
|
+
|
13
|
+
def assert_beautifies(expected, source)
|
14
|
+
actual = ''
|
15
|
+
beautifier = HtmlBeautifier::Beautifier.new(actual)
|
16
|
+
beautifier.scan(source)
|
17
|
+
assert expected == actual, "Expected:\n#{expected}\nbut was:\n#{actual}"
|
18
|
+
end
|
19
|
+
end
|
@@ -1,4 +1,5 @@
|
|
1
|
-
require
|
1
|
+
require 'test_helper'
|
2
|
+
require 'htmlbeautifier/beautifier'
|
2
3
|
|
3
4
|
class TestHtmlBeautifierIntegration < Test::Unit::TestCase
|
4
5
|
include HtmlBeautifierTestUtilities
|
@@ -79,4 +80,15 @@ class TestHtmlBeautifierIntegration < Test::Unit::TestCase
|
|
79
80
|
assert_beautifies expected, source
|
80
81
|
end
|
81
82
|
|
83
|
+
def test_should_raise_an_error_with_the_source_line_of_an_illegal_outdent
|
84
|
+
begin
|
85
|
+
HtmlBeautifier::Beautifier.new('').scan("<html>\n</html>\n</html>")
|
86
|
+
rescue Exception => e
|
87
|
+
@exception = e
|
88
|
+
end
|
89
|
+
assert_equal RuntimeError, @exception.class
|
90
|
+
assert_match /outdent/i, @exception.message
|
91
|
+
assert_match /line 3/i, @exception.message
|
92
|
+
end
|
93
|
+
|
82
94
|
end
|
data/test/test_parser.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require 'test_helper'
|
2
2
|
require 'htmlbeautifier/parser'
|
3
3
|
|
4
4
|
class TestParser < Test::Unit::TestCase
|
@@ -21,10 +21,10 @@ class TestParser < Test::Unit::TestCase
|
|
21
21
|
|
22
22
|
def test_should_dispatch_matching_sequence
|
23
23
|
receiver = Receiver.new
|
24
|
-
parser = HtmlBeautifier::Parser.new{
|
25
|
-
map %r{foo}, :foo
|
26
|
-
map %r{bar\s*}, :bar
|
27
|
-
map %r{\s+}, :whitespace
|
24
|
+
parser = HtmlBeautifier::Parser.new { |p|
|
25
|
+
p.map %r{foo}, :foo
|
26
|
+
p.map %r{bar\s*}, :bar
|
27
|
+
p.map %r{\s+}, :whitespace
|
28
28
|
}
|
29
29
|
parser.scan('foo bar ', receiver)
|
30
30
|
assert_equal [[:foo, ['foo']], [:whitespace, [' ']], [:bar, ['bar ']]], receiver.sequence
|
@@ -32,11 +32,51 @@ class TestParser < Test::Unit::TestCase
|
|
32
32
|
|
33
33
|
def test_should_send_parenthesized_components_as_separate_parameters
|
34
34
|
receiver = Receiver.new
|
35
|
-
parser = HtmlBeautifier::Parser.new{
|
36
|
-
map %r{(foo)\((.*?)\)}, :foo
|
35
|
+
parser = HtmlBeautifier::Parser.new { |p|
|
36
|
+
p.map %r{(foo)\((.*?)\)}, :foo
|
37
37
|
}
|
38
38
|
parser.scan('foo(bar)', receiver)
|
39
39
|
assert_equal [[:foo, ['foo', 'bar']]], receiver.sequence
|
40
40
|
end
|
41
41
|
|
42
|
+
class SourceTrackingReceiver < Receiver
|
43
|
+
attr_reader :sources_so_far
|
44
|
+
attr_reader :source_line_numbers
|
45
|
+
|
46
|
+
def initialize(parser)
|
47
|
+
@sources_so_far = []
|
48
|
+
@source_line_numbers = []
|
49
|
+
@parser = parser
|
50
|
+
super()
|
51
|
+
end
|
52
|
+
|
53
|
+
def append_new_source_so_far(*ignored)
|
54
|
+
@sources_so_far << @parser.source_so_far
|
55
|
+
end
|
56
|
+
|
57
|
+
def append_new_source_line_number(*ignored)
|
58
|
+
@source_line_numbers << @parser.source_line_number
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_should_give_source_so_far
|
63
|
+
parser = HtmlBeautifier::Parser.new { |p|
|
64
|
+
p.map %r{(M+)}m, :append_new_source_so_far
|
65
|
+
p.map %r{([\s\n]+)}m, :space_or_newline
|
66
|
+
}
|
67
|
+
receiver = SourceTrackingReceiver.new(parser)
|
68
|
+
parser.scan("M MM MMM", receiver)
|
69
|
+
assert_equal ['M', 'M MM', 'M MM MMM'], receiver.sources_so_far
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_should_give_source_line_number
|
73
|
+
parser = HtmlBeautifier::Parser.new{ |p|
|
74
|
+
p.map %r{(M+)}m, :append_new_source_line_number
|
75
|
+
p.map %r{([\s\n]+)}m, :space_or_newline
|
76
|
+
}
|
77
|
+
receiver = SourceTrackingReceiver.new(parser)
|
78
|
+
parser.scan("M \n\nMM\nMMM", receiver)
|
79
|
+
assert_equal [1, 3, 4], receiver.source_line_numbers
|
80
|
+
end
|
81
|
+
|
42
82
|
end
|
metadata
CHANGED
@@ -1,66 +1,58 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: htmlbeautifier
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.7
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
|
-
authors:
|
7
|
+
authors:
|
7
8
|
- Paul Battley
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
|
12
|
-
date: 2010-07-02 00:00:00 +01:00
|
13
|
-
default_executable:
|
12
|
+
date: 2012-07-10 00:00:00.000000000 Z
|
14
13
|
dependencies: []
|
15
|
-
|
16
14
|
description:
|
17
15
|
email: pbattley@gmail.com
|
18
|
-
executables:
|
16
|
+
executables:
|
19
17
|
- htmlbeautifier
|
20
18
|
extensions: []
|
21
|
-
|
22
19
|
extra_rdoc_files: []
|
23
|
-
|
24
|
-
files:
|
20
|
+
files:
|
25
21
|
- Rakefile
|
26
22
|
- README.md
|
27
23
|
- bin/htmlbeautifier
|
28
|
-
- test/html_beautifier_test_utilities.rb
|
29
|
-
- test/test_helper.rb
|
30
24
|
- test/test_html_beautifier_integration.rb
|
31
|
-
- test/
|
25
|
+
- test/test_helper.rb
|
32
26
|
- test/test_parser.rb
|
27
|
+
- test/test_html_beautifier_regression.rb
|
28
|
+
- lib/htmlbeautifier/html_parser.rb
|
29
|
+
- lib/htmlbeautifier/builder.rb
|
33
30
|
- lib/htmlbeautifier/beautifier.rb
|
34
|
-
- lib/htmlbeautifier/parser.rb
|
35
31
|
- lib/htmlbeautifier/version.rb
|
32
|
+
- lib/htmlbeautifier/parser.rb
|
36
33
|
- lib/htmlbeautifier.rb
|
37
|
-
has_rdoc: true
|
38
34
|
homepage: http://github.com/threedaymonk/htmlbeautifier
|
39
35
|
licenses: []
|
40
|
-
|
41
36
|
post_install_message:
|
42
37
|
rdoc_options: []
|
43
|
-
|
44
|
-
require_paths:
|
38
|
+
require_paths:
|
45
39
|
- lib
|
46
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
48
|
+
requirements:
|
49
|
+
- - ! '>='
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
58
52
|
requirements: []
|
59
|
-
|
60
53
|
rubyforge_project:
|
61
|
-
rubygems_version: 1.
|
54
|
+
rubygems_version: 1.8.23
|
62
55
|
signing_key:
|
63
56
|
specification_version: 3
|
64
57
|
summary: A normaliser/beautifier for HTML that also understands embedded Ruby.
|
65
58
|
test_files: []
|
66
|
-
|
@@ -1,17 +0,0 @@
|
|
1
|
-
module HtmlBeautifierTestUtilities
|
2
|
-
|
3
|
-
def code(str)
|
4
|
-
str = str.gsub(/\A\n|\n\s*\Z/, '')
|
5
|
-
indentation = str[/\A +/]
|
6
|
-
lines = str.split(/\n/)
|
7
|
-
lines.map{ |line| line.sub(/^#{indentation}/, '') }.join("\n")
|
8
|
-
end
|
9
|
-
|
10
|
-
def assert_beautifies(expected, source)
|
11
|
-
actual = ''
|
12
|
-
beautifier = HtmlBeautifier::Beautifier.new(actual)
|
13
|
-
beautifier.scan(source)
|
14
|
-
assert expected == actual, "Expected:\n#{expected}\nbut was:\n#{actual}"
|
15
|
-
end
|
16
|
-
|
17
|
-
end
|