htmlbeautifier 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -1,7 +1,7 @@
1
1
  require 'rake/testtask'
2
2
 
3
3
  Rake::TestTask.new(:test) do |t|
4
- t.libs << 'lib'
4
+ t.libs << 'test'
5
5
  t.pattern = 'test/test_*.rb'
6
6
  t.verbose = true
7
7
  end
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
- require 'htmlbeautifier/beautifier'
2
+ require 'htmlbeautifier'
3
3
 
4
4
  def beautify(input, output)
5
5
  HtmlBeautifier::Beautifier.new(output).scan(input)
@@ -1,5 +1,2 @@
1
- $:.unshift File.dirname(__FILE__)
2
-
3
- module HtmlBeautifier
4
-
5
- end
1
+ require 'htmlbeautifier/beautifier'
2
+ require 'htmlbeautifier/version'
@@ -1,115 +1,23 @@
1
- require 'htmlbeautifier/parser'
1
+ require 'htmlbeautifier/html_parser'
2
+ require 'htmlbeautifier/builder'
2
3
 
3
4
  module HtmlBeautifier
4
5
  class Beautifier
6
+ attr_accessor :tab_stops
5
7
 
6
- RUBY_INDENT =
7
- %r{ ^ ( if | unless | while | begin | elsif | else )\b
8
- | \b ( do | \{ ) ( \s* \| [^\|]+ \| )? $
9
- }x
10
- RUBY_OUTDENT =
11
- %r{ ^ ( end | elsif | else |\} ) \b
12
- }x
13
- ELEMENT_CONTENT = %r{ (?:[^<>]|<%.*?%>)* }mx
14
-
8
+ # Create a new Beautifier.
9
+ # output should be an object that responds to <<
10
+ # i.e. a String or an IO
15
11
  def initialize(output)
16
- @level = 0
17
- @new_line = true
18
12
  self.tab_stops = 2
19
13
  @output = output
20
14
  end
21
15
 
22
- def tab_stops=(n)
23
- @tab = ' ' * n
24
- end
25
-
26
- def indent
27
- @level += 1
28
- end
29
-
30
- def outdent
31
- @level -= 1
32
- raise "Outdented too far" if @level < 0
33
- end
34
-
35
- def emit(s)
36
- if (@new_line)
37
- @output << (@tab * @level)
38
- end
39
- @output << s
40
- @new_line = false
41
- end
42
-
43
- def whitespace(*x)
44
- emit "\n"
45
- @new_line = true
46
- end
47
-
48
- def embed(opening, code, closing)
49
- lines = code.split(/\n/).map{ |l| l.strip }
50
- outdent if lines.first =~ RUBY_OUTDENT
51
- emit opening + code + closing
52
- indent if lines.last =~ RUBY_INDENT
53
- end
54
-
55
- def foreign_block(opening, code, closing)
56
- emit opening
57
- unless code.empty?
58
- indent
59
-
60
- lines = code.split(/\n/)
61
- lines.shift while lines.first.strip.empty?
62
- lines.pop while lines.last.strip.empty?
63
- indentation = lines.first[/^ +/]
64
-
65
- whitespace
66
- lines.each do |line|
67
- emit line.rstrip.sub(/^#{indentation}/, '')
68
- whitespace
69
- end
70
-
71
- outdent
72
- end
73
- emit closing
74
- end
75
-
76
- def standalone_element(e)
77
- emit e
78
- end
79
-
80
- def close_element(e)
81
- outdent
82
- emit e
83
- end
84
-
85
- def open_element(e)
86
- emit e
87
- indent
88
- end
89
-
90
- def text(t)
91
- emit(t.strip)
92
- whitespace if t =~ /\s$/
93
- end
94
-
16
+ # Process an HTML/HTML+ERB document
17
+ # html should be a string
95
18
  def scan(html)
96
- html = html.strip.gsub(/\t/, @tab)
97
- parser = Parser.new do
98
- map %r{(<%-?=?)(.*?)(-?%>)}m, :embed
99
- map %r{<!--\[.*?\]>}m, :open_element
100
- map %r{<!\[.*?\]-->}m, :close_element
101
- map %r{<!--.*?-->}m, :standalone_element
102
- map %r{<!.*?>}m, :standalone_element
103
- map %r{(<script#{ELEMENT_CONTENT}>)(.*?)(</script>)}m, :foreign_block
104
- map %r{(<style#{ELEMENT_CONTENT}>)(.*?)(</style>)}m, :foreign_block
105
- map %r{<#{ELEMENT_CONTENT}/>}m, :standalone_element
106
- map %r{</#{ELEMENT_CONTENT}>}m, :close_element
107
- map %r{<#{ELEMENT_CONTENT}>}m, :open_element
108
- map %r{\s+}, :whitespace
109
- map %r{[^<]+}, :text
110
- end
111
- parser.scan(html, self)
19
+ @parser = HtmlParser.new
20
+ @parser.scan html.strip, Builder.new(@output, self.tab_stops)
112
21
  end
113
-
114
22
  end
115
23
  end
@@ -0,0 +1,92 @@
1
+ require 'htmlbeautifier/parser'
2
+
3
+ module HtmlBeautifier
4
+ class Builder
5
+
6
+ RUBY_INDENT =
7
+ %r{ ^ ( if | unless | while | begin | elsif | else )\b
8
+ | \b ( do | \{ ) ( \s* \| [^\|]+ \| )? $
9
+ }x
10
+ RUBY_OUTDENT =
11
+ %r{ ^ ( end | elsif | else |\} ) \b
12
+ }x
13
+ ELEMENT_CONTENT = %r{ (?:[^<>]|<%.*?%>)* }mx
14
+
15
+ def initialize(output, tab_stops)
16
+ @level = 0
17
+ @new_line = true
18
+ @tab = ' ' * tab_stops
19
+ @output = output
20
+ end
21
+
22
+ def indent
23
+ @level += 1
24
+ end
25
+
26
+ def outdent
27
+ @level -= 1
28
+ raise "Outdented too far" if @level < 0
29
+ end
30
+
31
+ def emit(s)
32
+ if (@new_line)
33
+ @output << (@tab * @level)
34
+ end
35
+ @output << s
36
+ @new_line = false
37
+ end
38
+
39
+ def whitespace(*x)
40
+ emit "\n"
41
+ @new_line = true
42
+ end
43
+
44
+ def embed(opening, code, closing)
45
+ lines = code.split(/\n/).map{ |l| l.strip }
46
+ outdent if lines.first =~ RUBY_OUTDENT
47
+ emit opening + code + closing
48
+ indent if lines.last =~ RUBY_INDENT
49
+ end
50
+
51
+ def foreign_block(opening, code, closing)
52
+ emit opening
53
+ unless code.empty?
54
+ indent
55
+
56
+ lines = code.split(/\n/)
57
+ lines.shift while lines.first.strip.empty?
58
+ lines.pop while lines.last.strip.empty?
59
+ indentation = lines.first[/^ +/]
60
+
61
+ whitespace
62
+ lines.each do |line|
63
+ emit line.rstrip.sub(/^#{indentation}/, '')
64
+ whitespace
65
+ end
66
+
67
+ outdent
68
+ end
69
+ emit closing
70
+ end
71
+
72
+ def standalone_element(e)
73
+ emit e
74
+ end
75
+
76
+ def close_element(e)
77
+ outdent
78
+ emit e
79
+ end
80
+
81
+ def open_element(e)
82
+ emit e
83
+ indent
84
+ end
85
+
86
+ def text(t)
87
+ emit(t.strip)
88
+ whitespace if t =~ /\s$/
89
+ end
90
+ end
91
+ end
92
+
@@ -0,0 +1,24 @@
1
+ require 'htmlbeautifier/parser'
2
+
3
+ module HtmlBeautifier
4
+ class HtmlParser < Parser
5
+ ELEMENT_CONTENT = %r{ (?:[^<>]|<%.*?%>)* }mx
6
+
7
+ def initialize
8
+ super do |p|
9
+ p.map %r{(<%-?=?)(.*?)(-?%>)}m, :embed
10
+ p.map %r{<!--\[.*?\]>}m, :open_element
11
+ p.map %r{<!\[.*?\]-->}m, :close_element
12
+ p.map %r{<!--.*?-->}m, :standalone_element
13
+ p.map %r{<!.*?>}m, :standalone_element
14
+ p.map %r{(<script#{ELEMENT_CONTENT}>)(.*?)(</script>)}m, :foreign_block
15
+ p.map %r{(<style#{ELEMENT_CONTENT}>)(.*?)(</style>)}m, :foreign_block
16
+ p.map %r{<#{ELEMENT_CONTENT}/>}m, :standalone_element
17
+ p.map %r{</#{ELEMENT_CONTENT}>}m, :close_element
18
+ p.map %r{<#{ELEMENT_CONTENT}>}m, :open_element
19
+ p.map %r{\s+}, :whitespace
20
+ p.map %r{[^<]+}, :text
21
+ end
22
+ end
23
+ end
24
+ end
@@ -15,9 +15,7 @@ module HtmlBeautifier
15
15
 
16
16
  def initialize(&blk)
17
17
  @maps = []
18
- if block_given?
19
- self.instance_eval(&blk)
20
- end
18
+ yield self if block_given?
21
19
  end
22
20
 
23
21
  def map(pattern, method)
@@ -25,29 +23,39 @@ module HtmlBeautifier
25
23
  end
26
24
 
27
25
  def scan(subject, receiver)
28
- scanner = StringScanner.new(subject)
29
- until scanner.eos?
30
- dispatch(scanner, receiver)
26
+ @scanner = StringScanner.new(subject)
27
+ until @scanner.eos?
28
+ dispatch(receiver)
31
29
  end
32
30
  end
33
31
 
34
- def dispatch(scanner, receiver)
32
+ def source_so_far
33
+ @scanner.string[0...@scanner.pos]
34
+ end
35
+
36
+ def source_line_number
37
+ [source_so_far.chomp.split(/\n/).count, 1].max
38
+ end
39
+
40
+ private
41
+ def dispatch(receiver)
35
42
  @maps.each do |pattern, method|
36
- if scanner.scan(pattern)
43
+ if @scanner.scan(pattern)
37
44
  params = []
38
45
  i = 1
39
- while scanner[i]
40
- params << scanner[i]
46
+ while @scanner[i]
47
+ params << @scanner[i]
41
48
  i += 1
42
49
  end
43
- params = [scanner[0]] if params.empty?
44
- self.class.debug(scanner[0], method)
50
+ params = [@scanner[0]] if params.empty?
51
+ self.class.debug(@scanner[0], method)
45
52
  receiver.__send__(method, *params)
46
53
  return
47
54
  end
48
55
  end
49
56
  raise "Unmatched sequence #{match.inspect}"
57
+ rescue => ex
58
+ raise "#{ex.message} on line #{source_line_number}"
50
59
  end
51
-
52
60
  end
53
61
  end
@@ -2,7 +2,7 @@ module HtmlBeautifier #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 0
5
- TINY = 6
5
+ TINY = 7
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -1,3 +1,19 @@
1
1
  require 'test/unit'
2
- require File.dirname(__FILE__) + '/html_beautifier_test_utilities'
3
- require File.dirname(__FILE__) + '/../lib/htmlbeautifier'
2
+ lib = File.expand_path('../../lib', __FILE__)
3
+ $:.unshift lib unless $:.include?(lib)
4
+
5
+ module HtmlBeautifierTestUtilities
6
+ def code(str)
7
+ str = str.gsub(/\A\n|\n\s*\Z/, '')
8
+ indentation = str[/\A +/]
9
+ lines = str.split(/\n/)
10
+ lines.map{ |line| line.sub(/^#{indentation}/, '') }.join("\n")
11
+ end
12
+
13
+ def assert_beautifies(expected, source)
14
+ actual = ''
15
+ beautifier = HtmlBeautifier::Beautifier.new(actual)
16
+ beautifier.scan(source)
17
+ assert expected == actual, "Expected:\n#{expected}\nbut was:\n#{actual}"
18
+ end
19
+ end
@@ -1,4 +1,5 @@
1
- require File.dirname(__FILE__) + '/test_helper'
1
+ require 'test_helper'
2
+ require 'htmlbeautifier/beautifier'
2
3
 
3
4
  class TestHtmlBeautifierIntegration < Test::Unit::TestCase
4
5
  include HtmlBeautifierTestUtilities
@@ -79,4 +80,15 @@ class TestHtmlBeautifierIntegration < Test::Unit::TestCase
79
80
  assert_beautifies expected, source
80
81
  end
81
82
 
83
+ def test_should_raise_an_error_with_the_source_line_of_an_illegal_outdent
84
+ begin
85
+ HtmlBeautifier::Beautifier.new('').scan("<html>\n</html>\n</html>")
86
+ rescue Exception => e
87
+ @exception = e
88
+ end
89
+ assert_equal RuntimeError, @exception.class
90
+ assert_match /outdent/i, @exception.message
91
+ assert_match /line 3/i, @exception.message
92
+ end
93
+
82
94
  end
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/test_helper'
1
+ require 'test_helper'
2
2
  require 'htmlbeautifier/beautifier'
3
3
 
4
4
  class HtmlBeautifierRegressionTest < Test::Unit::TestCase
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/test_helper'
1
+ require 'test_helper'
2
2
  require 'htmlbeautifier/parser'
3
3
 
4
4
  class TestParser < Test::Unit::TestCase
@@ -21,10 +21,10 @@ class TestParser < Test::Unit::TestCase
21
21
 
22
22
  def test_should_dispatch_matching_sequence
23
23
  receiver = Receiver.new
24
- parser = HtmlBeautifier::Parser.new{
25
- map %r{foo}, :foo
26
- map %r{bar\s*}, :bar
27
- map %r{\s+}, :whitespace
24
+ parser = HtmlBeautifier::Parser.new { |p|
25
+ p.map %r{foo}, :foo
26
+ p.map %r{bar\s*}, :bar
27
+ p.map %r{\s+}, :whitespace
28
28
  }
29
29
  parser.scan('foo bar ', receiver)
30
30
  assert_equal [[:foo, ['foo']], [:whitespace, [' ']], [:bar, ['bar ']]], receiver.sequence
@@ -32,11 +32,51 @@ class TestParser < Test::Unit::TestCase
32
32
 
33
33
  def test_should_send_parenthesized_components_as_separate_parameters
34
34
  receiver = Receiver.new
35
- parser = HtmlBeautifier::Parser.new{
36
- map %r{(foo)\((.*?)\)}, :foo
35
+ parser = HtmlBeautifier::Parser.new { |p|
36
+ p.map %r{(foo)\((.*?)\)}, :foo
37
37
  }
38
38
  parser.scan('foo(bar)', receiver)
39
39
  assert_equal [[:foo, ['foo', 'bar']]], receiver.sequence
40
40
  end
41
41
 
42
+ class SourceTrackingReceiver < Receiver
43
+ attr_reader :sources_so_far
44
+ attr_reader :source_line_numbers
45
+
46
+ def initialize(parser)
47
+ @sources_so_far = []
48
+ @source_line_numbers = []
49
+ @parser = parser
50
+ super()
51
+ end
52
+
53
+ def append_new_source_so_far(*ignored)
54
+ @sources_so_far << @parser.source_so_far
55
+ end
56
+
57
+ def append_new_source_line_number(*ignored)
58
+ @source_line_numbers << @parser.source_line_number
59
+ end
60
+ end
61
+
62
+ def test_should_give_source_so_far
63
+ parser = HtmlBeautifier::Parser.new { |p|
64
+ p.map %r{(M+)}m, :append_new_source_so_far
65
+ p.map %r{([\s\n]+)}m, :space_or_newline
66
+ }
67
+ receiver = SourceTrackingReceiver.new(parser)
68
+ parser.scan("M MM MMM", receiver)
69
+ assert_equal ['M', 'M MM', 'M MM MMM'], receiver.sources_so_far
70
+ end
71
+
72
+ def test_should_give_source_line_number
73
+ parser = HtmlBeautifier::Parser.new{ |p|
74
+ p.map %r{(M+)}m, :append_new_source_line_number
75
+ p.map %r{([\s\n]+)}m, :space_or_newline
76
+ }
77
+ receiver = SourceTrackingReceiver.new(parser)
78
+ parser.scan("M \n\nMM\nMMM", receiver)
79
+ assert_equal [1, 3, 4], receiver.source_line_numbers
80
+ end
81
+
42
82
  end
metadata CHANGED
@@ -1,66 +1,58 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: htmlbeautifier
3
- version: !ruby/object:Gem::Version
4
- version: 0.0.6
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.7
5
+ prerelease:
5
6
  platform: ruby
6
- authors:
7
+ authors:
7
8
  - Paul Battley
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
-
12
- date: 2010-07-02 00:00:00 +01:00
13
- default_executable:
12
+ date: 2012-07-10 00:00:00.000000000 Z
14
13
  dependencies: []
15
-
16
14
  description:
17
15
  email: pbattley@gmail.com
18
- executables:
16
+ executables:
19
17
  - htmlbeautifier
20
18
  extensions: []
21
-
22
19
  extra_rdoc_files: []
23
-
24
- files:
20
+ files:
25
21
  - Rakefile
26
22
  - README.md
27
23
  - bin/htmlbeautifier
28
- - test/html_beautifier_test_utilities.rb
29
- - test/test_helper.rb
30
24
  - test/test_html_beautifier_integration.rb
31
- - test/test_html_beautifier_regression.rb
25
+ - test/test_helper.rb
32
26
  - test/test_parser.rb
27
+ - test/test_html_beautifier_regression.rb
28
+ - lib/htmlbeautifier/html_parser.rb
29
+ - lib/htmlbeautifier/builder.rb
33
30
  - lib/htmlbeautifier/beautifier.rb
34
- - lib/htmlbeautifier/parser.rb
35
31
  - lib/htmlbeautifier/version.rb
32
+ - lib/htmlbeautifier/parser.rb
36
33
  - lib/htmlbeautifier.rb
37
- has_rdoc: true
38
34
  homepage: http://github.com/threedaymonk/htmlbeautifier
39
35
  licenses: []
40
-
41
36
  post_install_message:
42
37
  rdoc_options: []
43
-
44
- require_paths:
38
+ require_paths:
45
39
  - lib
46
- required_ruby_version: !ruby/object:Gem::Requirement
47
- requirements:
48
- - - ">="
49
- - !ruby/object:Gem::Version
50
- version: "0"
51
- version:
52
- required_rubygems_version: !ruby/object:Gem::Requirement
53
- requirements:
54
- - - ">="
55
- - !ruby/object:Gem::Version
56
- version: "0"
57
- version:
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
58
52
  requirements: []
59
-
60
53
  rubyforge_project:
61
- rubygems_version: 1.3.5
54
+ rubygems_version: 1.8.23
62
55
  signing_key:
63
56
  specification_version: 3
64
57
  summary: A normaliser/beautifier for HTML that also understands embedded Ruby.
65
58
  test_files: []
66
-
@@ -1,17 +0,0 @@
1
- module HtmlBeautifierTestUtilities
2
-
3
- def code(str)
4
- str = str.gsub(/\A\n|\n\s*\Z/, '')
5
- indentation = str[/\A +/]
6
- lines = str.split(/\n/)
7
- lines.map{ |line| line.sub(/^#{indentation}/, '') }.join("\n")
8
- end
9
-
10
- def assert_beautifies(expected, source)
11
- actual = ''
12
- beautifier = HtmlBeautifier::Beautifier.new(actual)
13
- beautifier.scan(source)
14
- assert expected == actual, "Expected:\n#{expected}\nbut was:\n#{actual}"
15
- end
16
-
17
- end