htmlbeautifier 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -1,7 +1,7 @@
1
1
  require 'rake/testtask'
2
2
 
3
3
  Rake::TestTask.new(:test) do |t|
4
- t.libs << 'lib'
4
+ t.libs << 'test'
5
5
  t.pattern = 'test/test_*.rb'
6
6
  t.verbose = true
7
7
  end
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
- require 'htmlbeautifier/beautifier'
2
+ require 'htmlbeautifier'
3
3
 
4
4
  def beautify(input, output)
5
5
  HtmlBeautifier::Beautifier.new(output).scan(input)
@@ -1,5 +1,2 @@
1
- $:.unshift File.dirname(__FILE__)
2
-
3
- module HtmlBeautifier
4
-
5
- end
1
+ require 'htmlbeautifier/beautifier'
2
+ require 'htmlbeautifier/version'
@@ -1,115 +1,23 @@
1
- require 'htmlbeautifier/parser'
1
+ require 'htmlbeautifier/html_parser'
2
+ require 'htmlbeautifier/builder'
2
3
 
3
4
  module HtmlBeautifier
4
5
  class Beautifier
6
+ attr_accessor :tab_stops
5
7
 
6
- RUBY_INDENT =
7
- %r{ ^ ( if | unless | while | begin | elsif | else )\b
8
- | \b ( do | \{ ) ( \s* \| [^\|]+ \| )? $
9
- }x
10
- RUBY_OUTDENT =
11
- %r{ ^ ( end | elsif | else |\} ) \b
12
- }x
13
- ELEMENT_CONTENT = %r{ (?:[^<>]|<%.*?%>)* }mx
14
-
8
+ # Create a new Beautifier.
9
+ # output should be an object that responds to <<
10
+ # i.e. a String or an IO
15
11
  def initialize(output)
16
- @level = 0
17
- @new_line = true
18
12
  self.tab_stops = 2
19
13
  @output = output
20
14
  end
21
15
 
22
- def tab_stops=(n)
23
- @tab = ' ' * n
24
- end
25
-
26
- def indent
27
- @level += 1
28
- end
29
-
30
- def outdent
31
- @level -= 1
32
- raise "Outdented too far" if @level < 0
33
- end
34
-
35
- def emit(s)
36
- if (@new_line)
37
- @output << (@tab * @level)
38
- end
39
- @output << s
40
- @new_line = false
41
- end
42
-
43
- def whitespace(*x)
44
- emit "\n"
45
- @new_line = true
46
- end
47
-
48
- def embed(opening, code, closing)
49
- lines = code.split(/\n/).map{ |l| l.strip }
50
- outdent if lines.first =~ RUBY_OUTDENT
51
- emit opening + code + closing
52
- indent if lines.last =~ RUBY_INDENT
53
- end
54
-
55
- def foreign_block(opening, code, closing)
56
- emit opening
57
- unless code.empty?
58
- indent
59
-
60
- lines = code.split(/\n/)
61
- lines.shift while lines.first.strip.empty?
62
- lines.pop while lines.last.strip.empty?
63
- indentation = lines.first[/^ +/]
64
-
65
- whitespace
66
- lines.each do |line|
67
- emit line.rstrip.sub(/^#{indentation}/, '')
68
- whitespace
69
- end
70
-
71
- outdent
72
- end
73
- emit closing
74
- end
75
-
76
- def standalone_element(e)
77
- emit e
78
- end
79
-
80
- def close_element(e)
81
- outdent
82
- emit e
83
- end
84
-
85
- def open_element(e)
86
- emit e
87
- indent
88
- end
89
-
90
- def text(t)
91
- emit(t.strip)
92
- whitespace if t =~ /\s$/
93
- end
94
-
16
+ # Process an HTML/HTML+ERB document
17
+ # html should be a string
95
18
  def scan(html)
96
- html = html.strip.gsub(/\t/, @tab)
97
- parser = Parser.new do
98
- map %r{(<%-?=?)(.*?)(-?%>)}m, :embed
99
- map %r{<!--\[.*?\]>}m, :open_element
100
- map %r{<!\[.*?\]-->}m, :close_element
101
- map %r{<!--.*?-->}m, :standalone_element
102
- map %r{<!.*?>}m, :standalone_element
103
- map %r{(<script#{ELEMENT_CONTENT}>)(.*?)(</script>)}m, :foreign_block
104
- map %r{(<style#{ELEMENT_CONTENT}>)(.*?)(</style>)}m, :foreign_block
105
- map %r{<#{ELEMENT_CONTENT}/>}m, :standalone_element
106
- map %r{</#{ELEMENT_CONTENT}>}m, :close_element
107
- map %r{<#{ELEMENT_CONTENT}>}m, :open_element
108
- map %r{\s+}, :whitespace
109
- map %r{[^<]+}, :text
110
- end
111
- parser.scan(html, self)
19
+ @parser = HtmlParser.new
20
+ @parser.scan html.strip, Builder.new(@output, self.tab_stops)
112
21
  end
113
-
114
22
  end
115
23
  end
@@ -0,0 +1,92 @@
1
+ require 'htmlbeautifier/parser'
2
+
3
+ module HtmlBeautifier
4
+ class Builder
5
+
6
+ RUBY_INDENT =
7
+ %r{ ^ ( if | unless | while | begin | elsif | else )\b
8
+ | \b ( do | \{ ) ( \s* \| [^\|]+ \| )? $
9
+ }x
10
+ RUBY_OUTDENT =
11
+ %r{ ^ ( end | elsif | else |\} ) \b
12
+ }x
13
+ ELEMENT_CONTENT = %r{ (?:[^<>]|<%.*?%>)* }mx
14
+
15
+ def initialize(output, tab_stops)
16
+ @level = 0
17
+ @new_line = true
18
+ @tab = ' ' * tab_stops
19
+ @output = output
20
+ end
21
+
22
+ def indent
23
+ @level += 1
24
+ end
25
+
26
+ def outdent
27
+ @level -= 1
28
+ raise "Outdented too far" if @level < 0
29
+ end
30
+
31
+ def emit(s)
32
+ if (@new_line)
33
+ @output << (@tab * @level)
34
+ end
35
+ @output << s
36
+ @new_line = false
37
+ end
38
+
39
+ def whitespace(*x)
40
+ emit "\n"
41
+ @new_line = true
42
+ end
43
+
44
+ def embed(opening, code, closing)
45
+ lines = code.split(/\n/).map{ |l| l.strip }
46
+ outdent if lines.first =~ RUBY_OUTDENT
47
+ emit opening + code + closing
48
+ indent if lines.last =~ RUBY_INDENT
49
+ end
50
+
51
+ def foreign_block(opening, code, closing)
52
+ emit opening
53
+ unless code.empty?
54
+ indent
55
+
56
+ lines = code.split(/\n/)
57
+ lines.shift while lines.first.strip.empty?
58
+ lines.pop while lines.last.strip.empty?
59
+ indentation = lines.first[/^ +/]
60
+
61
+ whitespace
62
+ lines.each do |line|
63
+ emit line.rstrip.sub(/^#{indentation}/, '')
64
+ whitespace
65
+ end
66
+
67
+ outdent
68
+ end
69
+ emit closing
70
+ end
71
+
72
+ def standalone_element(e)
73
+ emit e
74
+ end
75
+
76
+ def close_element(e)
77
+ outdent
78
+ emit e
79
+ end
80
+
81
+ def open_element(e)
82
+ emit e
83
+ indent
84
+ end
85
+
86
+ def text(t)
87
+ emit(t.strip)
88
+ whitespace if t =~ /\s$/
89
+ end
90
+ end
91
+ end
92
+
@@ -0,0 +1,24 @@
1
+ require 'htmlbeautifier/parser'
2
+
3
+ module HtmlBeautifier
4
+ class HtmlParser < Parser
5
+ ELEMENT_CONTENT = %r{ (?:[^<>]|<%.*?%>)* }mx
6
+
7
+ def initialize
8
+ super do |p|
9
+ p.map %r{(<%-?=?)(.*?)(-?%>)}m, :embed
10
+ p.map %r{<!--\[.*?\]>}m, :open_element
11
+ p.map %r{<!\[.*?\]-->}m, :close_element
12
+ p.map %r{<!--.*?-->}m, :standalone_element
13
+ p.map %r{<!.*?>}m, :standalone_element
14
+ p.map %r{(<script#{ELEMENT_CONTENT}>)(.*?)(</script>)}m, :foreign_block
15
+ p.map %r{(<style#{ELEMENT_CONTENT}>)(.*?)(</style>)}m, :foreign_block
16
+ p.map %r{<#{ELEMENT_CONTENT}/>}m, :standalone_element
17
+ p.map %r{</#{ELEMENT_CONTENT}>}m, :close_element
18
+ p.map %r{<#{ELEMENT_CONTENT}>}m, :open_element
19
+ p.map %r{\s+}, :whitespace
20
+ p.map %r{[^<]+}, :text
21
+ end
22
+ end
23
+ end
24
+ end
@@ -15,9 +15,7 @@ module HtmlBeautifier
15
15
 
16
16
  def initialize(&blk)
17
17
  @maps = []
18
- if block_given?
19
- self.instance_eval(&blk)
20
- end
18
+ yield self if block_given?
21
19
  end
22
20
 
23
21
  def map(pattern, method)
@@ -25,29 +23,39 @@ module HtmlBeautifier
25
23
  end
26
24
 
27
25
  def scan(subject, receiver)
28
- scanner = StringScanner.new(subject)
29
- until scanner.eos?
30
- dispatch(scanner, receiver)
26
+ @scanner = StringScanner.new(subject)
27
+ until @scanner.eos?
28
+ dispatch(receiver)
31
29
  end
32
30
  end
33
31
 
34
- def dispatch(scanner, receiver)
32
+ def source_so_far
33
+ @scanner.string[0...@scanner.pos]
34
+ end
35
+
36
+ def source_line_number
37
+ [source_so_far.chomp.split(/\n/).count, 1].max
38
+ end
39
+
40
+ private
41
+ def dispatch(receiver)
35
42
  @maps.each do |pattern, method|
36
- if scanner.scan(pattern)
43
+ if @scanner.scan(pattern)
37
44
  params = []
38
45
  i = 1
39
- while scanner[i]
40
- params << scanner[i]
46
+ while @scanner[i]
47
+ params << @scanner[i]
41
48
  i += 1
42
49
  end
43
- params = [scanner[0]] if params.empty?
44
- self.class.debug(scanner[0], method)
50
+ params = [@scanner[0]] if params.empty?
51
+ self.class.debug(@scanner[0], method)
45
52
  receiver.__send__(method, *params)
46
53
  return
47
54
  end
48
55
  end
49
56
  raise "Unmatched sequence #{match.inspect}"
57
+ rescue => ex
58
+ raise "#{ex.message} on line #{source_line_number}"
50
59
  end
51
-
52
60
  end
53
61
  end
@@ -2,7 +2,7 @@ module HtmlBeautifier #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 0
5
- TINY = 6
5
+ TINY = 7
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -1,3 +1,19 @@
1
1
  require 'test/unit'
2
- require File.dirname(__FILE__) + '/html_beautifier_test_utilities'
3
- require File.dirname(__FILE__) + '/../lib/htmlbeautifier'
2
+ lib = File.expand_path('../../lib', __FILE__)
3
+ $:.unshift lib unless $:.include?(lib)
4
+
5
+ module HtmlBeautifierTestUtilities
6
+ def code(str)
7
+ str = str.gsub(/\A\n|\n\s*\Z/, '')
8
+ indentation = str[/\A +/]
9
+ lines = str.split(/\n/)
10
+ lines.map{ |line| line.sub(/^#{indentation}/, '') }.join("\n")
11
+ end
12
+
13
+ def assert_beautifies(expected, source)
14
+ actual = ''
15
+ beautifier = HtmlBeautifier::Beautifier.new(actual)
16
+ beautifier.scan(source)
17
+ assert expected == actual, "Expected:\n#{expected}\nbut was:\n#{actual}"
18
+ end
19
+ end
@@ -1,4 +1,5 @@
1
- require File.dirname(__FILE__) + '/test_helper'
1
+ require 'test_helper'
2
+ require 'htmlbeautifier/beautifier'
2
3
 
3
4
  class TestHtmlBeautifierIntegration < Test::Unit::TestCase
4
5
  include HtmlBeautifierTestUtilities
@@ -79,4 +80,15 @@ class TestHtmlBeautifierIntegration < Test::Unit::TestCase
79
80
  assert_beautifies expected, source
80
81
  end
81
82
 
83
+ def test_should_raise_an_error_with_the_source_line_of_an_illegal_outdent
84
+ begin
85
+ HtmlBeautifier::Beautifier.new('').scan("<html>\n</html>\n</html>")
86
+ rescue Exception => e
87
+ @exception = e
88
+ end
89
+ assert_equal RuntimeError, @exception.class
90
+ assert_match /outdent/i, @exception.message
91
+ assert_match /line 3/i, @exception.message
92
+ end
93
+
82
94
  end
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/test_helper'
1
+ require 'test_helper'
2
2
  require 'htmlbeautifier/beautifier'
3
3
 
4
4
  class HtmlBeautifierRegressionTest < Test::Unit::TestCase
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/test_helper'
1
+ require 'test_helper'
2
2
  require 'htmlbeautifier/parser'
3
3
 
4
4
  class TestParser < Test::Unit::TestCase
@@ -21,10 +21,10 @@ class TestParser < Test::Unit::TestCase
21
21
 
22
22
  def test_should_dispatch_matching_sequence
23
23
  receiver = Receiver.new
24
- parser = HtmlBeautifier::Parser.new{
25
- map %r{foo}, :foo
26
- map %r{bar\s*}, :bar
27
- map %r{\s+}, :whitespace
24
+ parser = HtmlBeautifier::Parser.new { |p|
25
+ p.map %r{foo}, :foo
26
+ p.map %r{bar\s*}, :bar
27
+ p.map %r{\s+}, :whitespace
28
28
  }
29
29
  parser.scan('foo bar ', receiver)
30
30
  assert_equal [[:foo, ['foo']], [:whitespace, [' ']], [:bar, ['bar ']]], receiver.sequence
@@ -32,11 +32,51 @@ class TestParser < Test::Unit::TestCase
32
32
 
33
33
  def test_should_send_parenthesized_components_as_separate_parameters
34
34
  receiver = Receiver.new
35
- parser = HtmlBeautifier::Parser.new{
36
- map %r{(foo)\((.*?)\)}, :foo
35
+ parser = HtmlBeautifier::Parser.new { |p|
36
+ p.map %r{(foo)\((.*?)\)}, :foo
37
37
  }
38
38
  parser.scan('foo(bar)', receiver)
39
39
  assert_equal [[:foo, ['foo', 'bar']]], receiver.sequence
40
40
  end
41
41
 
42
+ class SourceTrackingReceiver < Receiver
43
+ attr_reader :sources_so_far
44
+ attr_reader :source_line_numbers
45
+
46
+ def initialize(parser)
47
+ @sources_so_far = []
48
+ @source_line_numbers = []
49
+ @parser = parser
50
+ super()
51
+ end
52
+
53
+ def append_new_source_so_far(*ignored)
54
+ @sources_so_far << @parser.source_so_far
55
+ end
56
+
57
+ def append_new_source_line_number(*ignored)
58
+ @source_line_numbers << @parser.source_line_number
59
+ end
60
+ end
61
+
62
+ def test_should_give_source_so_far
63
+ parser = HtmlBeautifier::Parser.new { |p|
64
+ p.map %r{(M+)}m, :append_new_source_so_far
65
+ p.map %r{([\s\n]+)}m, :space_or_newline
66
+ }
67
+ receiver = SourceTrackingReceiver.new(parser)
68
+ parser.scan("M MM MMM", receiver)
69
+ assert_equal ['M', 'M MM', 'M MM MMM'], receiver.sources_so_far
70
+ end
71
+
72
+ def test_should_give_source_line_number
73
+ parser = HtmlBeautifier::Parser.new{ |p|
74
+ p.map %r{(M+)}m, :append_new_source_line_number
75
+ p.map %r{([\s\n]+)}m, :space_or_newline
76
+ }
77
+ receiver = SourceTrackingReceiver.new(parser)
78
+ parser.scan("M \n\nMM\nMMM", receiver)
79
+ assert_equal [1, 3, 4], receiver.source_line_numbers
80
+ end
81
+
42
82
  end
metadata CHANGED
@@ -1,66 +1,58 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: htmlbeautifier
3
- version: !ruby/object:Gem::Version
4
- version: 0.0.6
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.7
5
+ prerelease:
5
6
  platform: ruby
6
- authors:
7
+ authors:
7
8
  - Paul Battley
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
-
12
- date: 2010-07-02 00:00:00 +01:00
13
- default_executable:
12
+ date: 2012-07-10 00:00:00.000000000 Z
14
13
  dependencies: []
15
-
16
14
  description:
17
15
  email: pbattley@gmail.com
18
- executables:
16
+ executables:
19
17
  - htmlbeautifier
20
18
  extensions: []
21
-
22
19
  extra_rdoc_files: []
23
-
24
- files:
20
+ files:
25
21
  - Rakefile
26
22
  - README.md
27
23
  - bin/htmlbeautifier
28
- - test/html_beautifier_test_utilities.rb
29
- - test/test_helper.rb
30
24
  - test/test_html_beautifier_integration.rb
31
- - test/test_html_beautifier_regression.rb
25
+ - test/test_helper.rb
32
26
  - test/test_parser.rb
27
+ - test/test_html_beautifier_regression.rb
28
+ - lib/htmlbeautifier/html_parser.rb
29
+ - lib/htmlbeautifier/builder.rb
33
30
  - lib/htmlbeautifier/beautifier.rb
34
- - lib/htmlbeautifier/parser.rb
35
31
  - lib/htmlbeautifier/version.rb
32
+ - lib/htmlbeautifier/parser.rb
36
33
  - lib/htmlbeautifier.rb
37
- has_rdoc: true
38
34
  homepage: http://github.com/threedaymonk/htmlbeautifier
39
35
  licenses: []
40
-
41
36
  post_install_message:
42
37
  rdoc_options: []
43
-
44
- require_paths:
38
+ require_paths:
45
39
  - lib
46
- required_ruby_version: !ruby/object:Gem::Requirement
47
- requirements:
48
- - - ">="
49
- - !ruby/object:Gem::Version
50
- version: "0"
51
- version:
52
- required_rubygems_version: !ruby/object:Gem::Requirement
53
- requirements:
54
- - - ">="
55
- - !ruby/object:Gem::Version
56
- version: "0"
57
- version:
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ! '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
58
52
  requirements: []
59
-
60
53
  rubyforge_project:
61
- rubygems_version: 1.3.5
54
+ rubygems_version: 1.8.23
62
55
  signing_key:
63
56
  specification_version: 3
64
57
  summary: A normaliser/beautifier for HTML that also understands embedded Ruby.
65
58
  test_files: []
66
-
@@ -1,17 +0,0 @@
1
- module HtmlBeautifierTestUtilities
2
-
3
- def code(str)
4
- str = str.gsub(/\A\n|\n\s*\Z/, '')
5
- indentation = str[/\A +/]
6
- lines = str.split(/\n/)
7
- lines.map{ |line| line.sub(/^#{indentation}/, '') }.join("\n")
8
- end
9
-
10
- def assert_beautifies(expected, source)
11
- actual = ''
12
- beautifier = HtmlBeautifier::Beautifier.new(actual)
13
- beautifier.scan(source)
14
- assert expected == actual, "Expected:\n#{expected}\nbut was:\n#{actual}"
15
- end
16
-
17
- end