RubyGems - htmlbeautifier - Versions diffs - 0.0.6 → 0.0.7 - Mend

htmlbeautifier 0.0.6 → 0.0.7

Files changed (14) hide show

data/Rakefile +1 -1
data/bin/htmlbeautifier +1 -1
data/lib/htmlbeautifier.rb +2 -5
data/lib/htmlbeautifier/beautifier.rb +10 -102
data/lib/htmlbeautifier/builder.rb +92 -0
data/lib/htmlbeautifier/html_parser.rb +24 -0
data/lib/htmlbeautifier/parser.rb +21 -13
data/lib/htmlbeautifier/version.rb +1 -1
data/test/test_helper.rb +18 -2
data/test/test_html_beautifier_integration.rb +13 -1
data/test/test_html_beautifier_regression.rb +1 -1
data/test/test_parser.rb +47 -7
metadata +27 -35
data/test/html_beautifier_test_utilities.rb +0 -17

data/Rakefile CHANGED

@@ -1,7 +1,7 @@
 require 'rake/testtask'
 Rake::TestTask.new(:test) do |t|
-  t.libs << 'lib'
+  t.libs << 'test'
   t.pattern = 'test/test_*.rb'
   t.verbose = true
 end

data/bin/htmlbeautifier CHANGED

@@ -1,5 +1,5 @@
 #!/usr/bin/env ruby
-require 'htmlbeautifier/beautifier'
+require 'htmlbeautifier'
 def beautify(input, output)
   HtmlBeautifier::Beautifier.new(output).scan(input)

data/lib/htmlbeautifier.rb CHANGED

@@ -1,5 +1,2 @@
-$:.unshift File.dirname(__FILE__)
-module HtmlBeautifier
-end
+require 'htmlbeautifier/beautifier'
+require 'htmlbeautifier/version'

data/lib/htmlbeautifier/beautifier.rb CHANGED

@@ -1,115 +1,23 @@
-require 'htmlbeautifier/parser'
+require 'htmlbeautifier/html_parser'
+require 'htmlbeautifier/builder'
 module HtmlBeautifier
   class Beautifier
+    attr_accessor :tab_stops
-    RUBY_INDENT  =
-      %r{ ^ ( if | unless | while | begin | elsif | else )\b
-        | \b ( do | \{ ) ( \s* \| [^\|]+ \| )? $
-        }x
-    RUBY_OUTDENT =
-      %r{ ^ ( end | elsif | else |\} ) \b
-        }x
-    ELEMENT_CONTENT = %r{ (?:[^<>]|<%.*?%>)* }mx
+    # Create a new Beautifier.
+    # output should be an object that responds to <<
+    # i.e. a String or an IO
     def initialize(output)
-      @level = 0
-      @new_line = true
       self.tab_stops = 2
       @output = output
     end
-    def tab_stops=(n)
-      @tab = ' ' * n
-    end
-    def indent
-      @level += 1
-    end
-    def outdent
-      @level -= 1
-      raise "Outdented too far" if @level < 0
-    end
-    def emit(s)
-      if (@new_line)
-        @output << (@tab * @level)
-      end
-      @output << s
-      @new_line = false
-    end
-    def whitespace(*x)
-      emit "\n"
-      @new_line = true
-    end
-    def embed(opening, code, closing)
-      lines = code.split(/\n/).map{ |l| l.strip }
-      outdent if lines.first =~ RUBY_OUTDENT
-      emit opening + code + closing
-      indent if lines.last =~ RUBY_INDENT
-    end
-    def foreign_block(opening, code, closing)
-      emit opening
-      unless code.empty?
-        indent
-        lines = code.split(/\n/)
-        lines.shift while lines.first.strip.empty?
-        lines.pop while lines.last.strip.empty?
-        indentation = lines.first[/^ +/]
-        whitespace
-        lines.each do |line|
-          emit line.rstrip.sub(/^#{indentation}/, '')
-          whitespace
-        end
-        outdent
-      end
-      emit closing
-    end
-    def standalone_element(e)
-      emit e
-    end
-    def close_element(e)
-      outdent
-      emit e
-    end
-    def open_element(e)
-      emit e
-      indent
-    end
-    def text(t)
-      emit(t.strip)
-      whitespace if t =~ /\s$/
-    end
+    # Process an HTML/HTML+ERB document
+    # html should be a string
     def scan(html)
-      html = html.strip.gsub(/\t/, @tab)
-      parser = Parser.new do
-        map %r{(<%-?=?)(.*?)(-?%>)}m,                           :embed
-        map %r{<!--\[.*?\]>}m,                                  :open_element
-        map %r{<!\[.*?\]-->}m,                                  :close_element
-        map %r{<!--.*?-->}m,                                    :standalone_element
-        map %r{<!.*?>}m,                                        :standalone_element
-        map %r{(<script#{ELEMENT_CONTENT}>)(.*?)(</script>)}m,  :foreign_block
-        map %r{(<style#{ELEMENT_CONTENT}>)(.*?)(</style>)}m,    :foreign_block
-        map %r{<#{ELEMENT_CONTENT}/>}m,                         :standalone_element
-        map %r{</#{ELEMENT_CONTENT}>}m,                         :close_element
-        map %r{<#{ELEMENT_CONTENT}>}m,                          :open_element
-        map %r{\s+},                                            :whitespace
-        map %r{[^<]+},                                          :text
-      end
-      parser.scan(html, self)
+      @parser = HtmlParser.new
+      @parser.scan html.strip, Builder.new(@output, self.tab_stops)
     end
   end
 end

data/lib/htmlbeautifier/builder.rb ADDED

@@ -0,0 +1,92 @@
+require 'htmlbeautifier/parser'
+module HtmlBeautifier
+  class Builder
+    RUBY_INDENT  =
+      %r{ ^ ( if | unless | while | begin | elsif | else )\b
+        | \b ( do | \{ ) ( \s* \| [^\|]+ \| )? $
+        }x
+    RUBY_OUTDENT =
+      %r{ ^ ( end | elsif | else |\} ) \b
+        }x
+    ELEMENT_CONTENT = %r{ (?:[^<>]|<%.*?%>)* }mx
+    def initialize(output, tab_stops)
+      @level = 0
+      @new_line = true
+      @tab = ' ' * tab_stops
+      @output = output
+    end
+    def indent
+      @level += 1
+    end
+    def outdent
+      @level -= 1
+      raise "Outdented too far" if @level < 0
+    end
+    def emit(s)
+      if (@new_line)
+        @output << (@tab * @level)
+      end
+      @output << s
+      @new_line = false
+    end
+    def whitespace(*x)
+      emit "\n"
+      @new_line = true
+    end
+    def embed(opening, code, closing)
+      lines = code.split(/\n/).map{ |l| l.strip }
+      outdent if lines.first =~ RUBY_OUTDENT
+      emit opening + code + closing
+      indent if lines.last =~ RUBY_INDENT
+    end
+    def foreign_block(opening, code, closing)
+      emit opening
+      unless code.empty?
+        indent
+        lines = code.split(/\n/)
+        lines.shift while lines.first.strip.empty?
+        lines.pop while lines.last.strip.empty?
+        indentation = lines.first[/^ +/]
+        whitespace
+        lines.each do |line|
+          emit line.rstrip.sub(/^#{indentation}/, '')
+          whitespace
+        end
+        outdent
+      end
+      emit closing
+    end
+    def standalone_element(e)
+      emit e
+    end
+    def close_element(e)
+      outdent
+      emit e
+    end
+    def open_element(e)
+      emit e
+      indent
+    end
+    def text(t)
+      emit(t.strip)
+      whitespace if t =~ /\s$/
+    end
+  end
+end

data/lib/htmlbeautifier/html_parser.rb ADDED

@@ -0,0 +1,24 @@
+require 'htmlbeautifier/parser'
+module HtmlBeautifier
+  class HtmlParser < Parser
+    ELEMENT_CONTENT = %r{ (?:[^<>]|<%.*?%>)* }mx
+    def initialize
+      super do |p|
+        p.map %r{(<%-?=?)(.*?)(-?%>)}m,                           :embed
+        p.map %r{<!--\[.*?\]>}m,                                  :open_element
+        p.map %r{<!\[.*?\]-->}m,                                  :close_element
+        p.map %r{<!--.*?-->}m,                                    :standalone_element
+        p.map %r{<!.*?>}m,                                        :standalone_element
+        p.map %r{(<script#{ELEMENT_CONTENT}>)(.*?)(</script>)}m,  :foreign_block
+        p.map %r{(<style#{ELEMENT_CONTENT}>)(.*?)(</style>)}m,    :foreign_block
+        p.map %r{<#{ELEMENT_CONTENT}/>}m,                         :standalone_element
+        p.map %r{</#{ELEMENT_CONTENT}>}m,                         :close_element
+        p.map %r{<#{ELEMENT_CONTENT}>}m,                          :open_element
+        p.map %r{\s+},                                            :whitespace
+        p.map %r{[^<]+},                                          :text
+      end
+    end
+  end
+end

data/lib/htmlbeautifier/parser.rb CHANGED

@@ -15,9 +15,7 @@ module HtmlBeautifier
     def initialize(&blk)
       @maps = []
-      if block_given?
-        self.instance_eval(&blk)
-      end
+      yield self if block_given?
     end
     def map(pattern, method)
@@ -25,29 +23,39 @@ module HtmlBeautifier
     end
     def scan(subject, receiver)
-      scanner = StringScanner.new(subject)
-      until scanner.eos?
-        dispatch(scanner, receiver)
+      @scanner = StringScanner.new(subject)
+      until @scanner.eos?
+        dispatch(receiver)
       end
     end
-    def dispatch(scanner, receiver)
+    def source_so_far
+      @scanner.string[0...@scanner.pos]
+    end
+    def source_line_number
+      [source_so_far.chomp.split(/\n/).count, 1].max
+    end
+  private
+    def dispatch(receiver)
       @maps.each do |pattern, method|
-        if scanner.scan(pattern)
+        if @scanner.scan(pattern)
           params = []
           i = 1
-          while scanner[i]
-            params << scanner[i]
+          while @scanner[i]
+            params << @scanner[i]
             i += 1
           end
-          params = [scanner[0]] if params.empty?
-          self.class.debug(scanner[0], method)
+          params = [@scanner[0]] if params.empty?
+          self.class.debug(@scanner[0], method)
           receiver.__send__(method, *params)
           return
         end
       end
       raise "Unmatched sequence #{match.inspect}"
+    rescue => ex
+      raise "#{ex.message} on line #{source_line_number}"
     end
   end
 end

data/lib/htmlbeautifier/version.rb CHANGED

@@ -2,7 +2,7 @@ module HtmlBeautifier #:nodoc:
   module VERSION #:nodoc:
     MAJOR = 0
     MINOR = 0
-    TINY  = 6
+    TINY  = 7
     STRING = [MAJOR, MINOR, TINY].join('.')
   end

data/test/test_helper.rb CHANGED

@@ -1,3 +1,19 @@
 require 'test/unit'
-require File.dirname(__FILE__) + '/html_beautifier_test_utilities'
-require File.dirname(__FILE__) + '/../lib/htmlbeautifier'
+lib = File.expand_path('../../lib', __FILE__)
+$:.unshift lib unless $:.include?(lib)
+module HtmlBeautifierTestUtilities
+  def code(str)
+    str = str.gsub(/\A\n|\n\s*\Z/, '')
+    indentation = str[/\A +/]
+    lines = str.split(/\n/)
+    lines.map{ |line| line.sub(/^#{indentation}/, '') }.join("\n")
+  end
+  def assert_beautifies(expected, source)
+    actual = ''
+    beautifier = HtmlBeautifier::Beautifier.new(actual)
+    beautifier.scan(source)
+    assert expected == actual, "Expected:\n#{expected}\nbut was:\n#{actual}"
+  end
+end

data/test/test_html_beautifier_integration.rb CHANGED

@@ -1,4 +1,5 @@
-require File.dirname(__FILE__) + '/test_helper'
+require 'test_helper'
+require 'htmlbeautifier/beautifier'
 class TestHtmlBeautifierIntegration < Test::Unit::TestCase
   include HtmlBeautifierTestUtilities
@@ -79,4 +80,15 @@ class TestHtmlBeautifierIntegration < Test::Unit::TestCase
     assert_beautifies expected, source
   end
+  def test_should_raise_an_error_with_the_source_line_of_an_illegal_outdent
+    begin
+      HtmlBeautifier::Beautifier.new('').scan("<html>\n</html>\n</html>")
+    rescue Exception => e
+      @exception = e
+    end
+    assert_equal RuntimeError, @exception.class
+    assert_match /outdent/i, @exception.message
+    assert_match /line 3/i, @exception.message
+  end
 end

data/test/test_html_beautifier_regression.rb CHANGED

@@ -1,4 +1,4 @@
-require File.dirname(__FILE__) + '/test_helper'
+require 'test_helper'
 require 'htmlbeautifier/beautifier'
 class HtmlBeautifierRegressionTest < Test::Unit::TestCase

data/test/test_parser.rb CHANGED

@@ -1,4 +1,4 @@
-require File.dirname(__FILE__) + '/test_helper'
+require 'test_helper'
 require 'htmlbeautifier/parser'
 class TestParser < Test::Unit::TestCase
@@ -21,10 +21,10 @@ class TestParser < Test::Unit::TestCase
   def test_should_dispatch_matching_sequence
     receiver = Receiver.new
-    parser = HtmlBeautifier::Parser.new{
-      map %r{foo}, :foo
-      map %r{bar\s*}, :bar
-      map %r{\s+}, :whitespace
+    parser = HtmlBeautifier::Parser.new { |p|
+      p.map %r{foo}, :foo
+      p.map %r{bar\s*}, :bar
+      p.map %r{\s+}, :whitespace
     }
     parser.scan('foo bar ', receiver)
     assert_equal [[:foo, ['foo']], [:whitespace, [' ']], [:bar, ['bar ']]], receiver.sequence
@@ -32,11 +32,51 @@ class TestParser < Test::Unit::TestCase
   def test_should_send_parenthesized_components_as_separate_parameters
     receiver = Receiver.new
-    parser = HtmlBeautifier::Parser.new{
-      map %r{(foo)\((.*?)\)}, :foo
+    parser = HtmlBeautifier::Parser.new { |p|
+      p.map %r{(foo)\((.*?)\)}, :foo
     }
     parser.scan('foo(bar)', receiver)
     assert_equal [[:foo, ['foo', 'bar']]], receiver.sequence
   end
+  class SourceTrackingReceiver < Receiver
+    attr_reader :sources_so_far
+    attr_reader :source_line_numbers
+    def initialize(parser)
+      @sources_so_far = []
+      @source_line_numbers = []
+      @parser = parser
+      super()
+    end
+    def append_new_source_so_far(*ignored)
+      @sources_so_far << @parser.source_so_far
+    end
+    def append_new_source_line_number(*ignored)
+      @source_line_numbers << @parser.source_line_number
+    end
+  end
+  def test_should_give_source_so_far
+    parser = HtmlBeautifier::Parser.new { |p|
+      p.map %r{(M+)}m, :append_new_source_so_far
+      p.map %r{([\s\n]+)}m, :space_or_newline
+    }
+    receiver = SourceTrackingReceiver.new(parser)
+    parser.scan("M MM MMM", receiver)
+    assert_equal ['M', 'M MM', 'M MM MMM'], receiver.sources_so_far
+  end
+  def test_should_give_source_line_number
+    parser = HtmlBeautifier::Parser.new{ |p|
+      p.map %r{(M+)}m, :append_new_source_line_number
+      p.map %r{([\s\n]+)}m, :space_or_newline
+    }
+    receiver = SourceTrackingReceiver.new(parser)
+    parser.scan("M \n\nMM\nMMM", receiver)
+    assert_equal [1, 3, 4], receiver.source_line_numbers
+  end
 end

metadata CHANGED

@@ -1,66 +1,58 @@
---- !ruby/object:Gem::Specification
+--- !ruby/object:Gem::Specification
 name: htmlbeautifier
-version: !ruby/object:Gem::Version
-  version: 0.0.6
+version: !ruby/object:Gem::Version
+  version: 0.0.7
+  prerelease:
 platform: ruby
-authors:
+authors:
 - Paul Battley
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-07-02 00:00:00 +01:00
-default_executable:
+date: 2012-07-10 00:00:00.000000000 Z
 dependencies: []
 description:
 email: pbattley@gmail.com
-executables:
+executables:
 - htmlbeautifier
 extensions: []
 extra_rdoc_files: []
-files:
+files:
 - Rakefile
 - README.md
 - bin/htmlbeautifier
-- test/html_beautifier_test_utilities.rb
-- test/test_helper.rb
 - test/test_html_beautifier_integration.rb
-- test/test_html_beautifier_regression.rb
+- test/test_helper.rb
 - test/test_parser.rb
+- test/test_html_beautifier_regression.rb
+- lib/htmlbeautifier/html_parser.rb
+- lib/htmlbeautifier/builder.rb
 - lib/htmlbeautifier/beautifier.rb
-- lib/htmlbeautifier/parser.rb
 - lib/htmlbeautifier/version.rb
+- lib/htmlbeautifier/parser.rb
 - lib/htmlbeautifier.rb
-has_rdoc: true
 homepage: http://github.com/threedaymonk/htmlbeautifier
 licenses: []
 post_install_message:
 rdoc_options: []
-require_paths:
+require_paths:
 - lib
-required_ruby_version: !ruby/object:Gem::Requirement
-  requirements:
-  - - ">="
-    - !ruby/object:Gem::Version
-      version: "0"
-  version:
-required_rubygems_version: !ruby/object:Gem::Requirement
-  requirements:
-  - - ">="
-    - !ruby/object:Gem::Version
-      version: "0"
-  version:
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 1.3.5
+rubygems_version: 1.8.23
 signing_key:
 specification_version: 3
 summary: A normaliser/beautifier for HTML that also understands embedded Ruby.
 test_files: []

data/test/html_beautifier_test_utilities.rb DELETED

@@ -1,17 +0,0 @@
-module HtmlBeautifierTestUtilities
-  def code(str)
-    str = str.gsub(/\A\n|\n\s*\Z/, '')
-    indentation = str[/\A +/]
-    lines = str.split(/\n/)
-    lines.map{ |line| line.sub(/^#{indentation}/, '') }.join("\n")
-  end
-  def assert_beautifies(expected, source)
-    actual = ''
-    beautifier = HtmlBeautifier::Beautifier.new(actual)
-    beautifier.scan(source)
-    assert expected == actual, "Expected:\n#{expected}\nbut was:\n#{actual}"
-  end
-end