RubyGems - rfeedparser - Versions diffs - 0.9.92 → 0.9.93 - Mend

rfeedparser 0.9.92 → 0.9.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

data/lib/rfeedparser.rb +106 -105
data/lib/rfeedparser/better_sgmlparser.rb +84 -84
data/lib/rfeedparser/encoding_helpers.rb +4 -3
data/lib/rfeedparser/parser_mixin.rb +121 -118
data/lib/rfeedparser/parsers.rb +31 -30
data/lib/rfeedparser/scrub.rb +1 -1
data/lib/rfeedparser/time_helpers.rb +52 -54
data/tests/rfponly/wellformed/mrss/mrss_media_content.xml +20 -0
data/tests/rfponly/wellformed/mrss/mrss_thumbnail.xml +21 -0
metadata +10 -5

data/lib/rfeedparser.rb CHANGED

@@ -56,38 +56,38 @@ include FeedParserUtilities
 module FeedParser
-  Version = "0.9.92"
+  Version = "0.9.93"
   License = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
+  Redistribution and use in source and binary forms, with or without modification,
+  are permitted provided that the following conditions are met:
-* Redistributions of source code must retain the above copyright notice,
+  * Redistributions of source code must retain the above copyright notice,
   this list of conditions and the following disclaimer.
-* Redistributions in binary form must reproduce the above copyright notice,
+  * Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE."""
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE."""
   Author = "Jeff Hodges <http://somethingsimilar.com>"
   Copyright_Holder = "Mark Pilgrim <http://diveintomark.org/>"
   Contributors = [  "Jason Diamond <http://injektilo.org/>",
-		    "John Beimler <http://john.beimler.org/>",
-		    "Fazal Majid <http://www.majid.info/mylos/weblog/>",
-		    "Aaron Swartz <http://aaronsw.com/>",
-		    "Kevin Marks <http://epeus.blogspot.com/>"
+    "John Beimler <http://john.beimler.org/>",
+    "Fazal Majid <http://www.majid.info/mylos/weblog/>",
+    "Aaron Swartz <http://aaronsw.com/>",
+    "Kevin Marks <http://epeus.blogspot.com/>"
   ]
   # HTTP "User-Agent" header to send to servers when downloading feeds.
   # If you are embedding feedparser in a larger application, you should
@@ -123,25 +123,26 @@ POSSIBILITY OF SUCH DAMAGE."""
   SUPPORTED_VERSIONS = {'' => 'unknown',
-		      'rss090' => 'RSS 0.90',
-		      'rss091n' => 'RSS 0.91 (Netscape)',
-		      'rss091u' => 'RSS 0.91 (Userland)',
-		      'rss092' => 'RSS 0.92',
-		      'rss093' => 'RSS 0.93',
-		      'rss094' => 'RSS 0.94',
-		      'rss20' => 'RSS 2.0',
-		      'rss10' => 'RSS 1.0',
-		      'rss' => 'RSS (unknown version)',
-		      'atom01' => 'Atom 0.1',
-		      'atom02' => 'Atom 0.2',
-		      'atom03' => 'Atom 0.3',
-		      'atom10' => 'Atom 1.0',
-		      'atom' => 'Atom (unknown version)',
-		      'cdf' => 'CDF',
-		      'hotrss' => 'Hot RSS'
+    'rss090' => 'RSS 0.90',
+    'rss091n' => 'RSS 0.91 (Netscape)',
+    'rss091u' => 'RSS 0.91 (Userland)',
+    'rss092' => 'RSS 0.92',
+    'rss093' => 'RSS 0.93',
+    'rss094' => 'RSS 0.94',
+    'rss20' => 'RSS 2.0',
+    'rss10' => 'RSS 1.0',
+    'rss' => 'RSS (unknown version)',
+    'atom01' => 'Atom 0.1',
+    'atom02' => 'Atom 0.2',
+    'atom03' => 'Atom 0.3',
+    'atom10' => 'Atom 1.0',
+    'atom' => 'Atom (unknown version)',
+    'cdf' => 'CDF',
+    'hotrss' => 'Hot RSS'
   }
   def parse(furi, options = {})
+    furi.strip!
     # Parse a feed from a URL, file, stream or string
     $compatible = options[:compatible].nil? ? $compatible : options[:compatible]# Use the default compatibility if compatible is nil
     strictklass = options[:strict] || StrictFeedParser
@@ -189,27 +190,27 @@ POSSIBILITY OF SUCH DAMAGE."""
     end
     begin
       if f.meta
-	result['etag'] = options[:etag] || f.meta['etag']
-	result['modified'] = options[:modified] || f.last_modified
-	result['url'] = f.base_uri.to_s
-	result['status'] = f.status[0] || 200
-	result['headers'] = f.meta
-	result['headers']['content-location'] ||= options[:content_location] unless options[:content_location].nil?
-	result['headers']['content-language'] ||= options[:content_language] unless options[:content_language].nil?
-	result['headers']['content-type'] ||= options[:content_type] unless options[:content_type].nil?
+        result['etag'] = options[:etag] || f.meta['etag']
+        result['modified'] = options[:modified] || f.last_modified
+        result['url'] = f.base_uri.to_s
+        result['status'] = f.status[0] || 200
+        result['headers'] = f.meta
+        result['headers']['content-location'] ||= options[:content_location] unless options[:content_location].nil?
+        result['headers']['content-language'] ||= options[:content_language] unless options[:content_language].nil?
+        result['headers']['content-type'] ||= options[:content_type] unless options[:content_type].nil?
       end
     rescue NoMethodError
       result['headers'] = {}
       result['etag'] = result['headers']['etag'] = options[:etag] unless options[:etag].nil?
       result['modified'] = result['headers']['last-modified'] = options[:modified] unless options[:modified].nil?
       unless options[:content_location].nil?
-	result['headers']['content-location'] = options[:content_location]
+        result['headers']['content-location'] = options[:content_location]
       end
       unless options[:content_language].nil?
-	result['headers']['content-language'] = options[:content_language]
+        result['headers']['content-language'] = options[:content_language]
       end
       unless options[:content_type].nil?
-	result['headers']['content-type'] = options[:content_type]
+        result['headers']['content-type'] = options[:content_type]
       end
     end
@@ -221,13 +222,13 @@ POSSIBILITY OF SUCH DAMAGE."""
     # - result['encoding'] is the actual encoding, as per RFC 3023 and a variety of other conflicting specifications
     http_headers = result['headers']
     result['encoding'], http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type =
-      self.getCharacterEncoding(f,data)
+    self.getCharacterEncoding(f,data)
     if not http_headers.empty? and not acceptable_content_type
       if http_headers.has_key?('content-type')
-	bozo_message = "#{http_headers['content-type']} is not an XML media type"
+        bozo_message = "#{http_headers['content-type']} is not an XML media type"
       else
-	bozo_message = 'no Content-type specified'
+        bozo_message = 'no Content-type specified'
       end
       result['bozo'] = true
       result['bozo_exception'] = NonXMLContentType.new(bozo_message) # I get to care about this, cuz Mark says I should.
@@ -260,21 +261,21 @@ POSSIBILITY OF SUCH DAMAGE."""
       next if tried_encodings.include? proposed_encoding
       tried_encodings << proposed_encoding
       begin
-	data = self.toUTF8(data, proposed_encoding)
-	known_encoding = use_strict_parser = true
-	break
+        data = self.toUTF8(data, proposed_encoding)
+        known_encoding = use_strict_parser = true
+        break
       rescue
       end
     end
     # if no luck and we have auto-detection library, try that
     if not known_encoding and $chardet
       begin
-	proposed_encoding = CharDet.detect(data)['encoding']
-	if proposed_encoding and not tried_encodings.include?proposed_encoding
-	  tried_encodings << proposed_encoding
-	  data = self.toUTF8(data, proposed_encoding)
-	  known_encoding = use_strict_parser = true
-	end
+        proposed_encoding = CharDet.detect(data)['encoding']
+        if proposed_encoding and not tried_encodings.include?proposed_encoding
+          tried_encodings << proposed_encoding
+          data = self.toUTF8(data, proposed_encoding)
+          known_encoding = use_strict_parser = true
+        end
       rescue
       end
     end
@@ -284,24 +285,24 @@ POSSIBILITY OF SUCH DAMAGE."""
     # if still no luck and we haven't tried utf-8 yet, try that
     if not known_encoding and not tried_encodings.include?'utf-8'
       begin
-	proposed_encoding = 'utf-8'
-	tried_encodings << proposed_encoding
-	data = self.toUTF8(data, proposed_encoding)
-	known_encoding = use_strict_parser = true
+        proposed_encoding = 'utf-8'
+        tried_encodings << proposed_encoding
+        data = self.toUTF8(data, proposed_encoding)
+        known_encoding = use_strict_parser = true
       rescue
       end
     end
     # if still no luck and we haven't tried windows-1252 yet, try that
     if not known_encoding and not tried_encodings.include?'windows-1252'
       begin
-	proposed_encdoing = 'windows-1252'
-	tried_encodings << proposed_encoding
-	data = self.toUTF8(data, proposed_encoding)
-	known_encoding = use_strict_parser = true
+        proposed_encoding = 'windows-1252'
+        tried_encodings << proposed_encoding
+        data = self.toUTF8(data, proposed_encoding)
+        known_encoding = use_strict_parser = true
       rescue
       end
     end
     # NOTE this isn't in FeedParser.py 4.1
     # if still no luck and we haven't tried iso-8859-2 yet, try that.
     #if not known_encoding and not tried_encodings.include?'iso-8859-2'
@@ -338,15 +339,15 @@ POSSIBILITY OF SUCH DAMAGE."""
       inputdata = XML::SAX::InputSource.new('parsedfeed')
       inputdata.setByteStream(StringIO.new(data))
       begin
-	saxparser.parse(inputdata)
+        saxparser.parse(inputdata)
       rescue Exception => parseerr # resparse
-	if $debug
-	  $stderr << "xml parsing failed\n"
-	  $stderr << parseerr.to_s+"\n" # Hrmph.
-	end
-	result['bozo'] = true
-	result['bozo_exception'] = feedparser.exc || e
-	use_strict_parser = false
+        if $debug
+          $stderr << "xml parsing failed\n"
+          $stderr << parseerr.to_s+"\n" # Hrmph.
+        end
+        result['bozo'] = true
+        result['bozo_exception'] = feedparser.exc || e
+        use_strict_parser = false
       end
     end
     if not use_strict_parser
@@ -378,22 +379,22 @@ class TextSerializer < Serializer
     return if (node.nil? or node.empty?)
     if node.methods.include?'keys'
       node.keys.sort.each do |key|
-      next if ['description','link'].include? key
-      next if node.has_key? k+'_detail'
-      next if node.has_key? k+'_parsed'
-      writer(stream,node[k], prefix+k+'.')
+        next if ['description','link'].include? key
+        next if node.has_key? k+'_detail'
+        next if node.has_key? k+'_parsed'
+        writer(stream,node[k], prefix+k+'.')
       end
     elsif node.class == Array
       node.each_with_index do |thing, index|
-	writer(stream, thing, prefix[0..-2] + '[' + index.to_s + '].')
+        writer(stream, thing, prefix[0..-2] + '[' + index.to_s + '].')
       end
     else
       begin
-	s = u(node.to_s)
-	stream << prefix[0..-2]
-	stream << '='
-	stream << s
-	stream << "\n"
+        s = u(node.to_s)
+        stream << prefix[0..-2]
+        stream << '='
+        stream << s
+        stream << "\n"
       rescue
       end
     end
@@ -422,49 +423,49 @@ if $0 == __FILE__
     opts.banner
     opts.separator ""
     opts.on("-A", "--user-agent [AGENT]",
-	  "User-Agent for HTTP URLs") {|agent|
+    "User-Agent for HTTP URLs") {|agent|
       options.agent = agent
     }
     opts.on("-e", "--referrer [URL]",
-	  "Referrer for HTTP URLs") {|referrer|
+    "Referrer for HTTP URLs") {|referrer|
       options.referrer = referrer
     }
     opts.on("-t", "--etag [TAG]",
-	  "ETag/If-None-Match for HTTP URLs") {|etag|
+    "ETag/If-None-Match for HTTP URLs") {|etag|
       options.etag = etag
     }
     opts.on("-m", "--last-modified [DATE]",
-	  "Last-modified/If-Modified-Since for HTTP URLs (any supported date format)") {|modified|
+    "Last-modified/If-Modified-Since for HTTP URLs (any supported date format)") {|modified|
       options.modified = modified
     }
     opts.on("-f", "--format [FORMAT]", [:text, :pprint],
-	  "output resutls in FORMAT (text, pprint)") {|format|
+    "output resutls in FORMAT (text, pprint)") {|format|
       options.format = format
     }
     opts.on("-v", "--[no-]verbose",
-	  "write debugging information to stderr") {|v|
+    "write debugging information to stderr") {|v|
       options.verbose = v
     }
     opts.on("-c", "--[no-]compatible",
-	  "strip element attributes like feedparser.py 4.1 (default)") {|comp|
+    "strip element attributes like feedparser.py 4.1 (default)") {|comp|
       options.compatible = comp
     }
     opts.on("-l", "--content-location [LOCATION]",
-	  "default Content-Location HTTP header") {|loc|
+    "default Content-Location HTTP header") {|loc|
       options.content_location = loc
     }
     opts.on("-a", "--content-language [LANG]",
-	  "default Content-Language HTTP header") {|lang|
+    "default Content-Language HTTP header") {|lang|
       options.content_language = lang
     }
     opts.on("-t", "--content-type [TYPE]",
-	  "default Content-type HTTP header") {|ctype|
+    "default Content-type HTTP header") {|ctype|
       options.ctype = ctype
     }
   end
@@ -482,14 +483,14 @@ if $0 == __FILE__
   unless args.nil?
     args.each do |url| # opts.parse! removes everything but the urls from the command line
       results = FeedParser.parse(url, :etag => options.etag,
-				 :modified => options.modified,
-				 :agent => options.agent,
-				 :referrer => options.referrer,
-				 :content_location => options.content_location,
-				 :content_language => options.content_language,
-				 :content_type => options.ctype
-				)
-				serializer.new(results).write($stdout)
+      :modified => options.modified,
+      :agent => options.agent,
+      :referrer => options.referrer,
+      :content_location => options.content_location,
+      :content_language => options.content_language,
+      :content_type => options.ctype
+      )
+      serializer.new(results).write($stdout)
     end
   end
 end

data/lib/rfeedparser/better_sgmlparser.rb CHANGED

@@ -14,7 +14,7 @@ class BetterSGMLParser < HTML::SGMLParser
   Shorttagopen = /'<[a-zA-Z][-.a-zA-Z0-9]*/u
   Shorttag = /'<([a-zA-Z][-.a-zA-Z0-9]*)\/([^\/]*)\//u
-  Endtagopen = /<\//u # Matching the Python SGMLParser
+  Endtagopen = /<\//u # Changed the RegExps to match the Python SGMLParser
   Endbracket = /[<>]/u
   Declopen = /<!/u
   Piopenbegin = /^<\?/u
@@ -24,8 +24,8 @@ class BetterSGMLParser < HTML::SGMLParser
   Commentclose = /--\s*>/u
   Tagfind = /[a-zA-Z][-_.:a-zA-Z0-9]*/u
   Attrfind = Regexp.compile('\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'+
-			    '(\'[^\']*\'|"[^"]*"|[\]\[\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?',
-			    64)
+  '(\'[^\']*\'|"[^"]*"|[\]\[\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?',
+  64)
   Endtagfind = /\s*\/\s*>/u
   def initialize(verbose=false)
     super(verbose)
@@ -40,98 +40,98 @@ class BetterSGMLParser < HTML::SGMLParser
     n = rawdata.length
     while i < n
       if @nomoretags
-	# handle_data_range does nothing more than set a "Range" that is never used. wtf?
-	handle_data(rawdata[i...n]) # i...n means "range from i to n not including n"
-	i = n
-	break
+        # handle_data_range does nothing more than set a "Range" that is never used. wtf?
+        handle_data(rawdata[i...n]) # i...n means "range from i to n not including n"
+        i = n
+        break
       end
       j = rawdata.index(Interesting, i)
       j = n unless j
       handle_data(rawdata[i...j]) if i < j
       i = j
       break if (i == n)
-      if rawdata[i..i] == '<' # equivalent to rawdata[i..i] == '<' # Yeah, ugly.
-	if rawdata.index(Starttagopen,i) == i
-	  if @literal
-	    handle_data(rawdata[i..i])
-	    i = i+1
-	    next
-	  end
-	  k = parse_starttag(i)
-	  break unless k
-	  i = k
-	  next
-	end
-	if rawdata.index(Endtagopen,i) == i #Don't use Endtagopen
-	  k = parse_endtag(i)
-	  break unless k
-	  i = k
-	  @literal = false
-	  next
-	end
-	if @literal
-	  if n > (i+1)
-	    handle_data("<")
-	    i = i+1
-	  else
-	    #incomplete
-	    break
-	  end
-	  next
-	end
-	if rawdata.index(Commentopen,i) == i
-	  k = parse_comment(i)
-	  break unless k
-	  i = k
-	  next
-	end
-	if rawdata.index(Piopenbegin,i) == i # Like Piopen but must be at beginning of rawdata
-	  k = parse_pi(i)
-	  break unless k
-	  i += k
-	  next
-	end
-	if rawdata.index(Declopen,i) == i
-	  # This is some sort of declaration; in "HTML as
-	  # deployed," this should only be the document type
-	  # declaration ("<!DOCTYPE html...>").
-	  k = parse_declaration(i)
-	  break unless k
-	  i = k
-	  next
-	end
+      if rawdata[i..i] == '<' # Yeah, ugly, but I prefer it to rawdata[i] == ?<
+        if rawdata.index(Starttagopen,i) == i
+          if @literal
+            handle_data(rawdata[i..i])
+            i = i+1
+            next
+          end
+          k = parse_starttag(i)
+          break unless k
+          i = k
+          next
+        end
+        if rawdata.index(Endtagopen,i) == i #Don't use Endtagopen
+          k = parse_endtag(i)
+          break unless k
+          i = k
+          @literal = false
+          next
+        end
+        if @literal
+          if n > (i+1)
+            handle_data("<")
+            i = i+1
+          else
+            #incomplete
+            break
+          end
+          next
+        end
+        if rawdata.index(Commentopen,i) == i
+          k = parse_comment(i)
+          break unless k
+          i = k
+          next
+        end
+        if rawdata.index(Piopenbegin,i) == i # Like Piopen but must be at beginning of rawdata
+          k = parse_pi(i)
+          break unless k
+          i += k
+          next
+        end
+        if rawdata.index(Declopen,i) == i
+          # This is some sort of declaration; in "HTML as
+          # deployed," this should only be the document type
+          # declaration ("<!DOCTYPE html...>").
+          k = parse_declaration(i)
+          break unless k
+          i = k
+          next
+        end
       elsif rawdata[i..i] == '&'
-	if @literal # FIXME BUGME SGMLParser totally does not check this. Bug it.
-	  handle_data(rawdata[i..i])
-	  i += 1
-	  next
-	end
+        if @literal # FIXME BUGME SGMLParser totally does not check this. Bug it.
+          handle_data(rawdata[i..i])
+          i += 1
+          next
+        end
-      # the Char must come first as its #=~ method is the only one that is UTF-8 safe
-      ni,match = index_match(rawdata, Charref, i)
-      if ni and ni == i # See? Ugly
-	handle_charref(match[1]) # $1 is just the first group we captured (with parentheses)
-	i += match[0].length  # $& is the "all" of the match.. it includes the full match we looked for not just the stuff we put parentheses around to capture.
-	i -= 1 unless rawdata[i-1..i-1] == ";"
-	next
-      end
-      ni,match = index_match(rawdata, Entityref, i)
-      if ni and ni == i
-	handle_entityref(match[1])
-	i += match[0].length
-	i -= 1 unless rawdata[i-1..i-1] == ";"
-	next
-      end
+        # the Char must come first as its #=~ method is the only one that is UTF-8 safe
+        ni,match = index_match(rawdata, Charref, i)
+        if ni and ni == i # See? Ugly
+          handle_charref(match[1]) # $1 is just the first group we captured (with parentheses)
+          i += match[0].length  # $& is the "all" of the match.. it includes the full match we looked for not just the stuff we put parentheses around to capture.
+          i -= 1 unless rawdata[i-1..i-1] == ";"
+          next
+        end
+        ni,match = index_match(rawdata, Entityref, i)
+        if ni and ni == i
+          handle_entityref(match[1])
+          i += match[0].length
+          i -= 1 unless rawdata[i-1..i-1] == ";"
+          next
+        end
       else
-	error('neither < nor & ??')
+        error('neither < nor & ??')
       end
       # We get here only if incomplete matches but
       # nothing else
       ni,match = index_match(rawdata,Incomplete,i)
       unless ni and ni == 0
-	handle_data(rawdata[i...i+1]) # str[i...i+1] == str[i..i]
-	i += 1
-	next
+        handle_data(rawdata[i...i+1]) # str[i...i+1] == str[i..i]
+        i += 1
+        next
       end
       j = ni + match[0].length
       break if j == n # Really incomplete
@@ -206,7 +206,7 @@ class BetterSGMLParser < HTML::SGMLParser
     else
       ni,match = index_match(rawdata,Tagfind,i+1)
       unless match
-	error('unexpected call to parse_starttag')
+        error('unexpected call to parse_starttag')
       end
       k = ni+match[0].length+1
       tag = match[0].downcase
@@ -220,9 +220,9 @@ class BetterSGMLParser < HTML::SGMLParser
       matched_length = match[0].length
       attrname, rest, attrvalue = match[1],match[2],match[3]
       if rest.nil? or rest.empty?
-	attrvalue = '' # was: = attrname # Why the change?
+        attrvalue = '' # was: = attrname # Why the change?
       elsif [?',?'] == [attrvalue[0..0], attrvalue[-1..-1]] or [?",?"] == [attrvalue[0],attrvalue[-1]]
-	attrvalue = attrvalue[1...-1]
+        attrvalue = attrvalue[1...-1]
       end
       attrsd << [attrname.downcase, attrvalue]
       k += matched_length