RubyGems - rfeedparser - Versions diffs - 0.9.92 → 0.9.93 - Mend

rfeedparser 0.9.92 → 0.9.93

Files changed (10) hide show

data/lib/rfeedparser.rb +106 -105
data/lib/rfeedparser/better_sgmlparser.rb +84 -84
data/lib/rfeedparser/encoding_helpers.rb +4 -3
data/lib/rfeedparser/parser_mixin.rb +121 -118
data/lib/rfeedparser/parsers.rb +31 -30
data/lib/rfeedparser/scrub.rb +1 -1
data/lib/rfeedparser/time_helpers.rb +52 -54
data/tests/rfponly/wellformed/mrss/mrss_media_content.xml +20 -0
data/tests/rfponly/wellformed/mrss/mrss_thumbnail.xml +21 -0
metadata +10 -5

data/lib/rfeedparser/encoding_helpers.rb CHANGED

@@ -178,16 +178,17 @@ module FeedParserUtilities
       data = data[4..-1]
     end
     begin
-      newdata = uconvert(data, encoding, 'utf-8')
+      newdata = uconvert(data, encoding, 'utf-8')
     rescue => details
+      raise details
     end
     $stderr << "successfully converted #{encoding} data to utf-8\n" if $debug
     declmatch = /^<\?xml[^>]*?>/
       newdecl = "<?xml version=\'1.0\' encoding=\'utf-8\'?>"
       if declmatch =~ newdata
-	newdata.sub!(declmatch, newdecl)
+        newdata.sub!(declmatch, newdecl)
       else
-	newdata = newdecl + "\n" + newdata
+        newdata = newdecl + "\n" + newdata
       end
     return newdata
   end

data/lib/rfeedparser/parser_mixin.rb CHANGED

@@ -6,65 +6,65 @@ module FeedParserMixin
     $stderr << "initializing FeedParser\n" if $debug
     @namespaces = {'' => '',
-		'http://backend.userland.com/rss' => '',
-		'http://blogs.law.harvard.edu/tech/rss' => '',
-		'http://purl.org/rss/1.0/' => '',
-		'http://my.netscape.com/rdf/simple/0.9/' => '',
-		'http://example.com/newformat#' => '',
-		'http://example.com/necho' => '',
-		'http://purl.org/echo/' => '',
-		'uri/of/echo/namespace#' => '',
-		'http://purl.org/pie/' => '',
-		'http://purl.org/atom/ns#' => '',
-		'http://www.w3.org/2005/Atom' => '',
-		'http://purl.org/rss/1.0/modules/rss091#' => '',
-		'http://webns.net/mvcb/' =>                               'admin',
-		'http://purl.org/rss/1.0/modules/aggregation/' =>         'ag',
-		'http://purl.org/rss/1.0/modules/annotate/' =>            'annotate',
-		'http://media.tangent.org/rss/1.0/' =>                    'audio',
-		'http://backend.userland.com/blogChannelModule' =>        'blogChannel',
-		'http://web.resource.org/cc/' =>                          'cc',
-		'http://backend.userland.com/creativeCommonsRssModule' => 'creativeCommons',
-		'http://purl.org/rss/1.0/modules/company' =>              'co',
-		'http://purl.org/rss/1.0/modules/content/' =>             'content',
-		'http://my.theinfo.org/changed/1.0/rss/' =>               'cp',
-		'http://purl.org/dc/elements/1.1/' =>                     'dc',
-		'http://purl.org/dc/terms/' =>                            'dcterms',
-		'http://purl.org/rss/1.0/modules/email/' =>               'email',
-		'http://purl.org/rss/1.0/modules/event/' =>               'ev',
-		'http://rssnamespace.org/feedburner/ext/1.0' =>           'feedburner',
-		'http://freshmeat.net/rss/fm/' =>                         'fm',
-		'http://xmlns.com/foaf/0.1/' =>                           'foaf',
-		'http://www.w3.org/2003/01/geo/wgs84_pos#' =>             'geo',
-		'http://postneo.com/icbm/' =>                             'icbm',
-		'http://purl.org/rss/1.0/modules/image/' =>               'image',
-		'http://www.itunes.com/DTDs/PodCast-1.0.dtd' =>           'itunes',
-		'http://example.com/DTDs/PodCast-1.0.dtd' =>              'itunes',
-		'http://purl.org/rss/1.0/modules/link/' =>                'l',
-		'http://search.yahoo.com/mrss' =>                         'media',
-		'http://madskills.com/public/xml/rss/module/pingback/' => 'pingback',
-		'http://prismstandard.org/namespaces/1.2/basic/' =>       'prism',
-		'http://www.w3.org/1999/02/22-rdf-syntax-ns#' =>          'rdf',
-		'http://www.w3.org/2000/01/rdf-schema#' =>                'rdfs',
-		'http://purl.org/rss/1.0/modules/reference/' =>           'ref',
-		'http://purl.org/rss/1.0/modules/richequiv/' =>           'reqv',
-		'http://purl.org/rss/1.0/modules/search/' =>              'search',
-		'http://purl.org/rss/1.0/modules/slash/' =>               'slash',
-		'http://schemas.xmlsoap.org/soap/envelope/' =>            'soap',
-		'http://purl.org/rss/1.0/modules/servicestatus/' =>       'ss',
-		'http://hacks.benhammersley.com/rss/streaming/' =>        'str',
-		'http://purl.org/rss/1.0/modules/subscription/' =>        'sub',
-		'http://purl.org/rss/1.0/modules/syndication/' =>         'sy',
-		'http://purl.org/rss/1.0/modules/taxonomy/' =>            'taxo',
-		'http://purl.org/rss/1.0/modules/threading/' =>           'thr',
-		'http://purl.org/rss/1.0/modules/textinput/' =>           'ti',
-		'http://madskills.com/public/xml/rss/module/trackback/' =>'trackback',
-		'http://wellformedweb.org/commentAPI/' =>                 'wfw',
-		'http://purl.org/rss/1.0/modules/wiki/' =>                'wiki',
-		'http://www.w3.org/1999/xhtml' =>                         'xhtml',
-		'http://www.w3.org/XML/1998/namespace' =>                 'xml',
-		'http://www.w3.org/1999/xlink' =>                         'xlink',
-		'http://schemas.pocketsoap.com/rss/myDescModule/' =>      'szf'
+      'http://backend.userland.com/rss' => '',
+      'http://blogs.law.harvard.edu/tech/rss' => '',
+      'http://purl.org/rss/1.0/' => '',
+      'http://my.netscape.com/rdf/simple/0.9/' => '',
+      'http://example.com/newformat#' => '',
+      'http://example.com/necho' => '',
+      'http://purl.org/echo/' => '',
+      'uri/of/echo/namespace#' => '',
+      'http://purl.org/pie/' => '',
+      'http://purl.org/atom/ns#' => '',
+      'http://www.w3.org/2005/Atom' => '',
+      'http://purl.org/rss/1.0/modules/rss091#' => '',
+      'http://webns.net/mvcb/' =>                               'admin',
+      'http://purl.org/rss/1.0/modules/aggregation/' =>         'ag',
+      'http://purl.org/rss/1.0/modules/annotate/' =>            'annotate',
+      'http://media.tangent.org/rss/1.0/' =>                    'audio',
+      'http://backend.userland.com/blogChannelModule' =>        'blogChannel',
+      'http://web.resource.org/cc/' =>                          'cc',
+      'http://backend.userland.com/creativeCommonsRssModule' => 'creativeCommons',
+      'http://purl.org/rss/1.0/modules/company' =>              'co',
+      'http://purl.org/rss/1.0/modules/content/' =>             'content',
+      'http://my.theinfo.org/changed/1.0/rss/' =>               'cp',
+      'http://purl.org/dc/elements/1.1/' =>                     'dc',
+      'http://purl.org/dc/terms/' =>                            'dcterms',
+      'http://purl.org/rss/1.0/modules/email/' =>               'email',
+      'http://purl.org/rss/1.0/modules/event/' =>               'ev',
+      'http://rssnamespace.org/feedburner/ext/1.0' =>           'feedburner',
+      'http://freshmeat.net/rss/fm/' =>                         'fm',
+      'http://xmlns.com/foaf/0.1/' =>                           'foaf',
+      'http://www.w3.org/2003/01/geo/wgs84_pos#' =>             'geo',
+      'http://postneo.com/icbm/' =>                             'icbm',
+      'http://purl.org/rss/1.0/modules/image/' =>               'image',
+      'http://www.itunes.com/DTDs/PodCast-1.0.dtd' =>           'itunes',
+      'http://example.com/DTDs/PodCast-1.0.dtd' =>              'itunes',
+      'http://purl.org/rss/1.0/modules/link/' =>                'l',
+      'http://search.yahoo.com/mrss' =>                         'media',
+      'http://madskills.com/public/xml/rss/module/pingback/' => 'pingback',
+      'http://prismstandard.org/namespaces/1.2/basic/' =>       'prism',
+      'http://www.w3.org/1999/02/22-rdf-syntax-ns#' =>          'rdf',
+      'http://www.w3.org/2000/01/rdf-schema#' =>                'rdfs',
+      'http://purl.org/rss/1.0/modules/reference/' =>           'ref',
+      'http://purl.org/rss/1.0/modules/richequiv/' =>           'reqv',
+      'http://purl.org/rss/1.0/modules/search/' =>              'search',
+      'http://purl.org/rss/1.0/modules/slash/' =>               'slash',
+      'http://schemas.xmlsoap.org/soap/envelope/' =>            'soap',
+      'http://purl.org/rss/1.0/modules/servicestatus/' =>       'ss',
+      'http://hacks.benhammersley.com/rss/streaming/' =>        'str',
+      'http://purl.org/rss/1.0/modules/subscription/' =>        'sub',
+      'http://purl.org/rss/1.0/modules/syndication/' =>         'sy',
+      'http://purl.org/rss/1.0/modules/taxonomy/' =>            'taxo',
+      'http://purl.org/rss/1.0/modules/threading/' =>           'thr',
+      'http://purl.org/rss/1.0/modules/textinput/' =>           'ti',
+      'http://madskills.com/public/xml/rss/module/trackback/' =>'trackback',
+      'http://wellformedweb.org/commentAPI/' =>                 'wfw',
+      'http://purl.org/rss/1.0/modules/wiki/' =>                'wiki',
+      'http://www.w3.org/1999/xhtml' =>                         'xhtml',
+      'http://www.w3.org/XML/1998/namespace' =>                 'xml',
+      'http://www.w3.org/1999/xlink' =>                         'xlink',
+      'http://schemas.pocketsoap.com/rss/myDescModule/' =>      'szf'
     }
     @matchnamespaces = {}
     @namespaces.each do |l|
@@ -123,7 +123,7 @@ module FeedParserMixin
       k = old_k.downcase # Downcase all keys
       attrsD[k] = value
       if ['rel','type'].include?value
-	attrsD[k].downcase!   # Downcase the value if the key is 'rel' or 'type'
+        attrsD[k].downcase!   # Downcase the value if the key is 'rel' or 'type'
       end
     end
@@ -140,7 +140,7 @@ module FeedParserMixin
     end
     if lang and not lang.empty? # Seriously, this cannot be correct
       if ['feed', 'rss', 'rdf:RDF'].include?tag
-	@feeddata['language'] = lang.gsub('_','-')
+        @feeddata['language'] = lang.gsub('_','-')
       end
     end
     @lang = lang
@@ -150,9 +150,9 @@ module FeedParserMixin
     # track namespaces
     attrsd.each do |prefix, uri|
       if /^xmlns:/ =~ prefix # prefix begins with xmlns:
-	trackNamespace(prefix[6..-1], uri)
+        trackNamespace(prefix[6..-1], uri)
       elsif prefix == 'xmlns':
-	trackNamespace(nil, uri)
+        trackNamespace(nil, uri)
       end
     end
@@ -238,13 +238,13 @@ module FeedParserMixin
     if @basestack and not @basestack.empty?
       @basestack.pop
       if @basestack and @basestack[-1] and not (@basestack.empty? or @basestack[-1].empty?)
-	@baseuri = @basestack[-1]
+        @baseuri = @basestack[-1]
       end
     end
     if @langstack and not @langstack.empty?
       @langstack.pop
       if @langstack and not @langstack.empty? # and @langstack[-1] and not @langstack.empty?
-	@lang = @langstack[-1]
+        @lang = @langstack[-1]
       end
     end
   end
@@ -260,9 +260,9 @@ module FeedParserMixin
       text = "&##{ref};"
     else
       if ref[0..0] == 'x'
-	c = (ref[1..-1]).to_i(16)
+        c = (ref[1..-1]).to_i(16)
       else
-	c = ref.to_i
+        c = ref.to_i
       end
       text = [c].pack('U*')
     end
@@ -383,7 +383,7 @@ module FeedParserMixin
     if @contentparams['base64']
       out64 = Base64::decode64(output) # a.k.a. [output].unpack('m')[0]
       if not output.empty? and not out64.empty?
-	output = out64
+        output = out64
       end
     end
@@ -404,13 +404,13 @@ module FeedParserMixin
     # resolve relative URIs within embedded markup
     if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
       if @can_contain_relative_uris.include?element
-	output = FeedParser.resolveRelativeURIs(output, @baseuri, @encoding)
+        output = FeedParser.resolveRelativeURIs(output, @baseuri, @encoding)
       end
     end
     # sanitize embedded markup
     if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
       if @can_contain_dangerous_markup.include?element
-	output = FeedParser.sanitizeHTML(output, @encoding)
+        output = FeedParser.sanitizeHTML(output, @encoding)
       end
     end
@@ -425,34 +425,34 @@ module FeedParserMixin
     # store output in appropriate place(s)
     if @inentry and not @insource
       if element == 'content'
-	@entries[-1][element] ||= []
-	contentparams = Marshal.load(Marshal.dump(@contentparams)) # deepcopy
-	contentparams['value'] = output
-	@entries[-1][element] << contentparams
+        @entries[-1][element] ||= []
+        contentparams = Marshal.load(Marshal.dump(@contentparams)) # deepcopy
+        contentparams['value'] = output
+        @entries[-1][element] << contentparams
       elsif element == 'link'
-	@entries[-1][element] = output
-	if output and not output.empty?
-	  @entries[-1]['links'][-1]['href'] = output
-	end
+        @entries[-1][element] = output
+        if output and not output.empty?
+          @entries[-1]['links'][-1]['href'] = output
+        end
       else
-	element = 'summary' if element == 'description'
-	@entries[-1][element] = output
-	if @incontent != 0
-	  contentparams = Marshal.load(Marshal.dump(@contentparams))
-	  contentparams['value'] = output
-	  @entries[-1][element + '_detail'] = contentparams
-	end
+        element = 'summary' if element == 'description'
+        @entries[-1][element] = output
+        if @incontent != 0
+          contentparams = Marshal.load(Marshal.dump(@contentparams))
+          contentparams['value'] = output
+          @entries[-1][element + '_detail'] = contentparams
+        end
       end
     elsif (@infeed or @insource) and not @intextinput and not @inimage
       context = getContext()
       element = 'subtitle' if element == 'description'
       context[element] = output
       if element == 'link'
-	context['links'][-1]['href'] = output
+        context['links'][-1]['href'] = output
       elsif @incontent != 0
-	contentparams = Marshal.load(Marshal.dump(@contentparams))
-	contentparams['value'] = output
-	context[element + '_detail'] = contentparams
+        contentparams = Marshal.load(Marshal.dump(@contentparams))
+        contentparams['value'] = output
+        context[element + '_detail'] = contentparams
       end
     end
     return output
@@ -514,20 +514,20 @@ module FeedParserMixin
   def _start_rss(attrsD)
     versionmap = {'0.91' => 'rss091u',
-		  '0.92' => 'rss092',
-		  '0.93' => 'rss093',
-		  '0.94' => 'rss094'
+      '0.92' => 'rss092',
+      '0.93' => 'rss093',
+      '0.94' => 'rss094'
     }
     if not @version or @version.empty?
       attr_version = attrsD['version'] || ''
       version = versionmap[attr_version]
       if version and not version.empty?
-	@version = version
+        @version = version
       elsif /^2\./ =~ attr_version
-	@version = 'rss20'
+        @version = 'rss20'
       else
-	@version = 'rss'
+        @version = 'rss'
       end
     end
   end
@@ -558,17 +558,17 @@ module FeedParserMixin
   def _start_feed(attrsD)
     @infeed = true
     versionmap = {'0.1' => 'atom01',
-		  '0.2' => 'atom02',
-		  '0.3' => 'atom03'
+      '0.2' => 'atom02',
+      '0.3' => 'atom03'
     }
     if not @version or @version.empty?
       attr_version = attrsD['version']
       version = versionmap[attr_version]
       if @version and not @version.empty?
-	@version = version
+        @version = version
       else
-	@version = 'atom'
+        @version = 'atom'
       end
     end
   end
@@ -776,11 +776,11 @@ module FeedParserMixin
       email = detail['email']
       if name and email and not (name.empty? or name.empty?)
-	context[key] = "#{name} (#{email})"
+        context[key] = "#{name} (#{email})"
       elsif name and not name.empty?
-	context[key] = name
+        context[key] = name
       elsif email and not email.empty?
-	context[key] = email
+        context[key] = email
       end
     else
       author = context[key].dup unless context[key].nil?
@@ -791,11 +791,11 @@ module FeedParserMixin
       author.gsub!("\(\)", '')
       author.strip!
       author.gsub!(/^\(/,'')
-		   author.gsub!(/\)$/,'')
-		   author.strip!
-		   context["#{key}_detail"] ||= FeedParserDict.new
-		   context["#{key}_detail"]['name'] = author
-		   context["#{key}_detail"]['email'] = email
+      author.gsub!(/\)$/,'')
+      author.strip!
+      context["#{key}_detail"] ||= FeedParserDict.new
+      context["#{key}_detail"]['name'] = author
+      context["#{key}_detail"]['email'] = email
     end
   end
@@ -1002,7 +1002,7 @@ module FeedParserMixin
     if attrsD.has_key? 'href'
       expectingText = false
       if (attrsD['rel'] == 'alternate') and @html_types.include?mapContentType(attrsD['type'])
-	context['link'] = attrsD['href']
+        context['link'] = attrsD['href']
       end
     else
       push('link', expectingText)
@@ -1077,9 +1077,9 @@ module FeedParserMixin
       value = popContent('description')
       context = getContext()
       if @intextinput
-	context['textinput']['description'] = value
+        context['textinput']['description'] = value
       elsif @inimage:
-	context['image']['description'] = value
+        context['image']['description'] = value
       end
     end
     @summaryKey = nil
@@ -1100,7 +1100,7 @@ module FeedParserMixin
     if attrsD and not attrsD.empty?
       attrsD = itsAnHrefDamnIt(attrsD)
       if attrsD.has_key?('href')
-	attrsD['href'] = resolveURI(attrsD['href'])
+        attrsD['href'] = resolveURI(attrsD['href'])
       end
     end
     getContext()['generator_detail'] = FeedParserDict.new(attrsD)
@@ -1164,10 +1164,12 @@ module FeedParserMixin
     if href and not href.empty?
       context = getContext()
       if not context['id']
-	context['id'] = href
+        context['id'] = href
       end
     end
   end
+  alias :_start_media_content :_start_enclosure
+  alias :_start_media_thumbnail :_start_enclosure
   def _start_source(attrsD)
     @insource = true
@@ -1208,13 +1210,13 @@ module FeedParserMixin
     if copyToDescription
       _save('description', value)
     end
-    alias :_end_body :_end_content
-    alias :_end_xhtml_body :_end_content
-    alias :_end_content_encoded :_end_content
-    alias :_end_fullitem :_end_content
-    alias :_end_prodlink :_end_content
   end
+  alias :_end_body :_end_content
+  alias :_end_xhtml_body :_end_content
+  alias :_end_content_encoded :_end_content
+  alias :_end_fullitem :_end_content
+  alias :_end_prodlink :_end_content
   def _start_itunes_image(attrsD)
     push('itunes_image', false)
     getContext()['image'] = FeedParserDict.new({'href' => attrsD['href']})
@@ -1230,6 +1232,7 @@ module FeedParserMixin
     value = pop('itunes_explicit', false)
     getContext()['itunes_explicit'] = (value == 'yes') and true or false
   end
 end # End FeedParserMixin

data/lib/rfeedparser/parsers.rb CHANGED

@@ -21,7 +21,7 @@ module FeedParser
     def getAttrs(attrs)
       ret = []
       for i in 0..attrs.getLength
-	ret.push([attrs.getName(i), attrs.getValue(i)])
+        ret.push([attrs.getName(i), attrs.getValue(i)])
       end
       ret
     end
@@ -43,17 +43,17 @@ module FeedParser
     def startElement(name, attrs)
       name =~ /^(([^;]*);)?(.+)$/ # Snag namespaceuri from name
-	namespaceuri = ($2 || '').downcase
+      namespaceuri = ($2 || '').downcase
       name = $3
       if /backend\.userland\.com\/rss/ =~ namespaceuri
-	# match any backend.userland.com namespace
-	namespaceuri = 'http://backend.userland.com/rss'
+        # match any backend.userland.com namespace
+        namespaceuri = 'http://backend.userland.com/rss'
       end
       prefix = @matchnamespaces[namespaceuri]
       # No need to raise UndeclaredNamespace, Expat does that for us with
       "unbound prefix (XMLParserError)"
       if prefix and not prefix.empty?
-	name = prefix + ':' + name
+        name = prefix + ':' + name
       end
       name.downcase!
       unknown_starttag(name, attrs)
@@ -72,10 +72,10 @@ module FeedParser
     def endElement(name)
       name =~ /^(([^;]*);)?(.+)$/ # Snag namespaceuri from name
-	namespaceuri = ($2 || '').downcase
+      namespaceuri = ($2 || '').downcase
       prefix = @matchnamespaces[namespaceuri]
       if prefix and not prefix.empty?
-	localname = prefix + ':' + name
+        localname = prefix + ':' + name
       end
       name.downcase!
       unknown_endtag(name)
@@ -112,13 +112,13 @@ module FeedParser
     attr_accessor :encoding, :bozo, :feeddata, :entries, :namespacesInUse
-    Elements_No_End_Tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
-      'img', 'input', 'isindex', 'link', 'meta', 'param']
+    Elements_No_End_Tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param']
     New_Declname_Re = /[a-zA-Z][-_.a-zA-Z0-9:]*\s*/
-      alias :sgml_feed :feed # feed needs to mapped to feeddata, not the SGMLParser method feed. I think.
+    alias :sgml_feed :feed # feed needs to mapped to feeddata, not the SGMLParser method feed. I think.
     def feed
       @feeddata
     end
     def feed=(data)
       @feeddata = data
     end
@@ -134,22 +134,23 @@ module FeedParser
     end
     def parse(data)
-      data.gsub!(/<!((?!DOCTYPE|--|\[))/i,  '&lt;!\1')
-	data.gsub!(/<([^<\s]+?)\s*\/>/) do |tag|
-	  clean = tag[1..-3].strip
-	  if Elements_No_End_Tag.include?clean
-	    tag
-	  else
-	  '<'+clean+'></'+clean+'>'
-	  end
-	end
+      doctype_regexp = Regexp.new('<!((?!DOCTYPE|--|\[))', Regexp::IGNORECASE) # Getting around a Textmate ident bug
+      data.gsub!(doctype_regexp,  '&lt;!\1')
+      data.gsub!(/<([^<\s]+?)\s*\/>/) do |tag|
+        clean = tag[1..-3].strip
+        if Elements_No_End_Tag.include?clean
+          tag
+        else
+          '<'+clean+'></'+clean+'>'
+        end
+      end
-	data.gsub!(/&#39;/, "'")
-	  data.gsub!(/&#34;/, "'")
-	  if @encoding and not @encoding.empty? # FIXME unicode check type(u'')
-	    data = uconvert(data,'utf-8',@encoding)
-	  end
-	sgml_feed(data) # see the alias above
+      data.gsub!(/&#39;/, "'")
+      data.gsub!(/&#34;/, "'")
+      if @encoding and not @encoding.empty? # FIXME unicode check type(u'')
+        data = uconvert(data,'utf-8',@encoding)
+      end
+      sgml_feed(data) # see the alias above
     end
@@ -165,11 +166,11 @@ module FeedParser
       data.gsub!('&#39;', '&apos;')
       data.gsub!('&#x27;', '&apos;')
       if @contentparams.has_key? 'type' and not ((@contentparams['type'] || 'xml') =~ /xml$/u)
-	data.gsub!('&lt;', '<')
-	data.gsub!('&gt;', '>')
-	data.gsub!('&amp;', '&')
-	data.gsub!('&quot;', '"')
-	data.gsub!('&apos;', "'")
+        data.gsub!('&lt;', '<')
+        data.gsub!('&gt;', '>')
+        data.gsub!('&amp;', '&')
+        data.gsub!('&quot;', '"')
+        data.gsub!('&apos;', "'")
       end
       return data
     end