rfeedparser 0.9.92 → 0.9.93
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rfeedparser.rb +106 -105
- data/lib/rfeedparser/better_sgmlparser.rb +84 -84
- data/lib/rfeedparser/encoding_helpers.rb +4 -3
- data/lib/rfeedparser/parser_mixin.rb +121 -118
- data/lib/rfeedparser/parsers.rb +31 -30
- data/lib/rfeedparser/scrub.rb +1 -1
- data/lib/rfeedparser/time_helpers.rb +52 -54
- data/tests/rfponly/wellformed/mrss/mrss_media_content.xml +20 -0
- data/tests/rfponly/wellformed/mrss/mrss_thumbnail.xml +21 -0
- metadata +10 -5
@@ -178,16 +178,17 @@ module FeedParserUtilities
|
|
178
178
|
data = data[4..-1]
|
179
179
|
end
|
180
180
|
begin
|
181
|
-
newdata = uconvert(data, encoding, 'utf-8')
|
181
|
+
newdata = uconvert(data, encoding, 'utf-8')
|
182
182
|
rescue => details
|
183
|
+
raise details
|
183
184
|
end
|
184
185
|
$stderr << "successfully converted #{encoding} data to utf-8\n" if $debug
|
185
186
|
declmatch = /^<\?xml[^>]*?>/
|
186
187
|
newdecl = "<?xml version=\'1.0\' encoding=\'utf-8\'?>"
|
187
188
|
if declmatch =~ newdata
|
188
|
-
|
189
|
+
newdata.sub!(declmatch, newdecl)
|
189
190
|
else
|
190
|
-
|
191
|
+
newdata = newdecl + "\n" + newdata
|
191
192
|
end
|
192
193
|
return newdata
|
193
194
|
end
|
@@ -6,65 +6,65 @@ module FeedParserMixin
|
|
6
6
|
$stderr << "initializing FeedParser\n" if $debug
|
7
7
|
|
8
8
|
@namespaces = {'' => '',
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
9
|
+
'http://backend.userland.com/rss' => '',
|
10
|
+
'http://blogs.law.harvard.edu/tech/rss' => '',
|
11
|
+
'http://purl.org/rss/1.0/' => '',
|
12
|
+
'http://my.netscape.com/rdf/simple/0.9/' => '',
|
13
|
+
'http://example.com/newformat#' => '',
|
14
|
+
'http://example.com/necho' => '',
|
15
|
+
'http://purl.org/echo/' => '',
|
16
|
+
'uri/of/echo/namespace#' => '',
|
17
|
+
'http://purl.org/pie/' => '',
|
18
|
+
'http://purl.org/atom/ns#' => '',
|
19
|
+
'http://www.w3.org/2005/Atom' => '',
|
20
|
+
'http://purl.org/rss/1.0/modules/rss091#' => '',
|
21
|
+
'http://webns.net/mvcb/' => 'admin',
|
22
|
+
'http://purl.org/rss/1.0/modules/aggregation/' => 'ag',
|
23
|
+
'http://purl.org/rss/1.0/modules/annotate/' => 'annotate',
|
24
|
+
'http://media.tangent.org/rss/1.0/' => 'audio',
|
25
|
+
'http://backend.userland.com/blogChannelModule' => 'blogChannel',
|
26
|
+
'http://web.resource.org/cc/' => 'cc',
|
27
|
+
'http://backend.userland.com/creativeCommonsRssModule' => 'creativeCommons',
|
28
|
+
'http://purl.org/rss/1.0/modules/company' => 'co',
|
29
|
+
'http://purl.org/rss/1.0/modules/content/' => 'content',
|
30
|
+
'http://my.theinfo.org/changed/1.0/rss/' => 'cp',
|
31
|
+
'http://purl.org/dc/elements/1.1/' => 'dc',
|
32
|
+
'http://purl.org/dc/terms/' => 'dcterms',
|
33
|
+
'http://purl.org/rss/1.0/modules/email/' => 'email',
|
34
|
+
'http://purl.org/rss/1.0/modules/event/' => 'ev',
|
35
|
+
'http://rssnamespace.org/feedburner/ext/1.0' => 'feedburner',
|
36
|
+
'http://freshmeat.net/rss/fm/' => 'fm',
|
37
|
+
'http://xmlns.com/foaf/0.1/' => 'foaf',
|
38
|
+
'http://www.w3.org/2003/01/geo/wgs84_pos#' => 'geo',
|
39
|
+
'http://postneo.com/icbm/' => 'icbm',
|
40
|
+
'http://purl.org/rss/1.0/modules/image/' => 'image',
|
41
|
+
'http://www.itunes.com/DTDs/PodCast-1.0.dtd' => 'itunes',
|
42
|
+
'http://example.com/DTDs/PodCast-1.0.dtd' => 'itunes',
|
43
|
+
'http://purl.org/rss/1.0/modules/link/' => 'l',
|
44
|
+
'http://search.yahoo.com/mrss' => 'media',
|
45
|
+
'http://madskills.com/public/xml/rss/module/pingback/' => 'pingback',
|
46
|
+
'http://prismstandard.org/namespaces/1.2/basic/' => 'prism',
|
47
|
+
'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => 'rdf',
|
48
|
+
'http://www.w3.org/2000/01/rdf-schema#' => 'rdfs',
|
49
|
+
'http://purl.org/rss/1.0/modules/reference/' => 'ref',
|
50
|
+
'http://purl.org/rss/1.0/modules/richequiv/' => 'reqv',
|
51
|
+
'http://purl.org/rss/1.0/modules/search/' => 'search',
|
52
|
+
'http://purl.org/rss/1.0/modules/slash/' => 'slash',
|
53
|
+
'http://schemas.xmlsoap.org/soap/envelope/' => 'soap',
|
54
|
+
'http://purl.org/rss/1.0/modules/servicestatus/' => 'ss',
|
55
|
+
'http://hacks.benhammersley.com/rss/streaming/' => 'str',
|
56
|
+
'http://purl.org/rss/1.0/modules/subscription/' => 'sub',
|
57
|
+
'http://purl.org/rss/1.0/modules/syndication/' => 'sy',
|
58
|
+
'http://purl.org/rss/1.0/modules/taxonomy/' => 'taxo',
|
59
|
+
'http://purl.org/rss/1.0/modules/threading/' => 'thr',
|
60
|
+
'http://purl.org/rss/1.0/modules/textinput/' => 'ti',
|
61
|
+
'http://madskills.com/public/xml/rss/module/trackback/' =>'trackback',
|
62
|
+
'http://wellformedweb.org/commentAPI/' => 'wfw',
|
63
|
+
'http://purl.org/rss/1.0/modules/wiki/' => 'wiki',
|
64
|
+
'http://www.w3.org/1999/xhtml' => 'xhtml',
|
65
|
+
'http://www.w3.org/XML/1998/namespace' => 'xml',
|
66
|
+
'http://www.w3.org/1999/xlink' => 'xlink',
|
67
|
+
'http://schemas.pocketsoap.com/rss/myDescModule/' => 'szf'
|
68
68
|
}
|
69
69
|
@matchnamespaces = {}
|
70
70
|
@namespaces.each do |l|
|
@@ -123,7 +123,7 @@ module FeedParserMixin
|
|
123
123
|
k = old_k.downcase # Downcase all keys
|
124
124
|
attrsD[k] = value
|
125
125
|
if ['rel','type'].include?value
|
126
|
-
|
126
|
+
attrsD[k].downcase! # Downcase the value if the key is 'rel' or 'type'
|
127
127
|
end
|
128
128
|
end
|
129
129
|
|
@@ -140,7 +140,7 @@ module FeedParserMixin
|
|
140
140
|
end
|
141
141
|
if lang and not lang.empty? # Seriously, this cannot be correct
|
142
142
|
if ['feed', 'rss', 'rdf:RDF'].include?tag
|
143
|
-
|
143
|
+
@feeddata['language'] = lang.gsub('_','-')
|
144
144
|
end
|
145
145
|
end
|
146
146
|
@lang = lang
|
@@ -150,9 +150,9 @@ module FeedParserMixin
|
|
150
150
|
# track namespaces
|
151
151
|
attrsd.each do |prefix, uri|
|
152
152
|
if /^xmlns:/ =~ prefix # prefix begins with xmlns:
|
153
|
-
|
153
|
+
trackNamespace(prefix[6..-1], uri)
|
154
154
|
elsif prefix == 'xmlns':
|
155
|
-
|
155
|
+
trackNamespace(nil, uri)
|
156
156
|
end
|
157
157
|
end
|
158
158
|
|
@@ -238,13 +238,13 @@ module FeedParserMixin
|
|
238
238
|
if @basestack and not @basestack.empty?
|
239
239
|
@basestack.pop
|
240
240
|
if @basestack and @basestack[-1] and not (@basestack.empty? or @basestack[-1].empty?)
|
241
|
-
|
241
|
+
@baseuri = @basestack[-1]
|
242
242
|
end
|
243
243
|
end
|
244
244
|
if @langstack and not @langstack.empty?
|
245
245
|
@langstack.pop
|
246
246
|
if @langstack and not @langstack.empty? # and @langstack[-1] and not @langstack.empty?
|
247
|
-
|
247
|
+
@lang = @langstack[-1]
|
248
248
|
end
|
249
249
|
end
|
250
250
|
end
|
@@ -260,9 +260,9 @@ module FeedParserMixin
|
|
260
260
|
text = "&##{ref};"
|
261
261
|
else
|
262
262
|
if ref[0..0] == 'x'
|
263
|
-
|
263
|
+
c = (ref[1..-1]).to_i(16)
|
264
264
|
else
|
265
|
-
|
265
|
+
c = ref.to_i
|
266
266
|
end
|
267
267
|
text = [c].pack('U*')
|
268
268
|
end
|
@@ -383,7 +383,7 @@ module FeedParserMixin
|
|
383
383
|
if @contentparams['base64']
|
384
384
|
out64 = Base64::decode64(output) # a.k.a. [output].unpack('m')[0]
|
385
385
|
if not output.empty? and not out64.empty?
|
386
|
-
|
386
|
+
output = out64
|
387
387
|
end
|
388
388
|
end
|
389
389
|
|
@@ -404,13 +404,13 @@ module FeedParserMixin
|
|
404
404
|
# resolve relative URIs within embedded markup
|
405
405
|
if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
|
406
406
|
if @can_contain_relative_uris.include?element
|
407
|
-
|
407
|
+
output = FeedParser.resolveRelativeURIs(output, @baseuri, @encoding)
|
408
408
|
end
|
409
409
|
end
|
410
410
|
# sanitize embedded markup
|
411
411
|
if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
|
412
412
|
if @can_contain_dangerous_markup.include?element
|
413
|
-
|
413
|
+
output = FeedParser.sanitizeHTML(output, @encoding)
|
414
414
|
end
|
415
415
|
end
|
416
416
|
|
@@ -425,34 +425,34 @@ module FeedParserMixin
|
|
425
425
|
# store output in appropriate place(s)
|
426
426
|
if @inentry and not @insource
|
427
427
|
if element == 'content'
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
428
|
+
@entries[-1][element] ||= []
|
429
|
+
contentparams = Marshal.load(Marshal.dump(@contentparams)) # deepcopy
|
430
|
+
contentparams['value'] = output
|
431
|
+
@entries[-1][element] << contentparams
|
432
432
|
elsif element == 'link'
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
433
|
+
@entries[-1][element] = output
|
434
|
+
if output and not output.empty?
|
435
|
+
@entries[-1]['links'][-1]['href'] = output
|
436
|
+
end
|
437
437
|
else
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
438
|
+
element = 'summary' if element == 'description'
|
439
|
+
@entries[-1][element] = output
|
440
|
+
if @incontent != 0
|
441
|
+
contentparams = Marshal.load(Marshal.dump(@contentparams))
|
442
|
+
contentparams['value'] = output
|
443
|
+
@entries[-1][element + '_detail'] = contentparams
|
444
|
+
end
|
445
445
|
end
|
446
446
|
elsif (@infeed or @insource) and not @intextinput and not @inimage
|
447
447
|
context = getContext()
|
448
448
|
element = 'subtitle' if element == 'description'
|
449
449
|
context[element] = output
|
450
450
|
if element == 'link'
|
451
|
-
|
451
|
+
context['links'][-1]['href'] = output
|
452
452
|
elsif @incontent != 0
|
453
|
-
|
454
|
-
|
455
|
-
|
453
|
+
contentparams = Marshal.load(Marshal.dump(@contentparams))
|
454
|
+
contentparams['value'] = output
|
455
|
+
context[element + '_detail'] = contentparams
|
456
456
|
end
|
457
457
|
end
|
458
458
|
return output
|
@@ -514,20 +514,20 @@ module FeedParserMixin
|
|
514
514
|
|
515
515
|
def _start_rss(attrsD)
|
516
516
|
versionmap = {'0.91' => 'rss091u',
|
517
|
-
|
518
|
-
|
519
|
-
|
517
|
+
'0.92' => 'rss092',
|
518
|
+
'0.93' => 'rss093',
|
519
|
+
'0.94' => 'rss094'
|
520
520
|
}
|
521
521
|
|
522
522
|
if not @version or @version.empty?
|
523
523
|
attr_version = attrsD['version'] || ''
|
524
524
|
version = versionmap[attr_version]
|
525
525
|
if version and not version.empty?
|
526
|
-
|
526
|
+
@version = version
|
527
527
|
elsif /^2\./ =~ attr_version
|
528
|
-
|
528
|
+
@version = 'rss20'
|
529
529
|
else
|
530
|
-
|
530
|
+
@version = 'rss'
|
531
531
|
end
|
532
532
|
end
|
533
533
|
end
|
@@ -558,17 +558,17 @@ module FeedParserMixin
|
|
558
558
|
def _start_feed(attrsD)
|
559
559
|
@infeed = true
|
560
560
|
versionmap = {'0.1' => 'atom01',
|
561
|
-
|
562
|
-
|
561
|
+
'0.2' => 'atom02',
|
562
|
+
'0.3' => 'atom03'
|
563
563
|
}
|
564
564
|
|
565
565
|
if not @version or @version.empty?
|
566
566
|
attr_version = attrsD['version']
|
567
567
|
version = versionmap[attr_version]
|
568
568
|
if @version and not @version.empty?
|
569
|
-
|
569
|
+
@version = version
|
570
570
|
else
|
571
|
-
|
571
|
+
@version = 'atom'
|
572
572
|
end
|
573
573
|
end
|
574
574
|
end
|
@@ -776,11 +776,11 @@ module FeedParserMixin
|
|
776
776
|
email = detail['email']
|
777
777
|
|
778
778
|
if name and email and not (name.empty? or name.empty?)
|
779
|
-
|
779
|
+
context[key] = "#{name} (#{email})"
|
780
780
|
elsif name and not name.empty?
|
781
|
-
|
781
|
+
context[key] = name
|
782
782
|
elsif email and not email.empty?
|
783
|
-
|
783
|
+
context[key] = email
|
784
784
|
end
|
785
785
|
else
|
786
786
|
author = context[key].dup unless context[key].nil?
|
@@ -791,11 +791,11 @@ module FeedParserMixin
|
|
791
791
|
author.gsub!("\(\)", '')
|
792
792
|
author.strip!
|
793
793
|
author.gsub!(/^\(/,'')
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
794
|
+
author.gsub!(/\)$/,'')
|
795
|
+
author.strip!
|
796
|
+
context["#{key}_detail"] ||= FeedParserDict.new
|
797
|
+
context["#{key}_detail"]['name'] = author
|
798
|
+
context["#{key}_detail"]['email'] = email
|
799
799
|
end
|
800
800
|
end
|
801
801
|
|
@@ -1002,7 +1002,7 @@ module FeedParserMixin
|
|
1002
1002
|
if attrsD.has_key? 'href'
|
1003
1003
|
expectingText = false
|
1004
1004
|
if (attrsD['rel'] == 'alternate') and @html_types.include?mapContentType(attrsD['type'])
|
1005
|
-
|
1005
|
+
context['link'] = attrsD['href']
|
1006
1006
|
end
|
1007
1007
|
else
|
1008
1008
|
push('link', expectingText)
|
@@ -1077,9 +1077,9 @@ module FeedParserMixin
|
|
1077
1077
|
value = popContent('description')
|
1078
1078
|
context = getContext()
|
1079
1079
|
if @intextinput
|
1080
|
-
|
1080
|
+
context['textinput']['description'] = value
|
1081
1081
|
elsif @inimage:
|
1082
|
-
|
1082
|
+
context['image']['description'] = value
|
1083
1083
|
end
|
1084
1084
|
end
|
1085
1085
|
@summaryKey = nil
|
@@ -1100,7 +1100,7 @@ module FeedParserMixin
|
|
1100
1100
|
if attrsD and not attrsD.empty?
|
1101
1101
|
attrsD = itsAnHrefDamnIt(attrsD)
|
1102
1102
|
if attrsD.has_key?('href')
|
1103
|
-
|
1103
|
+
attrsD['href'] = resolveURI(attrsD['href'])
|
1104
1104
|
end
|
1105
1105
|
end
|
1106
1106
|
getContext()['generator_detail'] = FeedParserDict.new(attrsD)
|
@@ -1164,10 +1164,12 @@ module FeedParserMixin
|
|
1164
1164
|
if href and not href.empty?
|
1165
1165
|
context = getContext()
|
1166
1166
|
if not context['id']
|
1167
|
-
|
1167
|
+
context['id'] = href
|
1168
1168
|
end
|
1169
1169
|
end
|
1170
1170
|
end
|
1171
|
+
alias :_start_media_content :_start_enclosure
|
1172
|
+
alias :_start_media_thumbnail :_start_enclosure
|
1171
1173
|
|
1172
1174
|
def _start_source(attrsD)
|
1173
1175
|
@insource = true
|
@@ -1208,13 +1210,13 @@ module FeedParserMixin
|
|
1208
1210
|
if copyToDescription
|
1209
1211
|
_save('description', value)
|
1210
1212
|
end
|
1211
|
-
alias :_end_body :_end_content
|
1212
|
-
alias :_end_xhtml_body :_end_content
|
1213
|
-
alias :_end_content_encoded :_end_content
|
1214
|
-
alias :_end_fullitem :_end_content
|
1215
|
-
alias :_end_prodlink :_end_content
|
1216
1213
|
end
|
1217
|
-
|
1214
|
+
alias :_end_body :_end_content
|
1215
|
+
alias :_end_xhtml_body :_end_content
|
1216
|
+
alias :_end_content_encoded :_end_content
|
1217
|
+
alias :_end_fullitem :_end_content
|
1218
|
+
alias :_end_prodlink :_end_content
|
1219
|
+
|
1218
1220
|
def _start_itunes_image(attrsD)
|
1219
1221
|
push('itunes_image', false)
|
1220
1222
|
getContext()['image'] = FeedParserDict.new({'href' => attrsD['href']})
|
@@ -1230,6 +1232,7 @@ module FeedParserMixin
|
|
1230
1232
|
value = pop('itunes_explicit', false)
|
1231
1233
|
getContext()['itunes_explicit'] = (value == 'yes') and true or false
|
1232
1234
|
end
|
1235
|
+
|
1233
1236
|
end # End FeedParserMixin
|
1234
1237
|
|
1235
1238
|
|
data/lib/rfeedparser/parsers.rb
CHANGED
@@ -21,7 +21,7 @@ module FeedParser
|
|
21
21
|
def getAttrs(attrs)
|
22
22
|
ret = []
|
23
23
|
for i in 0..attrs.getLength
|
24
|
-
|
24
|
+
ret.push([attrs.getName(i), attrs.getValue(i)])
|
25
25
|
end
|
26
26
|
ret
|
27
27
|
end
|
@@ -43,17 +43,17 @@ module FeedParser
|
|
43
43
|
|
44
44
|
def startElement(name, attrs)
|
45
45
|
name =~ /^(([^;]*);)?(.+)$/ # Snag namespaceuri from name
|
46
|
-
|
46
|
+
namespaceuri = ($2 || '').downcase
|
47
47
|
name = $3
|
48
48
|
if /backend\.userland\.com\/rss/ =~ namespaceuri
|
49
|
-
|
50
|
-
|
49
|
+
# match any backend.userland.com namespace
|
50
|
+
namespaceuri = 'http://backend.userland.com/rss'
|
51
51
|
end
|
52
52
|
prefix = @matchnamespaces[namespaceuri]
|
53
53
|
# No need to raise UndeclaredNamespace, Expat does that for us with
|
54
54
|
"unbound prefix (XMLParserError)"
|
55
55
|
if prefix and not prefix.empty?
|
56
|
-
|
56
|
+
name = prefix + ':' + name
|
57
57
|
end
|
58
58
|
name.downcase!
|
59
59
|
unknown_starttag(name, attrs)
|
@@ -72,10 +72,10 @@ module FeedParser
|
|
72
72
|
|
73
73
|
def endElement(name)
|
74
74
|
name =~ /^(([^;]*);)?(.+)$/ # Snag namespaceuri from name
|
75
|
-
|
75
|
+
namespaceuri = ($2 || '').downcase
|
76
76
|
prefix = @matchnamespaces[namespaceuri]
|
77
77
|
if prefix and not prefix.empty?
|
78
|
-
|
78
|
+
localname = prefix + ':' + name
|
79
79
|
end
|
80
80
|
name.downcase!
|
81
81
|
unknown_endtag(name)
|
@@ -112,13 +112,13 @@ module FeedParser
|
|
112
112
|
|
113
113
|
attr_accessor :encoding, :bozo, :feeddata, :entries, :namespacesInUse
|
114
114
|
|
115
|
-
Elements_No_End_Tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
|
116
|
-
'img', 'input', 'isindex', 'link', 'meta', 'param']
|
115
|
+
Elements_No_End_Tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param']
|
117
116
|
New_Declname_Re = /[a-zA-Z][-_.a-zA-Z0-9:]*\s*/
|
118
|
-
|
117
|
+
alias :sgml_feed :feed # feed needs to mapped to feeddata, not the SGMLParser method feed. I think.
|
119
118
|
def feed
|
120
119
|
@feeddata
|
121
120
|
end
|
121
|
+
|
122
122
|
def feed=(data)
|
123
123
|
@feeddata = data
|
124
124
|
end
|
@@ -134,22 +134,23 @@ module FeedParser
|
|
134
134
|
end
|
135
135
|
|
136
136
|
def parse(data)
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
137
|
+
doctype_regexp = Regexp.new('<!((?!DOCTYPE|--|\[))', Regexp::IGNORECASE) # Getting around a Textmate ident bug
|
138
|
+
data.gsub!(doctype_regexp, '<!\1')
|
139
|
+
data.gsub!(/<([^<\s]+?)\s*\/>/) do |tag|
|
140
|
+
clean = tag[1..-3].strip
|
141
|
+
if Elements_No_End_Tag.include?clean
|
142
|
+
tag
|
143
|
+
else
|
144
|
+
'<'+clean+'></'+clean+'>'
|
145
|
+
end
|
146
|
+
end
|
146
147
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
148
|
+
data.gsub!(/'/, "'")
|
149
|
+
data.gsub!(/"/, "'")
|
150
|
+
if @encoding and not @encoding.empty? # FIXME unicode check type(u'')
|
151
|
+
data = uconvert(data,'utf-8',@encoding)
|
152
|
+
end
|
153
|
+
sgml_feed(data) # see the alias above
|
153
154
|
end
|
154
155
|
|
155
156
|
|
@@ -165,11 +166,11 @@ module FeedParser
|
|
165
166
|
data.gsub!(''', ''')
|
166
167
|
data.gsub!(''', ''')
|
167
168
|
if @contentparams.has_key? 'type' and not ((@contentparams['type'] || 'xml') =~ /xml$/u)
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
169
|
+
data.gsub!('<', '<')
|
170
|
+
data.gsub!('>', '>')
|
171
|
+
data.gsub!('&', '&')
|
172
|
+
data.gsub!('"', '"')
|
173
|
+
data.gsub!(''', "'")
|
173
174
|
end
|
174
175
|
return data
|
175
176
|
end
|