rfeedparser 0.9.92 → 0.9.93
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rfeedparser.rb +106 -105
- data/lib/rfeedparser/better_sgmlparser.rb +84 -84
- data/lib/rfeedparser/encoding_helpers.rb +4 -3
- data/lib/rfeedparser/parser_mixin.rb +121 -118
- data/lib/rfeedparser/parsers.rb +31 -30
- data/lib/rfeedparser/scrub.rb +1 -1
- data/lib/rfeedparser/time_helpers.rb +52 -54
- data/tests/rfponly/wellformed/mrss/mrss_media_content.xml +20 -0
- data/tests/rfponly/wellformed/mrss/mrss_thumbnail.xml +21 -0
- metadata +10 -5
@@ -178,16 +178,17 @@ module FeedParserUtilities
|
|
178
178
|
data = data[4..-1]
|
179
179
|
end
|
180
180
|
begin
|
181
|
-
newdata = uconvert(data, encoding, 'utf-8')
|
181
|
+
newdata = uconvert(data, encoding, 'utf-8')
|
182
182
|
rescue => details
|
183
|
+
raise details
|
183
184
|
end
|
184
185
|
$stderr << "successfully converted #{encoding} data to utf-8\n" if $debug
|
185
186
|
declmatch = /^<\?xml[^>]*?>/
|
186
187
|
newdecl = "<?xml version=\'1.0\' encoding=\'utf-8\'?>"
|
187
188
|
if declmatch =~ newdata
|
188
|
-
|
189
|
+
newdata.sub!(declmatch, newdecl)
|
189
190
|
else
|
190
|
-
|
191
|
+
newdata = newdecl + "\n" + newdata
|
191
192
|
end
|
192
193
|
return newdata
|
193
194
|
end
|
@@ -6,65 +6,65 @@ module FeedParserMixin
|
|
6
6
|
$stderr << "initializing FeedParser\n" if $debug
|
7
7
|
|
8
8
|
@namespaces = {'' => '',
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
9
|
+
'http://backend.userland.com/rss' => '',
|
10
|
+
'http://blogs.law.harvard.edu/tech/rss' => '',
|
11
|
+
'http://purl.org/rss/1.0/' => '',
|
12
|
+
'http://my.netscape.com/rdf/simple/0.9/' => '',
|
13
|
+
'http://example.com/newformat#' => '',
|
14
|
+
'http://example.com/necho' => '',
|
15
|
+
'http://purl.org/echo/' => '',
|
16
|
+
'uri/of/echo/namespace#' => '',
|
17
|
+
'http://purl.org/pie/' => '',
|
18
|
+
'http://purl.org/atom/ns#' => '',
|
19
|
+
'http://www.w3.org/2005/Atom' => '',
|
20
|
+
'http://purl.org/rss/1.0/modules/rss091#' => '',
|
21
|
+
'http://webns.net/mvcb/' => 'admin',
|
22
|
+
'http://purl.org/rss/1.0/modules/aggregation/' => 'ag',
|
23
|
+
'http://purl.org/rss/1.0/modules/annotate/' => 'annotate',
|
24
|
+
'http://media.tangent.org/rss/1.0/' => 'audio',
|
25
|
+
'http://backend.userland.com/blogChannelModule' => 'blogChannel',
|
26
|
+
'http://web.resource.org/cc/' => 'cc',
|
27
|
+
'http://backend.userland.com/creativeCommonsRssModule' => 'creativeCommons',
|
28
|
+
'http://purl.org/rss/1.0/modules/company' => 'co',
|
29
|
+
'http://purl.org/rss/1.0/modules/content/' => 'content',
|
30
|
+
'http://my.theinfo.org/changed/1.0/rss/' => 'cp',
|
31
|
+
'http://purl.org/dc/elements/1.1/' => 'dc',
|
32
|
+
'http://purl.org/dc/terms/' => 'dcterms',
|
33
|
+
'http://purl.org/rss/1.0/modules/email/' => 'email',
|
34
|
+
'http://purl.org/rss/1.0/modules/event/' => 'ev',
|
35
|
+
'http://rssnamespace.org/feedburner/ext/1.0' => 'feedburner',
|
36
|
+
'http://freshmeat.net/rss/fm/' => 'fm',
|
37
|
+
'http://xmlns.com/foaf/0.1/' => 'foaf',
|
38
|
+
'http://www.w3.org/2003/01/geo/wgs84_pos#' => 'geo',
|
39
|
+
'http://postneo.com/icbm/' => 'icbm',
|
40
|
+
'http://purl.org/rss/1.0/modules/image/' => 'image',
|
41
|
+
'http://www.itunes.com/DTDs/PodCast-1.0.dtd' => 'itunes',
|
42
|
+
'http://example.com/DTDs/PodCast-1.0.dtd' => 'itunes',
|
43
|
+
'http://purl.org/rss/1.0/modules/link/' => 'l',
|
44
|
+
'http://search.yahoo.com/mrss' => 'media',
|
45
|
+
'http://madskills.com/public/xml/rss/module/pingback/' => 'pingback',
|
46
|
+
'http://prismstandard.org/namespaces/1.2/basic/' => 'prism',
|
47
|
+
'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => 'rdf',
|
48
|
+
'http://www.w3.org/2000/01/rdf-schema#' => 'rdfs',
|
49
|
+
'http://purl.org/rss/1.0/modules/reference/' => 'ref',
|
50
|
+
'http://purl.org/rss/1.0/modules/richequiv/' => 'reqv',
|
51
|
+
'http://purl.org/rss/1.0/modules/search/' => 'search',
|
52
|
+
'http://purl.org/rss/1.0/modules/slash/' => 'slash',
|
53
|
+
'http://schemas.xmlsoap.org/soap/envelope/' => 'soap',
|
54
|
+
'http://purl.org/rss/1.0/modules/servicestatus/' => 'ss',
|
55
|
+
'http://hacks.benhammersley.com/rss/streaming/' => 'str',
|
56
|
+
'http://purl.org/rss/1.0/modules/subscription/' => 'sub',
|
57
|
+
'http://purl.org/rss/1.0/modules/syndication/' => 'sy',
|
58
|
+
'http://purl.org/rss/1.0/modules/taxonomy/' => 'taxo',
|
59
|
+
'http://purl.org/rss/1.0/modules/threading/' => 'thr',
|
60
|
+
'http://purl.org/rss/1.0/modules/textinput/' => 'ti',
|
61
|
+
'http://madskills.com/public/xml/rss/module/trackback/' =>'trackback',
|
62
|
+
'http://wellformedweb.org/commentAPI/' => 'wfw',
|
63
|
+
'http://purl.org/rss/1.0/modules/wiki/' => 'wiki',
|
64
|
+
'http://www.w3.org/1999/xhtml' => 'xhtml',
|
65
|
+
'http://www.w3.org/XML/1998/namespace' => 'xml',
|
66
|
+
'http://www.w3.org/1999/xlink' => 'xlink',
|
67
|
+
'http://schemas.pocketsoap.com/rss/myDescModule/' => 'szf'
|
68
68
|
}
|
69
69
|
@matchnamespaces = {}
|
70
70
|
@namespaces.each do |l|
|
@@ -123,7 +123,7 @@ module FeedParserMixin
|
|
123
123
|
k = old_k.downcase # Downcase all keys
|
124
124
|
attrsD[k] = value
|
125
125
|
if ['rel','type'].include?value
|
126
|
-
|
126
|
+
attrsD[k].downcase! # Downcase the value if the key is 'rel' or 'type'
|
127
127
|
end
|
128
128
|
end
|
129
129
|
|
@@ -140,7 +140,7 @@ module FeedParserMixin
|
|
140
140
|
end
|
141
141
|
if lang and not lang.empty? # Seriously, this cannot be correct
|
142
142
|
if ['feed', 'rss', 'rdf:RDF'].include?tag
|
143
|
-
|
143
|
+
@feeddata['language'] = lang.gsub('_','-')
|
144
144
|
end
|
145
145
|
end
|
146
146
|
@lang = lang
|
@@ -150,9 +150,9 @@ module FeedParserMixin
|
|
150
150
|
# track namespaces
|
151
151
|
attrsd.each do |prefix, uri|
|
152
152
|
if /^xmlns:/ =~ prefix # prefix begins with xmlns:
|
153
|
-
|
153
|
+
trackNamespace(prefix[6..-1], uri)
|
154
154
|
elsif prefix == 'xmlns':
|
155
|
-
|
155
|
+
trackNamespace(nil, uri)
|
156
156
|
end
|
157
157
|
end
|
158
158
|
|
@@ -238,13 +238,13 @@ module FeedParserMixin
|
|
238
238
|
if @basestack and not @basestack.empty?
|
239
239
|
@basestack.pop
|
240
240
|
if @basestack and @basestack[-1] and not (@basestack.empty? or @basestack[-1].empty?)
|
241
|
-
|
241
|
+
@baseuri = @basestack[-1]
|
242
242
|
end
|
243
243
|
end
|
244
244
|
if @langstack and not @langstack.empty?
|
245
245
|
@langstack.pop
|
246
246
|
if @langstack and not @langstack.empty? # and @langstack[-1] and not @langstack.empty?
|
247
|
-
|
247
|
+
@lang = @langstack[-1]
|
248
248
|
end
|
249
249
|
end
|
250
250
|
end
|
@@ -260,9 +260,9 @@ module FeedParserMixin
|
|
260
260
|
text = "&##{ref};"
|
261
261
|
else
|
262
262
|
if ref[0..0] == 'x'
|
263
|
-
|
263
|
+
c = (ref[1..-1]).to_i(16)
|
264
264
|
else
|
265
|
-
|
265
|
+
c = ref.to_i
|
266
266
|
end
|
267
267
|
text = [c].pack('U*')
|
268
268
|
end
|
@@ -383,7 +383,7 @@ module FeedParserMixin
|
|
383
383
|
if @contentparams['base64']
|
384
384
|
out64 = Base64::decode64(output) # a.k.a. [output].unpack('m')[0]
|
385
385
|
if not output.empty? and not out64.empty?
|
386
|
-
|
386
|
+
output = out64
|
387
387
|
end
|
388
388
|
end
|
389
389
|
|
@@ -404,13 +404,13 @@ module FeedParserMixin
|
|
404
404
|
# resolve relative URIs within embedded markup
|
405
405
|
if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
|
406
406
|
if @can_contain_relative_uris.include?element
|
407
|
-
|
407
|
+
output = FeedParser.resolveRelativeURIs(output, @baseuri, @encoding)
|
408
408
|
end
|
409
409
|
end
|
410
410
|
# sanitize embedded markup
|
411
411
|
if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
|
412
412
|
if @can_contain_dangerous_markup.include?element
|
413
|
-
|
413
|
+
output = FeedParser.sanitizeHTML(output, @encoding)
|
414
414
|
end
|
415
415
|
end
|
416
416
|
|
@@ -425,34 +425,34 @@ module FeedParserMixin
|
|
425
425
|
# store output in appropriate place(s)
|
426
426
|
if @inentry and not @insource
|
427
427
|
if element == 'content'
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
428
|
+
@entries[-1][element] ||= []
|
429
|
+
contentparams = Marshal.load(Marshal.dump(@contentparams)) # deepcopy
|
430
|
+
contentparams['value'] = output
|
431
|
+
@entries[-1][element] << contentparams
|
432
432
|
elsif element == 'link'
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
433
|
+
@entries[-1][element] = output
|
434
|
+
if output and not output.empty?
|
435
|
+
@entries[-1]['links'][-1]['href'] = output
|
436
|
+
end
|
437
437
|
else
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
438
|
+
element = 'summary' if element == 'description'
|
439
|
+
@entries[-1][element] = output
|
440
|
+
if @incontent != 0
|
441
|
+
contentparams = Marshal.load(Marshal.dump(@contentparams))
|
442
|
+
contentparams['value'] = output
|
443
|
+
@entries[-1][element + '_detail'] = contentparams
|
444
|
+
end
|
445
445
|
end
|
446
446
|
elsif (@infeed or @insource) and not @intextinput and not @inimage
|
447
447
|
context = getContext()
|
448
448
|
element = 'subtitle' if element == 'description'
|
449
449
|
context[element] = output
|
450
450
|
if element == 'link'
|
451
|
-
|
451
|
+
context['links'][-1]['href'] = output
|
452
452
|
elsif @incontent != 0
|
453
|
-
|
454
|
-
|
455
|
-
|
453
|
+
contentparams = Marshal.load(Marshal.dump(@contentparams))
|
454
|
+
contentparams['value'] = output
|
455
|
+
context[element + '_detail'] = contentparams
|
456
456
|
end
|
457
457
|
end
|
458
458
|
return output
|
@@ -514,20 +514,20 @@ module FeedParserMixin
|
|
514
514
|
|
515
515
|
def _start_rss(attrsD)
|
516
516
|
versionmap = {'0.91' => 'rss091u',
|
517
|
-
|
518
|
-
|
519
|
-
|
517
|
+
'0.92' => 'rss092',
|
518
|
+
'0.93' => 'rss093',
|
519
|
+
'0.94' => 'rss094'
|
520
520
|
}
|
521
521
|
|
522
522
|
if not @version or @version.empty?
|
523
523
|
attr_version = attrsD['version'] || ''
|
524
524
|
version = versionmap[attr_version]
|
525
525
|
if version and not version.empty?
|
526
|
-
|
526
|
+
@version = version
|
527
527
|
elsif /^2\./ =~ attr_version
|
528
|
-
|
528
|
+
@version = 'rss20'
|
529
529
|
else
|
530
|
-
|
530
|
+
@version = 'rss'
|
531
531
|
end
|
532
532
|
end
|
533
533
|
end
|
@@ -558,17 +558,17 @@ module FeedParserMixin
|
|
558
558
|
def _start_feed(attrsD)
|
559
559
|
@infeed = true
|
560
560
|
versionmap = {'0.1' => 'atom01',
|
561
|
-
|
562
|
-
|
561
|
+
'0.2' => 'atom02',
|
562
|
+
'0.3' => 'atom03'
|
563
563
|
}
|
564
564
|
|
565
565
|
if not @version or @version.empty?
|
566
566
|
attr_version = attrsD['version']
|
567
567
|
version = versionmap[attr_version]
|
568
568
|
if @version and not @version.empty?
|
569
|
-
|
569
|
+
@version = version
|
570
570
|
else
|
571
|
-
|
571
|
+
@version = 'atom'
|
572
572
|
end
|
573
573
|
end
|
574
574
|
end
|
@@ -776,11 +776,11 @@ module FeedParserMixin
|
|
776
776
|
email = detail['email']
|
777
777
|
|
778
778
|
if name and email and not (name.empty? or name.empty?)
|
779
|
-
|
779
|
+
context[key] = "#{name} (#{email})"
|
780
780
|
elsif name and not name.empty?
|
781
|
-
|
781
|
+
context[key] = name
|
782
782
|
elsif email and not email.empty?
|
783
|
-
|
783
|
+
context[key] = email
|
784
784
|
end
|
785
785
|
else
|
786
786
|
author = context[key].dup unless context[key].nil?
|
@@ -791,11 +791,11 @@ module FeedParserMixin
|
|
791
791
|
author.gsub!("\(\)", '')
|
792
792
|
author.strip!
|
793
793
|
author.gsub!(/^\(/,'')
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
794
|
+
author.gsub!(/\)$/,'')
|
795
|
+
author.strip!
|
796
|
+
context["#{key}_detail"] ||= FeedParserDict.new
|
797
|
+
context["#{key}_detail"]['name'] = author
|
798
|
+
context["#{key}_detail"]['email'] = email
|
799
799
|
end
|
800
800
|
end
|
801
801
|
|
@@ -1002,7 +1002,7 @@ module FeedParserMixin
|
|
1002
1002
|
if attrsD.has_key? 'href'
|
1003
1003
|
expectingText = false
|
1004
1004
|
if (attrsD['rel'] == 'alternate') and @html_types.include?mapContentType(attrsD['type'])
|
1005
|
-
|
1005
|
+
context['link'] = attrsD['href']
|
1006
1006
|
end
|
1007
1007
|
else
|
1008
1008
|
push('link', expectingText)
|
@@ -1077,9 +1077,9 @@ module FeedParserMixin
|
|
1077
1077
|
value = popContent('description')
|
1078
1078
|
context = getContext()
|
1079
1079
|
if @intextinput
|
1080
|
-
|
1080
|
+
context['textinput']['description'] = value
|
1081
1081
|
elsif @inimage:
|
1082
|
-
|
1082
|
+
context['image']['description'] = value
|
1083
1083
|
end
|
1084
1084
|
end
|
1085
1085
|
@summaryKey = nil
|
@@ -1100,7 +1100,7 @@ module FeedParserMixin
|
|
1100
1100
|
if attrsD and not attrsD.empty?
|
1101
1101
|
attrsD = itsAnHrefDamnIt(attrsD)
|
1102
1102
|
if attrsD.has_key?('href')
|
1103
|
-
|
1103
|
+
attrsD['href'] = resolveURI(attrsD['href'])
|
1104
1104
|
end
|
1105
1105
|
end
|
1106
1106
|
getContext()['generator_detail'] = FeedParserDict.new(attrsD)
|
@@ -1164,10 +1164,12 @@ module FeedParserMixin
|
|
1164
1164
|
if href and not href.empty?
|
1165
1165
|
context = getContext()
|
1166
1166
|
if not context['id']
|
1167
|
-
|
1167
|
+
context['id'] = href
|
1168
1168
|
end
|
1169
1169
|
end
|
1170
1170
|
end
|
1171
|
+
alias :_start_media_content :_start_enclosure
|
1172
|
+
alias :_start_media_thumbnail :_start_enclosure
|
1171
1173
|
|
1172
1174
|
def _start_source(attrsD)
|
1173
1175
|
@insource = true
|
@@ -1208,13 +1210,13 @@ module FeedParserMixin
|
|
1208
1210
|
if copyToDescription
|
1209
1211
|
_save('description', value)
|
1210
1212
|
end
|
1211
|
-
alias :_end_body :_end_content
|
1212
|
-
alias :_end_xhtml_body :_end_content
|
1213
|
-
alias :_end_content_encoded :_end_content
|
1214
|
-
alias :_end_fullitem :_end_content
|
1215
|
-
alias :_end_prodlink :_end_content
|
1216
1213
|
end
|
1217
|
-
|
1214
|
+
alias :_end_body :_end_content
|
1215
|
+
alias :_end_xhtml_body :_end_content
|
1216
|
+
alias :_end_content_encoded :_end_content
|
1217
|
+
alias :_end_fullitem :_end_content
|
1218
|
+
alias :_end_prodlink :_end_content
|
1219
|
+
|
1218
1220
|
def _start_itunes_image(attrsD)
|
1219
1221
|
push('itunes_image', false)
|
1220
1222
|
getContext()['image'] = FeedParserDict.new({'href' => attrsD['href']})
|
@@ -1230,6 +1232,7 @@ module FeedParserMixin
|
|
1230
1232
|
value = pop('itunes_explicit', false)
|
1231
1233
|
getContext()['itunes_explicit'] = (value == 'yes') and true or false
|
1232
1234
|
end
|
1235
|
+
|
1233
1236
|
end # End FeedParserMixin
|
1234
1237
|
|
1235
1238
|
|
data/lib/rfeedparser/parsers.rb
CHANGED
@@ -21,7 +21,7 @@ module FeedParser
|
|
21
21
|
def getAttrs(attrs)
|
22
22
|
ret = []
|
23
23
|
for i in 0..attrs.getLength
|
24
|
-
|
24
|
+
ret.push([attrs.getName(i), attrs.getValue(i)])
|
25
25
|
end
|
26
26
|
ret
|
27
27
|
end
|
@@ -43,17 +43,17 @@ module FeedParser
|
|
43
43
|
|
44
44
|
def startElement(name, attrs)
|
45
45
|
name =~ /^(([^;]*);)?(.+)$/ # Snag namespaceuri from name
|
46
|
-
|
46
|
+
namespaceuri = ($2 || '').downcase
|
47
47
|
name = $3
|
48
48
|
if /backend\.userland\.com\/rss/ =~ namespaceuri
|
49
|
-
|
50
|
-
|
49
|
+
# match any backend.userland.com namespace
|
50
|
+
namespaceuri = 'http://backend.userland.com/rss'
|
51
51
|
end
|
52
52
|
prefix = @matchnamespaces[namespaceuri]
|
53
53
|
# No need to raise UndeclaredNamespace, Expat does that for us with
|
54
54
|
"unbound prefix (XMLParserError)"
|
55
55
|
if prefix and not prefix.empty?
|
56
|
-
|
56
|
+
name = prefix + ':' + name
|
57
57
|
end
|
58
58
|
name.downcase!
|
59
59
|
unknown_starttag(name, attrs)
|
@@ -72,10 +72,10 @@ module FeedParser
|
|
72
72
|
|
73
73
|
def endElement(name)
|
74
74
|
name =~ /^(([^;]*);)?(.+)$/ # Snag namespaceuri from name
|
75
|
-
|
75
|
+
namespaceuri = ($2 || '').downcase
|
76
76
|
prefix = @matchnamespaces[namespaceuri]
|
77
77
|
if prefix and not prefix.empty?
|
78
|
-
|
78
|
+
localname = prefix + ':' + name
|
79
79
|
end
|
80
80
|
name.downcase!
|
81
81
|
unknown_endtag(name)
|
@@ -112,13 +112,13 @@ module FeedParser
|
|
112
112
|
|
113
113
|
attr_accessor :encoding, :bozo, :feeddata, :entries, :namespacesInUse
|
114
114
|
|
115
|
-
Elements_No_End_Tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
|
116
|
-
'img', 'input', 'isindex', 'link', 'meta', 'param']
|
115
|
+
Elements_No_End_Tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param']
|
117
116
|
New_Declname_Re = /[a-zA-Z][-_.a-zA-Z0-9:]*\s*/
|
118
|
-
|
117
|
+
alias :sgml_feed :feed # feed needs to mapped to feeddata, not the SGMLParser method feed. I think.
|
119
118
|
def feed
|
120
119
|
@feeddata
|
121
120
|
end
|
121
|
+
|
122
122
|
def feed=(data)
|
123
123
|
@feeddata = data
|
124
124
|
end
|
@@ -134,22 +134,23 @@ module FeedParser
|
|
134
134
|
end
|
135
135
|
|
136
136
|
def parse(data)
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
137
|
+
doctype_regexp = Regexp.new('<!((?!DOCTYPE|--|\[))', Regexp::IGNORECASE) # Getting around a Textmate ident bug
|
138
|
+
data.gsub!(doctype_regexp, '<!\1')
|
139
|
+
data.gsub!(/<([^<\s]+?)\s*\/>/) do |tag|
|
140
|
+
clean = tag[1..-3].strip
|
141
|
+
if Elements_No_End_Tag.include?clean
|
142
|
+
tag
|
143
|
+
else
|
144
|
+
'<'+clean+'></'+clean+'>'
|
145
|
+
end
|
146
|
+
end
|
146
147
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
148
|
+
data.gsub!(/'/, "'")
|
149
|
+
data.gsub!(/"/, "'")
|
150
|
+
if @encoding and not @encoding.empty? # FIXME unicode check type(u'')
|
151
|
+
data = uconvert(data,'utf-8',@encoding)
|
152
|
+
end
|
153
|
+
sgml_feed(data) # see the alias above
|
153
154
|
end
|
154
155
|
|
155
156
|
|
@@ -165,11 +166,11 @@ module FeedParser
|
|
165
166
|
data.gsub!(''', ''')
|
166
167
|
data.gsub!(''', ''')
|
167
168
|
if @contentparams.has_key? 'type' and not ((@contentparams['type'] || 'xml') =~ /xml$/u)
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
169
|
+
data.gsub!('<', '<')
|
170
|
+
data.gsub!('>', '>')
|
171
|
+
data.gsub!('&', '&')
|
172
|
+
data.gsub!('"', '"')
|
173
|
+
data.gsub!(''', "'")
|
173
174
|
end
|
174
175
|
return data
|
175
176
|
end
|