rfeedparser 0.9.9 → 0.9.85

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,93 +0,0 @@
1
- #!/usr/bin/ruby
2
- module FeedParserUtilities
3
- class FeedParserDict < Hash
4
- =begin
5
- The naming of a certain common attribute (such as, "When was the last
6
- time this feed was updated?") can have many different names depending
7
- on the type of feed we are handling. This class allows us to satisfy
8
- the expectations of both the developer who has prior knowledge of the
9
- feed type as well as the developer who wants a consistent application
10
- interface.
11
-
12
- @@keymap is a Hash that contains information on what a certain
13
- attribute names "really are" in each kind of feed. It does this by
14
- providing a common name that will map to any feed type in the keys,
15
- with possible "correct" attributes in the its values. the #[] and #[]=
16
- methods check with keymaps to see what attribute the developer "really
17
- means" if they've asked for one which happens to be in @@keymap's keys.
18
- =end
19
- @@keymap = {'channel' => 'feed',
20
- 'items' => 'entries',
21
- 'guid' => 'id',
22
- 'date' => 'updated',
23
- 'date_parsed' => 'updated_parsed',
24
- 'description' => ['subtitle', 'summary'],
25
- 'url' => ['href'],
26
- 'modified' => 'updated',
27
- 'modified_parsed' => 'updated_parsed',
28
- 'issued' => 'published',
29
- 'issued_parsed' => 'published_parsed',
30
- 'copyright' => 'rights',
31
- 'copyright_detail' => 'rights_detail',
32
- 'tagline' => 'subtitle',
33
- 'tagline_detail' => 'subtitle_detail'}
34
-
35
- def entries # Apparently, Hash has an entries method! That blew a good 3 hours or more of my time
36
- return self['entries']
37
- end
38
-
39
- # We could include the [] rewrite in new using Hash.new's fancy pants block thing
40
- # but we'd still have to overwrite []= and such.
41
- # I'm going to make it easy to turn lists of pairs into FeedParserDicts's though.
42
- def initialize(pairs=nil)
43
- if pairs.class == Array and pairs[0].class == Array and pairs[0].length == 2
44
- pairs.each do |l|
45
- k,v = l
46
- self[k] = v
47
- end
48
- elsif pairs.class == Hash
49
- self.merge!(pairs)
50
- end
51
- end
52
-
53
- def [](key)
54
- if key == 'category'
55
- return self['tags'][0]['term']
56
- end
57
- if key == 'categories'
58
- return self['tags'].collect{|tag| [tag['scheme'],tag['term']]}
59
- end
60
- realkey = @@keymap[key] || key
61
- if realkey.class == Array
62
- realkey.each{ |key| return self[key] if has_key?key }
63
- end
64
- # Note that the original key is preferred over the realkey we (might
65
- # have) found in @@keymap
66
- if has_key?(key)
67
- return super(key)
68
- end
69
- return super(realkey)
70
- end
71
-
72
- def []=(key,value)
73
- if @@keymap.key?key
74
- key = @@keymap[key]
75
- if key.class == Array
76
- key = key[0]
77
- end
78
- end
79
- super(key,value)
80
- end
81
-
82
- def method_missing(msym, *args)
83
- methodname = msym.to_s
84
- if methodname[-1] == '='
85
- return self[methodname[0..-2]] = args[0]
86
- elsif methodname[-1] != '!' and methodname[-1] != '?' and methodname[0] != "_" # FIXME implement with private?
87
- return self[methodname]
88
- else
89
- raise NoMethodError, "whoops, we don't know about the attribute or method called `#{methodname}' for #{self}:#{self.class}"
90
- end
91
- end
92
- end
93
- end
@@ -1,93 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- module URI
4
- # NOTE I wish I didn't have to open this module up,but I cannot find a
5
- # better way of accessing all of the instance methods of the URI module. I \
6
- # may just be an idiot.
7
- def self.split(uri)
8
- case uri
9
- when ''
10
- # null uri
11
-
12
- when ABS_URI
13
- scheme, opaque, userinfo, host, port,
14
- registry, path, query, fragment = $~[1..-1]
15
-
16
- # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
17
-
18
- # absoluteURI = scheme ":" ( hier_part | opaque_part )
19
- # hier_part = ( net_path | abs_path ) [ "?" query ]
20
- # opaque_part = uric_no_slash *uric
21
-
22
- # abs_path = "/" path_segments
23
- # net_path = "//" authority [ abs_path ]
24
-
25
- # authority = server | reg_name
26
- # server = [ [ userinfo "@" ] hostport ]
27
-
28
- if !scheme
29
- raise InvalidURIError,
30
- "bad URI(absolute but no scheme): #{uri}"
31
- end
32
- if !opaque && (!path && (!host && !registry))
33
- raise InvalidURIError,
34
- "bad URI(absolute but no path): #{uri}"
35
- end
36
-
37
- when REL_URI
38
- scheme = nil
39
- opaque = nil
40
-
41
- userinfo, host, port, registry,
42
- rel_segment, abs_path, query, fragment = $~[1..-1]
43
- if rel_segment && abs_path
44
- path = rel_segment + abs_path
45
- elsif rel_segment
46
- path = rel_segment
47
- elsif abs_path
48
- path = abs_path
49
- end
50
-
51
- # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
52
-
53
- # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
54
-
55
- # net_path = "//" authority [ abs_path ]
56
- # abs_path = "/" path_segments
57
- # rel_path = rel_segment [ abs_path ]
58
-
59
- # authority = server | reg_name
60
- # server = [ [ userinfo "@" ] hostport ]
61
-
62
- else
63
- # NOTE this is the only part of the code that differs from the "clean"
64
- # URI module.
65
- return [nil,nil,uri,nil,nil,nil,nil,nil,nil]
66
- end
67
-
68
- path = '' if !path && !opaque # (see RFC2396 Section 5.2)
69
- ret = [
70
- scheme,
71
- userinfo, host, port, # X
72
- registry, # X
73
- path, # Y
74
- opaque, # Y
75
- query,
76
- fragment
77
- ]
78
- return ret
79
- end
80
- end
81
-
82
- def urljoin(base, uri)
83
- urifixer = /^([A-Za-z][A-Za-z0-9+-.]*:\/\/)(\/*)(.*?)/u
84
- uri = uri.sub(urifixer, '\1\3')
85
- begin
86
- return URI.join(base, uri).to_s
87
- rescue URI::BadURIError => e
88
- if URI.parse(base).relative?
89
- return URI::parse(uri).to_s
90
- end
91
- end
92
- end
93
-
@@ -1,73 +0,0 @@
1
- #!/usr/bin/ruby
2
- module FeedParserUtilities
3
- #FIXME we need to find a better place for this method
4
- def stripDoctype(data)
5
- =begin
6
- Strips DOCTYPE from XML document, returns (rss_version, stripped_data)
7
-
8
- rss_version may be 'rss091n' or None
9
- stripped_data is the same XML document, minus the DOCTYPE
10
- =end
11
- entity_pattern = /<!ENTITY(.*?)>/m # m is for Regexp::MULTILINE
12
- data = data.gsub(entity_pattern,'')
13
-
14
- doctype_pattern = /<!DOCTYPE(.*?)>/m
15
- doctype_results = data.scan(doctype_pattern)
16
- if doctype_results and doctype_results[0]
17
- doctype = doctype_results[0][0]
18
- else
19
- doctype = ''
20
- end
21
-
22
- if /netscape/ =~ doctype.downcase
23
- version = 'rss091n'
24
- else
25
- version = nil
26
- end
27
- data = data.sub(doctype_pattern, '')
28
- return version, data
29
- end
30
-
31
- def resolveRelativeURIs(htmlSource, baseURI, encoding)
32
- $stderr << "entering resolveRelativeURIs\n" if $debug # FIXME write a decent logger
33
- relative_uris = [ ['a','href'],
34
- ['applet','codebase'],
35
- ['area','href'],
36
- ['blockquote','cite'],
37
- ['body','background'],
38
- ['del','cite'],
39
- ['form','action'],
40
- ['frame','longdesc'],
41
- ['frame','src'],
42
- ['iframe','longdesc'],
43
- ['iframe','src'],
44
- ['head','profile'],
45
- ['img','longdesc'],
46
- ['img','src'],
47
- ['img','usemap'],
48
- ['input','src'],
49
- ['input','usemap'],
50
- ['ins','cite'],
51
- ['link','href'],
52
- ['object','classid'],
53
- ['object','codebase'],
54
- ['object','data'],
55
- ['object','usemap'],
56
- ['q','cite'],
57
- ['script','src'],
58
- ]
59
- h = Hpricot(htmlSource)
60
- relative_uris.each do |l|
61
- ename, eattr = l
62
- h.search(ename).each do |elem|
63
- euri = elem.attributes[eattr]
64
- if euri and not euri.empty? and URI.parse(URI.encode(euri)).relative?
65
- elem.attributes[eattr] = urljoin(baseURI, euri)
66
- end
67
- end
68
- end
69
- return h.to_html
70
- end
71
- end
72
-
73
-
@@ -1,1235 +0,0 @@
1
- #!/usr/bin/ruby
2
- module FeedParserMixin
3
- attr_accessor :feeddata, :version, :namespacesInUse, :date_handlers
4
-
5
- def startup(baseuri=nil, baselang=nil, encoding='utf-8')
6
- $stderr << "initializing FeedParser\n" if $debug
7
-
8
- @namespaces = {'' => '',
9
- 'http://backend.userland.com/rss' => '',
10
- 'http://blogs.law.harvard.edu/tech/rss' => '',
11
- 'http://purl.org/rss/1.0/' => '',
12
- 'http://my.netscape.com/rdf/simple/0.9/' => '',
13
- 'http://example.com/newformat#' => '',
14
- 'http://example.com/necho' => '',
15
- 'http://purl.org/echo/' => '',
16
- 'uri/of/echo/namespace#' => '',
17
- 'http://purl.org/pie/' => '',
18
- 'http://purl.org/atom/ns#' => '',
19
- 'http://www.w3.org/2005/Atom' => '',
20
- 'http://purl.org/rss/1.0/modules/rss091#' => '',
21
- 'http://webns.net/mvcb/' => 'admin',
22
- 'http://purl.org/rss/1.0/modules/aggregation/' => 'ag',
23
- 'http://purl.org/rss/1.0/modules/annotate/' => 'annotate',
24
- 'http://media.tangent.org/rss/1.0/' => 'audio',
25
- 'http://backend.userland.com/blogChannelModule' => 'blogChannel',
26
- 'http://web.resource.org/cc/' => 'cc',
27
- 'http://backend.userland.com/creativeCommonsRssModule' => 'creativeCommons',
28
- 'http://purl.org/rss/1.0/modules/company' => 'co',
29
- 'http://purl.org/rss/1.0/modules/content/' => 'content',
30
- 'http://my.theinfo.org/changed/1.0/rss/' => 'cp',
31
- 'http://purl.org/dc/elements/1.1/' => 'dc',
32
- 'http://purl.org/dc/terms/' => 'dcterms',
33
- 'http://purl.org/rss/1.0/modules/email/' => 'email',
34
- 'http://purl.org/rss/1.0/modules/event/' => 'ev',
35
- 'http://rssnamespace.org/feedburner/ext/1.0' => 'feedburner',
36
- 'http://freshmeat.net/rss/fm/' => 'fm',
37
- 'http://xmlns.com/foaf/0.1/' => 'foaf',
38
- 'http://www.w3.org/2003/01/geo/wgs84_pos#' => 'geo',
39
- 'http://postneo.com/icbm/' => 'icbm',
40
- 'http://purl.org/rss/1.0/modules/image/' => 'image',
41
- 'http://www.itunes.com/DTDs/PodCast-1.0.dtd' => 'itunes',
42
- 'http://example.com/DTDs/PodCast-1.0.dtd' => 'itunes',
43
- 'http://purl.org/rss/1.0/modules/link/' => 'l',
44
- 'http://search.yahoo.com/mrss' => 'media',
45
- 'http://madskills.com/public/xml/rss/module/pingback/' => 'pingback',
46
- 'http://prismstandard.org/namespaces/1.2/basic/' => 'prism',
47
- 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => 'rdf',
48
- 'http://www.w3.org/2000/01/rdf-schema#' => 'rdfs',
49
- 'http://purl.org/rss/1.0/modules/reference/' => 'ref',
50
- 'http://purl.org/rss/1.0/modules/richequiv/' => 'reqv',
51
- 'http://purl.org/rss/1.0/modules/search/' => 'search',
52
- 'http://purl.org/rss/1.0/modules/slash/' => 'slash',
53
- 'http://schemas.xmlsoap.org/soap/envelope/' => 'soap',
54
- 'http://purl.org/rss/1.0/modules/servicestatus/' => 'ss',
55
- 'http://hacks.benhammersley.com/rss/streaming/' => 'str',
56
- 'http://purl.org/rss/1.0/modules/subscription/' => 'sub',
57
- 'http://purl.org/rss/1.0/modules/syndication/' => 'sy',
58
- 'http://purl.org/rss/1.0/modules/taxonomy/' => 'taxo',
59
- 'http://purl.org/rss/1.0/modules/threading/' => 'thr',
60
- 'http://purl.org/rss/1.0/modules/textinput/' => 'ti',
61
- 'http://madskills.com/public/xml/rss/module/trackback/' =>'trackback',
62
- 'http://wellformedweb.org/commentAPI/' => 'wfw',
63
- 'http://purl.org/rss/1.0/modules/wiki/' => 'wiki',
64
- 'http://www.w3.org/1999/xhtml' => 'xhtml',
65
- 'http://www.w3.org/XML/1998/namespace' => 'xml',
66
- 'http://www.w3.org/1999/xlink' => 'xlink',
67
- 'http://schemas.pocketsoap.com/rss/myDescModule/' => 'szf'
68
- }
69
- @matchnamespaces = {}
70
- @namespaces.each do |l|
71
- @matchnamespaces[l[0].downcase] = l[1]
72
- end
73
- @can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'license', 'icon', 'logo']
74
- @can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
75
- @can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
76
- @html_types = ['text/html', 'application/xhtml+xml']
77
- @feeddata = FeedParserDict.new # feed-level data
78
- @encoding = encoding # character encoding
79
- @entries = [] # list of entry-level data
80
- @version = '' # feed type/version see SUPPORTED_VERSIOSN
81
- @namespacesInUse = {} # hash of namespaces defined by the feed
82
-
83
- # the following are used internall to track state;
84
- # this is really out of control and should be refactored
85
- @infeed = false
86
- @inentry = false
87
- @incontent = 0 # Yes, this needs to be zero until I work out popContent and pushContent
88
- @intextinput = false
89
- @inimage = false
90
- @inauthor = false
91
- @incontributor = false
92
- @inpublisher = false
93
- @insource = false
94
- @sourcedata = FeedParserDict.new
95
- @contentparams = FeedParserDict.new
96
- @summaryKey = nil
97
- @namespacemap = {}
98
- @elementstack = []
99
- @basestack = []
100
- @langstack = []
101
- @baseuri = baseuri || ''
102
- @lang = baselang || nil
103
- if baselang
104
- @feeddata['language'] = baselang.gsub('_','-')
105
- end
106
- @date_handlers = [:_parse_date_rfc822,
107
- :_parse_date_hungarian, :_parse_date_greek,:_parse_date_mssql,
108
- :_parse_date_nate,:_parse_date_onblog,:_parse_date_w3dtf,:_parse_date_iso8601
109
- ]
110
- $stderr << "Leaving startup\n" if $debug # My addition
111
- end
112
-
113
- def unknown_starttag(tag, attrsd)
114
- $stderr << "start #{tag} with #{attrsd}\n" if $debug
115
- # normalize attrs
116
- attrsD = {}
117
- attrsd = Hash[*attrsd.flatten] if attrsd.class == Array # Magic! Asterisk!
118
- # LooseFeedParser needs the above because SGMLParser sends attrs as a
119
- # list of lists (like [['type','text/html'],['mode','escaped']])
120
-
121
- attrsd.each do |old_k,value|
122
- # There has to be a better, non-ugly way of doing this
123
- k = old_k.downcase # Downcase all keys
124
- attrsD[k] = value
125
- if ['rel','type'].include?value
126
- attrsD[k].downcase! # Downcase the value if the key is 'rel' or 'type'
127
- end
128
- end
129
-
130
- # track xml:base and xml:lang
131
- baseuri = attrsD['xml:base'] || attrsD['base'] || @baseuri
132
- @baseuri = urljoin(@baseuri, baseuri)
133
- lang = attrsD['xml:lang'] || attrsD['lang']
134
- if lang == '' # FIXME This next bit of code is right? Wtf?
135
- # xml:lang could be explicitly set to '', we need to capture that
136
- lang = nil
137
- elsif lang.nil?
138
- # if no xml:lang is specified, use parent lang
139
- lang = @lang
140
- end
141
- if lang and not lang.empty? # Seriously, this cannot be correct
142
- if ['feed', 'rss', 'rdf:RDF'].include?tag
143
- @feeddata['language'] = lang.gsub('_','-')
144
- end
145
- end
146
- @lang = lang
147
- @basestack << @baseuri
148
- @langstack << lang
149
-
150
- # track namespaces
151
- attrsd.each do |prefix, uri|
152
- if /^xmlns:/ =~ prefix # prefix begins with xmlns:
153
- trackNamespace(prefix[6..-1], uri)
154
- elsif prefix == 'xmlns':
155
- trackNamespace(nil, uri)
156
- end
157
- end
158
-
159
- # track inline content
160
- if @incontent != 0 and @contentparams.has_key?('type') and not ( /xml$/ =~ (@contentparams['type'] || 'xml') )
161
- # element declared itself as escaped markup, but isn't really
162
-
163
- @contentparams['type'] = 'application/xhtml+xml'
164
- end
165
- if @incontent != 0 and @contentparams['type'] == 'application/xhtml+xml'
166
- # Note: probably shouldn't simply recreate localname here, but
167
- # our namespace handling isn't actually 100% correct in cases where
168
- # the feed redefines the default namespace (which is actually
169
- # the usual case for inline content, thanks Sam), so here we
170
- # cheat and just reconstruct the element based on localname
171
- # because that compensates for the bugs in our namespace handling.
172
- # This will horribly munge inline content with non-empty qnames,
173
- # but nobody actually does that, so I'm not fixing it.
174
- tag = tag.split(':')[-1]
175
- attrsA = attrsd.to_a.collect{|l| "#{l[0]}=\"#{l[1]}\""}
176
- attrsS = ' '+attrsA.join(' ')
177
- return handle_data("<#{tag}#{attrsS}>", escape=false)
178
- end
179
-
180
- # match namespaces
181
- if /:/ =~ tag
182
- prefix, suffix = tag.split(':', 2)
183
- else
184
- prefix, suffix = '', tag
185
- end
186
- prefix = @namespacemap[prefix] || prefix
187
- if prefix and not prefix.empty?
188
- prefix = prefix + '_'
189
- end
190
-
191
- # special hack for better tracking of empty textinput/image elements in illformed feeds
192
- if (not prefix and not prefix.empty?) and not (['title', 'link', 'description','name'].include?tag)
193
- @intextinput = false
194
- end
195
- if (prefix.nil? or prefix.empty?) and not (['title', 'link', 'description', 'url', 'href', 'width', 'height'].include?tag)
196
- @inimage = false
197
- end
198
-
199
- # call special handler (if defined) or default handler
200
- begin
201
- return send('_start_'+prefix+suffix, attrsD)
202
- rescue NoMethodError
203
- return push(prefix + suffix, true)
204
- end
205
- end # End unknown_starttag
206
-
207
- def unknown_endtag(tag)
208
- $stderr << "end #{tag}\n" if $debug
209
- # match namespaces
210
- if tag.index(':')
211
- prefix, suffix = tag.split(':',2)
212
- else
213
- prefix, suffix = '', tag
214
- end
215
- prefix = @namespacemap[prefix] || prefix
216
- if prefix and not prefix.empty?
217
- prefix = prefix + '_'
218
- end
219
-
220
- # call special handler (if defined) or default handler
221
- begin
222
- send('_end_' + prefix + suffix) # NOTE no return here! do not add it!
223
- rescue NoMethodError => details
224
- pop(prefix + suffix)
225
- end
226
-
227
- # track inline content
228
- if @incontent != 0 and @contentparams.has_key?'type' and /xml$/ =~ (@contentparams['type'] || 'xml')
229
- # element declared itself as escaped markup, but it isn't really
230
- @contentparams['type'] = 'application/xhtml+xml'
231
- end
232
- if @incontent != 0 and @contentparams['type'] == 'application/xhtml+xml'
233
- tag = tag.split(':')[-1]
234
- handle_data("</#{tag}>", escape=false)
235
- end
236
-
237
- # track xml:base and xml:lang going out of scope
238
- if @basestack and not @basestack.empty?
239
- @basestack.pop
240
- if @basestack and @basestack[-1] and not (@basestack.empty? or @basestack[-1].empty?)
241
- @baseuri = @basestack[-1]
242
- end
243
- end
244
- if @langstack and not @langstack.empty?
245
- @langstack.pop
246
- if @langstack and not @langstack.empty? # and @langstack[-1] and not @langstack.empty?
247
- @lang = @langstack[-1]
248
- end
249
- end
250
- end
251
-
252
- def handle_charref(ref)
253
- # LooseParserOnly
254
- # called for each character reference, e.g. for '&#160;', ref will be '160'
255
- $stderr << "entering handle_charref with #{ref}\n" if $debug
256
- return if @elementstack.nil? or @elementstack.empty?
257
- ref.downcase!
258
- chars = ['34', '38', '39', '60', '62', 'x22', 'x26', 'x27', 'x3c', 'x3e']
259
- if chars.include?ref
260
- text = "&##{ref};"
261
- else
262
- if ref[0..0] == 'x'
263
- c = (ref[1..-1]).to_i(16)
264
- else
265
- c = ref.to_i
266
- end
267
- text = [c].pack('U*')
268
- end
269
- @elementstack[-1][2] << text
270
- end
271
-
272
- def handle_entityref(ref)
273
- # LooseParserOnly
274
- # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
275
-
276
- return if @elementstack.nil? or @elementstack.empty?
277
- $stderr << "entering handle_entityref with #{ref}\n" if $debug
278
- ents = ['lt', 'gt', 'quot', 'amp', 'apos']
279
- if ents.include?ref
280
- text = "&#{ref};"
281
- else
282
- text = HTMLEntities::decode_entities("&#{ref};")
283
- end
284
- @elementstack[-1][2] << text
285
- end
286
-
287
- def handle_data(text, escape=true)
288
- # called for each block of plain text, i.e. outside of any tag and
289
- # not containing any character or entity references
290
- return if @elementstack.nil? or @elementstack.empty?
291
- if escape and @contentparams['type'] == 'application/xhtml+xml'
292
- text = text.to_xs
293
- end
294
- @elementstack[-1][2] << text
295
- end
296
-
297
- def handle_comment(comment)
298
- # called for each comment, e.g. <!-- insert message here -->
299
- end
300
-
301
- def handle_pi(text)
302
- end
303
-
304
- def handle_decl(text)
305
- end
306
-
307
- def parse_declaration(i)
308
- # for LooseFeedParser
309
- $stderr << "entering parse_declaration\n" if $debug
310
- if @rawdata[i...i+9] == '<![CDATA['
311
- k = @rawdata.index(/\]\]>/u,i+9)
312
- k = @rawdata.length unless k
313
- handle_data(@rawdata[i+9...k].to_xs,false)
314
- return k+3
315
- else
316
- k = @rawdata.index(/>/,i).to_i
317
- return k+1
318
- end
319
- end
320
-
321
- def mapContentType(contentType)
322
- contentType.downcase!
323
- case contentType
324
- when 'text'
325
- contentType = 'text/plain'
326
- when 'html'
327
- contentType = 'text/html'
328
- when 'xhtml'
329
- contentType = 'application/xhtml+xml'
330
- end
331
- return contentType
332
- end
333
-
334
- def trackNamespace(prefix, uri)
335
-
336
- loweruri = uri.downcase.strip
337
- if [prefix, loweruri] == [nil, 'http://my.netscape.com/rdf/simple/0.9/'] and (@version.nil? or @version.empty?)
338
- @version = 'rss090'
339
- elsif loweruri == 'http://purl.org/rss/1.0/' and (@version.nil? or @version.empty?)
340
- @version = 'rss10'
341
- elsif loweruri == 'http://www.w3.org/2005/atom' and (@version.nil? or @version.empty?)
342
- @version = 'atom10'
343
- elsif /backend\.userland\.com\/rss/ =~ loweruri
344
- # match any backend.userland.com namespace
345
- uri = 'http://backend.userland.com/rss'
346
- loweruri = uri
347
- end
348
- if @matchnamespaces.has_key? loweruri
349
- @namespacemap[prefix] = @matchnamespaces[loweruri]
350
- @namespacesInUse[@matchnamespaces[loweruri]] = uri
351
- else
352
- @namespacesInUse[prefix || ''] = uri
353
- end
354
- end
355
-
356
- def resolveURI(uri)
357
- return urljoin(@baseuri || '', uri)
358
- end
359
-
360
- def decodeEntities(element, data)
361
- return data
362
- end
363
-
364
- def push(element, expectingText)
365
- @elementstack << [element, expectingText, []]
366
- end
367
-
368
- def pop(element, stripWhitespace=true)
369
- return if @elementstack.nil? or @elementstack.empty?
370
- return if @elementstack[-1][0] != element
371
- element, expectingText, pieces = @elementstack.pop
372
- if pieces.class == Array
373
- output = pieces.join('')
374
- else
375
- output = pieces
376
- end
377
- if stripWhitespace
378
- output.strip!
379
- end
380
- return output if not expectingText
381
-
382
- # decode base64 content
383
- if @contentparams['base64']
384
- out64 = Base64::decode64(output) # a.k.a. [output].unpack('m')[0]
385
- if not output.empty? and not out64.empty?
386
- output = out64
387
- end
388
- end
389
-
390
- # resolve relative URIs
391
- if @can_be_relative_uri.include?element and output and not output.empty?
392
- output = resolveURI(output)
393
- end
394
-
395
- # decode entities within embedded markup
396
- if not @contentparams['base64']
397
- output = decodeEntities(element, output)
398
- end
399
-
400
- # remove temporary cruft from contentparams
401
- @contentparams.delete('mode')
402
- @contentparams.delete('base64')
403
-
404
- # resolve relative URIs within embedded markup
405
- if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
406
- if @can_contain_relative_uris.include?element
407
- output = FeedParser.resolveRelativeURIs(output, @baseuri, @encoding)
408
- end
409
- end
410
- # sanitize embedded markup
411
- if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
412
- if @can_contain_dangerous_markup.include?element
413
- output = FeedParser.sanitizeHTML(output, @encoding)
414
- end
415
- end
416
-
417
- if @encoding and not @encoding.empty? and @encoding != 'utf-8'
418
- output = uconvert(output, @encoding, 'utf-8')
419
- # FIXME I turn everything into utf-8, not unicode, originally because REXML was being used but now beause I haven't tested it out yet.
420
- end
421
-
422
- # categories/tags/keywords/whatever are handled in _end_category
423
- return output if element == 'category'
424
-
425
- # store output in appropriate place(s)
426
- if @inentry and not @insource
427
- if element == 'content'
428
- @entries[-1][element] ||= []
429
- contentparams = Marshal.load(Marshal.dump(@contentparams)) # deepcopy
430
- contentparams['value'] = output
431
- @entries[-1][element] << contentparams
432
- elsif element == 'link'
433
- @entries[-1][element] = output
434
- if output and not output.empty?
435
- @entries[-1]['links'][-1]['href'] = output
436
- end
437
- else
438
- element = 'summary' if element == 'description'
439
- @entries[-1][element] = output
440
- if @incontent != 0
441
- contentparams = Marshal.load(Marshal.dump(@contentparams))
442
- contentparams['value'] = output
443
- @entries[-1][element + '_detail'] = contentparams
444
- end
445
- end
446
- elsif (@infeed or @insource) and not @intextinput and not @inimage
447
- context = getContext()
448
- element = 'subtitle' if element == 'description'
449
- context[element] = output
450
- if element == 'link'
451
- context['links'][-1]['href'] = output
452
- elsif @incontent != 0
453
- contentparams = Marshal.load(Marshal.dump(@contentparams))
454
- contentparams['value'] = output
455
- context[element + '_detail'] = contentparams
456
- end
457
- end
458
- return output
459
- end
460
-
461
- def pushContent(tag, attrsD, defaultContentType, expectingText)
462
- @incontent += 1 # Yes, I hate this.
463
- type = mapContentType(attrsD['type'] || defaultContentType)
464
- @contentparams = FeedParserDict.new({'type' => type,'language' => @lang,'base' => @baseuri})
465
- @contentparams['base64'] = isBase64(attrsD, @contentparams)
466
- push(tag, expectingText)
467
- end
468
-
469
- def popContent(tag)
470
- value = pop(tag)
471
- @incontent -= 1
472
- @contentparams.clear
473
- return value
474
- end
475
-
476
- def mapToStandardPrefix(name)
477
- colonpos = name.index(':')
478
- if colonpos
479
- prefix = name[0..colonpos-1]
480
- suffix = name[colonpos+1..-1]
481
- prefix = @namespacemap[prefix] || prefix
482
- name = prefix + ':' + suffix
483
- end
484
- return name
485
- end
486
-
487
- def getAttribute(attrsD, name)
488
- return attrsD[mapToStandardPrefix(name)]
489
- end
490
-
491
- def isBase64(attrsD, contentparams)
492
- return true if (attrsD['mode'] == 'base64')
493
- if /(^text\/)|(\+xml$)|(\/xml$)/ =~ contentparams['type']
494
- return false
495
- end
496
- return true
497
- end
498
-
499
- def itsAnHrefDamnIt(attrsD)
500
- href= attrsD['url'] || attrsD['uri'] || attrsD['href']
501
- if href
502
- attrsD.delete('url')
503
- attrsD.delete('uri')
504
- attrsD['href'] = href
505
- end
506
- return attrsD
507
- end
508
-
509
-
510
- def _save(key, value)
511
- context = getContext()
512
- context[key] ||= value
513
- end
514
-
515
- def _start_rss(attrsD)
516
- versionmap = {'0.91' => 'rss091u',
517
- '0.92' => 'rss092',
518
- '0.93' => 'rss093',
519
- '0.94' => 'rss094'
520
- }
521
-
522
- if not @version or @version.empty?
523
- attr_version = attrsD['version'] || ''
524
- version = versionmap[attr_version]
525
- if version and not version.empty?
526
- @version = version
527
- elsif /^2\./ =~ attr_version
528
- @version = 'rss20'
529
- else
530
- @version = 'rss'
531
- end
532
- end
533
- end
534
-
535
- def _start_dlhottitles(attrsD)
536
- @version = 'hotrss'
537
- end
538
-
539
- def _start_channel(attrsD)
540
- @infeed = true
541
- _cdf_common(attrsD)
542
- end
543
- alias :_start_feedinfo :_start_channel
544
-
545
- def _cdf_common(attrsD)
546
- if attrsD.has_key?'lastmod'
547
- _start_modified({})
548
- @elementstack[-1][-1] = attrsD['lastmod']
549
- _end_modified
550
- end
551
- if attrsD.has_key?'href'
552
- _start_link({})
553
- @elementstack[-1][-1] = attrsD['href']
554
- _end_link
555
- end
556
- end
557
-
558
- def _start_feed(attrsD)
559
- @infeed = true
560
- versionmap = {'0.1' => 'atom01',
561
- '0.2' => 'atom02',
562
- '0.3' => 'atom03'
563
- }
564
-
565
- if not @version or @version.empty?
566
- attr_version = attrsD['version']
567
- version = versionmap[attr_version]
568
- if @version and not @version.empty?
569
- @version = version
570
- else
571
- @version = 'atom'
572
- end
573
- end
574
- end
575
-
576
- def _end_channel
577
- @infeed = false
578
- end
579
- alias :_end_feed :_end_channel
580
-
581
- def _start_image(attrsD)
582
- @inimage = true
583
- push('image', false)
584
- context = getContext()
585
- context['image'] ||= FeedParserDict.new
586
- end
587
-
588
- def _end_image
589
- pop('image')
590
- @inimage = false
591
- end
592
-
593
- def _start_textinput(attrsD)
594
- @intextinput = true
595
- push('textinput', false)
596
- context = getContext()
597
- context['textinput'] ||= FeedParserDict.new
598
- end
599
- alias :_start_textInput :_start_textinput
600
-
601
- def _end_textinput
602
- pop('textinput')
603
- @intextinput = false
604
- end
605
- alias :_end_textInput :_end_textinput
606
-
607
- def _start_author(attrsD)
608
- @inauthor = true
609
- push('author', true)
610
- end
611
- alias :_start_managingeditor :_start_author
612
- alias :_start_dc_author :_start_author
613
- alias :_start_dc_creator :_start_author
614
- alias :_start_itunes_author :_start_author
615
-
616
- def _end_author
617
- pop('author')
618
- @inauthor = false
619
- _sync_author_detail()
620
- end
621
- alias :_end_managingeditor :_end_author
622
- alias :_end_dc_author :_end_author
623
- alias :_end_dc_creator :_end_author
624
- alias :_end_itunes_author :_end_author
625
-
626
- def _start_itunes_owner(attrsD)
627
- @inpublisher = true
628
- push('publisher', false)
629
- end
630
-
631
- def _end_itunes_owner
632
- pop('publisher')
633
- @inpublisher = false
634
- _sync_author_detail('publisher')
635
- end
636
-
637
- def _start_contributor(attrsD)
638
- @incontributor = true
639
- context = getContext()
640
- context['contributors'] ||= []
641
- context['contributors'] << FeedParserDict.new
642
- push('contributor', false)
643
- end
644
-
645
- def _end_contributor
646
- pop('contributor')
647
- @incontributor = false
648
- end
649
-
650
- def _start_dc_contributor(attrsD)
651
- @incontributor = true
652
- context = getContext()
653
- context['contributors'] ||= []
654
- context['contributors'] << FeedParserDict.new
655
- push('name', false)
656
- end
657
-
658
- def _end_dc_contributor
659
- _end_name
660
- @incontributor = false
661
- end
662
-
663
- def _start_name(attrsD)
664
- push('name', false)
665
- end
666
- alias :_start_itunes_name :_start_name
667
-
668
- def _end_name
669
- value = pop('name')
670
- if @inpublisher
671
- _save_author('name', value, 'publisher')
672
- elsif @inauthor
673
- _save_author('name', value)
674
- elsif @incontributor
675
- _save_contributor('name', value)
676
- elsif @intextinput
677
- context = getContext()
678
- context['textinput']['name'] = value
679
- end
680
- end
681
- alias :_end_itunes_name :_end_name
682
-
683
- def _start_width(attrsD)
684
- push('width', false)
685
- end
686
-
687
- def _end_width
688
- value = pop('width').to_i
689
- if @inimage
690
- context = getContext
691
- context['image']['width'] = value
692
- end
693
- end
694
-
695
- def _start_height(attrsD)
696
- push('height', false)
697
- end
698
-
699
- def _end_height
700
- value = pop('height').to_i
701
- if @inimage
702
- context = getContext()
703
- context['image']['height'] = value
704
- end
705
- end
706
-
707
- def _start_url(attrsD)
708
- push('href', true)
709
- end
710
- alias :_start_homepage :_start_url
711
- alias :_start_uri :_start_url
712
-
713
- def _end_url
714
- value = pop('href')
715
- if @inauthor
716
- _save_author('href', value)
717
- elsif @incontributor
718
- _save_contributor('href', value)
719
- elsif @inimage
720
- context = getContext()
721
- context['image']['href'] = value
722
- elsif @intextinput
723
- context = getContext()
724
- context['textinput']['link'] = value
725
- end
726
- end
727
- alias :_end_homepage :_end_url
728
- alias :_end_uri :_end_url
729
-
730
- def _start_email(attrsD)
731
- push('email', false)
732
- end
733
- alias :_start_itunes_email :_start_email
734
-
735
- def _end_email
736
- value = pop('email')
737
- if @inpublisher
738
- _save_author('email', value, 'publisher')
739
- elsif @inauthor
740
- _save_author('email', value)
741
- elsif @incontributor
742
- _save_contributor('email', value)
743
- end
744
- end
745
- alias :_end_itunes_email :_end_email
746
-
747
- def getContext
748
- if @insource
749
- context = @sourcedata
750
- elsif @inentry
751
- context = @entries[-1]
752
- else
753
- context = @feeddata
754
- end
755
- return context
756
- end
757
-
758
- def _save_author(key, value, prefix='author')
759
- context = getContext()
760
- context[prefix + '_detail'] ||= FeedParserDict.new
761
- context[prefix + '_detail'][key] = value
762
- _sync_author_detail()
763
- end
764
-
765
- def _save_contributor(key, value)
766
- context = getContext
767
- context['contributors'] ||= [FeedParserDict.new]
768
- context['contributors'][-1][key] = value
769
- end
770
-
771
- def _sync_author_detail(key='author')
772
- context = getContext()
773
- detail = context["#{key}_detail"]
774
- if detail and not detail.empty?
775
- name = detail['name']
776
- email = detail['email']
777
-
778
- if name and email and not (name.empty? or name.empty?)
779
- context[key] = "#{name} (#{email})"
780
- elsif name and not name.empty?
781
- context[key] = name
782
- elsif email and not email.empty?
783
- context[key] = email
784
- end
785
- else
786
- author = context[key].dup unless context[key].nil?
787
- return if not author or author.empty?
788
- emailmatch = author.match(/(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))/)
789
- email = emailmatch[1]
790
- author.gsub!(email, '')
791
- author.gsub!("\(\)", '')
792
- author.strip!
793
- author.gsub!(/^\(/,'')
794
- author.gsub!(/\)$/,'')
795
- author.strip!
796
- context["#{key}_detail"] ||= FeedParserDict.new
797
- context["#{key}_detail"]['name'] = author
798
- context["#{key}_detail"]['email'] = email
799
- end
800
- end
801
-
802
- def _start_subtitle(attrsD)
803
- pushContent('subtitle', attrsD, 'text/plain', true)
804
- end
805
- alias :_start_tagline :_start_subtitle
806
- alias :_start_itunes_subtitle :_start_subtitle
807
-
808
- def _end_subtitle
809
- popContent('subtitle')
810
- end
811
- alias :_end_tagline :_end_subtitle
812
- alias :_end_itunes_subtitle :_end_subtitle
813
-
814
- def _start_rights(attrsD)
815
- pushContent('rights', attrsD, 'text/plain', true)
816
- end
817
- alias :_start_dc_rights :_start_rights
818
- alias :_start_copyright :_start_rights
819
-
820
- def _end_rights
821
- popContent('rights')
822
- end
823
- alias :_end_dc_rights :_end_rights
824
- alias :_end_copyright :_end_rights
825
-
826
- def _start_item(attrsD)
827
- @entries << FeedParserDict.new
828
- push('item', false)
829
- @inentry = true
830
- @guidislink = false
831
- id = getAttribute(attrsD, 'rdf:about')
832
- if id and not id.empty?
833
- context = getContext()
834
- context['id'] = id
835
- end
836
- _cdf_common(attrsD)
837
- end
838
- alias :_start_entry :_start_item
839
- alias :_start_product :_start_item
840
-
841
- def _end_item
842
- pop('item')
843
- @inentry = false
844
- end
845
- alias :_end_entry :_end_item
846
-
847
- def _start_dc_language(attrsD)
848
- push('language', true)
849
- end
850
- alias :_start_language :_start_dc_language
851
-
852
- def _end_dc_language
853
- @lang = pop('language')
854
- end
855
- alias :_end_language :_end_dc_language
856
-
857
- def _start_dc_publisher(attrsD)
858
- push('publisher', true)
859
- end
860
- alias :_start_webmaster :_start_dc_publisher
861
-
862
- def _end_dc_publisher
863
- pop('publisher')
864
- _sync_author_detail('publisher')
865
- end
866
- alias :_end_webmaster :_end_dc_publisher
867
-
868
- def _start_published(attrsD)
869
- push('published', true)
870
- end
871
- alias :_start_dcterms_issued :_start_published
872
- alias :_start_issued :_start_published
873
-
874
- def _end_published
875
- value = pop('published')
876
- _save('published_parsed', parse_date(value))
877
- end
878
- alias :_end_dcterms_issued :_end_published
879
- alias :_end_issued :_end_published
880
-
881
- def _start_updated(attrsD)
882
- push('updated', true)
883
- end
884
- alias :_start_modified :_start_updated
885
- alias :_start_dcterms_modified :_start_updated
886
- alias :_start_pubdate :_start_updated
887
- alias :_start_dc_date :_start_updated
888
-
889
- def _end_updated
890
- value = pop('updated')
891
- _save('updated_parsed', parse_date(value))
892
- end
893
- alias :_end_modified :_end_updated
894
- alias :_end_dcterms_modified :_end_updated
895
- alias :_end_pubdate :_end_updated
896
- alias :_end_dc_date :_end_updated
897
-
898
- def _start_created(attrsD)
899
- push('created', true)
900
- end
901
- alias :_start_dcterms_created :_start_created
902
-
903
- def _end_created
904
- value = pop('created')
905
- _save('created_parsed', parse_date(value))
906
- end
907
- alias :_end_dcterms_created :_end_created
908
-
909
- def _start_expirationdate(attrsD)
910
- push('expired', true)
911
- end
912
- def _end_expirationdate
913
- _save('expired_parsed', parse_date(pop('expired')))
914
- end
915
-
916
- def _start_cc_license(attrsD)
917
- push('license', true)
918
- value = getAttribute(attrsD, 'rdf:resource')
919
- if value and not value.empty?
920
- @elementstack[-1][2] << value
921
- pop('license')
922
- end
923
- end
924
-
925
- def _start_creativecommons_license(attrsD)
926
- push('license', true)
927
- end
928
-
929
- def _end_creativecommons_license
930
- pop('license')
931
- end
932
-
933
- def addTag(term, scheme, label)
934
- context = getContext()
935
- context['tags'] ||= []
936
- tags = context['tags']
937
- if (term.nil? or term.empty?) and (scheme.nil? or scheme.empty?) and (label.nil? or label.empty?)
938
- return
939
- end
940
- value = FeedParserDict.new({'term' => term, 'scheme' => scheme, 'label' => label})
941
- if not tags.include?value
942
- context['tags'] << FeedParserDict.new({'term' => term, 'scheme' => scheme, 'label' => label})
943
- end
944
- end
945
-
946
- def _start_category(attrsD)
947
- $stderr << "entering _start_category with #{attrsD}\n" if $debug
948
-
949
- term = attrsD['term']
950
- scheme = attrsD['scheme'] || attrsD['domain']
951
- label = attrsD['label']
952
- addTag(term, scheme, label)
953
- push('category', true)
954
- end
955
- alias :_start_dc_subject :_start_category
956
- alias :_start_keywords :_start_category
957
-
958
- def _end_itunes_keywords
959
- pop('itunes_keywords').split.each do |term|
960
- addTag(term, 'http://www.itunes.com/', nil)
961
- end
962
- end
963
-
964
- def _start_itunes_category(attrsD)
965
- addTag(attrsD['text'], 'http://www.itunes.com/', nil)
966
- push('category', true)
967
- end
968
-
969
- def _end_category
970
- value = pop('category')
971
- return if value.nil? or value.empty?
972
- context = getContext()
973
- tags = context['tags']
974
- if value and not value.empty? and not tags.empty? and not tags[-1]['term']:
975
- tags[-1]['term'] = value
976
- else
977
- addTag(value, nil, nil)
978
- end
979
- end
980
- alias :_end_dc_subject :_end_category
981
- alias :_end_keywords :_end_category
982
- alias :_end_itunes_category :_end_category
983
-
984
- def _start_cloud(attrsD)
985
- getContext()['cloud'] = FeedParserDict.new(attrsD)
986
- end
987
-
988
- def _start_link(attrsD)
989
- attrsD['rel'] ||= 'alternate'
990
- attrsD['type'] ||= 'text/html'
991
- attrsD = itsAnHrefDamnIt(attrsD)
992
- if attrsD.has_key? 'href'
993
- attrsD['href'] = resolveURI(attrsD['href'])
994
- end
995
- expectingText = @infeed || @inentry || @insource
996
- context = getContext()
997
- context['links'] ||= []
998
- context['links'] << FeedParserDict.new(attrsD)
999
- if attrsD['rel'] == 'enclosure'
1000
- _start_enclosure(attrsD)
1001
- end
1002
- if attrsD.has_key? 'href'
1003
- expectingText = false
1004
- if (attrsD['rel'] == 'alternate') and @html_types.include?mapContentType(attrsD['type'])
1005
- context['link'] = attrsD['href']
1006
- end
1007
- else
1008
- push('link', expectingText)
1009
- end
1010
- end
1011
- alias :_start_producturl :_start_link
1012
-
1013
- def _end_link
1014
- value = pop('link')
1015
- context = getContext()
1016
- if @intextinput
1017
- context['textinput']['link'] = value
1018
- end
1019
- if @inimage
1020
- context['image']['link'] = value
1021
- end
1022
- end
1023
- alias :_end_producturl :_end_link
1024
-
1025
- def _start_guid(attrsD)
1026
- @guidislink = ((attrsD['ispermalink'] || 'true') == 'true')
1027
- push('id', true)
1028
- end
1029
-
1030
- def _end_guid
1031
- value = pop('id')
1032
- _save('guidislink', (@guidislink and not getContext().has_key?('link')))
1033
- if @guidislink:
1034
- # guid acts as link, but only if 'ispermalink' is not present or is 'true',
1035
- # and only if the item doesn't already have a link element
1036
- _save('link', value)
1037
- end
1038
- end
1039
-
1040
-
1041
- def _start_title(attrsD)
1042
- pushContent('title', attrsD, 'text/plain', @infeed || @inentry || @insource)
1043
- end
1044
- alias :_start_dc_title :_start_title
1045
- alias :_start_media_title :_start_title
1046
-
1047
- def _end_title
1048
- value = popContent('title')
1049
- context = getContext()
1050
- if @intextinput
1051
- context['textinput']['title'] = value
1052
- elsif @inimage
1053
- context['image']['title'] = value
1054
- end
1055
- end
1056
- alias :_end_dc_title :_end_title
1057
- alias :_end_media_title :_end_title
1058
-
1059
- def _start_description(attrsD)
1060
- context = getContext()
1061
- if context.has_key?('summary')
1062
- @summaryKey = 'content'
1063
- _start_content(attrsD)
1064
- else
1065
- pushContent('description', attrsD, 'text/html', @infeed || @inentry || @insource)
1066
- end
1067
- end
1068
-
1069
- def _start_abstract(attrsD)
1070
- pushContent('description', attrsD, 'text/plain', @infeed || @inentry || @insource)
1071
- end
1072
-
1073
- def _end_description
1074
- if @summaryKey == 'content'
1075
- _end_content()
1076
- else
1077
- value = popContent('description')
1078
- context = getContext()
1079
- if @intextinput
1080
- context['textinput']['description'] = value
1081
- elsif @inimage:
1082
- context['image']['description'] = value
1083
- end
1084
- end
1085
- @summaryKey = nil
1086
- end
1087
- alias :_end_abstract :_end_description
1088
-
1089
- def _start_info(attrsD)
1090
- pushContent('info', attrsD, 'text/plain', true)
1091
- end
1092
- alias :_start_feedburner_browserfriendly :_start_info
1093
-
1094
- def _end_info
1095
- popContent('info')
1096
- end
1097
- alias :_end_feedburner_browserfriendly :_end_info
1098
-
1099
- def _start_generator(attrsD)
1100
- if attrsD and not attrsD.empty?
1101
- attrsD = itsAnHrefDamnIt(attrsD)
1102
- if attrsD.has_key?('href')
1103
- attrsD['href'] = resolveURI(attrsD['href'])
1104
- end
1105
- end
1106
- getContext()['generator_detail'] = FeedParserDict.new(attrsD)
1107
- push('generator', true)
1108
- end
1109
-
1110
- def _end_generator
1111
- value = pop('generator')
1112
- context = getContext()
1113
- if context.has_key?('generator_detail')
1114
- context['generator_detail']['name'] = value
1115
- end
1116
- end
1117
-
1118
- def _start_admin_generatoragent(attrsD)
1119
- push('generator', true)
1120
- value = getAttribute(attrsD, 'rdf:resource')
1121
- if value and not value.empty?
1122
- @elementstack[-1][2] << value
1123
- end
1124
- pop('generator')
1125
- getContext()['generator_detail'] = FeedParserDict.new({'href' => value})
1126
- end
1127
-
1128
- def _start_admin_errorreportsto(attrsD)
1129
- push('errorreportsto', true)
1130
- value = getAttribute(attrsD, 'rdf:resource')
1131
- if value and not value.empty?
1132
- @elementstack[-1][2] << value
1133
- end
1134
- pop('errorreportsto')
1135
- end
1136
-
1137
- def _start_summary(attrsD)
1138
- context = getContext()
1139
- if context.has_key?'summary'
1140
- @summaryKey = 'content'
1141
- _start_content(attrsD)
1142
- else
1143
- @summaryKey = 'summary'
1144
- pushContent(@summaryKey, attrsD, 'text/plain', true)
1145
- end
1146
- end
1147
- alias :_start_itunes_summary :_start_summary
1148
-
1149
- def _end_summary
1150
- if @summaryKey == 'content':
1151
- _end_content()
1152
- else
1153
- popContent(@summaryKey || 'summary')
1154
- end
1155
- @summaryKey = nil
1156
- end
1157
- alias :_end_itunes_summary :_end_summary
1158
-
1159
- def _start_enclosure(attrsD)
1160
- attrsD = itsAnHrefDamnIt(attrsD)
1161
- getContext()['enclosures'] ||= []
1162
- getContext()['enclosures'] << FeedParserDict.new(attrsD)
1163
- href = attrsD['href']
1164
- if href and not href.empty?
1165
- context = getContext()
1166
- if not context['id']
1167
- context['id'] = href
1168
- end
1169
- end
1170
- end
1171
-
1172
- def _start_source(attrsD)
1173
- @insource = true
1174
- end
1175
-
1176
- def _end_source
1177
- @insource = false
1178
- getContext()['source'] = Marshal.load(Marshal.dump(@sourcedata))
1179
- @sourcedata.clear()
1180
- end
1181
-
1182
- def _start_content(attrsD)
1183
- pushContent('content', attrsD, 'text/plain', true)
1184
- src = attrsD['src']
1185
- if src and not src.empty?:
1186
- @contentparams['src'] = src
1187
- end
1188
- push('content', true)
1189
- end
1190
-
1191
- def _start_prodlink(attrsD)
1192
- pushContent('content', attrsD, 'text/html', true)
1193
- end
1194
-
1195
- def _start_body(attrsD)
1196
- pushContent('content', attrsD, 'application/xhtml+xml', true)
1197
- end
1198
- alias :_start_xhtml_body :_start_body
1199
-
1200
- def _start_content_encoded(attrsD)
1201
- pushContent('content', attrsD, 'text/html', true)
1202
- end
1203
- alias :_start_fullitem :_start_content_encoded
1204
-
1205
- def _end_content
1206
- copyToDescription = (['text/plain'] + @html_types).include? mapContentType(@contentparams['type'])
1207
- value = popContent('content')
1208
- if copyToDescription
1209
- _save('description', value)
1210
- end
1211
- alias :_end_body :_end_content
1212
- alias :_end_xhtml_body :_end_content
1213
- alias :_end_content_encoded :_end_content
1214
- alias :_end_fullitem :_end_content
1215
- alias :_end_prodlink :_end_content
1216
- end
1217
-
1218
- def _start_itunes_image(attrsD)
1219
- push('itunes_image', false)
1220
- getContext()['image'] = FeedParserDict.new({'href' => attrsD['href']})
1221
- end
1222
- alias :_start_itunes_link :_start_itunes_image
1223
-
1224
- def _end_itunes_block
1225
- value = pop('itunes_block', false)
1226
- getContext()['itunes_block'] = (value == 'yes') and true or false
1227
- end
1228
-
1229
- def _end_itunes_explicit
1230
- value = pop('itunes_explicit', false)
1231
- getContext()['itunes_explicit'] = (value == 'yes') and true or false
1232
- end
1233
- end # End FeedParserMixin
1234
-
1235
-