rfeedparser 0.9.8 → 0.9.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/ruby
2
+ module FeedParserUtilities
3
+ class FeedParserDict < Hash
4
+ =begin
5
+ The naming of a certain common attribute (such as, "When was the last
6
+ time this feed was updated?") can have many different names depending
7
+ on the type of feed we are handling. This class allows us to satisfy
8
+ the expectations of both the developer who has prior knowledge of the
9
+ feed type as well as the developer who wants a consistent application
10
+ interface.
11
+
12
+ @@keymap is a Hash that contains information on what a certain
13
+ attribute names "really are" in each kind of feed. It does this by
14
+ providing a common name that will map to any feed type in the keys,
15
+ with possible "correct" attributes in the its values. the #[] and #[]=
16
+ methods check with keymaps to see what attribute the developer "really
17
+ means" if they've asked for one which happens to be in @@keymap's keys.
18
+ =end
19
+ @@keymap = {'channel' => 'feed',
20
+ 'items' => 'entries',
21
+ 'guid' => 'id',
22
+ 'date' => 'updated',
23
+ 'date_parsed' => 'updated_parsed',
24
+ 'description' => ['subtitle', 'summary'],
25
+ 'url' => ['href'],
26
+ 'modified' => 'updated',
27
+ 'modified_parsed' => 'updated_parsed',
28
+ 'issued' => 'published',
29
+ 'issued_parsed' => 'published_parsed',
30
+ 'copyright' => 'rights',
31
+ 'copyright_detail' => 'rights_detail',
32
+ 'tagline' => 'subtitle',
33
+ 'tagline_detail' => 'subtitle_detail'}
34
+
35
+ def entries # Apparently, Hash has an entries method! That blew a good 3 hours or more of my time
36
+ return self['entries']
37
+ end
38
+
39
+ # We could include the [] rewrite in new using Hash.new's fancy pants block thing
40
+ # but we'd still have to overwrite []= and such.
41
+ # I'm going to make it easy to turn lists of pairs into FeedParserDicts's though.
42
+ def initialize(pairs=nil)
43
+ if pairs.class == Array and pairs[0].class == Array and pairs[0].length == 2
44
+ pairs.each do |l|
45
+ k,v = l
46
+ self[k] = v
47
+ end
48
+ elsif pairs.class == Hash
49
+ self.merge!(pairs)
50
+ end
51
+ end
52
+
53
+ def [](key)
54
+ if key == 'category'
55
+ return self['tags'][0]['term']
56
+ end
57
+ if key == 'categories'
58
+ return self['tags'].collect{|tag| [tag['scheme'],tag['term']]}
59
+ end
60
+ realkey = @@keymap[key] || key
61
+ if realkey.class == Array
62
+ realkey.each{ |key| return self[key] if has_key?key }
63
+ end
64
+ # Note that the original key is preferred over the realkey we (might
65
+ # have) found in @@keymap
66
+ if has_key?(key)
67
+ return super(key)
68
+ end
69
+ return super(realkey)
70
+ end
71
+
72
+ def []=(key,value)
73
+ if @@keymap.key?key
74
+ key = @@keymap[key]
75
+ if key.class == Array
76
+ key = key[0]
77
+ end
78
+ end
79
+ super(key,value)
80
+ end
81
+
82
+ def method_missing(msym, *args)
83
+ methodname = msym.to_s
84
+ if methodname[-1] == '='
85
+ return self[methodname[0..-2]] = args[0]
86
+ elsif methodname[-1] != '!' and methodname[-1] != '?' and methodname[0] != "_" # FIXME implement with private?
87
+ return self[methodname]
88
+ else
89
+ raise NoMethodError, "whoops, we don't know about the attribute or method called `#{methodname}' for #{self}:#{self.class}"
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/ruby
2
+
3
+ module URI
4
+ # NOTE I wish I didn't have to open this module up,but I cannot find a
5
+ # better way of accessing all of the instance methods of the URI module. I \
6
+ # may just be an idiot.
7
+ def self.split(uri)
8
+ case uri
9
+ when ''
10
+ # null uri
11
+
12
+ when ABS_URI
13
+ scheme, opaque, userinfo, host, port,
14
+ registry, path, query, fragment = $~[1..-1]
15
+
16
+ # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
17
+
18
+ # absoluteURI = scheme ":" ( hier_part | opaque_part )
19
+ # hier_part = ( net_path | abs_path ) [ "?" query ]
20
+ # opaque_part = uric_no_slash *uric
21
+
22
+ # abs_path = "/" path_segments
23
+ # net_path = "//" authority [ abs_path ]
24
+
25
+ # authority = server | reg_name
26
+ # server = [ [ userinfo "@" ] hostport ]
27
+
28
+ if !scheme
29
+ raise InvalidURIError,
30
+ "bad URI(absolute but no scheme): #{uri}"
31
+ end
32
+ if !opaque && (!path && (!host && !registry))
33
+ raise InvalidURIError,
34
+ "bad URI(absolute but no path): #{uri}"
35
+ end
36
+
37
+ when REL_URI
38
+ scheme = nil
39
+ opaque = nil
40
+
41
+ userinfo, host, port, registry,
42
+ rel_segment, abs_path, query, fragment = $~[1..-1]
43
+ if rel_segment && abs_path
44
+ path = rel_segment + abs_path
45
+ elsif rel_segment
46
+ path = rel_segment
47
+ elsif abs_path
48
+ path = abs_path
49
+ end
50
+
51
+ # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
52
+
53
+ # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
54
+
55
+ # net_path = "//" authority [ abs_path ]
56
+ # abs_path = "/" path_segments
57
+ # rel_path = rel_segment [ abs_path ]
58
+
59
+ # authority = server | reg_name
60
+ # server = [ [ userinfo "@" ] hostport ]
61
+
62
+ else
63
+ # NOTE this is the only part of the code that differs from the "clean"
64
+ # URI module.
65
+ return [nil,nil,uri,nil,nil,nil,nil,nil,nil]
66
+ end
67
+
68
+ path = '' if !path && !opaque # (see RFC2396 Section 5.2)
69
+ ret = [
70
+ scheme,
71
+ userinfo, host, port, # X
72
+ registry, # X
73
+ path, # Y
74
+ opaque, # Y
75
+ query,
76
+ fragment
77
+ ]
78
+ return ret
79
+ end
80
+ end
81
+
82
+ def urljoin(base, uri)
83
+ urifixer = /^([A-Za-z][A-Za-z0-9+-.]*:\/\/)(\/*)(.*?)/u
84
+ uri = uri.sub(urifixer, '\1\3')
85
+ begin
86
+ return URI.join(base, uri).to_s
87
+ rescue URI::BadURIError => e
88
+ if URI.parse(base).relative?
89
+ return URI::parse(uri).to_s
90
+ end
91
+ end
92
+ end
93
+
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/ruby
2
+ module FeedParserUtilities
3
+ #FIXME we need to find a better place for this method
4
+ def stripDoctype(data)
5
+ =begin
6
+ Strips DOCTYPE from XML document, returns (rss_version, stripped_data)
7
+
8
+ rss_version may be 'rss091n' or None
9
+ stripped_data is the same XML document, minus the DOCTYPE
10
+ =end
11
+ entity_pattern = /<!ENTITY(.*?)>/m # m is for Regexp::MULTILINE
12
+ data = data.gsub(entity_pattern,'')
13
+
14
+ doctype_pattern = /<!DOCTYPE(.*?)>/m
15
+ doctype_results = data.scan(doctype_pattern)
16
+ if doctype_results and doctype_results[0]
17
+ doctype = doctype_results[0][0]
18
+ else
19
+ doctype = ''
20
+ end
21
+
22
+ if /netscape/ =~ doctype.downcase
23
+ version = 'rss091n'
24
+ else
25
+ version = nil
26
+ end
27
+ data = data.sub(doctype_pattern, '')
28
+ return version, data
29
+ end
30
+
31
+ def resolveRelativeURIs(htmlSource, baseURI, encoding)
32
+ $stderr << "entering resolveRelativeURIs\n" if $debug # FIXME write a decent logger
33
+ relative_uris = [ ['a','href'],
34
+ ['applet','codebase'],
35
+ ['area','href'],
36
+ ['blockquote','cite'],
37
+ ['body','background'],
38
+ ['del','cite'],
39
+ ['form','action'],
40
+ ['frame','longdesc'],
41
+ ['frame','src'],
42
+ ['iframe','longdesc'],
43
+ ['iframe','src'],
44
+ ['head','profile'],
45
+ ['img','longdesc'],
46
+ ['img','src'],
47
+ ['img','usemap'],
48
+ ['input','src'],
49
+ ['input','usemap'],
50
+ ['ins','cite'],
51
+ ['link','href'],
52
+ ['object','classid'],
53
+ ['object','codebase'],
54
+ ['object','data'],
55
+ ['object','usemap'],
56
+ ['q','cite'],
57
+ ['script','src'],
58
+ ]
59
+ h = Hpricot(htmlSource)
60
+ relative_uris.each do |l|
61
+ ename, eattr = l
62
+ h.search(ename).each do |elem|
63
+ euri = elem.attributes[eattr]
64
+ if euri and not euri.empty? and URI.parse(URI.encode(euri)).relative?
65
+ elem.attributes[eattr] = urljoin(baseURI, euri)
66
+ end
67
+ end
68
+ end
69
+ return h.to_html
70
+ end
71
+ end
72
+
73
+
@@ -0,0 +1,1235 @@
1
+ #!/usr/bin/ruby
2
+ module FeedParserMixin
3
+ attr_accessor :feeddata, :version, :namespacesInUse, :date_handlers
4
+
5
+ def startup(baseuri=nil, baselang=nil, encoding='utf-8')
6
+ $stderr << "initializing FeedParser\n" if $debug
7
+
8
+ @namespaces = {'' => '',
9
+ 'http://backend.userland.com/rss' => '',
10
+ 'http://blogs.law.harvard.edu/tech/rss' => '',
11
+ 'http://purl.org/rss/1.0/' => '',
12
+ 'http://my.netscape.com/rdf/simple/0.9/' => '',
13
+ 'http://example.com/newformat#' => '',
14
+ 'http://example.com/necho' => '',
15
+ 'http://purl.org/echo/' => '',
16
+ 'uri/of/echo/namespace#' => '',
17
+ 'http://purl.org/pie/' => '',
18
+ 'http://purl.org/atom/ns#' => '',
19
+ 'http://www.w3.org/2005/Atom' => '',
20
+ 'http://purl.org/rss/1.0/modules/rss091#' => '',
21
+ 'http://webns.net/mvcb/' => 'admin',
22
+ 'http://purl.org/rss/1.0/modules/aggregation/' => 'ag',
23
+ 'http://purl.org/rss/1.0/modules/annotate/' => 'annotate',
24
+ 'http://media.tangent.org/rss/1.0/' => 'audio',
25
+ 'http://backend.userland.com/blogChannelModule' => 'blogChannel',
26
+ 'http://web.resource.org/cc/' => 'cc',
27
+ 'http://backend.userland.com/creativeCommonsRssModule' => 'creativeCommons',
28
+ 'http://purl.org/rss/1.0/modules/company' => 'co',
29
+ 'http://purl.org/rss/1.0/modules/content/' => 'content',
30
+ 'http://my.theinfo.org/changed/1.0/rss/' => 'cp',
31
+ 'http://purl.org/dc/elements/1.1/' => 'dc',
32
+ 'http://purl.org/dc/terms/' => 'dcterms',
33
+ 'http://purl.org/rss/1.0/modules/email/' => 'email',
34
+ 'http://purl.org/rss/1.0/modules/event/' => 'ev',
35
+ 'http://rssnamespace.org/feedburner/ext/1.0' => 'feedburner',
36
+ 'http://freshmeat.net/rss/fm/' => 'fm',
37
+ 'http://xmlns.com/foaf/0.1/' => 'foaf',
38
+ 'http://www.w3.org/2003/01/geo/wgs84_pos#' => 'geo',
39
+ 'http://postneo.com/icbm/' => 'icbm',
40
+ 'http://purl.org/rss/1.0/modules/image/' => 'image',
41
+ 'http://www.itunes.com/DTDs/PodCast-1.0.dtd' => 'itunes',
42
+ 'http://example.com/DTDs/PodCast-1.0.dtd' => 'itunes',
43
+ 'http://purl.org/rss/1.0/modules/link/' => 'l',
44
+ 'http://search.yahoo.com/mrss' => 'media',
45
+ 'http://madskills.com/public/xml/rss/module/pingback/' => 'pingback',
46
+ 'http://prismstandard.org/namespaces/1.2/basic/' => 'prism',
47
+ 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => 'rdf',
48
+ 'http://www.w3.org/2000/01/rdf-schema#' => 'rdfs',
49
+ 'http://purl.org/rss/1.0/modules/reference/' => 'ref',
50
+ 'http://purl.org/rss/1.0/modules/richequiv/' => 'reqv',
51
+ 'http://purl.org/rss/1.0/modules/search/' => 'search',
52
+ 'http://purl.org/rss/1.0/modules/slash/' => 'slash',
53
+ 'http://schemas.xmlsoap.org/soap/envelope/' => 'soap',
54
+ 'http://purl.org/rss/1.0/modules/servicestatus/' => 'ss',
55
+ 'http://hacks.benhammersley.com/rss/streaming/' => 'str',
56
+ 'http://purl.org/rss/1.0/modules/subscription/' => 'sub',
57
+ 'http://purl.org/rss/1.0/modules/syndication/' => 'sy',
58
+ 'http://purl.org/rss/1.0/modules/taxonomy/' => 'taxo',
59
+ 'http://purl.org/rss/1.0/modules/threading/' => 'thr',
60
+ 'http://purl.org/rss/1.0/modules/textinput/' => 'ti',
61
+ 'http://madskills.com/public/xml/rss/module/trackback/' =>'trackback',
62
+ 'http://wellformedweb.org/commentAPI/' => 'wfw',
63
+ 'http://purl.org/rss/1.0/modules/wiki/' => 'wiki',
64
+ 'http://www.w3.org/1999/xhtml' => 'xhtml',
65
+ 'http://www.w3.org/XML/1998/namespace' => 'xml',
66
+ 'http://www.w3.org/1999/xlink' => 'xlink',
67
+ 'http://schemas.pocketsoap.com/rss/myDescModule/' => 'szf'
68
+ }
69
+ @matchnamespaces = {}
70
+ @namespaces.each do |l|
71
+ @matchnamespaces[l[0].downcase] = l[1]
72
+ end
73
+ @can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'license', 'icon', 'logo']
74
+ @can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
75
+ @can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
76
+ @html_types = ['text/html', 'application/xhtml+xml']
77
+ @feeddata = FeedParserDict.new # feed-level data
78
+ @encoding = encoding # character encoding
79
+ @entries = [] # list of entry-level data
80
+ @version = '' # feed type/version see SUPPORTED_VERSIOSN
81
+ @namespacesInUse = {} # hash of namespaces defined by the feed
82
+
83
+ # the following are used internall to track state;
84
+ # this is really out of control and should be refactored
85
+ @infeed = false
86
+ @inentry = false
87
+ @incontent = 0 # Yes, this needs to be zero until I work out popContent and pushContent
88
+ @intextinput = false
89
+ @inimage = false
90
+ @inauthor = false
91
+ @incontributor = false
92
+ @inpublisher = false
93
+ @insource = false
94
+ @sourcedata = FeedParserDict.new
95
+ @contentparams = FeedParserDict.new
96
+ @summaryKey = nil
97
+ @namespacemap = {}
98
+ @elementstack = []
99
+ @basestack = []
100
+ @langstack = []
101
+ @baseuri = baseuri || ''
102
+ @lang = baselang || nil
103
+ if baselang
104
+ @feeddata['language'] = baselang.gsub('_','-')
105
+ end
106
+ @date_handlers = [:_parse_date_rfc822,
107
+ :_parse_date_hungarian, :_parse_date_greek,:_parse_date_mssql,
108
+ :_parse_date_nate,:_parse_date_onblog,:_parse_date_w3dtf,:_parse_date_iso8601
109
+ ]
110
+ $stderr << "Leaving startup\n" if $debug # My addition
111
+ end
112
+
113
+ def unknown_starttag(tag, attrsd)
114
+ $stderr << "start #{tag} with #{attrsd}\n" if $debug
115
+ # normalize attrs
116
+ attrsD = {}
117
+ attrsd = Hash[*attrsd.flatten] if attrsd.class == Array # Magic! Asterisk!
118
+ # LooseFeedParser needs the above because SGMLParser sends attrs as a
119
+ # list of lists (like [['type','text/html'],['mode','escaped']])
120
+
121
+ attrsd.each do |old_k,value|
122
+ # There has to be a better, non-ugly way of doing this
123
+ k = old_k.downcase # Downcase all keys
124
+ attrsD[k] = value
125
+ if ['rel','type'].include?value
126
+ attrsD[k].downcase! # Downcase the value if the key is 'rel' or 'type'
127
+ end
128
+ end
129
+
130
+ # track xml:base and xml:lang
131
+ baseuri = attrsD['xml:base'] || attrsD['base'] || @baseuri
132
+ @baseuri = urljoin(@baseuri, baseuri)
133
+ lang = attrsD['xml:lang'] || attrsD['lang']
134
+ if lang == '' # FIXME This next bit of code is right? Wtf?
135
+ # xml:lang could be explicitly set to '', we need to capture that
136
+ lang = nil
137
+ elsif lang.nil?
138
+ # if no xml:lang is specified, use parent lang
139
+ lang = @lang
140
+ end
141
+ if lang and not lang.empty? # Seriously, this cannot be correct
142
+ if ['feed', 'rss', 'rdf:RDF'].include?tag
143
+ @feeddata['language'] = lang.gsub('_','-')
144
+ end
145
+ end
146
+ @lang = lang
147
+ @basestack << @baseuri
148
+ @langstack << lang
149
+
150
+ # track namespaces
151
+ attrsd.each do |prefix, uri|
152
+ if /^xmlns:/ =~ prefix # prefix begins with xmlns:
153
+ trackNamespace(prefix[6..-1], uri)
154
+ elsif prefix == 'xmlns':
155
+ trackNamespace(nil, uri)
156
+ end
157
+ end
158
+
159
+ # track inline content
160
+ if @incontent != 0 and @contentparams.has_key?('type') and not ( /xml$/ =~ (@contentparams['type'] || 'xml') )
161
+ # element declared itself as escaped markup, but isn't really
162
+
163
+ @contentparams['type'] = 'application/xhtml+xml'
164
+ end
165
+ if @incontent != 0 and @contentparams['type'] == 'application/xhtml+xml'
166
+ # Note: probably shouldn't simply recreate localname here, but
167
+ # our namespace handling isn't actually 100% correct in cases where
168
+ # the feed redefines the default namespace (which is actually
169
+ # the usual case for inline content, thanks Sam), so here we
170
+ # cheat and just reconstruct the element based on localname
171
+ # because that compensates for the bugs in our namespace handling.
172
+ # This will horribly munge inline content with non-empty qnames,
173
+ # but nobody actually does that, so I'm not fixing it.
174
+ tag = tag.split(':')[-1]
175
+ attrsA = attrsd.to_a.collect{|l| "#{l[0]}=\"#{l[1]}\""}
176
+ attrsS = ' '+attrsA.join(' ')
177
+ return handle_data("<#{tag}#{attrsS}>", escape=false)
178
+ end
179
+
180
+ # match namespaces
181
+ if /:/ =~ tag
182
+ prefix, suffix = tag.split(':', 2)
183
+ else
184
+ prefix, suffix = '', tag
185
+ end
186
+ prefix = @namespacemap[prefix] || prefix
187
+ if prefix and not prefix.empty?
188
+ prefix = prefix + '_'
189
+ end
190
+
191
+ # special hack for better tracking of empty textinput/image elements in illformed feeds
192
+ if (not prefix and not prefix.empty?) and not (['title', 'link', 'description','name'].include?tag)
193
+ @intextinput = false
194
+ end
195
+ if (prefix.nil? or prefix.empty?) and not (['title', 'link', 'description', 'url', 'href', 'width', 'height'].include?tag)
196
+ @inimage = false
197
+ end
198
+
199
+ # call special handler (if defined) or default handler
200
+ begin
201
+ return send('_start_'+prefix+suffix, attrsD)
202
+ rescue NoMethodError
203
+ return push(prefix + suffix, true)
204
+ end
205
+ end # End unknown_starttag
206
+
207
+ def unknown_endtag(tag)
208
+ $stderr << "end #{tag}\n" if $debug
209
+ # match namespaces
210
+ if tag.index(':')
211
+ prefix, suffix = tag.split(':',2)
212
+ else
213
+ prefix, suffix = '', tag
214
+ end
215
+ prefix = @namespacemap[prefix] || prefix
216
+ if prefix and not prefix.empty?
217
+ prefix = prefix + '_'
218
+ end
219
+
220
+ # call special handler (if defined) or default handler
221
+ begin
222
+ send('_end_' + prefix + suffix) # NOTE no return here! do not add it!
223
+ rescue NoMethodError => details
224
+ pop(prefix + suffix)
225
+ end
226
+
227
+ # track inline content
228
+ if @incontent != 0 and @contentparams.has_key?'type' and /xml$/ =~ (@contentparams['type'] || 'xml')
229
+ # element declared itself as escaped markup, but it isn't really
230
+ @contentparams['type'] = 'application/xhtml+xml'
231
+ end
232
+ if @incontent != 0 and @contentparams['type'] == 'application/xhtml+xml'
233
+ tag = tag.split(':')[-1]
234
+ handle_data("</#{tag}>", escape=false)
235
+ end
236
+
237
+ # track xml:base and xml:lang going out of scope
238
+ if @basestack and not @basestack.empty?
239
+ @basestack.pop
240
+ if @basestack and @basestack[-1] and not (@basestack.empty? or @basestack[-1].empty?)
241
+ @baseuri = @basestack[-1]
242
+ end
243
+ end
244
+ if @langstack and not @langstack.empty?
245
+ @langstack.pop
246
+ if @langstack and not @langstack.empty? # and @langstack[-1] and not @langstack.empty?
247
+ @lang = @langstack[-1]
248
+ end
249
+ end
250
+ end
251
+
252
+ def handle_charref(ref)
253
+ # LooseParserOnly
254
+ # called for each character reference, e.g. for '&#160;', ref will be '160'
255
+ $stderr << "entering handle_charref with #{ref}\n" if $debug
256
+ return if @elementstack.nil? or @elementstack.empty?
257
+ ref.downcase!
258
+ chars = ['34', '38', '39', '60', '62', 'x22', 'x26', 'x27', 'x3c', 'x3e']
259
+ if chars.include?ref
260
+ text = "&##{ref};"
261
+ else
262
+ if ref[0..0] == 'x'
263
+ c = (ref[1..-1]).to_i(16)
264
+ else
265
+ c = ref.to_i
266
+ end
267
+ text = [c].pack('U*')
268
+ end
269
+ @elementstack[-1][2] << text
270
+ end
271
+
272
+ def handle_entityref(ref)
273
+ # LooseParserOnly
274
+ # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
275
+
276
+ return if @elementstack.nil? or @elementstack.empty?
277
+ $stderr << "entering handle_entityref with #{ref}\n" if $debug
278
+ ents = ['lt', 'gt', 'quot', 'amp', 'apos']
279
+ if ents.include?ref
280
+ text = "&#{ref};"
281
+ else
282
+ text = HTMLEntities::decode_entities("&#{ref};")
283
+ end
284
+ @elementstack[-1][2] << text
285
+ end
286
+
287
+ def handle_data(text, escape=true)
288
+ # called for each block of plain text, i.e. outside of any tag and
289
+ # not containing any character or entity references
290
+ return if @elementstack.nil? or @elementstack.empty?
291
+ if escape and @contentparams['type'] == 'application/xhtml+xml'
292
+ text = text.to_xs
293
+ end
294
+ @elementstack[-1][2] << text
295
+ end
296
+
297
+ def handle_comment(comment)
298
+ # called for each comment, e.g. <!-- insert message here -->
299
+ end
300
+
301
+ def handle_pi(text)
302
+ end
303
+
304
+ def handle_decl(text)
305
+ end
306
+
307
+ def parse_declaration(i)
308
+ # for LooseFeedParser
309
+ $stderr << "entering parse_declaration\n" if $debug
310
+ if @rawdata[i...i+9] == '<![CDATA['
311
+ k = @rawdata.index(/\]\]>/u,i+9)
312
+ k = @rawdata.length unless k
313
+ handle_data(@rawdata[i+9...k].to_xs,false)
314
+ return k+3
315
+ else
316
+ k = @rawdata.index(/>/,i).to_i
317
+ return k+1
318
+ end
319
+ end
320
+
321
+ def mapContentType(contentType)
322
+ contentType.downcase!
323
+ case contentType
324
+ when 'text'
325
+ contentType = 'text/plain'
326
+ when 'html'
327
+ contentType = 'text/html'
328
+ when 'xhtml'
329
+ contentType = 'application/xhtml+xml'
330
+ end
331
+ return contentType
332
+ end
333
+
334
+ def trackNamespace(prefix, uri)
335
+
336
+ loweruri = uri.downcase.strip
337
+ if [prefix, loweruri] == [nil, 'http://my.netscape.com/rdf/simple/0.9/'] and (@version.nil? or @version.empty?)
338
+ @version = 'rss090'
339
+ elsif loweruri == 'http://purl.org/rss/1.0/' and (@version.nil? or @version.empty?)
340
+ @version = 'rss10'
341
+ elsif loweruri == 'http://www.w3.org/2005/atom' and (@version.nil? or @version.empty?)
342
+ @version = 'atom10'
343
+ elsif /backend\.userland\.com\/rss/ =~ loweruri
344
+ # match any backend.userland.com namespace
345
+ uri = 'http://backend.userland.com/rss'
346
+ loweruri = uri
347
+ end
348
+ if @matchnamespaces.has_key? loweruri
349
+ @namespacemap[prefix] = @matchnamespaces[loweruri]
350
+ @namespacesInUse[@matchnamespaces[loweruri]] = uri
351
+ else
352
+ @namespacesInUse[prefix || ''] = uri
353
+ end
354
+ end
355
+
356
+ def resolveURI(uri)
357
+ return urljoin(@baseuri || '', uri)
358
+ end
359
+
360
+ def decodeEntities(element, data)
361
+ return data
362
+ end
363
+
364
+ def push(element, expectingText)
365
+ @elementstack << [element, expectingText, []]
366
+ end
367
+
368
+ def pop(element, stripWhitespace=true)
369
+ return if @elementstack.nil? or @elementstack.empty?
370
+ return if @elementstack[-1][0] != element
371
+ element, expectingText, pieces = @elementstack.pop
372
+ if pieces.class == Array
373
+ output = pieces.join('')
374
+ else
375
+ output = pieces
376
+ end
377
+ if stripWhitespace
378
+ output.strip!
379
+ end
380
+ return output if not expectingText
381
+
382
+ # decode base64 content
383
+ if @contentparams['base64']
384
+ out64 = Base64::decode64(output) # a.k.a. [output].unpack('m')[0]
385
+ if not output.empty? and not out64.empty?
386
+ output = out64
387
+ end
388
+ end
389
+
390
+ # resolve relative URIs
391
+ if @can_be_relative_uri.include?element and output and not output.empty?
392
+ output = resolveURI(output)
393
+ end
394
+
395
+ # decode entities within embedded markup
396
+ if not @contentparams['base64']
397
+ output = decodeEntities(element, output)
398
+ end
399
+
400
+ # remove temporary cruft from contentparams
401
+ @contentparams.delete('mode')
402
+ @contentparams.delete('base64')
403
+
404
+ # resolve relative URIs within embedded markup
405
+ if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
406
+ if @can_contain_relative_uris.include?element
407
+ output = FeedParser.resolveRelativeURIs(output, @baseuri, @encoding)
408
+ end
409
+ end
410
+ # sanitize embedded markup
411
+ if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
412
+ if @can_contain_dangerous_markup.include?element
413
+ output = FeedParser.sanitizeHTML(output, @encoding)
414
+ end
415
+ end
416
+
417
+ if @encoding and not @encoding.empty? and @encoding != 'utf-8'
418
+ output = uconvert(output, @encoding, 'utf-8')
419
+ # FIXME I turn everything into utf-8, not unicode, originally because REXML was being used but now beause I haven't tested it out yet.
420
+ end
421
+
422
+ # categories/tags/keywords/whatever are handled in _end_category
423
+ return output if element == 'category'
424
+
425
+ # store output in appropriate place(s)
426
+ if @inentry and not @insource
427
+ if element == 'content'
428
+ @entries[-1][element] ||= []
429
+ contentparams = Marshal.load(Marshal.dump(@contentparams)) # deepcopy
430
+ contentparams['value'] = output
431
+ @entries[-1][element] << contentparams
432
+ elsif element == 'link'
433
+ @entries[-1][element] = output
434
+ if output and not output.empty?
435
+ @entries[-1]['links'][-1]['href'] = output
436
+ end
437
+ else
438
+ element = 'summary' if element == 'description'
439
+ @entries[-1][element] = output
440
+ if @incontent != 0
441
+ contentparams = Marshal.load(Marshal.dump(@contentparams))
442
+ contentparams['value'] = output
443
+ @entries[-1][element + '_detail'] = contentparams
444
+ end
445
+ end
446
+ elsif (@infeed or @insource) and not @intextinput and not @inimage
447
+ context = getContext()
448
+ element = 'subtitle' if element == 'description'
449
+ context[element] = output
450
+ if element == 'link'
451
+ context['links'][-1]['href'] = output
452
+ elsif @incontent != 0
453
+ contentparams = Marshal.load(Marshal.dump(@contentparams))
454
+ contentparams['value'] = output
455
+ context[element + '_detail'] = contentparams
456
+ end
457
+ end
458
+ return output
459
+ end
460
+
461
+ def pushContent(tag, attrsD, defaultContentType, expectingText)
462
+ @incontent += 1 # Yes, I hate this.
463
+ type = mapContentType(attrsD['type'] || defaultContentType)
464
+ @contentparams = FeedParserDict.new({'type' => type,'language' => @lang,'base' => @baseuri})
465
+ @contentparams['base64'] = isBase64(attrsD, @contentparams)
466
+ push(tag, expectingText)
467
+ end
468
+
469
+ def popContent(tag)
470
+ value = pop(tag)
471
+ @incontent -= 1
472
+ @contentparams.clear
473
+ return value
474
+ end
475
+
476
+ def mapToStandardPrefix(name)
477
+ colonpos = name.index(':')
478
+ if colonpos
479
+ prefix = name[0..colonpos-1]
480
+ suffix = name[colonpos+1..-1]
481
+ prefix = @namespacemap[prefix] || prefix
482
+ name = prefix + ':' + suffix
483
+ end
484
+ return name
485
+ end
486
+
487
+ def getAttribute(attrsD, name)
488
+ return attrsD[mapToStandardPrefix(name)]
489
+ end
490
+
491
+ def isBase64(attrsD, contentparams)
492
+ return true if (attrsD['mode'] == 'base64')
493
+ if /(^text\/)|(\+xml$)|(\/xml$)/ =~ contentparams['type']
494
+ return false
495
+ end
496
+ return true
497
+ end
498
+
499
+ def itsAnHrefDamnIt(attrsD)
500
+ href= attrsD['url'] || attrsD['uri'] || attrsD['href']
501
+ if href
502
+ attrsD.delete('url')
503
+ attrsD.delete('uri')
504
+ attrsD['href'] = href
505
+ end
506
+ return attrsD
507
+ end
508
+
509
+
510
+ def _save(key, value)
511
+ context = getContext()
512
+ context[key] ||= value
513
+ end
514
+
515
+ def _start_rss(attrsD)
516
+ versionmap = {'0.91' => 'rss091u',
517
+ '0.92' => 'rss092',
518
+ '0.93' => 'rss093',
519
+ '0.94' => 'rss094'
520
+ }
521
+
522
+ if not @version or @version.empty?
523
+ attr_version = attrsD['version'] || ''
524
+ version = versionmap[attr_version]
525
+ if version and not version.empty?
526
+ @version = version
527
+ elsif /^2\./ =~ attr_version
528
+ @version = 'rss20'
529
+ else
530
+ @version = 'rss'
531
+ end
532
+ end
533
+ end
534
+
535
+ def _start_dlhottitles(attrsD)
536
+ @version = 'hotrss'
537
+ end
538
+
539
+ def _start_channel(attrsD)
540
+ @infeed = true
541
+ _cdf_common(attrsD)
542
+ end
543
+ alias :_start_feedinfo :_start_channel
544
+
545
+ def _cdf_common(attrsD)
546
+ if attrsD.has_key?'lastmod'
547
+ _start_modified({})
548
+ @elementstack[-1][-1] = attrsD['lastmod']
549
+ _end_modified
550
+ end
551
+ if attrsD.has_key?'href'
552
+ _start_link({})
553
+ @elementstack[-1][-1] = attrsD['href']
554
+ _end_link
555
+ end
556
+ end
557
+
558
+ def _start_feed(attrsD)
559
+ @infeed = true
560
+ versionmap = {'0.1' => 'atom01',
561
+ '0.2' => 'atom02',
562
+ '0.3' => 'atom03'
563
+ }
564
+
565
+ if not @version or @version.empty?
566
+ attr_version = attrsD['version']
567
+ version = versionmap[attr_version]
568
+ if @version and not @version.empty?
569
+ @version = version
570
+ else
571
+ @version = 'atom'
572
+ end
573
+ end
574
+ end
575
+
576
+ def _end_channel
577
+ @infeed = false
578
+ end
579
+ alias :_end_feed :_end_channel
580
+
581
+ def _start_image(attrsD)
582
+ @inimage = true
583
+ push('image', false)
584
+ context = getContext()
585
+ context['image'] ||= FeedParserDict.new
586
+ end
587
+
588
+ def _end_image
589
+ pop('image')
590
+ @inimage = false
591
+ end
592
+
593
+ def _start_textinput(attrsD)
594
+ @intextinput = true
595
+ push('textinput', false)
596
+ context = getContext()
597
+ context['textinput'] ||= FeedParserDict.new
598
+ end
599
+ alias :_start_textInput :_start_textinput
600
+
601
+ def _end_textinput
602
+ pop('textinput')
603
+ @intextinput = false
604
+ end
605
+ alias :_end_textInput :_end_textinput
606
+
607
+ def _start_author(attrsD)
608
+ @inauthor = true
609
+ push('author', true)
610
+ end
611
+ alias :_start_managingeditor :_start_author
612
+ alias :_start_dc_author :_start_author
613
+ alias :_start_dc_creator :_start_author
614
+ alias :_start_itunes_author :_start_author
615
+
616
+ def _end_author
617
+ pop('author')
618
+ @inauthor = false
619
+ _sync_author_detail()
620
+ end
621
+ alias :_end_managingeditor :_end_author
622
+ alias :_end_dc_author :_end_author
623
+ alias :_end_dc_creator :_end_author
624
+ alias :_end_itunes_author :_end_author
625
+
626
+ def _start_itunes_owner(attrsD)
627
+ @inpublisher = true
628
+ push('publisher', false)
629
+ end
630
+
631
+ def _end_itunes_owner
632
+ pop('publisher')
633
+ @inpublisher = false
634
+ _sync_author_detail('publisher')
635
+ end
636
+
637
+ def _start_contributor(attrsD)
638
+ @incontributor = true
639
+ context = getContext()
640
+ context['contributors'] ||= []
641
+ context['contributors'] << FeedParserDict.new
642
+ push('contributor', false)
643
+ end
644
+
645
+ def _end_contributor
646
+ pop('contributor')
647
+ @incontributor = false
648
+ end
649
+
650
+ def _start_dc_contributor(attrsD)
651
+ @incontributor = true
652
+ context = getContext()
653
+ context['contributors'] ||= []
654
+ context['contributors'] << FeedParserDict.new
655
+ push('name', false)
656
+ end
657
+
658
+ def _end_dc_contributor
659
+ _end_name
660
+ @incontributor = false
661
+ end
662
+
663
+ def _start_name(attrsD)
664
+ push('name', false)
665
+ end
666
+ alias :_start_itunes_name :_start_name
667
+
668
+ def _end_name
669
+ value = pop('name')
670
+ if @inpublisher
671
+ _save_author('name', value, 'publisher')
672
+ elsif @inauthor
673
+ _save_author('name', value)
674
+ elsif @incontributor
675
+ _save_contributor('name', value)
676
+ elsif @intextinput
677
+ context = getContext()
678
+ context['textinput']['name'] = value
679
+ end
680
+ end
681
+ alias :_end_itunes_name :_end_name
682
+
683
+ def _start_width(attrsD)
684
+ push('width', false)
685
+ end
686
+
687
+ def _end_width
688
+ value = pop('width').to_i
689
+ if @inimage
690
+ context = getContext
691
+ context['image']['width'] = value
692
+ end
693
+ end
694
+
695
+ def _start_height(attrsD)
696
+ push('height', false)
697
+ end
698
+
699
+ def _end_height
700
+ value = pop('height').to_i
701
+ if @inimage
702
+ context = getContext()
703
+ context['image']['height'] = value
704
+ end
705
+ end
706
+
707
+ def _start_url(attrsD)
708
+ push('href', true)
709
+ end
710
+ alias :_start_homepage :_start_url
711
+ alias :_start_uri :_start_url
712
+
713
+ def _end_url
714
+ value = pop('href')
715
+ if @inauthor
716
+ _save_author('href', value)
717
+ elsif @incontributor
718
+ _save_contributor('href', value)
719
+ elsif @inimage
720
+ context = getContext()
721
+ context['image']['href'] = value
722
+ elsif @intextinput
723
+ context = getContext()
724
+ context['textinput']['link'] = value
725
+ end
726
+ end
727
+ alias :_end_homepage :_end_url
728
+ alias :_end_uri :_end_url
729
+
730
+ def _start_email(attrsD)
731
+ push('email', false)
732
+ end
733
+ alias :_start_itunes_email :_start_email
734
+
735
+ def _end_email
736
+ value = pop('email')
737
+ if @inpublisher
738
+ _save_author('email', value, 'publisher')
739
+ elsif @inauthor
740
+ _save_author('email', value)
741
+ elsif @incontributor
742
+ _save_contributor('email', value)
743
+ end
744
+ end
745
+ alias :_end_itunes_email :_end_email
746
+
747
+ def getContext
748
+ if @insource
749
+ context = @sourcedata
750
+ elsif @inentry
751
+ context = @entries[-1]
752
+ else
753
+ context = @feeddata
754
+ end
755
+ return context
756
+ end
757
+
758
+ def _save_author(key, value, prefix='author')
759
+ context = getContext()
760
+ context[prefix + '_detail'] ||= FeedParserDict.new
761
+ context[prefix + '_detail'][key] = value
762
+ _sync_author_detail()
763
+ end
764
+
765
+ def _save_contributor(key, value)
766
+ context = getContext
767
+ context['contributors'] ||= [FeedParserDict.new]
768
+ context['contributors'][-1][key] = value
769
+ end
770
+
771
+ def _sync_author_detail(key='author')
772
+ context = getContext()
773
+ detail = context["#{key}_detail"]
774
+ if detail and not detail.empty?
775
+ name = detail['name']
776
+ email = detail['email']
777
+
778
+ if name and email and not (name.empty? or name.empty?)
779
+ context[key] = "#{name} (#{email})"
780
+ elsif name and not name.empty?
781
+ context[key] = name
782
+ elsif email and not email.empty?
783
+ context[key] = email
784
+ end
785
+ else
786
+ author = context[key].dup unless context[key].nil?
787
+ return if not author or author.empty?
788
+ emailmatch = author.match(/(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))/)
789
+ email = emailmatch[1]
790
+ author.gsub!(email, '')
791
+ author.gsub!("\(\)", '')
792
+ author.strip!
793
+ author.gsub!(/^\(/,'')
794
+ author.gsub!(/\)$/,'')
795
+ author.strip!
796
+ context["#{key}_detail"] ||= FeedParserDict.new
797
+ context["#{key}_detail"]['name'] = author
798
+ context["#{key}_detail"]['email'] = email
799
+ end
800
+ end
801
+
802
+ def _start_subtitle(attrsD)
803
+ pushContent('subtitle', attrsD, 'text/plain', true)
804
+ end
805
+ alias :_start_tagline :_start_subtitle
806
+ alias :_start_itunes_subtitle :_start_subtitle
807
+
808
+ def _end_subtitle
809
+ popContent('subtitle')
810
+ end
811
+ alias :_end_tagline :_end_subtitle
812
+ alias :_end_itunes_subtitle :_end_subtitle
813
+
814
+ def _start_rights(attrsD)
815
+ pushContent('rights', attrsD, 'text/plain', true)
816
+ end
817
+ alias :_start_dc_rights :_start_rights
818
+ alias :_start_copyright :_start_rights
819
+
820
+ def _end_rights
821
+ popContent('rights')
822
+ end
823
+ alias :_end_dc_rights :_end_rights
824
+ alias :_end_copyright :_end_rights
825
+
826
+ def _start_item(attrsD)
827
+ @entries << FeedParserDict.new
828
+ push('item', false)
829
+ @inentry = true
830
+ @guidislink = false
831
+ id = getAttribute(attrsD, 'rdf:about')
832
+ if id and not id.empty?
833
+ context = getContext()
834
+ context['id'] = id
835
+ end
836
+ _cdf_common(attrsD)
837
+ end
838
+ alias :_start_entry :_start_item
839
+ alias :_start_product :_start_item
840
+
841
+ def _end_item
842
+ pop('item')
843
+ @inentry = false
844
+ end
845
+ alias :_end_entry :_end_item
846
+
847
+ def _start_dc_language(attrsD)
848
+ push('language', true)
849
+ end
850
+ alias :_start_language :_start_dc_language
851
+
852
+ def _end_dc_language
853
+ @lang = pop('language')
854
+ end
855
+ alias :_end_language :_end_dc_language
856
+
857
+ def _start_dc_publisher(attrsD)
858
+ push('publisher', true)
859
+ end
860
+ alias :_start_webmaster :_start_dc_publisher
861
+
862
+ def _end_dc_publisher
863
+ pop('publisher')
864
+ _sync_author_detail('publisher')
865
+ end
866
+ alias :_end_webmaster :_end_dc_publisher
867
+
868
+ def _start_published(attrsD)
869
+ push('published', true)
870
+ end
871
+ alias :_start_dcterms_issued :_start_published
872
+ alias :_start_issued :_start_published
873
+
874
+ def _end_published
875
+ value = pop('published')
876
+ _save('published_parsed', parse_date(value))
877
+ end
878
+ alias :_end_dcterms_issued :_end_published
879
+ alias :_end_issued :_end_published
880
+
881
+ def _start_updated(attrsD)
882
+ push('updated', true)
883
+ end
884
+ alias :_start_modified :_start_updated
885
+ alias :_start_dcterms_modified :_start_updated
886
+ alias :_start_pubdate :_start_updated
887
+ alias :_start_dc_date :_start_updated
888
+
889
+ def _end_updated
890
+ value = pop('updated')
891
+ _save('updated_parsed', parse_date(value))
892
+ end
893
+ alias :_end_modified :_end_updated
894
+ alias :_end_dcterms_modified :_end_updated
895
+ alias :_end_pubdate :_end_updated
896
+ alias :_end_dc_date :_end_updated
897
+
898
+ def _start_created(attrsD)
899
+ push('created', true)
900
+ end
901
+ alias :_start_dcterms_created :_start_created
902
+
903
+ def _end_created
904
+ value = pop('created')
905
+ _save('created_parsed', parse_date(value))
906
+ end
907
+ alias :_end_dcterms_created :_end_created
908
+
909
+ def _start_expirationdate(attrsD)
910
+ push('expired', true)
911
+ end
912
+ def _end_expirationdate
913
+ _save('expired_parsed', parse_date(pop('expired')))
914
+ end
915
+
916
+ def _start_cc_license(attrsD)
917
+ push('license', true)
918
+ value = getAttribute(attrsD, 'rdf:resource')
919
+ if value and not value.empty?
920
+ @elementstack[-1][2] << value
921
+ pop('license')
922
+ end
923
+ end
924
+
925
+ def _start_creativecommons_license(attrsD)
926
+ push('license', true)
927
+ end
928
+
929
+ def _end_creativecommons_license
930
+ pop('license')
931
+ end
932
+
933
+ def addTag(term, scheme, label)
934
+ context = getContext()
935
+ context['tags'] ||= []
936
+ tags = context['tags']
937
+ if (term.nil? or term.empty?) and (scheme.nil? or scheme.empty?) and (label.nil? or label.empty?)
938
+ return
939
+ end
940
+ value = FeedParserDict.new({'term' => term, 'scheme' => scheme, 'label' => label})
941
+ if not tags.include?value
942
+ context['tags'] << FeedParserDict.new({'term' => term, 'scheme' => scheme, 'label' => label})
943
+ end
944
+ end
945
+
946
+ def _start_category(attrsD)
947
+ $stderr << "entering _start_category with #{attrsD}\n" if $debug
948
+
949
+ term = attrsD['term']
950
+ scheme = attrsD['scheme'] || attrsD['domain']
951
+ label = attrsD['label']
952
+ addTag(term, scheme, label)
953
+ push('category', true)
954
+ end
955
+ alias :_start_dc_subject :_start_category
956
+ alias :_start_keywords :_start_category
957
+
958
+ def _end_itunes_keywords
959
+ pop('itunes_keywords').split.each do |term|
960
+ addTag(term, 'http://www.itunes.com/', nil)
961
+ end
962
+ end
963
+
964
+ def _start_itunes_category(attrsD)
965
+ addTag(attrsD['text'], 'http://www.itunes.com/', nil)
966
+ push('category', true)
967
+ end
968
+
969
+ def _end_category
970
+ value = pop('category')
971
+ return if value.nil? or value.empty?
972
+ context = getContext()
973
+ tags = context['tags']
974
+ if value and not value.empty? and not tags.empty? and not tags[-1]['term']:
975
+ tags[-1]['term'] = value
976
+ else
977
+ addTag(value, nil, nil)
978
+ end
979
+ end
980
+ alias :_end_dc_subject :_end_category
981
+ alias :_end_keywords :_end_category
982
+ alias :_end_itunes_category :_end_category
983
+
984
+ def _start_cloud(attrsD)
985
+ getContext()['cloud'] = FeedParserDict.new(attrsD)
986
+ end
987
+
988
+ def _start_link(attrsD)
989
+ attrsD['rel'] ||= 'alternate'
990
+ attrsD['type'] ||= 'text/html'
991
+ attrsD = itsAnHrefDamnIt(attrsD)
992
+ if attrsD.has_key? 'href'
993
+ attrsD['href'] = resolveURI(attrsD['href'])
994
+ end
995
+ expectingText = @infeed || @inentry || @insource
996
+ context = getContext()
997
+ context['links'] ||= []
998
+ context['links'] << FeedParserDict.new(attrsD)
999
+ if attrsD['rel'] == 'enclosure'
1000
+ _start_enclosure(attrsD)
1001
+ end
1002
+ if attrsD.has_key? 'href'
1003
+ expectingText = false
1004
+ if (attrsD['rel'] == 'alternate') and @html_types.include?mapContentType(attrsD['type'])
1005
+ context['link'] = attrsD['href']
1006
+ end
1007
+ else
1008
+ push('link', expectingText)
1009
+ end
1010
+ end
1011
+ alias :_start_producturl :_start_link
1012
+
1013
+ def _end_link
1014
+ value = pop('link')
1015
+ context = getContext()
1016
+ if @intextinput
1017
+ context['textinput']['link'] = value
1018
+ end
1019
+ if @inimage
1020
+ context['image']['link'] = value
1021
+ end
1022
+ end
1023
+ alias :_end_producturl :_end_link
1024
+
1025
+ def _start_guid(attrsD)
1026
+ @guidislink = ((attrsD['ispermalink'] || 'true') == 'true')
1027
+ push('id', true)
1028
+ end
1029
+
1030
+ def _end_guid
1031
+ value = pop('id')
1032
+ _save('guidislink', (@guidislink and not getContext().has_key?('link')))
1033
+ if @guidislink:
1034
+ # guid acts as link, but only if 'ispermalink' is not present or is 'true',
1035
+ # and only if the item doesn't already have a link element
1036
+ _save('link', value)
1037
+ end
1038
+ end
1039
+
1040
+
1041
+ def _start_title(attrsD)
1042
+ pushContent('title', attrsD, 'text/plain', @infeed || @inentry || @insource)
1043
+ end
1044
+ alias :_start_dc_title :_start_title
1045
+ alias :_start_media_title :_start_title
1046
+
1047
+ def _end_title
1048
+ value = popContent('title')
1049
+ context = getContext()
1050
+ if @intextinput
1051
+ context['textinput']['title'] = value
1052
+ elsif @inimage
1053
+ context['image']['title'] = value
1054
+ end
1055
+ end
1056
+ alias :_end_dc_title :_end_title
1057
+ alias :_end_media_title :_end_title
1058
+
1059
+ def _start_description(attrsD)
1060
+ context = getContext()
1061
+ if context.has_key?('summary')
1062
+ @summaryKey = 'content'
1063
+ _start_content(attrsD)
1064
+ else
1065
+ pushContent('description', attrsD, 'text/html', @infeed || @inentry || @insource)
1066
+ end
1067
+ end
1068
+
1069
+ def _start_abstract(attrsD)
1070
+ pushContent('description', attrsD, 'text/plain', @infeed || @inentry || @insource)
1071
+ end
1072
+
1073
+ def _end_description
1074
+ if @summaryKey == 'content'
1075
+ _end_content()
1076
+ else
1077
+ value = popContent('description')
1078
+ context = getContext()
1079
+ if @intextinput
1080
+ context['textinput']['description'] = value
1081
+ elsif @inimage:
1082
+ context['image']['description'] = value
1083
+ end
1084
+ end
1085
+ @summaryKey = nil
1086
+ end
1087
+ alias :_end_abstract :_end_description
1088
+
1089
+ def _start_info(attrsD)
1090
+ pushContent('info', attrsD, 'text/plain', true)
1091
+ end
1092
+ alias :_start_feedburner_browserfriendly :_start_info
1093
+
1094
+ def _end_info
1095
+ popContent('info')
1096
+ end
1097
+ alias :_end_feedburner_browserfriendly :_end_info
1098
+
1099
+ def _start_generator(attrsD)
1100
+ if attrsD and not attrsD.empty?
1101
+ attrsD = itsAnHrefDamnIt(attrsD)
1102
+ if attrsD.has_key?('href')
1103
+ attrsD['href'] = resolveURI(attrsD['href'])
1104
+ end
1105
+ end
1106
+ getContext()['generator_detail'] = FeedParserDict.new(attrsD)
1107
+ push('generator', true)
1108
+ end
1109
+
1110
+ def _end_generator
1111
+ value = pop('generator')
1112
+ context = getContext()
1113
+ if context.has_key?('generator_detail')
1114
+ context['generator_detail']['name'] = value
1115
+ end
1116
+ end
1117
+
1118
+ def _start_admin_generatoragent(attrsD)
1119
+ push('generator', true)
1120
+ value = getAttribute(attrsD, 'rdf:resource')
1121
+ if value and not value.empty?
1122
+ @elementstack[-1][2] << value
1123
+ end
1124
+ pop('generator')
1125
+ getContext()['generator_detail'] = FeedParserDict.new({'href' => value})
1126
+ end
1127
+
1128
+ def _start_admin_errorreportsto(attrsD)
1129
+ push('errorreportsto', true)
1130
+ value = getAttribute(attrsD, 'rdf:resource')
1131
+ if value and not value.empty?
1132
+ @elementstack[-1][2] << value
1133
+ end
1134
+ pop('errorreportsto')
1135
+ end
1136
+
1137
+ def _start_summary(attrsD)
1138
+ context = getContext()
1139
+ if context.has_key?'summary'
1140
+ @summaryKey = 'content'
1141
+ _start_content(attrsD)
1142
+ else
1143
+ @summaryKey = 'summary'
1144
+ pushContent(@summaryKey, attrsD, 'text/plain', true)
1145
+ end
1146
+ end
1147
+ alias :_start_itunes_summary :_start_summary
1148
+
1149
+ def _end_summary
1150
+ if @summaryKey == 'content':
1151
+ _end_content()
1152
+ else
1153
+ popContent(@summaryKey || 'summary')
1154
+ end
1155
+ @summaryKey = nil
1156
+ end
1157
+ alias :_end_itunes_summary :_end_summary
1158
+
1159
+ def _start_enclosure(attrsD)
1160
+ attrsD = itsAnHrefDamnIt(attrsD)
1161
+ getContext()['enclosures'] ||= []
1162
+ getContext()['enclosures'] << FeedParserDict.new(attrsD)
1163
+ href = attrsD['href']
1164
+ if href and not href.empty?
1165
+ context = getContext()
1166
+ if not context['id']
1167
+ context['id'] = href
1168
+ end
1169
+ end
1170
+ end
1171
+
1172
+ def _start_source(attrsD)
1173
+ @insource = true
1174
+ end
1175
+
1176
+ def _end_source
1177
+ @insource = false
1178
+ getContext()['source'] = Marshal.load(Marshal.dump(@sourcedata))
1179
+ @sourcedata.clear()
1180
+ end
1181
+
1182
+ def _start_content(attrsD)
1183
+ pushContent('content', attrsD, 'text/plain', true)
1184
+ src = attrsD['src']
1185
+ if src and not src.empty?:
1186
+ @contentparams['src'] = src
1187
+ end
1188
+ push('content', true)
1189
+ end
1190
+
1191
+ def _start_prodlink(attrsD)
1192
+ pushContent('content', attrsD, 'text/html', true)
1193
+ end
1194
+
1195
+ def _start_body(attrsD)
1196
+ pushContent('content', attrsD, 'application/xhtml+xml', true)
1197
+ end
1198
+ alias :_start_xhtml_body :_start_body
1199
+
1200
+ def _start_content_encoded(attrsD)
1201
+ pushContent('content', attrsD, 'text/html', true)
1202
+ end
1203
+ alias :_start_fullitem :_start_content_encoded
1204
+
1205
+ def _end_content
1206
+ copyToDescription = (['text/plain'] + @html_types).include? mapContentType(@contentparams['type'])
1207
+ value = popContent('content')
1208
+ if copyToDescription
1209
+ _save('description', value)
1210
+ end
1211
+ alias :_end_body :_end_content
1212
+ alias :_end_xhtml_body :_end_content
1213
+ alias :_end_content_encoded :_end_content
1214
+ alias :_end_fullitem :_end_content
1215
+ alias :_end_prodlink :_end_content
1216
+ end
1217
+
1218
+ def _start_itunes_image(attrsD)
1219
+ push('itunes_image', false)
1220
+ getContext()['image'] = FeedParserDict.new({'href' => attrsD['href']})
1221
+ end
1222
+ alias :_start_itunes_link :_start_itunes_image
1223
+
1224
+ def _end_itunes_block
1225
+ value = pop('itunes_block', false)
1226
+ getContext()['itunes_block'] = (value == 'yes') and true or false
1227
+ end
1228
+
1229
+ def _end_itunes_explicit
1230
+ value = pop('itunes_explicit', false)
1231
+ getContext()['itunes_explicit'] = (value == 'yes') and true or false
1232
+ end
1233
+ end # End FeedParserMixin
1234
+
1235
+