rfeedparser 0.9.8 → 0.9.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/ruby
2
+ module FeedParserUtilities
3
+ class FeedParserDict < Hash
4
+ =begin
5
+ The naming of a certain common attribute (such as, "When was the last
6
+ time this feed was updated?") can have many different names depending
7
+ on the type of feed we are handling. This class allows us to satisfy
8
+ the expectations of both the developer who has prior knowledge of the
9
+ feed type as well as the developer who wants a consistent application
10
+ interface.
11
+
12
+ @@keymap is a Hash that contains information on what a certain
13
+ attribute names "really are" in each kind of feed. It does this by
14
+ providing a common name that will map to any feed type in the keys,
15
+ with possible "correct" attributes in the its values. the #[] and #[]=
16
+ methods check with keymaps to see what attribute the developer "really
17
+ means" if they've asked for one which happens to be in @@keymap's keys.
18
+ =end
19
+ @@keymap = {'channel' => 'feed',
20
+ 'items' => 'entries',
21
+ 'guid' => 'id',
22
+ 'date' => 'updated',
23
+ 'date_parsed' => 'updated_parsed',
24
+ 'description' => ['subtitle', 'summary'],
25
+ 'url' => ['href'],
26
+ 'modified' => 'updated',
27
+ 'modified_parsed' => 'updated_parsed',
28
+ 'issued' => 'published',
29
+ 'issued_parsed' => 'published_parsed',
30
+ 'copyright' => 'rights',
31
+ 'copyright_detail' => 'rights_detail',
32
+ 'tagline' => 'subtitle',
33
+ 'tagline_detail' => 'subtitle_detail'}
34
+
35
+ def entries # Apparently, Hash has an entries method! That blew a good 3 hours or more of my time
36
+ return self['entries']
37
+ end
38
+
39
+ # We could include the [] rewrite in new using Hash.new's fancy pants block thing
40
+ # but we'd still have to overwrite []= and such.
41
+ # I'm going to make it easy to turn lists of pairs into FeedParserDicts's though.
42
+ def initialize(pairs=nil)
43
+ if pairs.class == Array and pairs[0].class == Array and pairs[0].length == 2
44
+ pairs.each do |l|
45
+ k,v = l
46
+ self[k] = v
47
+ end
48
+ elsif pairs.class == Hash
49
+ self.merge!(pairs)
50
+ end
51
+ end
52
+
53
+ def [](key)
54
+ if key == 'category'
55
+ return self['tags'][0]['term']
56
+ end
57
+ if key == 'categories'
58
+ return self['tags'].collect{|tag| [tag['scheme'],tag['term']]}
59
+ end
60
+ realkey = @@keymap[key] || key
61
+ if realkey.class == Array
62
+ realkey.each{ |key| return self[key] if has_key?key }
63
+ end
64
+ # Note that the original key is preferred over the realkey we (might
65
+ # have) found in @@keymap
66
+ if has_key?(key)
67
+ return super(key)
68
+ end
69
+ return super(realkey)
70
+ end
71
+
72
+ def []=(key,value)
73
+ if @@keymap.key?key
74
+ key = @@keymap[key]
75
+ if key.class == Array
76
+ key = key[0]
77
+ end
78
+ end
79
+ super(key,value)
80
+ end
81
+
82
+ def method_missing(msym, *args)
83
+ methodname = msym.to_s
84
+ if methodname[-1] == '='
85
+ return self[methodname[0..-2]] = args[0]
86
+ elsif methodname[-1] != '!' and methodname[-1] != '?' and methodname[0] != "_" # FIXME implement with private?
87
+ return self[methodname]
88
+ else
89
+ raise NoMethodError, "whoops, we don't know about the attribute or method called `#{methodname}' for #{self}:#{self.class}"
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/ruby
2
+
3
+ module URI
4
+ # NOTE I wish I didn't have to open this module up,but I cannot find a
5
+ # better way of accessing all of the instance methods of the URI module. I \
6
+ # may just be an idiot.
7
+ def self.split(uri)
8
+ case uri
9
+ when ''
10
+ # null uri
11
+
12
+ when ABS_URI
13
+ scheme, opaque, userinfo, host, port,
14
+ registry, path, query, fragment = $~[1..-1]
15
+
16
+ # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
17
+
18
+ # absoluteURI = scheme ":" ( hier_part | opaque_part )
19
+ # hier_part = ( net_path | abs_path ) [ "?" query ]
20
+ # opaque_part = uric_no_slash *uric
21
+
22
+ # abs_path = "/" path_segments
23
+ # net_path = "//" authority [ abs_path ]
24
+
25
+ # authority = server | reg_name
26
+ # server = [ [ userinfo "@" ] hostport ]
27
+
28
+ if !scheme
29
+ raise InvalidURIError,
30
+ "bad URI(absolute but no scheme): #{uri}"
31
+ end
32
+ if !opaque && (!path && (!host && !registry))
33
+ raise InvalidURIError,
34
+ "bad URI(absolute but no path): #{uri}"
35
+ end
36
+
37
+ when REL_URI
38
+ scheme = nil
39
+ opaque = nil
40
+
41
+ userinfo, host, port, registry,
42
+ rel_segment, abs_path, query, fragment = $~[1..-1]
43
+ if rel_segment && abs_path
44
+ path = rel_segment + abs_path
45
+ elsif rel_segment
46
+ path = rel_segment
47
+ elsif abs_path
48
+ path = abs_path
49
+ end
50
+
51
+ # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
52
+
53
+ # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
54
+
55
+ # net_path = "//" authority [ abs_path ]
56
+ # abs_path = "/" path_segments
57
+ # rel_path = rel_segment [ abs_path ]
58
+
59
+ # authority = server | reg_name
60
+ # server = [ [ userinfo "@" ] hostport ]
61
+
62
+ else
63
+ # NOTE this is the only part of the code that differs from the "clean"
64
+ # URI module.
65
+ return [nil,nil,uri,nil,nil,nil,nil,nil,nil]
66
+ end
67
+
68
+ path = '' if !path && !opaque # (see RFC2396 Section 5.2)
69
+ ret = [
70
+ scheme,
71
+ userinfo, host, port, # X
72
+ registry, # X
73
+ path, # Y
74
+ opaque, # Y
75
+ query,
76
+ fragment
77
+ ]
78
+ return ret
79
+ end
80
+ end
81
+
82
+ def urljoin(base, uri)
83
+ urifixer = /^([A-Za-z][A-Za-z0-9+-.]*:\/\/)(\/*)(.*?)/u
84
+ uri = uri.sub(urifixer, '\1\3')
85
+ begin
86
+ return URI.join(base, uri).to_s
87
+ rescue URI::BadURIError => e
88
+ if URI.parse(base).relative?
89
+ return URI::parse(uri).to_s
90
+ end
91
+ end
92
+ end
93
+
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/ruby
2
+ module FeedParserUtilities
3
+ #FIXME we need to find a better place for this method
4
+ def stripDoctype(data)
5
+ =begin
6
+ Strips DOCTYPE from XML document, returns (rss_version, stripped_data)
7
+
8
+ rss_version may be 'rss091n' or None
9
+ stripped_data is the same XML document, minus the DOCTYPE
10
+ =end
11
+ entity_pattern = /<!ENTITY(.*?)>/m # m is for Regexp::MULTILINE
12
+ data = data.gsub(entity_pattern,'')
13
+
14
+ doctype_pattern = /<!DOCTYPE(.*?)>/m
15
+ doctype_results = data.scan(doctype_pattern)
16
+ if doctype_results and doctype_results[0]
17
+ doctype = doctype_results[0][0]
18
+ else
19
+ doctype = ''
20
+ end
21
+
22
+ if /netscape/ =~ doctype.downcase
23
+ version = 'rss091n'
24
+ else
25
+ version = nil
26
+ end
27
+ data = data.sub(doctype_pattern, '')
28
+ return version, data
29
+ end
30
+
31
+ def resolveRelativeURIs(htmlSource, baseURI, encoding)
32
+ $stderr << "entering resolveRelativeURIs\n" if $debug # FIXME write a decent logger
33
+ relative_uris = [ ['a','href'],
34
+ ['applet','codebase'],
35
+ ['area','href'],
36
+ ['blockquote','cite'],
37
+ ['body','background'],
38
+ ['del','cite'],
39
+ ['form','action'],
40
+ ['frame','longdesc'],
41
+ ['frame','src'],
42
+ ['iframe','longdesc'],
43
+ ['iframe','src'],
44
+ ['head','profile'],
45
+ ['img','longdesc'],
46
+ ['img','src'],
47
+ ['img','usemap'],
48
+ ['input','src'],
49
+ ['input','usemap'],
50
+ ['ins','cite'],
51
+ ['link','href'],
52
+ ['object','classid'],
53
+ ['object','codebase'],
54
+ ['object','data'],
55
+ ['object','usemap'],
56
+ ['q','cite'],
57
+ ['script','src'],
58
+ ]
59
+ h = Hpricot(htmlSource)
60
+ relative_uris.each do |l|
61
+ ename, eattr = l
62
+ h.search(ename).each do |elem|
63
+ euri = elem.attributes[eattr]
64
+ if euri and not euri.empty? and URI.parse(URI.encode(euri)).relative?
65
+ elem.attributes[eattr] = urljoin(baseURI, euri)
66
+ end
67
+ end
68
+ end
69
+ return h.to_html
70
+ end
71
+ end
72
+
73
+
@@ -0,0 +1,1235 @@
1
+ #!/usr/bin/ruby
2
+ module FeedParserMixin
3
+ attr_accessor :feeddata, :version, :namespacesInUse, :date_handlers
4
+
5
+ def startup(baseuri=nil, baselang=nil, encoding='utf-8')
6
+ $stderr << "initializing FeedParser\n" if $debug
7
+
8
+ @namespaces = {'' => '',
9
+ 'http://backend.userland.com/rss' => '',
10
+ 'http://blogs.law.harvard.edu/tech/rss' => '',
11
+ 'http://purl.org/rss/1.0/' => '',
12
+ 'http://my.netscape.com/rdf/simple/0.9/' => '',
13
+ 'http://example.com/newformat#' => '',
14
+ 'http://example.com/necho' => '',
15
+ 'http://purl.org/echo/' => '',
16
+ 'uri/of/echo/namespace#' => '',
17
+ 'http://purl.org/pie/' => '',
18
+ 'http://purl.org/atom/ns#' => '',
19
+ 'http://www.w3.org/2005/Atom' => '',
20
+ 'http://purl.org/rss/1.0/modules/rss091#' => '',
21
+ 'http://webns.net/mvcb/' => 'admin',
22
+ 'http://purl.org/rss/1.0/modules/aggregation/' => 'ag',
23
+ 'http://purl.org/rss/1.0/modules/annotate/' => 'annotate',
24
+ 'http://media.tangent.org/rss/1.0/' => 'audio',
25
+ 'http://backend.userland.com/blogChannelModule' => 'blogChannel',
26
+ 'http://web.resource.org/cc/' => 'cc',
27
+ 'http://backend.userland.com/creativeCommonsRssModule' => 'creativeCommons',
28
+ 'http://purl.org/rss/1.0/modules/company' => 'co',
29
+ 'http://purl.org/rss/1.0/modules/content/' => 'content',
30
+ 'http://my.theinfo.org/changed/1.0/rss/' => 'cp',
31
+ 'http://purl.org/dc/elements/1.1/' => 'dc',
32
+ 'http://purl.org/dc/terms/' => 'dcterms',
33
+ 'http://purl.org/rss/1.0/modules/email/' => 'email',
34
+ 'http://purl.org/rss/1.0/modules/event/' => 'ev',
35
+ 'http://rssnamespace.org/feedburner/ext/1.0' => 'feedburner',
36
+ 'http://freshmeat.net/rss/fm/' => 'fm',
37
+ 'http://xmlns.com/foaf/0.1/' => 'foaf',
38
+ 'http://www.w3.org/2003/01/geo/wgs84_pos#' => 'geo',
39
+ 'http://postneo.com/icbm/' => 'icbm',
40
+ 'http://purl.org/rss/1.0/modules/image/' => 'image',
41
+ 'http://www.itunes.com/DTDs/PodCast-1.0.dtd' => 'itunes',
42
+ 'http://example.com/DTDs/PodCast-1.0.dtd' => 'itunes',
43
+ 'http://purl.org/rss/1.0/modules/link/' => 'l',
44
+ 'http://search.yahoo.com/mrss' => 'media',
45
+ 'http://madskills.com/public/xml/rss/module/pingback/' => 'pingback',
46
+ 'http://prismstandard.org/namespaces/1.2/basic/' => 'prism',
47
+ 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => 'rdf',
48
+ 'http://www.w3.org/2000/01/rdf-schema#' => 'rdfs',
49
+ 'http://purl.org/rss/1.0/modules/reference/' => 'ref',
50
+ 'http://purl.org/rss/1.0/modules/richequiv/' => 'reqv',
51
+ 'http://purl.org/rss/1.0/modules/search/' => 'search',
52
+ 'http://purl.org/rss/1.0/modules/slash/' => 'slash',
53
+ 'http://schemas.xmlsoap.org/soap/envelope/' => 'soap',
54
+ 'http://purl.org/rss/1.0/modules/servicestatus/' => 'ss',
55
+ 'http://hacks.benhammersley.com/rss/streaming/' => 'str',
56
+ 'http://purl.org/rss/1.0/modules/subscription/' => 'sub',
57
+ 'http://purl.org/rss/1.0/modules/syndication/' => 'sy',
58
+ 'http://purl.org/rss/1.0/modules/taxonomy/' => 'taxo',
59
+ 'http://purl.org/rss/1.0/modules/threading/' => 'thr',
60
+ 'http://purl.org/rss/1.0/modules/textinput/' => 'ti',
61
+ 'http://madskills.com/public/xml/rss/module/trackback/' =>'trackback',
62
+ 'http://wellformedweb.org/commentAPI/' => 'wfw',
63
+ 'http://purl.org/rss/1.0/modules/wiki/' => 'wiki',
64
+ 'http://www.w3.org/1999/xhtml' => 'xhtml',
65
+ 'http://www.w3.org/XML/1998/namespace' => 'xml',
66
+ 'http://www.w3.org/1999/xlink' => 'xlink',
67
+ 'http://schemas.pocketsoap.com/rss/myDescModule/' => 'szf'
68
+ }
69
+ @matchnamespaces = {}
70
+ @namespaces.each do |l|
71
+ @matchnamespaces[l[0].downcase] = l[1]
72
+ end
73
+ @can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'license', 'icon', 'logo']
74
+ @can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
75
+ @can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
76
+ @html_types = ['text/html', 'application/xhtml+xml']
77
+ @feeddata = FeedParserDict.new # feed-level data
78
+ @encoding = encoding # character encoding
79
+ @entries = [] # list of entry-level data
80
+ @version = '' # feed type/version see SUPPORTED_VERSIOSN
81
+ @namespacesInUse = {} # hash of namespaces defined by the feed
82
+
83
+ # the following are used internall to track state;
84
+ # this is really out of control and should be refactored
85
+ @infeed = false
86
+ @inentry = false
87
+ @incontent = 0 # Yes, this needs to be zero until I work out popContent and pushContent
88
+ @intextinput = false
89
+ @inimage = false
90
+ @inauthor = false
91
+ @incontributor = false
92
+ @inpublisher = false
93
+ @insource = false
94
+ @sourcedata = FeedParserDict.new
95
+ @contentparams = FeedParserDict.new
96
+ @summaryKey = nil
97
+ @namespacemap = {}
98
+ @elementstack = []
99
+ @basestack = []
100
+ @langstack = []
101
+ @baseuri = baseuri || ''
102
+ @lang = baselang || nil
103
+ if baselang
104
+ @feeddata['language'] = baselang.gsub('_','-')
105
+ end
106
+ @date_handlers = [:_parse_date_rfc822,
107
+ :_parse_date_hungarian, :_parse_date_greek,:_parse_date_mssql,
108
+ :_parse_date_nate,:_parse_date_onblog,:_parse_date_w3dtf,:_parse_date_iso8601
109
+ ]
110
+ $stderr << "Leaving startup\n" if $debug # My addition
111
+ end
112
+
113
+ def unknown_starttag(tag, attrsd)
114
+ $stderr << "start #{tag} with #{attrsd}\n" if $debug
115
+ # normalize attrs
116
+ attrsD = {}
117
+ attrsd = Hash[*attrsd.flatten] if attrsd.class == Array # Magic! Asterisk!
118
+ # LooseFeedParser needs the above because SGMLParser sends attrs as a
119
+ # list of lists (like [['type','text/html'],['mode','escaped']])
120
+
121
+ attrsd.each do |old_k,value|
122
+ # There has to be a better, non-ugly way of doing this
123
+ k = old_k.downcase # Downcase all keys
124
+ attrsD[k] = value
125
+ if ['rel','type'].include?value
126
+ attrsD[k].downcase! # Downcase the value if the key is 'rel' or 'type'
127
+ end
128
+ end
129
+
130
+ # track xml:base and xml:lang
131
+ baseuri = attrsD['xml:base'] || attrsD['base'] || @baseuri
132
+ @baseuri = urljoin(@baseuri, baseuri)
133
+ lang = attrsD['xml:lang'] || attrsD['lang']
134
+ if lang == '' # FIXME This next bit of code is right? Wtf?
135
+ # xml:lang could be explicitly set to '', we need to capture that
136
+ lang = nil
137
+ elsif lang.nil?
138
+ # if no xml:lang is specified, use parent lang
139
+ lang = @lang
140
+ end
141
+ if lang and not lang.empty? # Seriously, this cannot be correct
142
+ if ['feed', 'rss', 'rdf:RDF'].include?tag
143
+ @feeddata['language'] = lang.gsub('_','-')
144
+ end
145
+ end
146
+ @lang = lang
147
+ @basestack << @baseuri
148
+ @langstack << lang
149
+
150
+ # track namespaces
151
+ attrsd.each do |prefix, uri|
152
+ if /^xmlns:/ =~ prefix # prefix begins with xmlns:
153
+ trackNamespace(prefix[6..-1], uri)
154
+ elsif prefix == 'xmlns':
155
+ trackNamespace(nil, uri)
156
+ end
157
+ end
158
+
159
+ # track inline content
160
+ if @incontent != 0 and @contentparams.has_key?('type') and not ( /xml$/ =~ (@contentparams['type'] || 'xml') )
161
+ # element declared itself as escaped markup, but isn't really
162
+
163
+ @contentparams['type'] = 'application/xhtml+xml'
164
+ end
165
+ if @incontent != 0 and @contentparams['type'] == 'application/xhtml+xml'
166
+ # Note: probably shouldn't simply recreate localname here, but
167
+ # our namespace handling isn't actually 100% correct in cases where
168
+ # the feed redefines the default namespace (which is actually
169
+ # the usual case for inline content, thanks Sam), so here we
170
+ # cheat and just reconstruct the element based on localname
171
+ # because that compensates for the bugs in our namespace handling.
172
+ # This will horribly munge inline content with non-empty qnames,
173
+ # but nobody actually does that, so I'm not fixing it.
174
+ tag = tag.split(':')[-1]
175
+ attrsA = attrsd.to_a.collect{|l| "#{l[0]}=\"#{l[1]}\""}
176
+ attrsS = ' '+attrsA.join(' ')
177
+ return handle_data("<#{tag}#{attrsS}>", escape=false)
178
+ end
179
+
180
+ # match namespaces
181
+ if /:/ =~ tag
182
+ prefix, suffix = tag.split(':', 2)
183
+ else
184
+ prefix, suffix = '', tag
185
+ end
186
+ prefix = @namespacemap[prefix] || prefix
187
+ if prefix and not prefix.empty?
188
+ prefix = prefix + '_'
189
+ end
190
+
191
+ # special hack for better tracking of empty textinput/image elements in illformed feeds
192
+ if (not prefix and not prefix.empty?) and not (['title', 'link', 'description','name'].include?tag)
193
+ @intextinput = false
194
+ end
195
+ if (prefix.nil? or prefix.empty?) and not (['title', 'link', 'description', 'url', 'href', 'width', 'height'].include?tag)
196
+ @inimage = false
197
+ end
198
+
199
+ # call special handler (if defined) or default handler
200
+ begin
201
+ return send('_start_'+prefix+suffix, attrsD)
202
+ rescue NoMethodError
203
+ return push(prefix + suffix, true)
204
+ end
205
+ end # End unknown_starttag
206
+
207
+ def unknown_endtag(tag)
208
+ $stderr << "end #{tag}\n" if $debug
209
+ # match namespaces
210
+ if tag.index(':')
211
+ prefix, suffix = tag.split(':',2)
212
+ else
213
+ prefix, suffix = '', tag
214
+ end
215
+ prefix = @namespacemap[prefix] || prefix
216
+ if prefix and not prefix.empty?
217
+ prefix = prefix + '_'
218
+ end
219
+
220
+ # call special handler (if defined) or default handler
221
+ begin
222
+ send('_end_' + prefix + suffix) # NOTE no return here! do not add it!
223
+ rescue NoMethodError => details
224
+ pop(prefix + suffix)
225
+ end
226
+
227
+ # track inline content
228
+ if @incontent != 0 and @contentparams.has_key?'type' and /xml$/ =~ (@contentparams['type'] || 'xml')
229
+ # element declared itself as escaped markup, but it isn't really
230
+ @contentparams['type'] = 'application/xhtml+xml'
231
+ end
232
+ if @incontent != 0 and @contentparams['type'] == 'application/xhtml+xml'
233
+ tag = tag.split(':')[-1]
234
+ handle_data("</#{tag}>", escape=false)
235
+ end
236
+
237
+ # track xml:base and xml:lang going out of scope
238
+ if @basestack and not @basestack.empty?
239
+ @basestack.pop
240
+ if @basestack and @basestack[-1] and not (@basestack.empty? or @basestack[-1].empty?)
241
+ @baseuri = @basestack[-1]
242
+ end
243
+ end
244
+ if @langstack and not @langstack.empty?
245
+ @langstack.pop
246
+ if @langstack and not @langstack.empty? # and @langstack[-1] and not @langstack.empty?
247
+ @lang = @langstack[-1]
248
+ end
249
+ end
250
+ end
251
+
252
+ def handle_charref(ref)
253
+ # LooseParserOnly
254
+ # called for each character reference, e.g. for '&#160;', ref will be '160'
255
+ $stderr << "entering handle_charref with #{ref}\n" if $debug
256
+ return if @elementstack.nil? or @elementstack.empty?
257
+ ref.downcase!
258
+ chars = ['34', '38', '39', '60', '62', 'x22', 'x26', 'x27', 'x3c', 'x3e']
259
+ if chars.include?ref
260
+ text = "&##{ref};"
261
+ else
262
+ if ref[0..0] == 'x'
263
+ c = (ref[1..-1]).to_i(16)
264
+ else
265
+ c = ref.to_i
266
+ end
267
+ text = [c].pack('U*')
268
+ end
269
+ @elementstack[-1][2] << text
270
+ end
271
+
272
+ def handle_entityref(ref)
273
+ # LooseParserOnly
274
+ # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
275
+
276
+ return if @elementstack.nil? or @elementstack.empty?
277
+ $stderr << "entering handle_entityref with #{ref}\n" if $debug
278
+ ents = ['lt', 'gt', 'quot', 'amp', 'apos']
279
+ if ents.include?ref
280
+ text = "&#{ref};"
281
+ else
282
+ text = HTMLEntities::decode_entities("&#{ref};")
283
+ end
284
+ @elementstack[-1][2] << text
285
+ end
286
+
287
+ def handle_data(text, escape=true)
288
+ # called for each block of plain text, i.e. outside of any tag and
289
+ # not containing any character or entity references
290
+ return if @elementstack.nil? or @elementstack.empty?
291
+ if escape and @contentparams['type'] == 'application/xhtml+xml'
292
+ text = text.to_xs
293
+ end
294
+ @elementstack[-1][2] << text
295
+ end
296
+
297
+ def handle_comment(comment)
298
+ # called for each comment, e.g. <!-- insert message here -->
299
+ end
300
+
301
+ def handle_pi(text)
302
+ end
303
+
304
+ def handle_decl(text)
305
+ end
306
+
307
+ def parse_declaration(i)
308
+ # for LooseFeedParser
309
+ $stderr << "entering parse_declaration\n" if $debug
310
+ if @rawdata[i...i+9] == '<![CDATA['
311
+ k = @rawdata.index(/\]\]>/u,i+9)
312
+ k = @rawdata.length unless k
313
+ handle_data(@rawdata[i+9...k].to_xs,false)
314
+ return k+3
315
+ else
316
+ k = @rawdata.index(/>/,i).to_i
317
+ return k+1
318
+ end
319
+ end
320
+
321
+ def mapContentType(contentType)
322
+ contentType.downcase!
323
+ case contentType
324
+ when 'text'
325
+ contentType = 'text/plain'
326
+ when 'html'
327
+ contentType = 'text/html'
328
+ when 'xhtml'
329
+ contentType = 'application/xhtml+xml'
330
+ end
331
+ return contentType
332
+ end
333
+
334
+ def trackNamespace(prefix, uri)
335
+
336
+ loweruri = uri.downcase.strip
337
+ if [prefix, loweruri] == [nil, 'http://my.netscape.com/rdf/simple/0.9/'] and (@version.nil? or @version.empty?)
338
+ @version = 'rss090'
339
+ elsif loweruri == 'http://purl.org/rss/1.0/' and (@version.nil? or @version.empty?)
340
+ @version = 'rss10'
341
+ elsif loweruri == 'http://www.w3.org/2005/atom' and (@version.nil? or @version.empty?)
342
+ @version = 'atom10'
343
+ elsif /backend\.userland\.com\/rss/ =~ loweruri
344
+ # match any backend.userland.com namespace
345
+ uri = 'http://backend.userland.com/rss'
346
+ loweruri = uri
347
+ end
348
+ if @matchnamespaces.has_key? loweruri
349
+ @namespacemap[prefix] = @matchnamespaces[loweruri]
350
+ @namespacesInUse[@matchnamespaces[loweruri]] = uri
351
+ else
352
+ @namespacesInUse[prefix || ''] = uri
353
+ end
354
+ end
355
+
356
+ def resolveURI(uri)
357
+ return urljoin(@baseuri || '', uri)
358
+ end
359
+
360
+ def decodeEntities(element, data)
361
+ return data
362
+ end
363
+
364
+ def push(element, expectingText)
365
+ @elementstack << [element, expectingText, []]
366
+ end
367
+
368
+ def pop(element, stripWhitespace=true)
369
+ return if @elementstack.nil? or @elementstack.empty?
370
+ return if @elementstack[-1][0] != element
371
+ element, expectingText, pieces = @elementstack.pop
372
+ if pieces.class == Array
373
+ output = pieces.join('')
374
+ else
375
+ output = pieces
376
+ end
377
+ if stripWhitespace
378
+ output.strip!
379
+ end
380
+ return output if not expectingText
381
+
382
+ # decode base64 content
383
+ if @contentparams['base64']
384
+ out64 = Base64::decode64(output) # a.k.a. [output].unpack('m')[0]
385
+ if not output.empty? and not out64.empty?
386
+ output = out64
387
+ end
388
+ end
389
+
390
+ # resolve relative URIs
391
+ if @can_be_relative_uri.include?element and output and not output.empty?
392
+ output = resolveURI(output)
393
+ end
394
+
395
+ # decode entities within embedded markup
396
+ if not @contentparams['base64']
397
+ output = decodeEntities(element, output)
398
+ end
399
+
400
+ # remove temporary cruft from contentparams
401
+ @contentparams.delete('mode')
402
+ @contentparams.delete('base64')
403
+
404
+ # resolve relative URIs within embedded markup
405
+ if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
406
+ if @can_contain_relative_uris.include?element
407
+ output = FeedParser.resolveRelativeURIs(output, @baseuri, @encoding)
408
+ end
409
+ end
410
+ # sanitize embedded markup
411
+ if @html_types.include?mapContentType(@contentparams['type'] || 'text/html')
412
+ if @can_contain_dangerous_markup.include?element
413
+ output = FeedParser.sanitizeHTML(output, @encoding)
414
+ end
415
+ end
416
+
417
+ if @encoding and not @encoding.empty? and @encoding != 'utf-8'
418
+ output = uconvert(output, @encoding, 'utf-8')
419
+ # FIXME I turn everything into utf-8, not unicode, originally because REXML was being used but now beause I haven't tested it out yet.
420
+ end
421
+
422
+ # categories/tags/keywords/whatever are handled in _end_category
423
+ return output if element == 'category'
424
+
425
+ # store output in appropriate place(s)
426
+ if @inentry and not @insource
427
+ if element == 'content'
428
+ @entries[-1][element] ||= []
429
+ contentparams = Marshal.load(Marshal.dump(@contentparams)) # deepcopy
430
+ contentparams['value'] = output
431
+ @entries[-1][element] << contentparams
432
+ elsif element == 'link'
433
+ @entries[-1][element] = output
434
+ if output and not output.empty?
435
+ @entries[-1]['links'][-1]['href'] = output
436
+ end
437
+ else
438
+ element = 'summary' if element == 'description'
439
+ @entries[-1][element] = output
440
+ if @incontent != 0
441
+ contentparams = Marshal.load(Marshal.dump(@contentparams))
442
+ contentparams['value'] = output
443
+ @entries[-1][element + '_detail'] = contentparams
444
+ end
445
+ end
446
+ elsif (@infeed or @insource) and not @intextinput and not @inimage
447
+ context = getContext()
448
+ element = 'subtitle' if element == 'description'
449
+ context[element] = output
450
+ if element == 'link'
451
+ context['links'][-1]['href'] = output
452
+ elsif @incontent != 0
453
+ contentparams = Marshal.load(Marshal.dump(@contentparams))
454
+ contentparams['value'] = output
455
+ context[element + '_detail'] = contentparams
456
+ end
457
+ end
458
+ return output
459
+ end
460
+
461
+ def pushContent(tag, attrsD, defaultContentType, expectingText)
462
+ @incontent += 1 # Yes, I hate this.
463
+ type = mapContentType(attrsD['type'] || defaultContentType)
464
+ @contentparams = FeedParserDict.new({'type' => type,'language' => @lang,'base' => @baseuri})
465
+ @contentparams['base64'] = isBase64(attrsD, @contentparams)
466
+ push(tag, expectingText)
467
+ end
468
+
469
+ def popContent(tag)
470
+ value = pop(tag)
471
+ @incontent -= 1
472
+ @contentparams.clear
473
+ return value
474
+ end
475
+
476
+ def mapToStandardPrefix(name)
477
+ colonpos = name.index(':')
478
+ if colonpos
479
+ prefix = name[0..colonpos-1]
480
+ suffix = name[colonpos+1..-1]
481
+ prefix = @namespacemap[prefix] || prefix
482
+ name = prefix + ':' + suffix
483
+ end
484
+ return name
485
+ end
486
+
487
+ def getAttribute(attrsD, name)
488
+ return attrsD[mapToStandardPrefix(name)]
489
+ end
490
+
491
+ def isBase64(attrsD, contentparams)
492
+ return true if (attrsD['mode'] == 'base64')
493
+ if /(^text\/)|(\+xml$)|(\/xml$)/ =~ contentparams['type']
494
+ return false
495
+ end
496
+ return true
497
+ end
498
+
499
+ def itsAnHrefDamnIt(attrsD)
500
+ href= attrsD['url'] || attrsD['uri'] || attrsD['href']
501
+ if href
502
+ attrsD.delete('url')
503
+ attrsD.delete('uri')
504
+ attrsD['href'] = href
505
+ end
506
+ return attrsD
507
+ end
508
+
509
+
510
+ def _save(key, value)
511
+ context = getContext()
512
+ context[key] ||= value
513
+ end
514
+
515
+ def _start_rss(attrsD)
516
+ versionmap = {'0.91' => 'rss091u',
517
+ '0.92' => 'rss092',
518
+ '0.93' => 'rss093',
519
+ '0.94' => 'rss094'
520
+ }
521
+
522
+ if not @version or @version.empty?
523
+ attr_version = attrsD['version'] || ''
524
+ version = versionmap[attr_version]
525
+ if version and not version.empty?
526
+ @version = version
527
+ elsif /^2\./ =~ attr_version
528
+ @version = 'rss20'
529
+ else
530
+ @version = 'rss'
531
+ end
532
+ end
533
+ end
534
+
535
+ def _start_dlhottitles(attrsD)
536
+ @version = 'hotrss'
537
+ end
538
+
539
+ def _start_channel(attrsD)
540
+ @infeed = true
541
+ _cdf_common(attrsD)
542
+ end
543
+ alias :_start_feedinfo :_start_channel
544
+
545
+ def _cdf_common(attrsD)
546
+ if attrsD.has_key?'lastmod'
547
+ _start_modified({})
548
+ @elementstack[-1][-1] = attrsD['lastmod']
549
+ _end_modified
550
+ end
551
+ if attrsD.has_key?'href'
552
+ _start_link({})
553
+ @elementstack[-1][-1] = attrsD['href']
554
+ _end_link
555
+ end
556
+ end
557
+
558
+ def _start_feed(attrsD)
559
+ @infeed = true
560
+ versionmap = {'0.1' => 'atom01',
561
+ '0.2' => 'atom02',
562
+ '0.3' => 'atom03'
563
+ }
564
+
565
+ if not @version or @version.empty?
566
+ attr_version = attrsD['version']
567
+ version = versionmap[attr_version]
568
+ if @version and not @version.empty?
569
+ @version = version
570
+ else
571
+ @version = 'atom'
572
+ end
573
+ end
574
+ end
575
+
576
+ def _end_channel
577
+ @infeed = false
578
+ end
579
+ alias :_end_feed :_end_channel
580
+
581
+ def _start_image(attrsD)
582
+ @inimage = true
583
+ push('image', false)
584
+ context = getContext()
585
+ context['image'] ||= FeedParserDict.new
586
+ end
587
+
588
+ def _end_image
589
+ pop('image')
590
+ @inimage = false
591
+ end
592
+
593
+ def _start_textinput(attrsD)
594
+ @intextinput = true
595
+ push('textinput', false)
596
+ context = getContext()
597
+ context['textinput'] ||= FeedParserDict.new
598
+ end
599
+ alias :_start_textInput :_start_textinput
600
+
601
+ def _end_textinput
602
+ pop('textinput')
603
+ @intextinput = false
604
+ end
605
+ alias :_end_textInput :_end_textinput
606
+
607
+ def _start_author(attrsD)
608
+ @inauthor = true
609
+ push('author', true)
610
+ end
611
+ alias :_start_managingeditor :_start_author
612
+ alias :_start_dc_author :_start_author
613
+ alias :_start_dc_creator :_start_author
614
+ alias :_start_itunes_author :_start_author
615
+
616
+ def _end_author
617
+ pop('author')
618
+ @inauthor = false
619
+ _sync_author_detail()
620
+ end
621
+ alias :_end_managingeditor :_end_author
622
+ alias :_end_dc_author :_end_author
623
+ alias :_end_dc_creator :_end_author
624
+ alias :_end_itunes_author :_end_author
625
+
626
+ def _start_itunes_owner(attrsD)
627
+ @inpublisher = true
628
+ push('publisher', false)
629
+ end
630
+
631
+ def _end_itunes_owner
632
+ pop('publisher')
633
+ @inpublisher = false
634
+ _sync_author_detail('publisher')
635
+ end
636
+
637
+ def _start_contributor(attrsD)
638
+ @incontributor = true
639
+ context = getContext()
640
+ context['contributors'] ||= []
641
+ context['contributors'] << FeedParserDict.new
642
+ push('contributor', false)
643
+ end
644
+
645
+ def _end_contributor
646
+ pop('contributor')
647
+ @incontributor = false
648
+ end
649
+
650
+ def _start_dc_contributor(attrsD)
651
+ @incontributor = true
652
+ context = getContext()
653
+ context['contributors'] ||= []
654
+ context['contributors'] << FeedParserDict.new
655
+ push('name', false)
656
+ end
657
+
658
+ def _end_dc_contributor
659
+ _end_name
660
+ @incontributor = false
661
+ end
662
+
663
+ def _start_name(attrsD)
664
+ push('name', false)
665
+ end
666
+ alias :_start_itunes_name :_start_name
667
+
668
+ def _end_name
669
+ value = pop('name')
670
+ if @inpublisher
671
+ _save_author('name', value, 'publisher')
672
+ elsif @inauthor
673
+ _save_author('name', value)
674
+ elsif @incontributor
675
+ _save_contributor('name', value)
676
+ elsif @intextinput
677
+ context = getContext()
678
+ context['textinput']['name'] = value
679
+ end
680
+ end
681
+ alias :_end_itunes_name :_end_name
682
+
683
+ def _start_width(attrsD)
684
+ push('width', false)
685
+ end
686
+
687
+ def _end_width
688
+ value = pop('width').to_i
689
+ if @inimage
690
+ context = getContext
691
+ context['image']['width'] = value
692
+ end
693
+ end
694
+
695
+ def _start_height(attrsD)
696
+ push('height', false)
697
+ end
698
+
699
+ def _end_height
700
+ value = pop('height').to_i
701
+ if @inimage
702
+ context = getContext()
703
+ context['image']['height'] = value
704
+ end
705
+ end
706
+
707
+ def _start_url(attrsD)
708
+ push('href', true)
709
+ end
710
+ alias :_start_homepage :_start_url
711
+ alias :_start_uri :_start_url
712
+
713
+ def _end_url
714
+ value = pop('href')
715
+ if @inauthor
716
+ _save_author('href', value)
717
+ elsif @incontributor
718
+ _save_contributor('href', value)
719
+ elsif @inimage
720
+ context = getContext()
721
+ context['image']['href'] = value
722
+ elsif @intextinput
723
+ context = getContext()
724
+ context['textinput']['link'] = value
725
+ end
726
+ end
727
+ alias :_end_homepage :_end_url
728
+ alias :_end_uri :_end_url
729
+
730
+ def _start_email(attrsD)
731
+ push('email', false)
732
+ end
733
+ alias :_start_itunes_email :_start_email
734
+
735
+ def _end_email
736
+ value = pop('email')
737
+ if @inpublisher
738
+ _save_author('email', value, 'publisher')
739
+ elsif @inauthor
740
+ _save_author('email', value)
741
+ elsif @incontributor
742
+ _save_contributor('email', value)
743
+ end
744
+ end
745
+ alias :_end_itunes_email :_end_email
746
+
747
+ def getContext
748
+ if @insource
749
+ context = @sourcedata
750
+ elsif @inentry
751
+ context = @entries[-1]
752
+ else
753
+ context = @feeddata
754
+ end
755
+ return context
756
+ end
757
+
758
+ def _save_author(key, value, prefix='author')
759
+ context = getContext()
760
+ context[prefix + '_detail'] ||= FeedParserDict.new
761
+ context[prefix + '_detail'][key] = value
762
+ _sync_author_detail()
763
+ end
764
+
765
+ def _save_contributor(key, value)
766
+ context = getContext
767
+ context['contributors'] ||= [FeedParserDict.new]
768
+ context['contributors'][-1][key] = value
769
+ end
770
+
771
+ def _sync_author_detail(key='author')
772
+ context = getContext()
773
+ detail = context["#{key}_detail"]
774
+ if detail and not detail.empty?
775
+ name = detail['name']
776
+ email = detail['email']
777
+
778
+ if name and email and not (name.empty? or name.empty?)
779
+ context[key] = "#{name} (#{email})"
780
+ elsif name and not name.empty?
781
+ context[key] = name
782
+ elsif email and not email.empty?
783
+ context[key] = email
784
+ end
785
+ else
786
+ author = context[key].dup unless context[key].nil?
787
+ return if not author or author.empty?
788
+ emailmatch = author.match(/(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))/)
789
+ email = emailmatch[1]
790
+ author.gsub!(email, '')
791
+ author.gsub!("\(\)", '')
792
+ author.strip!
793
+ author.gsub!(/^\(/,'')
794
+ author.gsub!(/\)$/,'')
795
+ author.strip!
796
+ context["#{key}_detail"] ||= FeedParserDict.new
797
+ context["#{key}_detail"]['name'] = author
798
+ context["#{key}_detail"]['email'] = email
799
+ end
800
+ end
801
+
802
+ def _start_subtitle(attrsD)
803
+ pushContent('subtitle', attrsD, 'text/plain', true)
804
+ end
805
+ alias :_start_tagline :_start_subtitle
806
+ alias :_start_itunes_subtitle :_start_subtitle
807
+
808
+ def _end_subtitle
809
+ popContent('subtitle')
810
+ end
811
+ alias :_end_tagline :_end_subtitle
812
+ alias :_end_itunes_subtitle :_end_subtitle
813
+
814
+ def _start_rights(attrsD)
815
+ pushContent('rights', attrsD, 'text/plain', true)
816
+ end
817
+ alias :_start_dc_rights :_start_rights
818
+ alias :_start_copyright :_start_rights
819
+
820
+ def _end_rights
821
+ popContent('rights')
822
+ end
823
+ alias :_end_dc_rights :_end_rights
824
+ alias :_end_copyright :_end_rights
825
+
826
+ def _start_item(attrsD)
827
+ @entries << FeedParserDict.new
828
+ push('item', false)
829
+ @inentry = true
830
+ @guidislink = false
831
+ id = getAttribute(attrsD, 'rdf:about')
832
+ if id and not id.empty?
833
+ context = getContext()
834
+ context['id'] = id
835
+ end
836
+ _cdf_common(attrsD)
837
+ end
838
+ alias :_start_entry :_start_item
839
+ alias :_start_product :_start_item
840
+
841
+ def _end_item
842
+ pop('item')
843
+ @inentry = false
844
+ end
845
+ alias :_end_entry :_end_item
846
+
847
+ def _start_dc_language(attrsD)
848
+ push('language', true)
849
+ end
850
+ alias :_start_language :_start_dc_language
851
+
852
+ def _end_dc_language
853
+ @lang = pop('language')
854
+ end
855
+ alias :_end_language :_end_dc_language
856
+
857
+ def _start_dc_publisher(attrsD)
858
+ push('publisher', true)
859
+ end
860
+ alias :_start_webmaster :_start_dc_publisher
861
+
862
+ def _end_dc_publisher
863
+ pop('publisher')
864
+ _sync_author_detail('publisher')
865
+ end
866
+ alias :_end_webmaster :_end_dc_publisher
867
+
868
+ def _start_published(attrsD)
869
+ push('published', true)
870
+ end
871
+ alias :_start_dcterms_issued :_start_published
872
+ alias :_start_issued :_start_published
873
+
874
+ def _end_published
875
+ value = pop('published')
876
+ _save('published_parsed', parse_date(value))
877
+ end
878
+ alias :_end_dcterms_issued :_end_published
879
+ alias :_end_issued :_end_published
880
+
881
+ def _start_updated(attrsD)
882
+ push('updated', true)
883
+ end
884
+ alias :_start_modified :_start_updated
885
+ alias :_start_dcterms_modified :_start_updated
886
+ alias :_start_pubdate :_start_updated
887
+ alias :_start_dc_date :_start_updated
888
+
889
+ def _end_updated
890
+ value = pop('updated')
891
+ _save('updated_parsed', parse_date(value))
892
+ end
893
+ alias :_end_modified :_end_updated
894
+ alias :_end_dcterms_modified :_end_updated
895
+ alias :_end_pubdate :_end_updated
896
+ alias :_end_dc_date :_end_updated
897
+
898
+ def _start_created(attrsD)
899
+ push('created', true)
900
+ end
901
+ alias :_start_dcterms_created :_start_created
902
+
903
+ def _end_created
904
+ value = pop('created')
905
+ _save('created_parsed', parse_date(value))
906
+ end
907
+ alias :_end_dcterms_created :_end_created
908
+
909
+ def _start_expirationdate(attrsD)
910
+ push('expired', true)
911
+ end
912
+ def _end_expirationdate
913
+ _save('expired_parsed', parse_date(pop('expired')))
914
+ end
915
+
916
+ def _start_cc_license(attrsD)
917
+ push('license', true)
918
+ value = getAttribute(attrsD, 'rdf:resource')
919
+ if value and not value.empty?
920
+ @elementstack[-1][2] << value
921
+ pop('license')
922
+ end
923
+ end
924
+
925
+ def _start_creativecommons_license(attrsD)
926
+ push('license', true)
927
+ end
928
+
929
+ def _end_creativecommons_license
930
+ pop('license')
931
+ end
932
+
933
+ def addTag(term, scheme, label)
934
+ context = getContext()
935
+ context['tags'] ||= []
936
+ tags = context['tags']
937
+ if (term.nil? or term.empty?) and (scheme.nil? or scheme.empty?) and (label.nil? or label.empty?)
938
+ return
939
+ end
940
+ value = FeedParserDict.new({'term' => term, 'scheme' => scheme, 'label' => label})
941
+ if not tags.include?value
942
+ context['tags'] << FeedParserDict.new({'term' => term, 'scheme' => scheme, 'label' => label})
943
+ end
944
+ end
945
+
946
+ def _start_category(attrsD)
947
+ $stderr << "entering _start_category with #{attrsD}\n" if $debug
948
+
949
+ term = attrsD['term']
950
+ scheme = attrsD['scheme'] || attrsD['domain']
951
+ label = attrsD['label']
952
+ addTag(term, scheme, label)
953
+ push('category', true)
954
+ end
955
+ alias :_start_dc_subject :_start_category
956
+ alias :_start_keywords :_start_category
957
+
958
+ def _end_itunes_keywords
959
+ pop('itunes_keywords').split.each do |term|
960
+ addTag(term, 'http://www.itunes.com/', nil)
961
+ end
962
+ end
963
+
964
+ def _start_itunes_category(attrsD)
965
+ addTag(attrsD['text'], 'http://www.itunes.com/', nil)
966
+ push('category', true)
967
+ end
968
+
969
+ def _end_category
970
+ value = pop('category')
971
+ return if value.nil? or value.empty?
972
+ context = getContext()
973
+ tags = context['tags']
974
+ if value and not value.empty? and not tags.empty? and not tags[-1]['term']:
975
+ tags[-1]['term'] = value
976
+ else
977
+ addTag(value, nil, nil)
978
+ end
979
+ end
980
+ alias :_end_dc_subject :_end_category
981
+ alias :_end_keywords :_end_category
982
+ alias :_end_itunes_category :_end_category
983
+
984
+ def _start_cloud(attrsD)
985
+ getContext()['cloud'] = FeedParserDict.new(attrsD)
986
+ end
987
+
988
+ def _start_link(attrsD)
989
+ attrsD['rel'] ||= 'alternate'
990
+ attrsD['type'] ||= 'text/html'
991
+ attrsD = itsAnHrefDamnIt(attrsD)
992
+ if attrsD.has_key? 'href'
993
+ attrsD['href'] = resolveURI(attrsD['href'])
994
+ end
995
+ expectingText = @infeed || @inentry || @insource
996
+ context = getContext()
997
+ context['links'] ||= []
998
+ context['links'] << FeedParserDict.new(attrsD)
999
+ if attrsD['rel'] == 'enclosure'
1000
+ _start_enclosure(attrsD)
1001
+ end
1002
+ if attrsD.has_key? 'href'
1003
+ expectingText = false
1004
+ if (attrsD['rel'] == 'alternate') and @html_types.include?mapContentType(attrsD['type'])
1005
+ context['link'] = attrsD['href']
1006
+ end
1007
+ else
1008
+ push('link', expectingText)
1009
+ end
1010
+ end
1011
+ alias :_start_producturl :_start_link
1012
+
1013
+ def _end_link
1014
+ value = pop('link')
1015
+ context = getContext()
1016
+ if @intextinput
1017
+ context['textinput']['link'] = value
1018
+ end
1019
+ if @inimage
1020
+ context['image']['link'] = value
1021
+ end
1022
+ end
1023
+ alias :_end_producturl :_end_link
1024
+
1025
+ def _start_guid(attrsD)
1026
+ @guidislink = ((attrsD['ispermalink'] || 'true') == 'true')
1027
+ push('id', true)
1028
+ end
1029
+
1030
+ def _end_guid
1031
+ value = pop('id')
1032
+ _save('guidislink', (@guidislink and not getContext().has_key?('link')))
1033
+ if @guidislink:
1034
+ # guid acts as link, but only if 'ispermalink' is not present or is 'true',
1035
+ # and only if the item doesn't already have a link element
1036
+ _save('link', value)
1037
+ end
1038
+ end
1039
+
1040
+
1041
+ def _start_title(attrsD)
1042
+ pushContent('title', attrsD, 'text/plain', @infeed || @inentry || @insource)
1043
+ end
1044
+ alias :_start_dc_title :_start_title
1045
+ alias :_start_media_title :_start_title
1046
+
1047
+ def _end_title
1048
+ value = popContent('title')
1049
+ context = getContext()
1050
+ if @intextinput
1051
+ context['textinput']['title'] = value
1052
+ elsif @inimage
1053
+ context['image']['title'] = value
1054
+ end
1055
+ end
1056
+ alias :_end_dc_title :_end_title
1057
+ alias :_end_media_title :_end_title
1058
+
1059
+ def _start_description(attrsD)
1060
+ context = getContext()
1061
+ if context.has_key?('summary')
1062
+ @summaryKey = 'content'
1063
+ _start_content(attrsD)
1064
+ else
1065
+ pushContent('description', attrsD, 'text/html', @infeed || @inentry || @insource)
1066
+ end
1067
+ end
1068
+
1069
+ def _start_abstract(attrsD)
1070
+ pushContent('description', attrsD, 'text/plain', @infeed || @inentry || @insource)
1071
+ end
1072
+
1073
+ def _end_description
1074
+ if @summaryKey == 'content'
1075
+ _end_content()
1076
+ else
1077
+ value = popContent('description')
1078
+ context = getContext()
1079
+ if @intextinput
1080
+ context['textinput']['description'] = value
1081
+ elsif @inimage:
1082
+ context['image']['description'] = value
1083
+ end
1084
+ end
1085
+ @summaryKey = nil
1086
+ end
1087
+ alias :_end_abstract :_end_description
1088
+
1089
+ def _start_info(attrsD)
1090
+ pushContent('info', attrsD, 'text/plain', true)
1091
+ end
1092
+ alias :_start_feedburner_browserfriendly :_start_info
1093
+
1094
+ def _end_info
1095
+ popContent('info')
1096
+ end
1097
+ alias :_end_feedburner_browserfriendly :_end_info
1098
+
1099
+ def _start_generator(attrsD)
1100
+ if attrsD and not attrsD.empty?
1101
+ attrsD = itsAnHrefDamnIt(attrsD)
1102
+ if attrsD.has_key?('href')
1103
+ attrsD['href'] = resolveURI(attrsD['href'])
1104
+ end
1105
+ end
1106
+ getContext()['generator_detail'] = FeedParserDict.new(attrsD)
1107
+ push('generator', true)
1108
+ end
1109
+
1110
+ def _end_generator
1111
+ value = pop('generator')
1112
+ context = getContext()
1113
+ if context.has_key?('generator_detail')
1114
+ context['generator_detail']['name'] = value
1115
+ end
1116
+ end
1117
+
1118
+ def _start_admin_generatoragent(attrsD)
1119
+ push('generator', true)
1120
+ value = getAttribute(attrsD, 'rdf:resource')
1121
+ if value and not value.empty?
1122
+ @elementstack[-1][2] << value
1123
+ end
1124
+ pop('generator')
1125
+ getContext()['generator_detail'] = FeedParserDict.new({'href' => value})
1126
+ end
1127
+
1128
+ def _start_admin_errorreportsto(attrsD)
1129
+ push('errorreportsto', true)
1130
+ value = getAttribute(attrsD, 'rdf:resource')
1131
+ if value and not value.empty?
1132
+ @elementstack[-1][2] << value
1133
+ end
1134
+ pop('errorreportsto')
1135
+ end
1136
+
1137
+ def _start_summary(attrsD)
1138
+ context = getContext()
1139
+ if context.has_key?'summary'
1140
+ @summaryKey = 'content'
1141
+ _start_content(attrsD)
1142
+ else
1143
+ @summaryKey = 'summary'
1144
+ pushContent(@summaryKey, attrsD, 'text/plain', true)
1145
+ end
1146
+ end
1147
+ alias :_start_itunes_summary :_start_summary
1148
+
1149
+ def _end_summary
1150
+ if @summaryKey == 'content':
1151
+ _end_content()
1152
+ else
1153
+ popContent(@summaryKey || 'summary')
1154
+ end
1155
+ @summaryKey = nil
1156
+ end
1157
+ alias :_end_itunes_summary :_end_summary
1158
+
1159
+ def _start_enclosure(attrsD)
1160
+ attrsD = itsAnHrefDamnIt(attrsD)
1161
+ getContext()['enclosures'] ||= []
1162
+ getContext()['enclosures'] << FeedParserDict.new(attrsD)
1163
+ href = attrsD['href']
1164
+ if href and not href.empty?
1165
+ context = getContext()
1166
+ if not context['id']
1167
+ context['id'] = href
1168
+ end
1169
+ end
1170
+ end
1171
+
1172
+ def _start_source(attrsD)
1173
+ @insource = true
1174
+ end
1175
+
1176
+ def _end_source
1177
+ @insource = false
1178
+ getContext()['source'] = Marshal.load(Marshal.dump(@sourcedata))
1179
+ @sourcedata.clear()
1180
+ end
1181
+
1182
+ def _start_content(attrsD)
1183
+ pushContent('content', attrsD, 'text/plain', true)
1184
+ src = attrsD['src']
1185
+ if src and not src.empty?:
1186
+ @contentparams['src'] = src
1187
+ end
1188
+ push('content', true)
1189
+ end
1190
+
1191
+ def _start_prodlink(attrsD)
1192
+ pushContent('content', attrsD, 'text/html', true)
1193
+ end
1194
+
1195
+ def _start_body(attrsD)
1196
+ pushContent('content', attrsD, 'application/xhtml+xml', true)
1197
+ end
1198
+ alias :_start_xhtml_body :_start_body
1199
+
1200
+ def _start_content_encoded(attrsD)
1201
+ pushContent('content', attrsD, 'text/html', true)
1202
+ end
1203
+ alias :_start_fullitem :_start_content_encoded
1204
+
1205
+ def _end_content
1206
+ copyToDescription = (['text/plain'] + @html_types).include? mapContentType(@contentparams['type'])
1207
+ value = popContent('content')
1208
+ if copyToDescription
1209
+ _save('description', value)
1210
+ end
1211
+ alias :_end_body :_end_content
1212
+ alias :_end_xhtml_body :_end_content
1213
+ alias :_end_content_encoded :_end_content
1214
+ alias :_end_fullitem :_end_content
1215
+ alias :_end_prodlink :_end_content
1216
+ end
1217
+
1218
+ def _start_itunes_image(attrsD)
1219
+ push('itunes_image', false)
1220
+ getContext()['image'] = FeedParserDict.new({'href' => attrsD['href']})
1221
+ end
1222
+ alias :_start_itunes_link :_start_itunes_image
1223
+
1224
+ def _end_itunes_block
1225
+ value = pop('itunes_block', false)
1226
+ getContext()['itunes_block'] = (value == 'yes') and true or false
1227
+ end
1228
+
1229
+ def _end_itunes_explicit
1230
+ value = pop('itunes_explicit', false)
1231
+ getContext()['itunes_explicit'] = (value == 'yes') and true or false
1232
+ end
1233
+ end # End FeedParserMixin
1234
+
1235
+