forkparser 2026.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- feedparser/__init__.py +66 -0
- feedparser/api.py +376 -0
- feedparser/datetimes/__init__.py +73 -0
- feedparser/datetimes/asctime.py +80 -0
- feedparser/datetimes/greek.py +90 -0
- feedparser/datetimes/hungarian.py +66 -0
- feedparser/datetimes/iso8601.py +160 -0
- feedparser/datetimes/korean.py +94 -0
- feedparser/datetimes/perforce.py +63 -0
- feedparser/datetimes/rfc822.py +179 -0
- feedparser/datetimes/w3dtf.py +128 -0
- feedparser/encodings.py +649 -0
- feedparser/exceptions.py +55 -0
- feedparser/html.py +350 -0
- feedparser/http.py +74 -0
- feedparser/mixin.py +838 -0
- feedparser/namespaces/__init__.py +0 -0
- feedparser/namespaces/_base.py +547 -0
- feedparser/namespaces/admin.py +53 -0
- feedparser/namespaces/cc.py +70 -0
- feedparser/namespaces/dc.py +138 -0
- feedparser/namespaces/georss.py +682 -0
- feedparser/namespaces/itunes.py +113 -0
- feedparser/namespaces/mediarss.py +142 -0
- feedparser/namespaces/psc.py +74 -0
- feedparser/parsers/__init__.py +0 -0
- feedparser/parsers/json.py +135 -0
- feedparser/parsers/loose.py +75 -0
- feedparser/parsers/strict.py +141 -0
- feedparser/py.typed +0 -0
- feedparser/sanitizer.py +978 -0
- feedparser/sgml.py +98 -0
- feedparser/urls.py +233 -0
- feedparser/util.py +157 -0
- forkparser-2026.1.0.dist-info/METADATA +75 -0
- forkparser-2026.1.0.dist-info/RECORD +38 -0
- forkparser-2026.1.0.dist-info/WHEEL +4 -0
- forkparser-2026.1.0.dist-info/licenses/LICENSE +65 -0
|
File without changes
|
|
@@ -0,0 +1,547 @@
|
|
|
1
|
+
# Support for the Atom, RSS, RDF, and CDF feed formats
|
|
2
|
+
# Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
3
|
+
# Copyright 2002-2008 Mark Pilgrim
|
|
4
|
+
# All rights reserved.
|
|
5
|
+
#
|
|
6
|
+
# This file is a part of feedparser.
|
|
7
|
+
#
|
|
8
|
+
# Redistribution and use in source and binary forms, with or without modification,
|
|
9
|
+
# are permitted provided that the following conditions are met:
|
|
10
|
+
#
|
|
11
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
12
|
+
# this list of conditions and the following disclaimer.
|
|
13
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
14
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
15
|
+
# and/or other materials provided with the distribution.
|
|
16
|
+
#
|
|
17
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
18
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
19
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
20
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
21
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
22
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
23
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
25
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
26
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
27
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
28
|
+
|
|
29
|
+
import copy
|
|
30
|
+
|
|
31
|
+
from ..datetimes import _parse_date
|
|
32
|
+
from ..urls import make_safe_absolute_uri
|
|
33
|
+
from ..util import FeedParserDict
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Namespace:
|
|
37
|
+
"""Support for the Atom, RSS, RDF, and CDF feed formats.
|
|
38
|
+
|
|
39
|
+
The feed formats all share common elements, some of which have conflicting
|
|
40
|
+
interpretations. For simplicity, all of the base feed format support is
|
|
41
|
+
collected here.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
supported_namespaces = {
|
|
45
|
+
"": "",
|
|
46
|
+
"http://backend.userland.com/rss": "",
|
|
47
|
+
"http://blogs.law.harvard.edu/tech/rss": "",
|
|
48
|
+
"http://purl.org/rss/1.0/": "",
|
|
49
|
+
"http://my.netscape.com/rdf/simple/0.9/": "",
|
|
50
|
+
"http://example.com/newformat#": "",
|
|
51
|
+
"http://example.com/necho": "",
|
|
52
|
+
"http://purl.org/echo/": "",
|
|
53
|
+
"uri/of/echo/namespace#": "",
|
|
54
|
+
"http://purl.org/pie/": "",
|
|
55
|
+
"http://purl.org/atom/ns#": "",
|
|
56
|
+
"http://www.w3.org/2005/Atom": "",
|
|
57
|
+
"http://purl.org/rss/1.0/modules/rss091#": "",
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
def _start_rss(self, attrs_d):
|
|
61
|
+
versionmap = {
|
|
62
|
+
"0.91": "rss091u",
|
|
63
|
+
"0.92": "rss092",
|
|
64
|
+
"0.93": "rss093",
|
|
65
|
+
"0.94": "rss094",
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
# If we're here then this is an RSS feed.
|
|
69
|
+
# If we don't have a version or have a version that starts with something
|
|
70
|
+
# other than RSS then there's been a mistake. Correct it.
|
|
71
|
+
if not self.version or not self.version.startswith("rss"):
|
|
72
|
+
attr_version = attrs_d.get("version", "")
|
|
73
|
+
version = versionmap.get(attr_version)
|
|
74
|
+
if version:
|
|
75
|
+
self.version = version
|
|
76
|
+
elif attr_version.startswith("2."):
|
|
77
|
+
self.version = "rss20"
|
|
78
|
+
else:
|
|
79
|
+
self.version = "rss"
|
|
80
|
+
|
|
81
|
+
def _start_channel(self, attrs_d):
|
|
82
|
+
self.infeed = 1
|
|
83
|
+
self._cdf_common(attrs_d)
|
|
84
|
+
|
|
85
|
+
def _cdf_common(self, attrs_d):
|
|
86
|
+
if "lastmod" in attrs_d:
|
|
87
|
+
self._start_modified({})
|
|
88
|
+
self.elementstack[-1][-1] = attrs_d["lastmod"]
|
|
89
|
+
self._end_modified()
|
|
90
|
+
if "href" in attrs_d:
|
|
91
|
+
self._start_link({})
|
|
92
|
+
self.elementstack[-1][-1] = attrs_d["href"]
|
|
93
|
+
self._end_link()
|
|
94
|
+
|
|
95
|
+
def _start_feed(self, attrs_d):
|
|
96
|
+
self.infeed = 1
|
|
97
|
+
versionmap = {"0.1": "atom01", "0.2": "atom02", "0.3": "atom03"}
|
|
98
|
+
if not self.version:
|
|
99
|
+
attr_version = attrs_d.get("version")
|
|
100
|
+
version = versionmap.get(attr_version)
|
|
101
|
+
if version:
|
|
102
|
+
self.version = version
|
|
103
|
+
else:
|
|
104
|
+
self.version = "atom"
|
|
105
|
+
|
|
106
|
+
def _end_channel(self):
|
|
107
|
+
self.infeed = 0
|
|
108
|
+
|
|
109
|
+
_end_feed = _end_channel
|
|
110
|
+
|
|
111
|
+
def _start_image(self, attrs_d):
|
|
112
|
+
context = self._get_context()
|
|
113
|
+
if not self.inentry:
|
|
114
|
+
context.setdefault("image", FeedParserDict())
|
|
115
|
+
self.inimage = 1
|
|
116
|
+
self.title_depth = -1
|
|
117
|
+
self.push("image", 0)
|
|
118
|
+
|
|
119
|
+
def _end_image(self):
|
|
120
|
+
self.pop("image")
|
|
121
|
+
self.inimage = 0
|
|
122
|
+
|
|
123
|
+
def _start_textinput(self, attrs_d):
|
|
124
|
+
context = self._get_context()
|
|
125
|
+
context.setdefault("textinput", FeedParserDict())
|
|
126
|
+
self.intextinput = 1
|
|
127
|
+
self.title_depth = -1
|
|
128
|
+
self.push("textinput", 0)
|
|
129
|
+
|
|
130
|
+
_start_textInput = _start_textinput
|
|
131
|
+
|
|
132
|
+
def _end_textinput(self):
|
|
133
|
+
self.pop("textinput")
|
|
134
|
+
self.intextinput = 0
|
|
135
|
+
|
|
136
|
+
_end_textInput = _end_textinput
|
|
137
|
+
|
|
138
|
+
def _start_author(self, attrs_d):
|
|
139
|
+
self.inauthor = 1
|
|
140
|
+
self.push("author", 1)
|
|
141
|
+
# Append a new FeedParserDict when expecting an author
|
|
142
|
+
context = self._get_context()
|
|
143
|
+
context.setdefault("authors", [])
|
|
144
|
+
context["authors"].append(FeedParserDict())
|
|
145
|
+
|
|
146
|
+
_start_managingeditor = _start_author
|
|
147
|
+
|
|
148
|
+
def _end_author(self):
|
|
149
|
+
self.pop("author")
|
|
150
|
+
self.inauthor = 0
|
|
151
|
+
self._sync_author_detail()
|
|
152
|
+
|
|
153
|
+
_end_managingeditor = _end_author
|
|
154
|
+
|
|
155
|
+
def _start_contributor(self, attrs_d):
|
|
156
|
+
self.incontributor = 1
|
|
157
|
+
context = self._get_context()
|
|
158
|
+
context.setdefault("contributors", [])
|
|
159
|
+
context["contributors"].append(FeedParserDict())
|
|
160
|
+
self.push("contributor", 0)
|
|
161
|
+
|
|
162
|
+
def _end_contributor(self):
|
|
163
|
+
self.pop("contributor")
|
|
164
|
+
self.incontributor = 0
|
|
165
|
+
|
|
166
|
+
def _start_name(self, attrs_d):
|
|
167
|
+
self.push("name", 0)
|
|
168
|
+
|
|
169
|
+
def _end_name(self):
|
|
170
|
+
value = self.pop("name")
|
|
171
|
+
if self.inpublisher:
|
|
172
|
+
self._save_author("name", value, "publisher")
|
|
173
|
+
elif self.inauthor:
|
|
174
|
+
self._save_author("name", value)
|
|
175
|
+
elif self.incontributor:
|
|
176
|
+
self._save_contributor("name", value)
|
|
177
|
+
elif self.intextinput:
|
|
178
|
+
context = self._get_context()
|
|
179
|
+
context["name"] = value
|
|
180
|
+
|
|
181
|
+
def _start_width(self, attrs_d):
|
|
182
|
+
self.push("width", 0)
|
|
183
|
+
|
|
184
|
+
def _end_width(self):
|
|
185
|
+
value = self.pop("width")
|
|
186
|
+
try:
|
|
187
|
+
value = int(value)
|
|
188
|
+
except ValueError:
|
|
189
|
+
value = 0
|
|
190
|
+
if self.inimage:
|
|
191
|
+
context = self._get_context()
|
|
192
|
+
context["width"] = value
|
|
193
|
+
|
|
194
|
+
def _start_height(self, attrs_d):
|
|
195
|
+
self.push("height", 0)
|
|
196
|
+
|
|
197
|
+
def _end_height(self):
|
|
198
|
+
value = self.pop("height")
|
|
199
|
+
try:
|
|
200
|
+
value = int(value)
|
|
201
|
+
except ValueError:
|
|
202
|
+
value = 0
|
|
203
|
+
if self.inimage:
|
|
204
|
+
context = self._get_context()
|
|
205
|
+
context["height"] = value
|
|
206
|
+
|
|
207
|
+
def _start_url(self, attrs_d):
|
|
208
|
+
self.push("href", 1)
|
|
209
|
+
|
|
210
|
+
_start_homepage = _start_url
|
|
211
|
+
_start_uri = _start_url
|
|
212
|
+
|
|
213
|
+
def _end_url(self):
|
|
214
|
+
value = self.pop("href")
|
|
215
|
+
if self.inauthor:
|
|
216
|
+
self._save_author("href", value)
|
|
217
|
+
elif self.incontributor:
|
|
218
|
+
self._save_contributor("href", value)
|
|
219
|
+
|
|
220
|
+
_end_homepage = _end_url
|
|
221
|
+
_end_uri = _end_url
|
|
222
|
+
|
|
223
|
+
def _start_email(self, attrs_d):
|
|
224
|
+
self.push("email", 0)
|
|
225
|
+
|
|
226
|
+
def _end_email(self):
|
|
227
|
+
value = self.pop("email")
|
|
228
|
+
if self.inpublisher:
|
|
229
|
+
self._save_author("email", value, "publisher")
|
|
230
|
+
elif self.inauthor:
|
|
231
|
+
self._save_author("email", value)
|
|
232
|
+
elif self.incontributor:
|
|
233
|
+
self._save_contributor("email", value)
|
|
234
|
+
|
|
235
|
+
def _start_subtitle(self, attrs_d):
|
|
236
|
+
self.push_content("subtitle", attrs_d, "text/plain", 1)
|
|
237
|
+
|
|
238
|
+
_start_tagline = _start_subtitle
|
|
239
|
+
|
|
240
|
+
def _end_subtitle(self):
|
|
241
|
+
self.pop_content("subtitle")
|
|
242
|
+
|
|
243
|
+
_end_tagline = _end_subtitle
|
|
244
|
+
|
|
245
|
+
def _start_rights(self, attrs_d):
|
|
246
|
+
self.push_content("rights", attrs_d, "text/plain", 1)
|
|
247
|
+
|
|
248
|
+
_start_copyright = _start_rights
|
|
249
|
+
|
|
250
|
+
def _end_rights(self):
|
|
251
|
+
self.pop_content("rights")
|
|
252
|
+
|
|
253
|
+
_end_copyright = _end_rights
|
|
254
|
+
|
|
255
|
+
def _start_item(self, attrs_d):
|
|
256
|
+
self.entries.append(FeedParserDict())
|
|
257
|
+
self.push("item", 0)
|
|
258
|
+
self.inentry = 1
|
|
259
|
+
self.guidislink = 0
|
|
260
|
+
self.title_depth = -1
|
|
261
|
+
id = self._get_attribute(attrs_d, "rdf:about")
|
|
262
|
+
if id:
|
|
263
|
+
context = self._get_context()
|
|
264
|
+
context["id"] = id
|
|
265
|
+
self._cdf_common(attrs_d)
|
|
266
|
+
|
|
267
|
+
_start_entry = _start_item
|
|
268
|
+
|
|
269
|
+
def _end_item(self):
|
|
270
|
+
self.pop("item")
|
|
271
|
+
self.inentry = 0
|
|
272
|
+
self.hasContent = 0
|
|
273
|
+
|
|
274
|
+
_end_entry = _end_item
|
|
275
|
+
|
|
276
|
+
def _start_language(self, attrs_d):
|
|
277
|
+
self.push("language", 1)
|
|
278
|
+
|
|
279
|
+
def _end_language(self):
|
|
280
|
+
self.lang = self.pop("language")
|
|
281
|
+
|
|
282
|
+
def _start_webmaster(self, attrs_d):
|
|
283
|
+
self.push("publisher", 1)
|
|
284
|
+
|
|
285
|
+
def _end_webmaster(self):
|
|
286
|
+
self.pop("publisher")
|
|
287
|
+
self._sync_author_detail("publisher")
|
|
288
|
+
|
|
289
|
+
def _start_published(self, attrs_d):
|
|
290
|
+
self.push("published", 1)
|
|
291
|
+
|
|
292
|
+
_start_issued = _start_published
|
|
293
|
+
_start_pubdate = _start_published
|
|
294
|
+
|
|
295
|
+
def _end_published(self):
|
|
296
|
+
value = self.pop("published")
|
|
297
|
+
self._save("published_parsed", _parse_date(value), overwrite=True)
|
|
298
|
+
|
|
299
|
+
_end_issued = _end_published
|
|
300
|
+
_end_pubdate = _end_published
|
|
301
|
+
|
|
302
|
+
def _start_updated(self, attrs_d):
|
|
303
|
+
self.push("updated", 1)
|
|
304
|
+
|
|
305
|
+
_start_modified = _start_updated
|
|
306
|
+
_start_lastbuilddate = _start_updated
|
|
307
|
+
|
|
308
|
+
def _end_updated(self):
|
|
309
|
+
value = self.pop("updated")
|
|
310
|
+
parsed_value = _parse_date(value)
|
|
311
|
+
self._save("updated_parsed", parsed_value, overwrite=True)
|
|
312
|
+
|
|
313
|
+
_end_modified = _end_updated
|
|
314
|
+
_end_lastbuilddate = _end_updated
|
|
315
|
+
|
|
316
|
+
def _start_created(self, attrs_d):
|
|
317
|
+
self.push("created", 1)
|
|
318
|
+
|
|
319
|
+
def _end_created(self):
|
|
320
|
+
value = self.pop("created")
|
|
321
|
+
self._save("created_parsed", _parse_date(value), overwrite=True)
|
|
322
|
+
|
|
323
|
+
def _start_expirationdate(self, attrs_d):
|
|
324
|
+
self.push("expired", 1)
|
|
325
|
+
|
|
326
|
+
def _end_expirationdate(self):
|
|
327
|
+
self._save("expired_parsed", _parse_date(self.pop("expired")), overwrite=True)
|
|
328
|
+
|
|
329
|
+
def _start_category(self, attrs_d):
|
|
330
|
+
term = attrs_d.get("term")
|
|
331
|
+
scheme = attrs_d.get("scheme", attrs_d.get("domain"))
|
|
332
|
+
label = attrs_d.get("label")
|
|
333
|
+
self._add_tag(term, scheme, label)
|
|
334
|
+
self.push("category", 1)
|
|
335
|
+
|
|
336
|
+
_start_keywords = _start_category
|
|
337
|
+
|
|
338
|
+
def _end_category(self):
|
|
339
|
+
value = self.pop("category")
|
|
340
|
+
if not value:
|
|
341
|
+
return
|
|
342
|
+
context = self._get_context()
|
|
343
|
+
tags = context["tags"]
|
|
344
|
+
if value and len(tags) and not tags[-1]["term"]:
|
|
345
|
+
tags[-1]["term"] = value
|
|
346
|
+
else:
|
|
347
|
+
self._add_tag(value, None, None)
|
|
348
|
+
|
|
349
|
+
_end_keywords = _end_category
|
|
350
|
+
|
|
351
|
+
def _start_cloud(self, attrs_d):
|
|
352
|
+
self._get_context()["cloud"] = FeedParserDict(attrs_d)
|
|
353
|
+
|
|
354
|
+
def _start_link(self, attrs_d):
|
|
355
|
+
attrs_d.setdefault("rel", "alternate")
|
|
356
|
+
if attrs_d["rel"] == "self":
|
|
357
|
+
attrs_d.setdefault("type", "application/atom+xml")
|
|
358
|
+
else:
|
|
359
|
+
attrs_d.setdefault("type", "text/html")
|
|
360
|
+
context = self._get_context()
|
|
361
|
+
attrs_d = self._enforce_href(attrs_d)
|
|
362
|
+
if "href" in attrs_d:
|
|
363
|
+
attrs_d["href"] = self.resolve_uri(attrs_d["href"])
|
|
364
|
+
if (
|
|
365
|
+
attrs_d.get("rel") == "alternate"
|
|
366
|
+
and self.map_content_type(attrs_d.get("type")) in self.html_types
|
|
367
|
+
):
|
|
368
|
+
self.isentrylink = 1
|
|
369
|
+
expecting_text = self.infeed or self.inentry or self.insource
|
|
370
|
+
context.setdefault("links", [])
|
|
371
|
+
if not (self.inentry and self.inimage):
|
|
372
|
+
context["links"].append(FeedParserDict(attrs_d))
|
|
373
|
+
if "href" in attrs_d:
|
|
374
|
+
if self.isentrylink:
|
|
375
|
+
context["link"] = attrs_d["href"]
|
|
376
|
+
else:
|
|
377
|
+
self.push("link", expecting_text)
|
|
378
|
+
|
|
379
|
+
def _end_link(self):
|
|
380
|
+
self.pop("link")
|
|
381
|
+
self.isentrylink = 0
|
|
382
|
+
|
|
383
|
+
def _start_guid(self, attrs_d):
|
|
384
|
+
self.guidislink = attrs_d.get("ispermalink", "true") == "true"
|
|
385
|
+
self.push("id", 1)
|
|
386
|
+
|
|
387
|
+
_start_id = _start_guid
|
|
388
|
+
|
|
389
|
+
def _end_guid(self):
|
|
390
|
+
value = self.pop("id")
|
|
391
|
+
self._save("guidislink", self.guidislink and "link" not in self._get_context())
|
|
392
|
+
if self.guidislink:
|
|
393
|
+
# guid acts as link, but only if 'ispermalink' is not present or is 'true',
|
|
394
|
+
# and only if the item doesn't already have a link element
|
|
395
|
+
self._save("link", value)
|
|
396
|
+
|
|
397
|
+
_end_id = _end_guid
|
|
398
|
+
|
|
399
|
+
def _start_title(self, attrs_d):
|
|
400
|
+
if self.svgOK:
|
|
401
|
+
return self.unknown_starttag("title", list(attrs_d.items()))
|
|
402
|
+
self.push_content(
|
|
403
|
+
"title", attrs_d, "text/plain", self.infeed or self.inentry or self.insource
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
def _end_title(self):
|
|
407
|
+
if self.svgOK:
|
|
408
|
+
return
|
|
409
|
+
value = self.pop_content("title")
|
|
410
|
+
if not value:
|
|
411
|
+
return
|
|
412
|
+
self.title_depth = self.depth
|
|
413
|
+
|
|
414
|
+
def _start_description(self, attrs_d):
|
|
415
|
+
context = self._get_context()
|
|
416
|
+
if "summary" in context and not self.hasContent:
|
|
417
|
+
self._summaryKey = "content"
|
|
418
|
+
self._start_content(attrs_d)
|
|
419
|
+
else:
|
|
420
|
+
self.push_content(
|
|
421
|
+
"description",
|
|
422
|
+
attrs_d,
|
|
423
|
+
"text/html",
|
|
424
|
+
self.infeed or self.inentry or self.insource,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
def _start_abstract(self, attrs_d):
|
|
428
|
+
self.push_content(
|
|
429
|
+
"description",
|
|
430
|
+
attrs_d,
|
|
431
|
+
"text/plain",
|
|
432
|
+
self.infeed or self.inentry or self.insource,
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
def _end_description(self):
|
|
436
|
+
if self._summaryKey == "content":
|
|
437
|
+
self._end_content()
|
|
438
|
+
else:
|
|
439
|
+
self.pop_content("description")
|
|
440
|
+
self._summaryKey = None
|
|
441
|
+
|
|
442
|
+
_end_abstract = _end_description
|
|
443
|
+
|
|
444
|
+
def _start_info(self, attrs_d):
|
|
445
|
+
self.push_content("info", attrs_d, "text/plain", 1)
|
|
446
|
+
|
|
447
|
+
_start_feedburner_browserfriendly = _start_info
|
|
448
|
+
|
|
449
|
+
def _end_info(self):
|
|
450
|
+
self.pop_content("info")
|
|
451
|
+
|
|
452
|
+
_end_feedburner_browserfriendly = _end_info
|
|
453
|
+
|
|
454
|
+
def _start_generator(self, attrs_d):
|
|
455
|
+
if attrs_d:
|
|
456
|
+
attrs_d = self._enforce_href(attrs_d)
|
|
457
|
+
if "href" in attrs_d:
|
|
458
|
+
attrs_d["href"] = self.resolve_uri(attrs_d["href"])
|
|
459
|
+
self._get_context()["generator_detail"] = FeedParserDict(attrs_d)
|
|
460
|
+
self.push("generator", 1)
|
|
461
|
+
|
|
462
|
+
def _end_generator(self):
|
|
463
|
+
value = self.pop("generator")
|
|
464
|
+
context = self._get_context()
|
|
465
|
+
if "generator_detail" in context:
|
|
466
|
+
context["generator_detail"]["name"] = value
|
|
467
|
+
|
|
468
|
+
def _start_summary(self, attrs_d):
|
|
469
|
+
context = self._get_context()
|
|
470
|
+
if "summary" in context and not self.hasContent:
|
|
471
|
+
self._summaryKey = "content"
|
|
472
|
+
self._start_content(attrs_d)
|
|
473
|
+
else:
|
|
474
|
+
self._summaryKey = "summary"
|
|
475
|
+
self.push_content(self._summaryKey, attrs_d, "text/plain", 1)
|
|
476
|
+
|
|
477
|
+
def _end_summary(self):
|
|
478
|
+
if self._summaryKey == "content":
|
|
479
|
+
self._end_content()
|
|
480
|
+
else:
|
|
481
|
+
self.pop_content(self._summaryKey or "summary")
|
|
482
|
+
self._summaryKey = None
|
|
483
|
+
|
|
484
|
+
def _start_enclosure(self, attrs_d):
|
|
485
|
+
attrs_d = self._enforce_href(attrs_d)
|
|
486
|
+
context = self._get_context()
|
|
487
|
+
attrs_d["rel"] = "enclosure"
|
|
488
|
+
context.setdefault("links", []).append(FeedParserDict(attrs_d))
|
|
489
|
+
|
|
490
|
+
def _start_source(self, attrs_d):
|
|
491
|
+
if "url" in attrs_d:
|
|
492
|
+
# This means that we're processing a source element from an RSS 2.0 feed
|
|
493
|
+
self.sourcedata["href"] = attrs_d["url"]
|
|
494
|
+
self.push("source", 1)
|
|
495
|
+
self.insource = 1
|
|
496
|
+
self.title_depth = -1
|
|
497
|
+
|
|
498
|
+
def _end_source(self):
|
|
499
|
+
self.insource = 0
|
|
500
|
+
value = self.pop("source")
|
|
501
|
+
if value:
|
|
502
|
+
self.sourcedata["title"] = value
|
|
503
|
+
self._get_context()["source"] = copy.deepcopy(self.sourcedata)
|
|
504
|
+
self.sourcedata.clear()
|
|
505
|
+
|
|
506
|
+
def _start_content(self, attrs_d):
|
|
507
|
+
self.hasContent = 1
|
|
508
|
+
self.push_content("content", attrs_d, "text/plain", 1)
|
|
509
|
+
src = attrs_d.get("src")
|
|
510
|
+
if src:
|
|
511
|
+
self.contentparams["src"] = src
|
|
512
|
+
self.push("content", 1)
|
|
513
|
+
|
|
514
|
+
def _start_body(self, attrs_d):
|
|
515
|
+
self.push_content("content", attrs_d, "application/xhtml+xml", 1)
|
|
516
|
+
|
|
517
|
+
_start_xhtml_body = _start_body
|
|
518
|
+
|
|
519
|
+
def _start_content_encoded(self, attrs_d):
|
|
520
|
+
self.hasContent = 1
|
|
521
|
+
self.push_content("content", attrs_d, "text/html", 1)
|
|
522
|
+
|
|
523
|
+
_start_fullitem = _start_content_encoded
|
|
524
|
+
|
|
525
|
+
def _end_content(self):
|
|
526
|
+
copyToSummary = self.map_content_type(self.contentparams.get("type")) in (
|
|
527
|
+
{"text/plain"} | self.html_types
|
|
528
|
+
)
|
|
529
|
+
value = self.pop_content("content")
|
|
530
|
+
if copyToSummary:
|
|
531
|
+
self._save("summary", value)
|
|
532
|
+
|
|
533
|
+
_end_body = _end_content
|
|
534
|
+
_end_xhtml_body = _end_content
|
|
535
|
+
_end_content_encoded = _end_content
|
|
536
|
+
_end_fullitem = _end_content
|
|
537
|
+
|
|
538
|
+
def _start_newlocation(self, attrs_d):
|
|
539
|
+
self.push("newlocation", 1)
|
|
540
|
+
|
|
541
|
+
def _end_newlocation(self):
|
|
542
|
+
url = self.pop("newlocation")
|
|
543
|
+
context = self._get_context()
|
|
544
|
+
# don't set newlocation if the context isn't right
|
|
545
|
+
if context is not self.feeddata:
|
|
546
|
+
return
|
|
547
|
+
context["newlocation"] = make_safe_absolute_uri(self.baseuri, url.strip())
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Support for the administrative elements extension
|
|
2
|
+
# Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
3
|
+
# Copyright 2002-2008 Mark Pilgrim
|
|
4
|
+
# All rights reserved.
|
|
5
|
+
#
|
|
6
|
+
# This file is a part of feedparser.
|
|
7
|
+
#
|
|
8
|
+
# Redistribution and use in source and binary forms, with or without
|
|
9
|
+
# modification, are permitted provided that the following conditions are met:
|
|
10
|
+
#
|
|
11
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
12
|
+
# this list of conditions and the following disclaimer.
|
|
13
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
14
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
15
|
+
# and/or other materials provided with the distribution.
|
|
16
|
+
#
|
|
17
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
18
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
19
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
20
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
21
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
22
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
23
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
25
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
26
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
27
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
28
|
+
|
|
29
|
+
from ..util import FeedParserDict
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Namespace:
|
|
33
|
+
# RDF Site Summary 1.0 Modules: Administrative
|
|
34
|
+
# http://web.resource.org/rss/1.0/modules/admin/
|
|
35
|
+
|
|
36
|
+
supported_namespaces = {
|
|
37
|
+
"http://webns.net/mvcb/": "admin",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
def _start_admin_generatoragent(self, attrs_d):
|
|
41
|
+
self.push("generator", 1)
|
|
42
|
+
value = self._get_attribute(attrs_d, "rdf:resource")
|
|
43
|
+
if value:
|
|
44
|
+
self.elementstack[-1][2].append(value)
|
|
45
|
+
self.pop("generator")
|
|
46
|
+
self._get_context()["generator_detail"] = FeedParserDict({"href": value})
|
|
47
|
+
|
|
48
|
+
def _start_admin_errorreportsto(self, attrs_d):
|
|
49
|
+
self.push("errorreportsto", 1)
|
|
50
|
+
value = self._get_attribute(attrs_d, "rdf:resource")
|
|
51
|
+
if value:
|
|
52
|
+
self.elementstack[-1][2].append(value)
|
|
53
|
+
self.pop("errorreportsto")
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# Support for the Creative Commons licensing extensions
|
|
2
|
+
# Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
3
|
+
# Copyright 2002-2008 Mark Pilgrim
|
|
4
|
+
# All rights reserved.
|
|
5
|
+
#
|
|
6
|
+
# This file is a part of feedparser.
|
|
7
|
+
#
|
|
8
|
+
# Redistribution and use in source and binary forms, with or without
|
|
9
|
+
# modification, are permitted provided that the following conditions are met:
|
|
10
|
+
#
|
|
11
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
12
|
+
# this list of conditions and the following disclaimer.
|
|
13
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
14
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
15
|
+
# and/or other materials provided with the distribution.
|
|
16
|
+
#
|
|
17
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
18
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
19
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
20
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
21
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
22
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
23
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
25
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
26
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
27
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
28
|
+
|
|
29
|
+
from ..util import FeedParserDict
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Namespace:
|
|
33
|
+
supported_namespaces = {
|
|
34
|
+
# RDF-based namespace
|
|
35
|
+
"http://creativecommons.org/ns#license": "cc",
|
|
36
|
+
# Old RDF-based namespace
|
|
37
|
+
"http://web.resource.org/cc/": "cc",
|
|
38
|
+
# RSS-based namespace
|
|
39
|
+
"http://cyber.law.harvard.edu/rss/creativeCommonsRssModule.html": (
|
|
40
|
+
"creativecommons"
|
|
41
|
+
),
|
|
42
|
+
# Old RSS-based namespace
|
|
43
|
+
"http://backend.userland.com/creativeCommonsRssModule": "creativecommons",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
def _start_cc_license(self, attrs_d):
|
|
47
|
+
context = self._get_context()
|
|
48
|
+
value = self._get_attribute(attrs_d, "rdf:resource")
|
|
49
|
+
attrs_d = FeedParserDict()
|
|
50
|
+
attrs_d["rel"] = "license"
|
|
51
|
+
if value:
|
|
52
|
+
attrs_d["href"] = value
|
|
53
|
+
context.setdefault("links", []).append(attrs_d)
|
|
54
|
+
|
|
55
|
+
def _start_creativecommons_license(self, attrs_d):
|
|
56
|
+
self.push("license", 1)
|
|
57
|
+
|
|
58
|
+
_start_creativeCommons_license = _start_creativecommons_license
|
|
59
|
+
|
|
60
|
+
def _end_creativecommons_license(self):
|
|
61
|
+
value = self.pop("license")
|
|
62
|
+
context = self._get_context()
|
|
63
|
+
attrs_d = FeedParserDict()
|
|
64
|
+
attrs_d["rel"] = "license"
|
|
65
|
+
if value:
|
|
66
|
+
attrs_d["href"] = value
|
|
67
|
+
context.setdefault("links", []).append(attrs_d)
|
|
68
|
+
del context["license"]
|
|
69
|
+
|
|
70
|
+
_end_creativeCommons_license = _end_creativecommons_license
|