forkparser 2026.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- feedparser/__init__.py +66 -0
- feedparser/api.py +376 -0
- feedparser/datetimes/__init__.py +73 -0
- feedparser/datetimes/asctime.py +80 -0
- feedparser/datetimes/greek.py +90 -0
- feedparser/datetimes/hungarian.py +66 -0
- feedparser/datetimes/iso8601.py +160 -0
- feedparser/datetimes/korean.py +94 -0
- feedparser/datetimes/perforce.py +63 -0
- feedparser/datetimes/rfc822.py +179 -0
- feedparser/datetimes/w3dtf.py +128 -0
- feedparser/encodings.py +649 -0
- feedparser/exceptions.py +55 -0
- feedparser/html.py +350 -0
- feedparser/http.py +74 -0
- feedparser/mixin.py +838 -0
- feedparser/namespaces/__init__.py +0 -0
- feedparser/namespaces/_base.py +547 -0
- feedparser/namespaces/admin.py +53 -0
- feedparser/namespaces/cc.py +70 -0
- feedparser/namespaces/dc.py +138 -0
- feedparser/namespaces/georss.py +682 -0
- feedparser/namespaces/itunes.py +113 -0
- feedparser/namespaces/mediarss.py +142 -0
- feedparser/namespaces/psc.py +74 -0
- feedparser/parsers/__init__.py +0 -0
- feedparser/parsers/json.py +135 -0
- feedparser/parsers/loose.py +75 -0
- feedparser/parsers/strict.py +141 -0
- feedparser/py.typed +0 -0
- feedparser/sanitizer.py +978 -0
- feedparser/sgml.py +98 -0
- feedparser/urls.py +233 -0
- feedparser/util.py +157 -0
- forkparser-2026.1.0.dist-info/METADATA +75 -0
- forkparser-2026.1.0.dist-info/RECORD +38 -0
- forkparser-2026.1.0.dist-info/WHEEL +4 -0
- forkparser-2026.1.0.dist-info/licenses/LICENSE +65 -0
feedparser/sgml.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
2
|
+
# Copyright 2002-2008 Mark Pilgrim
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is a part of feedparser.
|
|
6
|
+
#
|
|
7
|
+
# Redistribution and use in source and binary forms, with or without
|
|
8
|
+
# modification, are permitted provided that the following conditions are met:
|
|
9
|
+
#
|
|
10
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
11
|
+
# this list of conditions and the following disclaimer.
|
|
12
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
# and/or other materials provided with the distribution.
|
|
15
|
+
#
|
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
17
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
18
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
19
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
20
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
21
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
22
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
23
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
24
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
25
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
26
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
27
|
+
|
|
28
|
+
import re
|
|
29
|
+
|
|
30
|
+
import sgmllib # type: ignore[import]
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"sgmllib",
|
|
34
|
+
"charref",
|
|
35
|
+
"tagfind",
|
|
36
|
+
"attrfind",
|
|
37
|
+
"entityref",
|
|
38
|
+
"incomplete",
|
|
39
|
+
"interesting",
|
|
40
|
+
"shorttag",
|
|
41
|
+
"shorttagopen",
|
|
42
|
+
"starttagopen",
|
|
43
|
+
"endbracket",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
# sgmllib defines a number of module-level regular expressions that are
|
|
47
|
+
# insufficient for the XML parsing feedparser needs. Rather than modify
|
|
48
|
+
# the variables directly in sgmllib, they're defined here using the same
|
|
49
|
+
# names, and the compiled code objects of several sgmllib.SGMLParser
|
|
50
|
+
# methods are copied into _BaseHTMLProcessor so that they execute in
|
|
51
|
+
# feedparser's scope instead of sgmllib's scope.
|
|
52
|
+
charref = re.compile(r"&#(\d+|[xX][0-9a-fA-F]+);")
|
|
53
|
+
tagfind = re.compile(r"[a-zA-Z][-_.:a-zA-Z0-9]*")
|
|
54
|
+
attrfind = re.compile(
|
|
55
|
+
r"""\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)[$]?(\s*=\s*"""
|
|
56
|
+
r"""('[^']*'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$()_#=~'"@]*))?"""
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Unfortunately, these must be copied over to prevent NameError exceptions
|
|
60
|
+
entityref = sgmllib.entityref
|
|
61
|
+
incomplete = sgmllib.incomplete
|
|
62
|
+
interesting = sgmllib.interesting
|
|
63
|
+
shorttag = sgmllib.shorttag
|
|
64
|
+
shorttagopen = sgmllib.shorttagopen
|
|
65
|
+
starttagopen = sgmllib.starttagopen
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class _EndBracketRegEx:
|
|
69
|
+
def __init__(self):
|
|
70
|
+
# Overriding the built-in sgmllib.endbracket regex allows the
|
|
71
|
+
# parser to find angle brackets embedded in element attributes.
|
|
72
|
+
self.endbracket = re.compile(
|
|
73
|
+
r"("
|
|
74
|
+
r"""[^'"<>]"""
|
|
75
|
+
r"""|"[^"]*"(?=>|/|\s|\w+=)"""
|
|
76
|
+
r"""|'[^']*'(?=>|/|\s|\w+=))*(?=[<>])"""
|
|
77
|
+
r"""|.*?(?=[<>]"""
|
|
78
|
+
r")"
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def search(self, target, index=0):
|
|
82
|
+
match = self.endbracket.match(target, index)
|
|
83
|
+
if match is not None:
|
|
84
|
+
# Returning a new object in the calling thread's context
|
|
85
|
+
# resolves a thread-safety issue.
|
|
86
|
+
return EndBracketMatch(match)
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class EndBracketMatch:
|
|
91
|
+
def __init__(self, match):
|
|
92
|
+
self.match = match
|
|
93
|
+
|
|
94
|
+
def start(self, n):
|
|
95
|
+
return self.match.end(n)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
endbracket = _EndBracketRegEx()
|
feedparser/urls.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
# Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
2
|
+
# Copyright 2025 Tom Most <twm@freecog.net>
|
|
3
|
+
# Copyright 2002-2008 Mark Pilgrim
|
|
4
|
+
# All rights reserved.
|
|
5
|
+
#
|
|
6
|
+
# This file is a part of feedparser.
|
|
7
|
+
#
|
|
8
|
+
# Redistribution and use in source and binary forms, with or without
|
|
9
|
+
# modification, are permitted provided that the following conditions are met:
|
|
10
|
+
#
|
|
11
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
12
|
+
# this list of conditions and the following disclaimer.
|
|
13
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
14
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
15
|
+
# and/or other materials provided with the distribution.
|
|
16
|
+
#
|
|
17
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
18
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
19
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
20
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
21
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
22
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
23
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
25
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
26
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
27
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
28
|
+
|
|
29
|
+
import re
|
|
30
|
+
import urllib.parse
|
|
31
|
+
|
|
32
|
+
from .html import BaseHTMLProcessor
|
|
33
|
+
|
|
34
|
+
# If you want feedparser to allow all URL schemes, set this to ()
|
|
35
|
+
# List culled from Python's urlparse documentation at:
|
|
36
|
+
# http://docs.python.org/library/urlparse.html
|
|
37
|
+
# as well as from "URI scheme" at Wikipedia:
|
|
38
|
+
# https://secure.wikimedia.org/wikipedia/en/wiki/URI_scheme
|
|
39
|
+
# Many more will likely need to be added!
|
|
40
|
+
ACCEPTABLE_URI_SCHEMES = (
|
|
41
|
+
"file",
|
|
42
|
+
"ftp",
|
|
43
|
+
"gopher",
|
|
44
|
+
"h323",
|
|
45
|
+
"hdl",
|
|
46
|
+
"http",
|
|
47
|
+
"https",
|
|
48
|
+
"imap",
|
|
49
|
+
"magnet",
|
|
50
|
+
"mailto",
|
|
51
|
+
"mms",
|
|
52
|
+
"news",
|
|
53
|
+
"nntp",
|
|
54
|
+
"prospero",
|
|
55
|
+
"rsync",
|
|
56
|
+
"rtsp",
|
|
57
|
+
"rtspu",
|
|
58
|
+
"sftp",
|
|
59
|
+
"shttp",
|
|
60
|
+
"sip",
|
|
61
|
+
"sips",
|
|
62
|
+
"snews",
|
|
63
|
+
"svn",
|
|
64
|
+
"svn+ssh",
|
|
65
|
+
"telnet",
|
|
66
|
+
"wais",
|
|
67
|
+
# Additional common-but-unofficial schemes
|
|
68
|
+
"aim",
|
|
69
|
+
"callto",
|
|
70
|
+
"cvs",
|
|
71
|
+
"facetime",
|
|
72
|
+
"feed",
|
|
73
|
+
"git",
|
|
74
|
+
"gtalk",
|
|
75
|
+
"irc",
|
|
76
|
+
"ircs",
|
|
77
|
+
"irc6",
|
|
78
|
+
"itms",
|
|
79
|
+
"mms",
|
|
80
|
+
"msnim",
|
|
81
|
+
"skype",
|
|
82
|
+
"ssh",
|
|
83
|
+
"smb",
|
|
84
|
+
"svn",
|
|
85
|
+
"ymsg",
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
_urifixer = re.compile("^([A-Za-z][A-Za-z0-9+-.]*://)(/*)(.*?)")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _urljoin(base, uri):
|
|
92
|
+
uri = _urifixer.sub(r"\1\3", uri)
|
|
93
|
+
try:
|
|
94
|
+
uri = urllib.parse.urljoin(base, uri)
|
|
95
|
+
except ValueError:
|
|
96
|
+
uri = ""
|
|
97
|
+
return uri
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def make_safe_absolute_uri(base, rel=None):
|
|
101
|
+
# bail if ACCEPTABLE_URI_SCHEMES is empty
|
|
102
|
+
if not ACCEPTABLE_URI_SCHEMES:
|
|
103
|
+
return _urljoin(base, rel or "")
|
|
104
|
+
if not base:
|
|
105
|
+
return rel or ""
|
|
106
|
+
if not rel:
|
|
107
|
+
try:
|
|
108
|
+
scheme = urllib.parse.urlparse(base)[0]
|
|
109
|
+
except ValueError:
|
|
110
|
+
return ""
|
|
111
|
+
if not scheme or scheme in ACCEPTABLE_URI_SCHEMES:
|
|
112
|
+
return base
|
|
113
|
+
return ""
|
|
114
|
+
uri = _urljoin(base, rel)
|
|
115
|
+
if uri.strip().split(":", 1)[0] not in ACCEPTABLE_URI_SCHEMES:
|
|
116
|
+
return ""
|
|
117
|
+
return uri
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# Matches image candidate strings within a srcset attribute value as
|
|
121
|
+
# described in https://html.spec.whatwg.org/multipage/images.html#srcset-attributes
|
|
122
|
+
_srcset_candidate = re.compile(
|
|
123
|
+
r"""
|
|
124
|
+
# ASCII whitespace: https://infra.spec.whatwg.org/#ascii-whitespace
|
|
125
|
+
[\t\n\f\r ]*
|
|
126
|
+
(
|
|
127
|
+
# URL that doesn't start or end with a comma
|
|
128
|
+
(?!,)
|
|
129
|
+
[^\t\n\f\r ]+
|
|
130
|
+
(?<!,)
|
|
131
|
+
)
|
|
132
|
+
(
|
|
133
|
+
# Width descriptor like "1234w"
|
|
134
|
+
# https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#non-negative-integers
|
|
135
|
+
[\t\n\f\r ]+
|
|
136
|
+
\d+w
|
|
137
|
+
|
|
|
138
|
+
# Pixel density descriptor like "2.0x"
|
|
139
|
+
# https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#valid-floating-point-number
|
|
140
|
+
[\t\n\f\r ]+
|
|
141
|
+
\d+(?:\.\d+)?(?:[eE][-+]?\d+)?x
|
|
142
|
+
|
|
|
143
|
+
)
|
|
144
|
+
[\t\n\f\r ]*
|
|
145
|
+
(?:,|\Z)
|
|
146
|
+
""",
|
|
147
|
+
re.VERBOSE | re.ASCII,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def srcset_candidates(value: str) -> list[tuple[str, str]]:
|
|
152
|
+
"""
|
|
153
|
+
Split a ``srcset`` attribute value into candidates:
|
|
154
|
+
|
|
155
|
+
>>> srcset_candidates("/foo.jpg, /foo.2x.jpg 2x")
|
|
156
|
+
[("/foo.jpg", ""), ("/foo.2x.jpg", "2x")]
|
|
157
|
+
|
|
158
|
+
This doesn't validate the URLs, nor check for duplicate or conflicting
|
|
159
|
+
descriptors. It returns an empty list when parsing fails.
|
|
160
|
+
"""
|
|
161
|
+
pos = 0
|
|
162
|
+
candidates = []
|
|
163
|
+
while m := _srcset_candidate.match(value, pos):
|
|
164
|
+
desc = m[2].strip("\t\n\f\r ")
|
|
165
|
+
candidates.append((m[1], desc))
|
|
166
|
+
pos = m.end(0)
|
|
167
|
+
return candidates
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class RelativeURIResolver(BaseHTMLProcessor):
|
|
171
|
+
relative_uris = {
|
|
172
|
+
("a", "href"),
|
|
173
|
+
("applet", "codebase"),
|
|
174
|
+
("area", "href"),
|
|
175
|
+
("audio", "src"),
|
|
176
|
+
("blockquote", "cite"),
|
|
177
|
+
("body", "background"),
|
|
178
|
+
("del", "cite"),
|
|
179
|
+
("form", "action"),
|
|
180
|
+
("frame", "longdesc"),
|
|
181
|
+
("frame", "src"),
|
|
182
|
+
("iframe", "longdesc"),
|
|
183
|
+
("iframe", "src"),
|
|
184
|
+
("head", "profile"),
|
|
185
|
+
("img", "longdesc"),
|
|
186
|
+
("img", "src"),
|
|
187
|
+
("img", "usemap"),
|
|
188
|
+
("input", "src"),
|
|
189
|
+
("input", "usemap"),
|
|
190
|
+
("ins", "cite"),
|
|
191
|
+
("link", "href"),
|
|
192
|
+
("object", "classid"),
|
|
193
|
+
("object", "codebase"),
|
|
194
|
+
("object", "data"),
|
|
195
|
+
("object", "usemap"),
|
|
196
|
+
("q", "cite"),
|
|
197
|
+
("script", "src"),
|
|
198
|
+
("source", "src"),
|
|
199
|
+
("video", "poster"),
|
|
200
|
+
("video", "src"),
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
def __init__(self, baseuri, encoding, _type):
|
|
204
|
+
BaseHTMLProcessor.__init__(self, encoding, _type)
|
|
205
|
+
self.baseuri = baseuri
|
|
206
|
+
|
|
207
|
+
def resolve_uri(self, uri):
|
|
208
|
+
return make_safe_absolute_uri(self.baseuri, uri.strip())
|
|
209
|
+
|
|
210
|
+
def resolve_srcset(self, srcset):
|
|
211
|
+
candidates = []
|
|
212
|
+
for uri, desc in srcset_candidates(srcset):
|
|
213
|
+
uri = self.resolve_uri(uri)
|
|
214
|
+
if desc:
|
|
215
|
+
candidates.append(f"{uri} {desc}")
|
|
216
|
+
else:
|
|
217
|
+
candidates.append(uri)
|
|
218
|
+
return ", ".join(candidates)
|
|
219
|
+
|
|
220
|
+
def unknown_starttag(self, tag, attrs):
|
|
221
|
+
attrs = self.normalize_attrs(attrs)
|
|
222
|
+
for i, (key, value) in enumerate(attrs):
|
|
223
|
+
if (tag, key) in self.relative_uris:
|
|
224
|
+
attrs[i] = (key, self.resolve_uri(value))
|
|
225
|
+
elif tag in {"img", "source"} and key == "srcset":
|
|
226
|
+
attrs[i] = (key, self.resolve_srcset(value))
|
|
227
|
+
super().unknown_starttag(tag, attrs)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def resolve_relative_uris(html_source, base_uri, encoding, type_):
|
|
231
|
+
p = RelativeURIResolver(base_uri, encoding, type_)
|
|
232
|
+
p.feed(html_source)
|
|
233
|
+
return p.output()
|
feedparser/util.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
2
|
+
# Copyright 2002-2008 Mark Pilgrim
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is a part of feedparser.
|
|
6
|
+
#
|
|
7
|
+
# Redistribution and use in source and binary forms, with or without
|
|
8
|
+
# modification, are permitted provided that the following conditions are met:
|
|
9
|
+
#
|
|
10
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
11
|
+
# this list of conditions and the following disclaimer.
|
|
12
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
# and/or other materials provided with the distribution.
|
|
15
|
+
#
|
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
17
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
18
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
19
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
20
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
21
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
22
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
23
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
24
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
25
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
26
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
27
|
+
|
|
28
|
+
import warnings
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class FeedParserDict(dict):
|
|
32
|
+
keymap = {
|
|
33
|
+
"channel": "feed",
|
|
34
|
+
"items": "entries",
|
|
35
|
+
"guid": "id",
|
|
36
|
+
"date": "updated",
|
|
37
|
+
"date_parsed": "updated_parsed",
|
|
38
|
+
"description": ["summary", "subtitle"],
|
|
39
|
+
"description_detail": ["summary_detail", "subtitle_detail"],
|
|
40
|
+
"url": ["href"],
|
|
41
|
+
"modified": "updated",
|
|
42
|
+
"modified_parsed": "updated_parsed",
|
|
43
|
+
"issued": "published",
|
|
44
|
+
"issued_parsed": "published_parsed",
|
|
45
|
+
"copyright": "rights",
|
|
46
|
+
"copyright_detail": "rights_detail",
|
|
47
|
+
"tagline": "subtitle",
|
|
48
|
+
"tagline_detail": "subtitle_detail",
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
def __getitem__(self, key, _stacklevel=2):
|
|
52
|
+
"""
|
|
53
|
+
:return: A :class:`FeedParserDict`.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
if key == "category":
|
|
57
|
+
try:
|
|
58
|
+
return dict.__getitem__(self, "tags")[0]["term"]
|
|
59
|
+
except IndexError:
|
|
60
|
+
raise KeyError("object doesn't have key 'category'")
|
|
61
|
+
elif key == "enclosures":
|
|
62
|
+
return [
|
|
63
|
+
FeedParserDict(
|
|
64
|
+
[(name, value) for (name, value) in link.items() if name != "rel"]
|
|
65
|
+
)
|
|
66
|
+
for link in dict.__getitem__(self, "links")
|
|
67
|
+
if link["rel"] == "enclosure"
|
|
68
|
+
]
|
|
69
|
+
elif key == "license":
|
|
70
|
+
for link in dict.__getitem__(self, "links"):
|
|
71
|
+
if link["rel"] == "license" and "href" in link:
|
|
72
|
+
return link["href"]
|
|
73
|
+
elif key == "updated":
|
|
74
|
+
# Temporarily help developers out by keeping the old
|
|
75
|
+
# broken behavior that was reported in issue 310.
|
|
76
|
+
# This fix was proposed in issue 328.
|
|
77
|
+
if not dict.__contains__(self, "updated") and dict.__contains__(
|
|
78
|
+
self, "published"
|
|
79
|
+
):
|
|
80
|
+
warnings.warn(
|
|
81
|
+
"To avoid breaking existing software while "
|
|
82
|
+
"fixing issue 310, a temporary mapping has been created "
|
|
83
|
+
"from `updated` to `published` if `updated` doesn't "
|
|
84
|
+
"exist. This fallback will be removed in a future version "
|
|
85
|
+
"of feedparser.",
|
|
86
|
+
DeprecationWarning,
|
|
87
|
+
stacklevel=_stacklevel,
|
|
88
|
+
)
|
|
89
|
+
return dict.__getitem__(self, "published")
|
|
90
|
+
return dict.__getitem__(self, "updated")
|
|
91
|
+
elif key == "updated_parsed":
|
|
92
|
+
if not dict.__contains__(self, "updated_parsed") and dict.__contains__(
|
|
93
|
+
self, "published_parsed"
|
|
94
|
+
):
|
|
95
|
+
warnings.warn(
|
|
96
|
+
"To avoid breaking existing software while "
|
|
97
|
+
"fixing issue 310, a temporary mapping has been created "
|
|
98
|
+
"from `updated_parsed` to `published_parsed` if "
|
|
99
|
+
"`updated_parsed` doesn't exist. This fallback will be "
|
|
100
|
+
"removed in a future version of feedparser.",
|
|
101
|
+
DeprecationWarning,
|
|
102
|
+
stacklevel=_stacklevel,
|
|
103
|
+
)
|
|
104
|
+
return dict.__getitem__(self, "published_parsed")
|
|
105
|
+
return dict.__getitem__(self, "updated_parsed")
|
|
106
|
+
else:
|
|
107
|
+
realkey = self.keymap.get(key, key)
|
|
108
|
+
if isinstance(realkey, list):
|
|
109
|
+
for k in realkey:
|
|
110
|
+
if dict.__contains__(self, k):
|
|
111
|
+
return dict.__getitem__(self, k)
|
|
112
|
+
elif dict.__contains__(self, realkey):
|
|
113
|
+
return dict.__getitem__(self, realkey)
|
|
114
|
+
return dict.__getitem__(self, key)
|
|
115
|
+
|
|
116
|
+
def __contains__(self, key):
|
|
117
|
+
if key in ("updated", "updated_parsed"):
|
|
118
|
+
# Temporarily help developers out by keeping the old
|
|
119
|
+
# broken behavior that was reported in issue 310.
|
|
120
|
+
# This fix was proposed in issue 328.
|
|
121
|
+
return dict.__contains__(self, key)
|
|
122
|
+
try:
|
|
123
|
+
self.__getitem__(key, _stacklevel=3)
|
|
124
|
+
except KeyError:
|
|
125
|
+
return False
|
|
126
|
+
return True
|
|
127
|
+
|
|
128
|
+
has_key = __contains__
|
|
129
|
+
|
|
130
|
+
def get(self, key, default=None):
|
|
131
|
+
"""
|
|
132
|
+
:return: A :class:`FeedParserDict`.
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
return self.__getitem__(key, _stacklevel=3)
|
|
137
|
+
except KeyError:
|
|
138
|
+
return default
|
|
139
|
+
|
|
140
|
+
def __setitem__(self, key, value):
|
|
141
|
+
key = self.keymap.get(key, key)
|
|
142
|
+
if isinstance(key, list):
|
|
143
|
+
key = key[0]
|
|
144
|
+
return dict.__setitem__(self, key, value)
|
|
145
|
+
|
|
146
|
+
def __getattr__(self, key):
|
|
147
|
+
# __getattribute__() is called first; this will be called
|
|
148
|
+
# only if an attribute was not already found
|
|
149
|
+
try:
|
|
150
|
+
return self.__getitem__(key, _stacklevel=3)
|
|
151
|
+
except KeyError:
|
|
152
|
+
raise AttributeError("object has no attribute '%s'" % key)
|
|
153
|
+
|
|
154
|
+
def __hash__(self):
|
|
155
|
+
# This is incorrect behavior -- dictionaries shouldn't be hashable.
|
|
156
|
+
# Note to self: remove this behavior in the future.
|
|
157
|
+
return id(self)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: forkparser
|
|
3
|
+
Version: 2026.1.0
|
|
4
|
+
Summary: Parse Atom/RSS/JSON feeds in Python
|
|
5
|
+
License-Expression: BSD-2-Clause
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Keywords: atom,feed,json,rdf,rss
|
|
8
|
+
Author: Kurt McKee
|
|
9
|
+
Author-email: contactme@kurtmckee.org
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Topic :: Text Processing :: Markup
|
|
14
|
+
Requires-Dist: requests (>=2.20.0)
|
|
15
|
+
Requires-Dist: sgmllib3k (==1.0.0)
|
|
16
|
+
Project-URL: Changelog, https://feedparser.readthedocs.io/en/latest/changelog/
|
|
17
|
+
Project-URL: Documentation, https://feedparser.readthedocs.io/en/latest/
|
|
18
|
+
Project-URL: Source, https://github.com/kurtmckee/feedparser
|
|
19
|
+
Description-Content-Type: text/x-rst
|
|
20
|
+
|
|
21
|
+
..
|
|
22
|
+
This file is part of feedparser.
|
|
23
|
+
Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
24
|
+
Copyright 2002-2008 Mark Pilgrim
|
|
25
|
+
Released under the BSD 2-clause license.
|
|
26
|
+
|
|
27
|
+
feedparser
|
|
28
|
+
##########
|
|
29
|
+
|
|
30
|
+
Parse Atom and RSS feeds in Python.
|
|
31
|
+
|
|
32
|
+
----
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
Installation
|
|
36
|
+
============
|
|
37
|
+
|
|
38
|
+
feedparser can be installed by running pip:
|
|
39
|
+
|
|
40
|
+
.. code-block:: console
|
|
41
|
+
|
|
42
|
+
$ pip install feedparser
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
Documentation
|
|
46
|
+
=============
|
|
47
|
+
|
|
48
|
+
The feedparser documentation is available on the web at:
|
|
49
|
+
|
|
50
|
+
https://feedparser.readthedocs.io/en/latest/
|
|
51
|
+
|
|
52
|
+
It can also be built and browsed locally using `tox`_:
|
|
53
|
+
|
|
54
|
+
.. code-block:: console
|
|
55
|
+
|
|
56
|
+
$ tox run -e docs
|
|
57
|
+
|
|
58
|
+
This will produce HTML documentation in the ``build/docs/`` directory.
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
Testing
|
|
62
|
+
=======
|
|
63
|
+
|
|
64
|
+
Feedparser has an extensive test suite, powered by `tox`_:
|
|
65
|
+
|
|
66
|
+
.. code-block:: console
|
|
67
|
+
|
|
68
|
+
$ tox run-parallel
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
.. Links
|
|
72
|
+
.. =====
|
|
73
|
+
..
|
|
74
|
+
.. _tox: https://tox.wiki/
|
|
75
|
+
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
feedparser/__init__.py,sha256=p_fcl9O3VqOGEQmlyuFwVo3-SEJ24ZqBFfXU21cSUM4,2404
|
|
2
|
+
feedparser/api.py,sha256=kXbsQcmwnuHyVcoXkMoKJQWFwiHv5e4X5BYn8j3oHYg,14432
|
|
3
|
+
feedparser/datetimes/__init__.py,sha256=sW2wF4_CXuVKPaMLtvQUE9EdWOCdcPMGouUbViAxnG0,2896
|
|
4
|
+
feedparser/datetimes/asctime.py,sha256=xEBrVkRKhOUBpDgdDOoJWH_hZA7byxV7lKfikXm-pIE,2512
|
|
5
|
+
feedparser/datetimes/greek.py,sha256=ML7DA3r0F4mKXzsJ9OfYINEVUTzPwiaHtBNVtQ4e0fw,3872
|
|
6
|
+
feedparser/datetimes/hungarian.py,sha256=CptCnTmuk7zziZhPHxr0WQbxcmAQVDhn_WZ8U01VsEs,2656
|
|
7
|
+
feedparser/datetimes/iso8601.py,sha256=8aVyhdEhW0SBhaul6af2CJQDT87OawSh3LFS11s7EOQ,5529
|
|
8
|
+
feedparser/datetimes/korean.py,sha256=CuFJF44lpsIVzqITwfcRART4I7mRK8eBBd8fITG7PLM,3369
|
|
9
|
+
feedparser/datetimes/perforce.py,sha256=tV5MclwkTVulyCxIWypBsulhoiKsPF0l-RYJrmSP0-Q,2313
|
|
10
|
+
feedparser/datetimes/rfc822.py,sha256=WInrY77sVlqvbreuBuR_uiWRajF0Jh0Aje9aAZexqnA,5707
|
|
11
|
+
feedparser/datetimes/w3dtf.py,sha256=0ZvJMQm0PHdhnVi_AkdM_7vUqPQBqnFrY8xY6S_u8TQ,4562
|
|
12
|
+
feedparser/encodings.py,sha256=NUXPbVO0p9wqiwpYWJMPxJbSZlyijxW0ad5a-XmC46E,22659
|
|
13
|
+
feedparser/exceptions.py,sha256=Hwp1K-yd1-WVxTjwLfA8PlPL3qOFYJ2Qcme2GU_LXLc,1897
|
|
14
|
+
feedparser/html.py,sha256=2qqO2SBtn6MwjILSXV9BXt3l97z49RWeUuJrf5wMC6A,10949
|
|
15
|
+
feedparser/http.py,sha256=oGkCrU-hZWxPeDVSB4jNqMIlUlqcv7A3NR4eg0KFeOY,2746
|
|
16
|
+
feedparser/mixin.py,sha256=coRar44a8BijoVH71pW9S96vnk2CibMw8WTIZxk5tsg,32207
|
|
17
|
+
feedparser/namespaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
+
feedparser/namespaces/_base.py,sha256=YfkNX_c2Q-Sq3Ggy54hishQd0mNX6evK76JGnzb8H9o,17355
|
|
19
|
+
feedparser/namespaces/admin.py,sha256=VOFFo14mbxKldN0bFEZDTHBmAFAtvpPbLjp-IpviTCY,2309
|
|
20
|
+
feedparser/namespaces/cc.py,sha256=2Hg0011gkBI0Cm9e8xcZz-i8yuWN_HmtRx21gTqV3rE,2881
|
|
21
|
+
feedparser/namespaces/dc.py,sha256=96KTj5RdLk94YH5jt92KZcwFgHE8AzZK6_rkahAo0J4,4530
|
|
22
|
+
feedparser/namespaces/georss.py,sha256=ejRFRjY3NM0m09QCQJshT9XQ0kFEwmKEuSLUqEA1Pzo,12961
|
|
23
|
+
feedparser/namespaces/itunes.py,sha256=68QLBuCYWdw55pEby8jqmlberbWL39eyu1euliM-Nhg,4153
|
|
24
|
+
feedparser/namespaces/mediarss.py,sha256=Z8WCpdbPaex0-eWl3y623PuR5ADqSytNH8jPkPvykPY,5358
|
|
25
|
+
feedparser/namespaces/psc.py,sha256=oJ_X83j_6sIHkJdyGiiQPBI2QQa6PsCGccGiZh7ZCKw,2824
|
|
26
|
+
feedparser/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
+
feedparser/parsers/json.py,sha256=ZG2zJKtjAcZBALyh033dMrGWxzN0B3PPLEIIVf_OX8s,4759
|
|
28
|
+
feedparser/parsers/loose.py,sha256=toNl3r4NGrMmxbO9KI8hYuhWMzx03IJSGCCs2E-aO4k,3391
|
|
29
|
+
feedparser/parsers/strict.py,sha256=cS3xSTvEBeXHxLaVmbbgBVlfwquXR5c9wCdUqja7kA8,5862
|
|
30
|
+
feedparser/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
+
feedparser/sanitizer.py,sha256=baus3XuOSbaZTBh_o9of82q95sykkn4qHjKO7sjV0M8,24521
|
|
32
|
+
feedparser/sgml.py,sha256=hNmbdhpUCMOfNNYJB6x7DY2Pe8_vNCt18H5gR_EoUSY,3518
|
|
33
|
+
feedparser/urls.py,sha256=G0NIkO6Nc_yZ4ZUrBxuNKyu6iTkZQykU2YvxMvnrbuU,6789
|
|
34
|
+
feedparser/util.py,sha256=Auo4Y-wxVOVjM4GoGyO-wh3ajpTp7sItuR7EtHk8HSc,6416
|
|
35
|
+
forkparser-2026.1.0.dist-info/METADATA,sha256=XVxfiAkcRp8qV6Bk6Z0UvQIBj9yjqBra6NBLGM3mXDM,1614
|
|
36
|
+
forkparser-2026.1.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
37
|
+
forkparser-2026.1.0.dist-info/licenses/LICENSE,sha256=dybQb1wSoz6JDHspMbaEAGejF-MmwQaN0wWiG0aTycM,3168
|
|
38
|
+
forkparser-2026.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
feedparser and its unit tests are released under the following license:
|
|
2
|
+
|
|
3
|
+
----- begin license block -----
|
|
4
|
+
|
|
5
|
+
Copyright (C) 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
6
|
+
Copyright (C) 2002-2008 Mark Pilgrim
|
|
7
|
+
All rights reserved.
|
|
8
|
+
|
|
9
|
+
Redistribution and use in source and binary forms, with or without modification,
|
|
10
|
+
are permitted provided that the following conditions are met:
|
|
11
|
+
|
|
12
|
+
* Redistributions of source code must retain the above copyright notice,
|
|
13
|
+
this list of conditions and the following disclaimer.
|
|
14
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
|
15
|
+
this list of conditions and the following disclaimer in the documentation
|
|
16
|
+
and/or other materials provided with the distribution.
|
|
17
|
+
|
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
19
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
20
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
21
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
22
|
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
23
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
24
|
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
25
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
26
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
27
|
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
28
|
+
POSSIBILITY OF SUCH DAMAGE.
|
|
29
|
+
|
|
30
|
+
----- end license block -----
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
The feedparser documentation (everything in the docs/ directory) is
|
|
37
|
+
released under the following license:
|
|
38
|
+
|
|
39
|
+
----- begin license block -----
|
|
40
|
+
|
|
41
|
+
Copyright (C) 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
42
|
+
Copyright (C) 2004-2008 Mark Pilgrim. All rights reserved.
|
|
43
|
+
|
|
44
|
+
Redistribution and use in source (Sphinx ReST) and "compiled" forms (HTML, PDF,
|
|
45
|
+
PostScript, RTF and so forth) with or without modification, are permitted
|
|
46
|
+
provided that the following conditions are met:
|
|
47
|
+
|
|
48
|
+
* Redistributions of source code (Sphinx ReST) must retain the above copyright
|
|
49
|
+
notice, this list of conditions and the following disclaimer.
|
|
50
|
+
* Redistributions in compiled form (converted to HTML, PDF, PostScript, RTF and
|
|
51
|
+
other formats) must reproduce the above copyright notice, this list of
|
|
52
|
+
conditions and the following disclaimer in the documentation and/or other
|
|
53
|
+
materials provided with the distribution.
|
|
54
|
+
|
|
55
|
+
THIS DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
56
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
57
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
58
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
59
|
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
60
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
61
|
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
62
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
63
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
64
|
+
ARISING IN ANY WAY OUT OF THE USE OF THIS DOCUMENTATION, EVEN IF ADVISED OF THE
|
|
65
|
+
POSSIBILITY OF SUCH DAMAGE.
|