forkparser 2026.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- feedparser/__init__.py +66 -0
- feedparser/api.py +376 -0
- feedparser/datetimes/__init__.py +73 -0
- feedparser/datetimes/asctime.py +80 -0
- feedparser/datetimes/greek.py +90 -0
- feedparser/datetimes/hungarian.py +66 -0
- feedparser/datetimes/iso8601.py +160 -0
- feedparser/datetimes/korean.py +94 -0
- feedparser/datetimes/perforce.py +63 -0
- feedparser/datetimes/rfc822.py +179 -0
- feedparser/datetimes/w3dtf.py +128 -0
- feedparser/encodings.py +649 -0
- feedparser/exceptions.py +55 -0
- feedparser/html.py +350 -0
- feedparser/http.py +74 -0
- feedparser/mixin.py +838 -0
- feedparser/namespaces/__init__.py +0 -0
- feedparser/namespaces/_base.py +547 -0
- feedparser/namespaces/admin.py +53 -0
- feedparser/namespaces/cc.py +70 -0
- feedparser/namespaces/dc.py +138 -0
- feedparser/namespaces/georss.py +682 -0
- feedparser/namespaces/itunes.py +113 -0
- feedparser/namespaces/mediarss.py +142 -0
- feedparser/namespaces/psc.py +74 -0
- feedparser/parsers/__init__.py +0 -0
- feedparser/parsers/json.py +135 -0
- feedparser/parsers/loose.py +75 -0
- feedparser/parsers/strict.py +141 -0
- feedparser/py.typed +0 -0
- feedparser/sanitizer.py +978 -0
- feedparser/sgml.py +98 -0
- feedparser/urls.py +233 -0
- feedparser/util.py +157 -0
- forkparser-2026.1.0.dist-info/METADATA +75 -0
- forkparser-2026.1.0.dist-info/RECORD +38 -0
- forkparser-2026.1.0.dist-info/WHEEL +4 -0
- forkparser-2026.1.0.dist-info/licenses/LICENSE +65 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# Support for the iTunes format
|
|
2
|
+
# Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
3
|
+
# Copyright 2002-2008 Mark Pilgrim
|
|
4
|
+
# All rights reserved.
|
|
5
|
+
#
|
|
6
|
+
# This file is a part of feedparser.
|
|
7
|
+
#
|
|
8
|
+
# Redistribution and use in source and binary forms, with or without
|
|
9
|
+
# modification, are permitted provided that the following conditions are met:
|
|
10
|
+
#
|
|
11
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
12
|
+
# this list of conditions and the following disclaimer.
|
|
13
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
14
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
15
|
+
# and/or other materials provided with the distribution.
|
|
16
|
+
#
|
|
17
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
18
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
19
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
20
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
21
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
22
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
23
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
25
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
26
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
27
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
28
|
+
|
|
29
|
+
from ..util import FeedParserDict
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Namespace:
|
|
33
|
+
supported_namespaces = {
|
|
34
|
+
# Canonical namespace
|
|
35
|
+
"http://www.itunes.com/DTDs/PodCast-1.0.dtd": "itunes",
|
|
36
|
+
# Extra namespace
|
|
37
|
+
"http://example.com/DTDs/PodCast-1.0.dtd": "itunes",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
def _start_itunes_author(self, attrs_d):
|
|
41
|
+
self._start_author(attrs_d)
|
|
42
|
+
|
|
43
|
+
def _end_itunes_author(self):
|
|
44
|
+
self._end_author()
|
|
45
|
+
|
|
46
|
+
def _end_itunes_category(self):
|
|
47
|
+
self._end_category()
|
|
48
|
+
|
|
49
|
+
def _start_itunes_name(self, attrs_d):
|
|
50
|
+
self._start_name(attrs_d)
|
|
51
|
+
|
|
52
|
+
def _end_itunes_name(self):
|
|
53
|
+
self._end_name()
|
|
54
|
+
|
|
55
|
+
def _start_itunes_email(self, attrs_d):
|
|
56
|
+
self._start_email(attrs_d)
|
|
57
|
+
|
|
58
|
+
def _end_itunes_email(self):
|
|
59
|
+
self._end_email()
|
|
60
|
+
|
|
61
|
+
def _start_itunes_subtitle(self, attrs_d):
|
|
62
|
+
self._start_subtitle(attrs_d)
|
|
63
|
+
|
|
64
|
+
def _end_itunes_subtitle(self):
|
|
65
|
+
self._end_subtitle()
|
|
66
|
+
|
|
67
|
+
def _start_itunes_summary(self, attrs_d):
|
|
68
|
+
self._start_summary(attrs_d)
|
|
69
|
+
|
|
70
|
+
def _end_itunes_summary(self):
|
|
71
|
+
self._end_summary()
|
|
72
|
+
|
|
73
|
+
def _start_itunes_owner(self, attrs_d):
|
|
74
|
+
self.inpublisher = 1
|
|
75
|
+
self.push("publisher", 0)
|
|
76
|
+
|
|
77
|
+
def _end_itunes_owner(self):
|
|
78
|
+
self.pop("publisher")
|
|
79
|
+
self.inpublisher = 0
|
|
80
|
+
self._sync_author_detail("publisher")
|
|
81
|
+
|
|
82
|
+
def _end_itunes_keywords(self):
|
|
83
|
+
for term in self.pop("itunes_keywords").split(","):
|
|
84
|
+
if term.strip():
|
|
85
|
+
self._add_tag(term.strip(), "http://www.itunes.com/", None)
|
|
86
|
+
|
|
87
|
+
def _start_itunes_category(self, attrs_d):
|
|
88
|
+
self._add_tag(attrs_d.get("text"), "http://www.itunes.com/", None)
|
|
89
|
+
self.push("category", 1)
|
|
90
|
+
|
|
91
|
+
def _start_itunes_image(self, attrs_d):
|
|
92
|
+
self.push("itunes_image", 0)
|
|
93
|
+
if attrs_d.get("href"):
|
|
94
|
+
self._get_context()["image"] = FeedParserDict({"href": attrs_d.get("href")})
|
|
95
|
+
elif attrs_d.get("url"):
|
|
96
|
+
self._get_context()["image"] = FeedParserDict({"href": attrs_d.get("url")})
|
|
97
|
+
|
|
98
|
+
_start_itunes_link = _start_itunes_image
|
|
99
|
+
|
|
100
|
+
def _end_itunes_block(self):
|
|
101
|
+
value = self.pop("itunes_block", 0)
|
|
102
|
+
self._get_context()["itunes_block"] = (
|
|
103
|
+
(value == "yes" or value == "Yes") and 1 or 0
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def _end_itunes_explicit(self):
|
|
107
|
+
value = self.pop("itunes_explicit", 0)
|
|
108
|
+
# Convert 'yes' -> True, 'clean' to False, and any other value to None
|
|
109
|
+
# False and None both evaluate as False, so the difference can be ignored
|
|
110
|
+
# by applications that only need to know if the content is explicit.
|
|
111
|
+
self._get_context()["itunes_explicit"] = (None, False, True)[
|
|
112
|
+
(value == "yes" and 2) or value == "clean" or 0
|
|
113
|
+
]
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# Support for the Media RSS format
|
|
2
|
+
# Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
3
|
+
# Copyright 2002-2008 Mark Pilgrim
|
|
4
|
+
# All rights reserved.
|
|
5
|
+
#
|
|
6
|
+
# This file is a part of feedparser.
|
|
7
|
+
#
|
|
8
|
+
# Redistribution and use in source and binary forms, with or without
|
|
9
|
+
# modification, are permitted provided that the following conditions are met:
|
|
10
|
+
#
|
|
11
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
12
|
+
# this list of conditions and the following disclaimer.
|
|
13
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
14
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
15
|
+
# and/or other materials provided with the distribution.
|
|
16
|
+
#
|
|
17
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
18
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
19
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
20
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
21
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
22
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
23
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
25
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
26
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
27
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
28
|
+
|
|
29
|
+
from ..util import FeedParserDict
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Namespace:
|
|
33
|
+
supported_namespaces = {
|
|
34
|
+
# Canonical namespace
|
|
35
|
+
"http://search.yahoo.com/mrss/": "media",
|
|
36
|
+
# Old namespace (no trailing slash)
|
|
37
|
+
"http://search.yahoo.com/mrss": "media",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
def _start_media_category(self, attrs_d):
|
|
41
|
+
attrs_d.setdefault("scheme", "http://search.yahoo.com/mrss/category_schema")
|
|
42
|
+
self._start_category(attrs_d)
|
|
43
|
+
|
|
44
|
+
def _end_media_category(self):
|
|
45
|
+
self._end_category()
|
|
46
|
+
|
|
47
|
+
def _end_media_keywords(self):
|
|
48
|
+
for term in self.pop("media_keywords").split(","):
|
|
49
|
+
if term.strip():
|
|
50
|
+
self._add_tag(term.strip(), None, None)
|
|
51
|
+
|
|
52
|
+
def _start_media_title(self, attrs_d):
|
|
53
|
+
self._start_title(attrs_d)
|
|
54
|
+
|
|
55
|
+
def _end_media_title(self):
|
|
56
|
+
title_depth = self.title_depth
|
|
57
|
+
self._end_title()
|
|
58
|
+
self.title_depth = title_depth
|
|
59
|
+
|
|
60
|
+
def _start_media_group(self, attrs_d):
|
|
61
|
+
# don't do anything, but don't break the enclosed tags either
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
def _start_media_rating(self, attrs_d):
|
|
65
|
+
context = self._get_context()
|
|
66
|
+
context.setdefault("media_rating", attrs_d)
|
|
67
|
+
self.push("rating", 1)
|
|
68
|
+
|
|
69
|
+
def _end_media_rating(self):
|
|
70
|
+
rating = self.pop("rating")
|
|
71
|
+
if rating is not None and rating.strip():
|
|
72
|
+
context = self._get_context()
|
|
73
|
+
context["media_rating"]["content"] = rating
|
|
74
|
+
|
|
75
|
+
def _start_media_credit(self, attrs_d):
|
|
76
|
+
context = self._get_context()
|
|
77
|
+
context.setdefault("media_credit", [])
|
|
78
|
+
context["media_credit"].append(attrs_d)
|
|
79
|
+
self.push("credit", 1)
|
|
80
|
+
|
|
81
|
+
def _end_media_credit(self):
|
|
82
|
+
credit = self.pop("credit")
|
|
83
|
+
if credit is not None and credit.strip():
|
|
84
|
+
context = self._get_context()
|
|
85
|
+
context["media_credit"][-1]["content"] = credit
|
|
86
|
+
|
|
87
|
+
def _start_media_description(self, attrs_d):
|
|
88
|
+
self._start_description(attrs_d)
|
|
89
|
+
|
|
90
|
+
def _end_media_description(self):
|
|
91
|
+
self._end_description()
|
|
92
|
+
|
|
93
|
+
def _start_media_restriction(self, attrs_d):
|
|
94
|
+
context = self._get_context()
|
|
95
|
+
context.setdefault("media_restriction", attrs_d)
|
|
96
|
+
self.push("restriction", 1)
|
|
97
|
+
|
|
98
|
+
def _end_media_restriction(self):
|
|
99
|
+
restriction = self.pop("restriction")
|
|
100
|
+
if restriction is not None and restriction.strip():
|
|
101
|
+
context = self._get_context()
|
|
102
|
+
context["media_restriction"]["content"] = [
|
|
103
|
+
cc.strip().lower() for cc in restriction.split(" ")
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
def _start_media_license(self, attrs_d):
|
|
107
|
+
context = self._get_context()
|
|
108
|
+
context.setdefault("media_license", attrs_d)
|
|
109
|
+
self.push("license", 1)
|
|
110
|
+
|
|
111
|
+
def _end_media_license(self):
|
|
112
|
+
license_ = self.pop("license")
|
|
113
|
+
if license_ is not None and license_.strip():
|
|
114
|
+
context = self._get_context()
|
|
115
|
+
context["media_license"]["content"] = license_
|
|
116
|
+
|
|
117
|
+
def _start_media_content(self, attrs_d):
|
|
118
|
+
context = self._get_context()
|
|
119
|
+
context.setdefault("media_content", [])
|
|
120
|
+
context["media_content"].append(attrs_d)
|
|
121
|
+
|
|
122
|
+
def _start_media_thumbnail(self, attrs_d):
|
|
123
|
+
context = self._get_context()
|
|
124
|
+
context.setdefault("media_thumbnail", [])
|
|
125
|
+
self.push("url", 1) # new
|
|
126
|
+
context["media_thumbnail"].append(attrs_d)
|
|
127
|
+
|
|
128
|
+
def _end_media_thumbnail(self):
|
|
129
|
+
url = self.pop("url")
|
|
130
|
+
context = self._get_context()
|
|
131
|
+
if url is not None and url.strip():
|
|
132
|
+
if "url" not in context["media_thumbnail"][-1]:
|
|
133
|
+
context["media_thumbnail"][-1]["url"] = url
|
|
134
|
+
|
|
135
|
+
def _start_media_player(self, attrs_d):
|
|
136
|
+
self.push("media_player", 0)
|
|
137
|
+
self._get_context()["media_player"] = FeedParserDict(attrs_d)
|
|
138
|
+
|
|
139
|
+
def _end_media_player(self):
|
|
140
|
+
value = self.pop("media_player")
|
|
141
|
+
context = self._get_context()
|
|
142
|
+
context["media_player"]["content"] = value
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Support for the Podlove Simple Chapters format
|
|
2
|
+
# Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
3
|
+
# Copyright 2002-2008 Mark Pilgrim
|
|
4
|
+
# All rights reserved.
|
|
5
|
+
#
|
|
6
|
+
# This file is a part of feedparser.
|
|
7
|
+
#
|
|
8
|
+
# Redistribution and use in source and binary forms, with or without
|
|
9
|
+
# modification, are permitted provided that the following conditions are met:
|
|
10
|
+
#
|
|
11
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
12
|
+
# this list of conditions and the following disclaimer.
|
|
13
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
14
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
15
|
+
# and/or other materials provided with the distribution.
|
|
16
|
+
#
|
|
17
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
18
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
19
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
20
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
21
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
22
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
23
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
25
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
26
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
27
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
28
|
+
|
|
29
|
+
import datetime
|
|
30
|
+
import re
|
|
31
|
+
|
|
32
|
+
from .. import util
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class Namespace:
|
|
36
|
+
supported_namespaces = {
|
|
37
|
+
"http://podlove.org/simple-chapters": "psc",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
def __init__(self):
|
|
41
|
+
# chapters will only be captured while psc_chapters_flag is True.
|
|
42
|
+
self.psc_chapters_flag = False
|
|
43
|
+
super().__init__()
|
|
44
|
+
|
|
45
|
+
def _start_psc_chapters(self, attrs_d):
|
|
46
|
+
context = self._get_context()
|
|
47
|
+
if "psc_chapters" not in context:
|
|
48
|
+
self.psc_chapters_flag = True
|
|
49
|
+
attrs_d["chapters"] = []
|
|
50
|
+
context["psc_chapters"] = util.FeedParserDict(attrs_d)
|
|
51
|
+
|
|
52
|
+
def _end_psc_chapters(self):
|
|
53
|
+
self.psc_chapters_flag = False
|
|
54
|
+
|
|
55
|
+
def _start_psc_chapter(self, attrs_d):
|
|
56
|
+
if self.psc_chapters_flag:
|
|
57
|
+
start = self._get_attribute(attrs_d, "start")
|
|
58
|
+
attrs_d["start_parsed"] = _parse_psc_chapter_start(start)
|
|
59
|
+
|
|
60
|
+
context = self._get_context()["psc_chapters"]
|
|
61
|
+
context["chapters"].append(util.FeedParserDict(attrs_d))
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
format_ = re.compile(r"^((\d{2}):)?(\d{2}):(\d{2})(\.(\d{3}))?$")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _parse_psc_chapter_start(start):
|
|
68
|
+
m = format_.match(start)
|
|
69
|
+
if m is None:
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
_, h, m, s, _, ms = m.groups()
|
|
73
|
+
h, m, s, ms = (int(h or 0), int(m), int(s), int(ms or 0))
|
|
74
|
+
return datetime.timedelta(0, h * 60 * 60 + m * 60 + s, ms * 1000)
|
|
File without changes
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# The JSON feed parser
|
|
2
|
+
# Copyright 2017 Beat Bolli
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is a part of feedparser.
|
|
6
|
+
#
|
|
7
|
+
# Redistribution and use in source and binary forms, with or without modification,
|
|
8
|
+
# are permitted provided that the following conditions are met:
|
|
9
|
+
#
|
|
10
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
11
|
+
# this list of conditions and the following disclaimer.
|
|
12
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
# and/or other materials provided with the distribution.
|
|
15
|
+
#
|
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
17
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
18
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
19
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
20
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
21
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
22
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
23
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
24
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
25
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
26
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
27
|
+
|
|
28
|
+
import json
|
|
29
|
+
|
|
30
|
+
from ..datetimes import _parse_date
|
|
31
|
+
from ..sanitizer import sanitize_html
|
|
32
|
+
from ..util import FeedParserDict
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class JSONParser:
|
|
36
|
+
VERSIONS = {
|
|
37
|
+
"https://jsonfeed.org/version/1": "json1",
|
|
38
|
+
"https://jsonfeed.org/version/1.1": "json11",
|
|
39
|
+
}
|
|
40
|
+
FEED_FIELDS = (
|
|
41
|
+
("title", "title"),
|
|
42
|
+
("icon", "image"),
|
|
43
|
+
("home_page_url", "link"),
|
|
44
|
+
("description", "description"),
|
|
45
|
+
)
|
|
46
|
+
ITEM_FIELDS = (
|
|
47
|
+
("title", "title"),
|
|
48
|
+
("id", "guid"),
|
|
49
|
+
("url", "link"),
|
|
50
|
+
("summary", "summary"),
|
|
51
|
+
("external_url", "source"),
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def __init__(self, baseuri=None, baselang=None, encoding=None):
|
|
55
|
+
self.baseuri = baseuri or ""
|
|
56
|
+
self.lang = baselang or None
|
|
57
|
+
self.encoding = encoding or "utf-8" # character encoding
|
|
58
|
+
|
|
59
|
+
self.version = None
|
|
60
|
+
self.feeddata = FeedParserDict()
|
|
61
|
+
self.namespacesInUse = []
|
|
62
|
+
self.entries = []
|
|
63
|
+
|
|
64
|
+
def feed(self, file):
|
|
65
|
+
data = json.load(file)
|
|
66
|
+
|
|
67
|
+
v = data.get("version", "")
|
|
68
|
+
try:
|
|
69
|
+
self.version = self.VERSIONS[v]
|
|
70
|
+
except KeyError:
|
|
71
|
+
raise ValueError("Unrecognized JSONFeed version '%s'" % v)
|
|
72
|
+
|
|
73
|
+
for src, dst in self.FEED_FIELDS:
|
|
74
|
+
if src in data:
|
|
75
|
+
self.feeddata[dst] = data[src]
|
|
76
|
+
if "author" in data:
|
|
77
|
+
self.parse_author(data["author"], self.feeddata)
|
|
78
|
+
# TODO: hubs; expired has no RSS equivalent
|
|
79
|
+
|
|
80
|
+
self.entries = [self.parse_entry(e) for e in data["items"]]
|
|
81
|
+
|
|
82
|
+
def parse_entry(self, e):
|
|
83
|
+
entry = FeedParserDict()
|
|
84
|
+
for src, dst in self.ITEM_FIELDS:
|
|
85
|
+
if src in e:
|
|
86
|
+
entry[dst] = e[src]
|
|
87
|
+
|
|
88
|
+
if "content_text" in e:
|
|
89
|
+
entry["content"] = c = FeedParserDict()
|
|
90
|
+
c["value"] = e["content_text"]
|
|
91
|
+
c["type"] = "text"
|
|
92
|
+
elif "content_html" in e:
|
|
93
|
+
entry["content"] = c = FeedParserDict()
|
|
94
|
+
c["value"] = sanitize_html(
|
|
95
|
+
e["content_html"], self.encoding, "application/json"
|
|
96
|
+
)
|
|
97
|
+
c["type"] = "html"
|
|
98
|
+
|
|
99
|
+
if "date_published" in e:
|
|
100
|
+
entry["published"] = e["date_published"]
|
|
101
|
+
entry["published_parsed"] = _parse_date(e["date_published"])
|
|
102
|
+
if "date_updated" in e:
|
|
103
|
+
entry["updated"] = e["date_modified"]
|
|
104
|
+
entry["updated_parsed"] = _parse_date(e["date_modified"])
|
|
105
|
+
|
|
106
|
+
if "tags" in e:
|
|
107
|
+
entry["category"] = e["tags"]
|
|
108
|
+
|
|
109
|
+
if "author" in e:
|
|
110
|
+
self.parse_author(e["author"], entry)
|
|
111
|
+
|
|
112
|
+
if "attachments" in e:
|
|
113
|
+
entry["enclosures"] = [self.parse_attachment(a) for a in e["attachments"]]
|
|
114
|
+
|
|
115
|
+
return entry
|
|
116
|
+
|
|
117
|
+
@staticmethod
|
|
118
|
+
def parse_author(parent, dest):
|
|
119
|
+
dest["author_detail"] = detail = FeedParserDict()
|
|
120
|
+
if "name" in parent:
|
|
121
|
+
dest["author"] = detail["name"] = parent["name"]
|
|
122
|
+
if "url" in parent:
|
|
123
|
+
if parent["url"].startswith("mailto:"):
|
|
124
|
+
detail["email"] = parent["url"][7:]
|
|
125
|
+
else:
|
|
126
|
+
detail["href"] = parent["url"]
|
|
127
|
+
|
|
128
|
+
@staticmethod
|
|
129
|
+
def parse_attachment(attachment):
|
|
130
|
+
enc = FeedParserDict()
|
|
131
|
+
enc["href"] = attachment["url"]
|
|
132
|
+
enc["type"] = attachment["mime_type"]
|
|
133
|
+
if "size_in_bytes" in attachment:
|
|
134
|
+
enc["length"] = attachment["size_in_bytes"]
|
|
135
|
+
return enc
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# The loose feed parser that interfaces with an SGML parsing library
|
|
2
|
+
# Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
3
|
+
# Copyright 2002-2008 Mark Pilgrim
|
|
4
|
+
# All rights reserved.
|
|
5
|
+
#
|
|
6
|
+
# This file is a part of feedparser.
|
|
7
|
+
#
|
|
8
|
+
# Redistribution and use in source and binary forms, with or without modification,
|
|
9
|
+
# are permitted provided that the following conditions are met:
|
|
10
|
+
#
|
|
11
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
12
|
+
# this list of conditions and the following disclaimer.
|
|
13
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
14
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
15
|
+
# and/or other materials provided with the distribution.
|
|
16
|
+
#
|
|
17
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
18
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
19
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
20
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
21
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
22
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
23
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
25
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
26
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
27
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class LooseXMLParser:
|
|
31
|
+
contentparams = None
|
|
32
|
+
|
|
33
|
+
def __init__(self, baseuri=None, baselang=None, encoding=None, entities=None):
|
|
34
|
+
self.baseuri = baseuri or ""
|
|
35
|
+
self.lang = baselang or None
|
|
36
|
+
self.encoding = encoding or "utf-8" # character encoding
|
|
37
|
+
self.entities = entities or {}
|
|
38
|
+
super().__init__()
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def _normalize_attributes(kv):
|
|
42
|
+
k = kv[0].lower()
|
|
43
|
+
v = k in ("rel", "type") and kv[1].lower() or kv[1]
|
|
44
|
+
# the sgml parser doesn't handle entities in attributes, nor
|
|
45
|
+
# does it pass the attribute values through as unicode, while
|
|
46
|
+
# strict xml parsers do -- account for this difference
|
|
47
|
+
v = v.replace("&", "&")
|
|
48
|
+
return k, v
|
|
49
|
+
|
|
50
|
+
def decode_entities(self, element, data):
|
|
51
|
+
data = data.replace("<", "<")
|
|
52
|
+
data = data.replace("<", "<")
|
|
53
|
+
data = data.replace("<", "<")
|
|
54
|
+
data = data.replace(">", ">")
|
|
55
|
+
data = data.replace(">", ">")
|
|
56
|
+
data = data.replace(">", ">")
|
|
57
|
+
data = data.replace("&", "&")
|
|
58
|
+
data = data.replace("&", "&")
|
|
59
|
+
data = data.replace(""", """)
|
|
60
|
+
data = data.replace(""", """)
|
|
61
|
+
data = data.replace("'", "'")
|
|
62
|
+
data = data.replace("'", "'")
|
|
63
|
+
if not self.contentparams.get("type", "xml").endswith("xml"):
|
|
64
|
+
data = data.replace("<", "<")
|
|
65
|
+
data = data.replace(">", ">")
|
|
66
|
+
data = data.replace("&", "&")
|
|
67
|
+
data = data.replace(""", '"')
|
|
68
|
+
data = data.replace("'", "'")
|
|
69
|
+
data = data.replace("/", "/")
|
|
70
|
+
data = data.replace("/", "/")
|
|
71
|
+
return data
|
|
72
|
+
|
|
73
|
+
@staticmethod
|
|
74
|
+
def strattrs(attrs):
|
|
75
|
+
return "".join(' {}="{}"'.format(n, v.replace('"', """)) for n, v in attrs)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# The strict feed parser that interfaces with an XML parsing library
|
|
2
|
+
# Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
3
|
+
# Copyright 2002-2008 Mark Pilgrim
|
|
4
|
+
# All rights reserved.
|
|
5
|
+
#
|
|
6
|
+
# This file is a part of feedparser.
|
|
7
|
+
#
|
|
8
|
+
# Redistribution and use in source and binary forms, with or without modification,
|
|
9
|
+
# are permitted provided that the following conditions are met:
|
|
10
|
+
#
|
|
11
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
12
|
+
# this list of conditions and the following disclaimer.
|
|
13
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
14
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
15
|
+
# and/or other materials provided with the distribution.
|
|
16
|
+
#
|
|
17
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
18
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
19
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
20
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
21
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
22
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
23
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
25
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
26
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
27
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
28
|
+
|
|
29
|
+
from ..exceptions import UndeclaredNamespace
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class StrictXMLParser:
|
|
33
|
+
def __init__(self, baseuri, baselang, encoding):
|
|
34
|
+
self.bozo = 0
|
|
35
|
+
self.exc = None
|
|
36
|
+
self.decls = {}
|
|
37
|
+
self.baseuri = baseuri or ""
|
|
38
|
+
self.lang = baselang
|
|
39
|
+
self.encoding = encoding
|
|
40
|
+
super().__init__()
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def _normalize_attributes(kv):
|
|
44
|
+
k = kv[0].lower()
|
|
45
|
+
v = k in ("rel", "type") and kv[1].lower() or kv[1]
|
|
46
|
+
return k, v
|
|
47
|
+
|
|
48
|
+
def startPrefixMapping(self, prefix, uri):
|
|
49
|
+
if not uri:
|
|
50
|
+
return
|
|
51
|
+
# Jython uses '' instead of None; standardize on None
|
|
52
|
+
prefix = prefix or None
|
|
53
|
+
self.track_namespace(prefix, uri)
|
|
54
|
+
if prefix and uri == "http://www.w3.org/1999/xlink":
|
|
55
|
+
self.decls["xmlns:" + prefix] = uri
|
|
56
|
+
|
|
57
|
+
def startElementNS(self, name, qname, attrs):
|
|
58
|
+
namespace, localname = name
|
|
59
|
+
lowernamespace = str(namespace or "").lower()
|
|
60
|
+
if lowernamespace.find("backend.userland.com/rss") != -1:
|
|
61
|
+
# match any backend.userland.com namespace
|
|
62
|
+
namespace = "http://backend.userland.com/rss"
|
|
63
|
+
lowernamespace = namespace
|
|
64
|
+
if qname and qname.find(":") > 0:
|
|
65
|
+
givenprefix = qname.split(":")[0]
|
|
66
|
+
else:
|
|
67
|
+
givenprefix = None
|
|
68
|
+
prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
|
|
69
|
+
if (
|
|
70
|
+
givenprefix
|
|
71
|
+
and (prefix is None or (prefix == "" and lowernamespace == ""))
|
|
72
|
+
and givenprefix not in self.namespaces_in_use
|
|
73
|
+
):
|
|
74
|
+
raise UndeclaredNamespace(
|
|
75
|
+
"'%s' is not associated with a namespace" % givenprefix
|
|
76
|
+
)
|
|
77
|
+
localname = str(localname).lower()
|
|
78
|
+
|
|
79
|
+
# qname implementation is horribly broken in Python 2.1 (it
|
|
80
|
+
# doesn't report any), and slightly broken in Python 2.2 (it
|
|
81
|
+
# doesn't report the xml: namespace). So we match up namespaces
|
|
82
|
+
# with a known list first, and then possibly override them with
|
|
83
|
+
# the qnames the SAX parser gives us (if indeed it gives us any
|
|
84
|
+
# at all). Thanks to MatejC for helping me test this and
|
|
85
|
+
# tirelessly telling me that it didn't work yet.
|
|
86
|
+
attrsD, self.decls = self.decls, {}
|
|
87
|
+
if localname == "math" and namespace == "http://www.w3.org/1998/Math/MathML":
|
|
88
|
+
attrsD["xmlns"] = namespace
|
|
89
|
+
if localname == "svg" and namespace == "http://www.w3.org/2000/svg":
|
|
90
|
+
attrsD["xmlns"] = namespace
|
|
91
|
+
|
|
92
|
+
if prefix:
|
|
93
|
+
localname = prefix.lower() + ":" + localname
|
|
94
|
+
elif namespace and not qname: # Expat
|
|
95
|
+
for name, value in self.namespaces_in_use.items():
|
|
96
|
+
if name and value == namespace:
|
|
97
|
+
localname = name + ":" + localname
|
|
98
|
+
break
|
|
99
|
+
|
|
100
|
+
for (namespace, attrlocalname), attrvalue in attrs.items():
|
|
101
|
+
lowernamespace = (namespace or "").lower()
|
|
102
|
+
prefix = self._matchnamespaces.get(lowernamespace, "")
|
|
103
|
+
if prefix:
|
|
104
|
+
attrlocalname = prefix + ":" + attrlocalname
|
|
105
|
+
attrsD[str(attrlocalname).lower()] = attrvalue
|
|
106
|
+
for qname in attrs.getQNames():
|
|
107
|
+
attrsD[str(qname).lower()] = attrs.getValueByQName(qname)
|
|
108
|
+
localname = str(localname).lower()
|
|
109
|
+
self.unknown_starttag(localname, list(attrsD.items()))
|
|
110
|
+
|
|
111
|
+
def characters(self, text):
|
|
112
|
+
self.handle_data(text)
|
|
113
|
+
|
|
114
|
+
def endElementNS(self, name, qname):
|
|
115
|
+
namespace, localname = name
|
|
116
|
+
lowernamespace = str(namespace or "").lower()
|
|
117
|
+
if qname and qname.find(":") > 0:
|
|
118
|
+
givenprefix = qname.split(":")[0]
|
|
119
|
+
else:
|
|
120
|
+
givenprefix = ""
|
|
121
|
+
prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
|
|
122
|
+
if prefix:
|
|
123
|
+
localname = prefix + ":" + localname
|
|
124
|
+
elif namespace and not qname: # Expat
|
|
125
|
+
for name, value in self.namespaces_in_use.items():
|
|
126
|
+
if name and value == namespace:
|
|
127
|
+
localname = name + ":" + localname
|
|
128
|
+
break
|
|
129
|
+
localname = str(localname).lower()
|
|
130
|
+
self.unknown_endtag(localname)
|
|
131
|
+
|
|
132
|
+
def error(self, exc):
|
|
133
|
+
self.bozo = 1
|
|
134
|
+
self.exc = exc
|
|
135
|
+
|
|
136
|
+
# drv_libxml2 calls warning() in some cases
|
|
137
|
+
warning = error
|
|
138
|
+
|
|
139
|
+
def fatalError(self, exc):
|
|
140
|
+
self.error(exc)
|
|
141
|
+
raise exc
|
feedparser/py.typed
ADDED
|
File without changes
|