forkparser 2026.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,113 @@
1
+ # Support for the iTunes format
2
+ # Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
3
+ # Copyright 2002-2008 Mark Pilgrim
4
+ # All rights reserved.
5
+ #
6
+ # This file is a part of feedparser.
7
+ #
8
+ # Redistribution and use in source and binary forms, with or without
9
+ # modification, are permitted provided that the following conditions are met:
10
+ #
11
+ # * Redistributions of source code must retain the above copyright notice,
12
+ # this list of conditions and the following disclaimer.
13
+ # * Redistributions in binary form must reproduce the above copyright notice,
14
+ # this list of conditions and the following disclaimer in the documentation
15
+ # and/or other materials provided with the distribution.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
18
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ from ..util import FeedParserDict
30
+
31
+
32
+ class Namespace:
33
+ supported_namespaces = {
34
+ # Canonical namespace
35
+ "http://www.itunes.com/DTDs/PodCast-1.0.dtd": "itunes",
36
+ # Extra namespace
37
+ "http://example.com/DTDs/PodCast-1.0.dtd": "itunes",
38
+ }
39
+
40
+ def _start_itunes_author(self, attrs_d):
41
+ self._start_author(attrs_d)
42
+
43
+ def _end_itunes_author(self):
44
+ self._end_author()
45
+
46
+ def _end_itunes_category(self):
47
+ self._end_category()
48
+
49
+ def _start_itunes_name(self, attrs_d):
50
+ self._start_name(attrs_d)
51
+
52
+ def _end_itunes_name(self):
53
+ self._end_name()
54
+
55
+ def _start_itunes_email(self, attrs_d):
56
+ self._start_email(attrs_d)
57
+
58
+ def _end_itunes_email(self):
59
+ self._end_email()
60
+
61
+ def _start_itunes_subtitle(self, attrs_d):
62
+ self._start_subtitle(attrs_d)
63
+
64
+ def _end_itunes_subtitle(self):
65
+ self._end_subtitle()
66
+
67
+ def _start_itunes_summary(self, attrs_d):
68
+ self._start_summary(attrs_d)
69
+
70
+ def _end_itunes_summary(self):
71
+ self._end_summary()
72
+
73
+ def _start_itunes_owner(self, attrs_d):
74
+ self.inpublisher = 1
75
+ self.push("publisher", 0)
76
+
77
+ def _end_itunes_owner(self):
78
+ self.pop("publisher")
79
+ self.inpublisher = 0
80
+ self._sync_author_detail("publisher")
81
+
82
+ def _end_itunes_keywords(self):
83
+ for term in self.pop("itunes_keywords").split(","):
84
+ if term.strip():
85
+ self._add_tag(term.strip(), "http://www.itunes.com/", None)
86
+
87
+ def _start_itunes_category(self, attrs_d):
88
+ self._add_tag(attrs_d.get("text"), "http://www.itunes.com/", None)
89
+ self.push("category", 1)
90
+
91
+ def _start_itunes_image(self, attrs_d):
92
+ self.push("itunes_image", 0)
93
+ if attrs_d.get("href"):
94
+ self._get_context()["image"] = FeedParserDict({"href": attrs_d.get("href")})
95
+ elif attrs_d.get("url"):
96
+ self._get_context()["image"] = FeedParserDict({"href": attrs_d.get("url")})
97
+
98
+ _start_itunes_link = _start_itunes_image
99
+
100
+ def _end_itunes_block(self):
101
+ value = self.pop("itunes_block", 0)
102
+ self._get_context()["itunes_block"] = (
103
+ (value == "yes" or value == "Yes") and 1 or 0
104
+ )
105
+
106
+ def _end_itunes_explicit(self):
107
+ value = self.pop("itunes_explicit", 0)
108
+ # Convert 'yes' -> True, 'clean' to False, and any other value to None
109
+ # False and None both evaluate as False, so the difference can be ignored
110
+ # by applications that only need to know if the content is explicit.
111
+ self._get_context()["itunes_explicit"] = (None, False, True)[
112
+ (value == "yes" and 2) or value == "clean" or 0
113
+ ]
@@ -0,0 +1,142 @@
1
+ # Support for the Media RSS format
2
+ # Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
3
+ # Copyright 2002-2008 Mark Pilgrim
4
+ # All rights reserved.
5
+ #
6
+ # This file is a part of feedparser.
7
+ #
8
+ # Redistribution and use in source and binary forms, with or without
9
+ # modification, are permitted provided that the following conditions are met:
10
+ #
11
+ # * Redistributions of source code must retain the above copyright notice,
12
+ # this list of conditions and the following disclaimer.
13
+ # * Redistributions in binary form must reproduce the above copyright notice,
14
+ # this list of conditions and the following disclaimer in the documentation
15
+ # and/or other materials provided with the distribution.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
18
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ from ..util import FeedParserDict
30
+
31
+
32
+ class Namespace:
33
+ supported_namespaces = {
34
+ # Canonical namespace
35
+ "http://search.yahoo.com/mrss/": "media",
36
+ # Old namespace (no trailing slash)
37
+ "http://search.yahoo.com/mrss": "media",
38
+ }
39
+
40
+ def _start_media_category(self, attrs_d):
41
+ attrs_d.setdefault("scheme", "http://search.yahoo.com/mrss/category_schema")
42
+ self._start_category(attrs_d)
43
+
44
+ def _end_media_category(self):
45
+ self._end_category()
46
+
47
+ def _end_media_keywords(self):
48
+ for term in self.pop("media_keywords").split(","):
49
+ if term.strip():
50
+ self._add_tag(term.strip(), None, None)
51
+
52
+ def _start_media_title(self, attrs_d):
53
+ self._start_title(attrs_d)
54
+
55
+ def _end_media_title(self):
56
+ title_depth = self.title_depth
57
+ self._end_title()
58
+ self.title_depth = title_depth
59
+
60
+ def _start_media_group(self, attrs_d):
61
+ # don't do anything, but don't break the enclosed tags either
62
+ pass
63
+
64
+ def _start_media_rating(self, attrs_d):
65
+ context = self._get_context()
66
+ context.setdefault("media_rating", attrs_d)
67
+ self.push("rating", 1)
68
+
69
+ def _end_media_rating(self):
70
+ rating = self.pop("rating")
71
+ if rating is not None and rating.strip():
72
+ context = self._get_context()
73
+ context["media_rating"]["content"] = rating
74
+
75
+ def _start_media_credit(self, attrs_d):
76
+ context = self._get_context()
77
+ context.setdefault("media_credit", [])
78
+ context["media_credit"].append(attrs_d)
79
+ self.push("credit", 1)
80
+
81
+ def _end_media_credit(self):
82
+ credit = self.pop("credit")
83
+ if credit is not None and credit.strip():
84
+ context = self._get_context()
85
+ context["media_credit"][-1]["content"] = credit
86
+
87
+ def _start_media_description(self, attrs_d):
88
+ self._start_description(attrs_d)
89
+
90
+ def _end_media_description(self):
91
+ self._end_description()
92
+
93
+ def _start_media_restriction(self, attrs_d):
94
+ context = self._get_context()
95
+ context.setdefault("media_restriction", attrs_d)
96
+ self.push("restriction", 1)
97
+
98
+ def _end_media_restriction(self):
99
+ restriction = self.pop("restriction")
100
+ if restriction is not None and restriction.strip():
101
+ context = self._get_context()
102
+ context["media_restriction"]["content"] = [
103
+ cc.strip().lower() for cc in restriction.split(" ")
104
+ ]
105
+
106
+ def _start_media_license(self, attrs_d):
107
+ context = self._get_context()
108
+ context.setdefault("media_license", attrs_d)
109
+ self.push("license", 1)
110
+
111
+ def _end_media_license(self):
112
+ license_ = self.pop("license")
113
+ if license_ is not None and license_.strip():
114
+ context = self._get_context()
115
+ context["media_license"]["content"] = license_
116
+
117
+ def _start_media_content(self, attrs_d):
118
+ context = self._get_context()
119
+ context.setdefault("media_content", [])
120
+ context["media_content"].append(attrs_d)
121
+
122
+ def _start_media_thumbnail(self, attrs_d):
123
+ context = self._get_context()
124
+ context.setdefault("media_thumbnail", [])
125
+ self.push("url", 1) # new
126
+ context["media_thumbnail"].append(attrs_d)
127
+
128
+ def _end_media_thumbnail(self):
129
+ url = self.pop("url")
130
+ context = self._get_context()
131
+ if url is not None and url.strip():
132
+ if "url" not in context["media_thumbnail"][-1]:
133
+ context["media_thumbnail"][-1]["url"] = url
134
+
135
+ def _start_media_player(self, attrs_d):
136
+ self.push("media_player", 0)
137
+ self._get_context()["media_player"] = FeedParserDict(attrs_d)
138
+
139
+ def _end_media_player(self):
140
+ value = self.pop("media_player")
141
+ context = self._get_context()
142
+ context["media_player"]["content"] = value
@@ -0,0 +1,74 @@
1
+ # Support for the Podlove Simple Chapters format
2
+ # Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
3
+ # Copyright 2002-2008 Mark Pilgrim
4
+ # All rights reserved.
5
+ #
6
+ # This file is a part of feedparser.
7
+ #
8
+ # Redistribution and use in source and binary forms, with or without
9
+ # modification, are permitted provided that the following conditions are met:
10
+ #
11
+ # * Redistributions of source code must retain the above copyright notice,
12
+ # this list of conditions and the following disclaimer.
13
+ # * Redistributions in binary form must reproduce the above copyright notice,
14
+ # this list of conditions and the following disclaimer in the documentation
15
+ # and/or other materials provided with the distribution.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
18
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ import datetime
30
+ import re
31
+
32
+ from .. import util
33
+
34
+
35
+ class Namespace:
36
+ supported_namespaces = {
37
+ "http://podlove.org/simple-chapters": "psc",
38
+ }
39
+
40
+ def __init__(self):
41
+ # chapters will only be captured while psc_chapters_flag is True.
42
+ self.psc_chapters_flag = False
43
+ super().__init__()
44
+
45
+ def _start_psc_chapters(self, attrs_d):
46
+ context = self._get_context()
47
+ if "psc_chapters" not in context:
48
+ self.psc_chapters_flag = True
49
+ attrs_d["chapters"] = []
50
+ context["psc_chapters"] = util.FeedParserDict(attrs_d)
51
+
52
+ def _end_psc_chapters(self):
53
+ self.psc_chapters_flag = False
54
+
55
+ def _start_psc_chapter(self, attrs_d):
56
+ if self.psc_chapters_flag:
57
+ start = self._get_attribute(attrs_d, "start")
58
+ attrs_d["start_parsed"] = _parse_psc_chapter_start(start)
59
+
60
+ context = self._get_context()["psc_chapters"]
61
+ context["chapters"].append(util.FeedParserDict(attrs_d))
62
+
63
+
64
+ format_ = re.compile(r"^((\d{2}):)?(\d{2}):(\d{2})(\.(\d{3}))?$")
65
+
66
+
67
+ def _parse_psc_chapter_start(start):
68
+ m = format_.match(start)
69
+ if m is None:
70
+ return None
71
+
72
+ _, h, m, s, _, ms = m.groups()
73
+ h, m, s, ms = (int(h or 0), int(m), int(s), int(ms or 0))
74
+ return datetime.timedelta(0, h * 60 * 60 + m * 60 + s, ms * 1000)
File without changes
@@ -0,0 +1,135 @@
1
+ # The JSON feed parser
2
+ # Copyright 2017 Beat Bolli
3
+ # All rights reserved.
4
+ #
5
+ # This file is a part of feedparser.
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without modification,
8
+ # are permitted provided that the following conditions are met:
9
+ #
10
+ # * Redistributions of source code must retain the above copyright notice,
11
+ # this list of conditions and the following disclaimer.
12
+ # * Redistributions in binary form must reproduce the above copyright notice,
13
+ # this list of conditions and the following disclaimer in the documentation
14
+ # and/or other materials provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
17
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26
+ # POSSIBILITY OF SUCH DAMAGE.
27
+
28
+ import json
29
+
30
+ from ..datetimes import _parse_date
31
+ from ..sanitizer import sanitize_html
32
+ from ..util import FeedParserDict
33
+
34
+
35
+ class JSONParser:
36
+ VERSIONS = {
37
+ "https://jsonfeed.org/version/1": "json1",
38
+ "https://jsonfeed.org/version/1.1": "json11",
39
+ }
40
+ FEED_FIELDS = (
41
+ ("title", "title"),
42
+ ("icon", "image"),
43
+ ("home_page_url", "link"),
44
+ ("description", "description"),
45
+ )
46
+ ITEM_FIELDS = (
47
+ ("title", "title"),
48
+ ("id", "guid"),
49
+ ("url", "link"),
50
+ ("summary", "summary"),
51
+ ("external_url", "source"),
52
+ )
53
+
54
+ def __init__(self, baseuri=None, baselang=None, encoding=None):
55
+ self.baseuri = baseuri or ""
56
+ self.lang = baselang or None
57
+ self.encoding = encoding or "utf-8" # character encoding
58
+
59
+ self.version = None
60
+ self.feeddata = FeedParserDict()
61
+ self.namespacesInUse = []
62
+ self.entries = []
63
+
64
+ def feed(self, file):
65
+ data = json.load(file)
66
+
67
+ v = data.get("version", "")
68
+ try:
69
+ self.version = self.VERSIONS[v]
70
+ except KeyError:
71
+ raise ValueError("Unrecognized JSONFeed version '%s'" % v)
72
+
73
+ for src, dst in self.FEED_FIELDS:
74
+ if src in data:
75
+ self.feeddata[dst] = data[src]
76
+ if "author" in data:
77
+ self.parse_author(data["author"], self.feeddata)
78
+ # TODO: hubs; expired has no RSS equivalent
79
+
80
+ self.entries = [self.parse_entry(e) for e in data["items"]]
81
+
82
+ def parse_entry(self, e):
83
+ entry = FeedParserDict()
84
+ for src, dst in self.ITEM_FIELDS:
85
+ if src in e:
86
+ entry[dst] = e[src]
87
+
88
+ if "content_text" in e:
89
+ entry["content"] = c = FeedParserDict()
90
+ c["value"] = e["content_text"]
91
+ c["type"] = "text"
92
+ elif "content_html" in e:
93
+ entry["content"] = c = FeedParserDict()
94
+ c["value"] = sanitize_html(
95
+ e["content_html"], self.encoding, "application/json"
96
+ )
97
+ c["type"] = "html"
98
+
99
+ if "date_published" in e:
100
+ entry["published"] = e["date_published"]
101
+ entry["published_parsed"] = _parse_date(e["date_published"])
102
+ if "date_updated" in e:
103
+ entry["updated"] = e["date_modified"]
104
+ entry["updated_parsed"] = _parse_date(e["date_modified"])
105
+
106
+ if "tags" in e:
107
+ entry["category"] = e["tags"]
108
+
109
+ if "author" in e:
110
+ self.parse_author(e["author"], entry)
111
+
112
+ if "attachments" in e:
113
+ entry["enclosures"] = [self.parse_attachment(a) for a in e["attachments"]]
114
+
115
+ return entry
116
+
117
+ @staticmethod
118
+ def parse_author(parent, dest):
119
+ dest["author_detail"] = detail = FeedParserDict()
120
+ if "name" in parent:
121
+ dest["author"] = detail["name"] = parent["name"]
122
+ if "url" in parent:
123
+ if parent["url"].startswith("mailto:"):
124
+ detail["email"] = parent["url"][7:]
125
+ else:
126
+ detail["href"] = parent["url"]
127
+
128
+ @staticmethod
129
+ def parse_attachment(attachment):
130
+ enc = FeedParserDict()
131
+ enc["href"] = attachment["url"]
132
+ enc["type"] = attachment["mime_type"]
133
+ if "size_in_bytes" in attachment:
134
+ enc["length"] = attachment["size_in_bytes"]
135
+ return enc
@@ -0,0 +1,75 @@
1
+ # The loose feed parser that interfaces with an SGML parsing library
2
+ # Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
3
+ # Copyright 2002-2008 Mark Pilgrim
4
+ # All rights reserved.
5
+ #
6
+ # This file is a part of feedparser.
7
+ #
8
+ # Redistribution and use in source and binary forms, with or without modification,
9
+ # are permitted provided that the following conditions are met:
10
+ #
11
+ # * Redistributions of source code must retain the above copyright notice,
12
+ # this list of conditions and the following disclaimer.
13
+ # * Redistributions in binary form must reproduce the above copyright notice,
14
+ # this list of conditions and the following disclaimer in the documentation
15
+ # and/or other materials provided with the distribution.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
18
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+
30
+ class LooseXMLParser:
31
+ contentparams = None
32
+
33
+ def __init__(self, baseuri=None, baselang=None, encoding=None, entities=None):
34
+ self.baseuri = baseuri or ""
35
+ self.lang = baselang or None
36
+ self.encoding = encoding or "utf-8" # character encoding
37
+ self.entities = entities or {}
38
+ super().__init__()
39
+
40
+ @staticmethod
41
+ def _normalize_attributes(kv):
42
+ k = kv[0].lower()
43
+ v = k in ("rel", "type") and kv[1].lower() or kv[1]
44
+ # the sgml parser doesn't handle entities in attributes, nor
45
+ # does it pass the attribute values through as unicode, while
46
+ # strict xml parsers do -- account for this difference
47
+ v = v.replace("&amp;", "&")
48
+ return k, v
49
+
50
+ def decode_entities(self, element, data):
51
+ data = data.replace("&#60;", "&lt;")
52
+ data = data.replace("&#x3c;", "&lt;")
53
+ data = data.replace("&#x3C;", "&lt;")
54
+ data = data.replace("&#62;", "&gt;")
55
+ data = data.replace("&#x3e;", "&gt;")
56
+ data = data.replace("&#x3E;", "&gt;")
57
+ data = data.replace("&#38;", "&amp;")
58
+ data = data.replace("&#x26;", "&amp;")
59
+ data = data.replace("&#34;", "&quot;")
60
+ data = data.replace("&#x22;", "&quot;")
61
+ data = data.replace("&#39;", "&apos;")
62
+ data = data.replace("&#x27;", "&apos;")
63
+ if not self.contentparams.get("type", "xml").endswith("xml"):
64
+ data = data.replace("&lt;", "<")
65
+ data = data.replace("&gt;", ">")
66
+ data = data.replace("&amp;", "&")
67
+ data = data.replace("&quot;", '"')
68
+ data = data.replace("&apos;", "'")
69
+ data = data.replace("&#x2f;", "/")
70
+ data = data.replace("&#x2F;", "/")
71
+ return data
72
+
73
+ @staticmethod
74
+ def strattrs(attrs):
75
+ return "".join(' {}="{}"'.format(n, v.replace('"', "&quot;")) for n, v in attrs)
@@ -0,0 +1,141 @@
1
+ # The strict feed parser that interfaces with an XML parsing library
2
+ # Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
3
+ # Copyright 2002-2008 Mark Pilgrim
4
+ # All rights reserved.
5
+ #
6
+ # This file is a part of feedparser.
7
+ #
8
+ # Redistribution and use in source and binary forms, with or without modification,
9
+ # are permitted provided that the following conditions are met:
10
+ #
11
+ # * Redistributions of source code must retain the above copyright notice,
12
+ # this list of conditions and the following disclaimer.
13
+ # * Redistributions in binary form must reproduce the above copyright notice,
14
+ # this list of conditions and the following disclaimer in the documentation
15
+ # and/or other materials provided with the distribution.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
18
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27
+ # POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ from ..exceptions import UndeclaredNamespace
30
+
31
+
32
+ class StrictXMLParser:
33
+ def __init__(self, baseuri, baselang, encoding):
34
+ self.bozo = 0
35
+ self.exc = None
36
+ self.decls = {}
37
+ self.baseuri = baseuri or ""
38
+ self.lang = baselang
39
+ self.encoding = encoding
40
+ super().__init__()
41
+
42
+ @staticmethod
43
+ def _normalize_attributes(kv):
44
+ k = kv[0].lower()
45
+ v = k in ("rel", "type") and kv[1].lower() or kv[1]
46
+ return k, v
47
+
48
+ def startPrefixMapping(self, prefix, uri):
49
+ if not uri:
50
+ return
51
+ # Jython uses '' instead of None; standardize on None
52
+ prefix = prefix or None
53
+ self.track_namespace(prefix, uri)
54
+ if prefix and uri == "http://www.w3.org/1999/xlink":
55
+ self.decls["xmlns:" + prefix] = uri
56
+
57
+ def startElementNS(self, name, qname, attrs):
58
+ namespace, localname = name
59
+ lowernamespace = str(namespace or "").lower()
60
+ if lowernamespace.find("backend.userland.com/rss") != -1:
61
+ # match any backend.userland.com namespace
62
+ namespace = "http://backend.userland.com/rss"
63
+ lowernamespace = namespace
64
+ if qname and qname.find(":") > 0:
65
+ givenprefix = qname.split(":")[0]
66
+ else:
67
+ givenprefix = None
68
+ prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
69
+ if (
70
+ givenprefix
71
+ and (prefix is None or (prefix == "" and lowernamespace == ""))
72
+ and givenprefix not in self.namespaces_in_use
73
+ ):
74
+ raise UndeclaredNamespace(
75
+ "'%s' is not associated with a namespace" % givenprefix
76
+ )
77
+ localname = str(localname).lower()
78
+
79
+ # qname implementation is horribly broken in Python 2.1 (it
80
+ # doesn't report any), and slightly broken in Python 2.2 (it
81
+ # doesn't report the xml: namespace). So we match up namespaces
82
+ # with a known list first, and then possibly override them with
83
+ # the qnames the SAX parser gives us (if indeed it gives us any
84
+ # at all). Thanks to MatejC for helping me test this and
85
+ # tirelessly telling me that it didn't work yet.
86
+ attrsD, self.decls = self.decls, {}
87
+ if localname == "math" and namespace == "http://www.w3.org/1998/Math/MathML":
88
+ attrsD["xmlns"] = namespace
89
+ if localname == "svg" and namespace == "http://www.w3.org/2000/svg":
90
+ attrsD["xmlns"] = namespace
91
+
92
+ if prefix:
93
+ localname = prefix.lower() + ":" + localname
94
+ elif namespace and not qname: # Expat
95
+ for name, value in self.namespaces_in_use.items():
96
+ if name and value == namespace:
97
+ localname = name + ":" + localname
98
+ break
99
+
100
+ for (namespace, attrlocalname), attrvalue in attrs.items():
101
+ lowernamespace = (namespace or "").lower()
102
+ prefix = self._matchnamespaces.get(lowernamespace, "")
103
+ if prefix:
104
+ attrlocalname = prefix + ":" + attrlocalname
105
+ attrsD[str(attrlocalname).lower()] = attrvalue
106
+ for qname in attrs.getQNames():
107
+ attrsD[str(qname).lower()] = attrs.getValueByQName(qname)
108
+ localname = str(localname).lower()
109
+ self.unknown_starttag(localname, list(attrsD.items()))
110
+
111
+ def characters(self, text):
112
+ self.handle_data(text)
113
+
114
+ def endElementNS(self, name, qname):
115
+ namespace, localname = name
116
+ lowernamespace = str(namespace or "").lower()
117
+ if qname and qname.find(":") > 0:
118
+ givenprefix = qname.split(":")[0]
119
+ else:
120
+ givenprefix = ""
121
+ prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
122
+ if prefix:
123
+ localname = prefix + ":" + localname
124
+ elif namespace and not qname: # Expat
125
+ for name, value in self.namespaces_in_use.items():
126
+ if name and value == namespace:
127
+ localname = name + ":" + localname
128
+ break
129
+ localname = str(localname).lower()
130
+ self.unknown_endtag(localname)
131
+
132
+ def error(self, exc):
133
+ self.bozo = 1
134
+ self.exc = exc
135
+
136
+ # drv_libxml2 calls warning() in some cases
137
+ warning = error
138
+
139
+ def fatalError(self, exc):
140
+ self.error(exc)
141
+ raise exc
feedparser/py.typed ADDED
File without changes