forkparser 2026.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- feedparser/__init__.py +66 -0
- feedparser/api.py +376 -0
- feedparser/datetimes/__init__.py +73 -0
- feedparser/datetimes/asctime.py +80 -0
- feedparser/datetimes/greek.py +90 -0
- feedparser/datetimes/hungarian.py +66 -0
- feedparser/datetimes/iso8601.py +160 -0
- feedparser/datetimes/korean.py +94 -0
- feedparser/datetimes/perforce.py +63 -0
- feedparser/datetimes/rfc822.py +179 -0
- feedparser/datetimes/w3dtf.py +128 -0
- feedparser/encodings.py +649 -0
- feedparser/exceptions.py +55 -0
- feedparser/html.py +350 -0
- feedparser/http.py +74 -0
- feedparser/mixin.py +838 -0
- feedparser/namespaces/__init__.py +0 -0
- feedparser/namespaces/_base.py +547 -0
- feedparser/namespaces/admin.py +53 -0
- feedparser/namespaces/cc.py +70 -0
- feedparser/namespaces/dc.py +138 -0
- feedparser/namespaces/georss.py +682 -0
- feedparser/namespaces/itunes.py +113 -0
- feedparser/namespaces/mediarss.py +142 -0
- feedparser/namespaces/psc.py +74 -0
- feedparser/parsers/__init__.py +0 -0
- feedparser/parsers/json.py +135 -0
- feedparser/parsers/loose.py +75 -0
- feedparser/parsers/strict.py +141 -0
- feedparser/py.typed +0 -0
- feedparser/sanitizer.py +978 -0
- feedparser/sgml.py +98 -0
- feedparser/urls.py +233 -0
- feedparser/util.py +157 -0
- forkparser-2026.1.0.dist-info/METADATA +75 -0
- forkparser-2026.1.0.dist-info/RECORD +38 -0
- forkparser-2026.1.0.dist-info/WHEEL +4 -0
- forkparser-2026.1.0.dist-info/licenses/LICENSE +65 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
2
|
+
# Copyright 2002-2008 Mark Pilgrim
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is a part of feedparser.
|
|
6
|
+
#
|
|
7
|
+
# Redistribution and use in source and binary forms, with or without
|
|
8
|
+
# modification, are permitted provided that the following conditions are met:
|
|
9
|
+
#
|
|
10
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
11
|
+
# this list of conditions and the following disclaimer.
|
|
12
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
# and/or other materials provided with the distribution.
|
|
15
|
+
#
|
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
17
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
18
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
19
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
20
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
21
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
22
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
23
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
24
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
25
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
26
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
27
|
+
|
|
28
|
+
import re
|
|
29
|
+
|
|
30
|
+
from .w3dtf import _parse_date_w3dtf
|
|
31
|
+
|
|
32
|
+
# Unicode strings for Hungarian date strings
|
|
33
|
+
_hungarian_months = {
|
|
34
|
+
"janu\u00e1r": "01", # e1 in iso-8859-2
|
|
35
|
+
"febru\u00e1ri": "02", # e1 in iso-8859-2
|
|
36
|
+
"m\u00e1rcius": "03", # e1 in iso-8859-2
|
|
37
|
+
"\u00e1prilis": "04", # e1 in iso-8859-2
|
|
38
|
+
"m\u00e1ujus": "05", # e1 in iso-8859-2
|
|
39
|
+
"j\u00fanius": "06", # fa in iso-8859-2
|
|
40
|
+
"j\u00falius": "07", # fa in iso-8859-2
|
|
41
|
+
"augusztus": "08",
|
|
42
|
+
"szeptember": "09",
|
|
43
|
+
"okt\u00f3ber": "10", # f3 in iso-8859-2
|
|
44
|
+
"november": "11",
|
|
45
|
+
"december": "12",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
_hungarian_date_format_re = re.compile(
|
|
49
|
+
r"(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})([+-](\d{,2}:\d{2}))"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _parse_date_hungarian(date_string):
|
|
54
|
+
"""Parse a string according to a Hungarian 8-bit date format."""
|
|
55
|
+
m = _hungarian_date_format_re.match(date_string)
|
|
56
|
+
if not m or m.group(2) not in _hungarian_months:
|
|
57
|
+
return None
|
|
58
|
+
month = _hungarian_months[m.group(2)]
|
|
59
|
+
day = m.group(3)
|
|
60
|
+
if len(day) == 1:
|
|
61
|
+
day = "0" + day
|
|
62
|
+
hour = m.group(4)
|
|
63
|
+
if len(hour) == 1:
|
|
64
|
+
hour = "0" + hour
|
|
65
|
+
w3dtfdate = f"{m.group(1)}-{month}-{day}T{hour}:{m.group(5)}{m.group(6)}"
|
|
66
|
+
return _parse_date_w3dtf(w3dtfdate)
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
2
|
+
# Copyright 2002-2008 Mark Pilgrim
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is a part of feedparser.
|
|
6
|
+
#
|
|
7
|
+
# Redistribution and use in source and binary forms, with or without
|
|
8
|
+
# modification, are permitted provided that the following conditions are met:
|
|
9
|
+
#
|
|
10
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
11
|
+
# this list of conditions and the following disclaimer.
|
|
12
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
# and/or other materials provided with the distribution.
|
|
15
|
+
#
|
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
17
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
18
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
19
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
20
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
21
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
22
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
23
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
24
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
25
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
26
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
27
|
+
|
|
28
|
+
import re
|
|
29
|
+
import time
|
|
30
|
+
|
|
31
|
+
# ISO-8601 date parsing routines written by Fazal Majid.
|
|
32
|
+
# The ISO 8601 standard is very convoluted and irregular - a full ISO 8601
|
|
33
|
+
# parser is beyond the scope of feedparser and would be a worthwhile addition
|
|
34
|
+
# to the Python library.
|
|
35
|
+
# A single regular expression cannot parse ISO 8601 date formats into groups
|
|
36
|
+
# as the standard is highly irregular (for instance is 030104 2003-01-04 or
|
|
37
|
+
# 0301-04-01), so we use templates instead.
|
|
38
|
+
# Please note the order in templates is significant because we need a
|
|
39
|
+
# greedy match.
|
|
40
|
+
_iso8601_tmpl = [
|
|
41
|
+
"YYYY-?MM-?DD",
|
|
42
|
+
"YYYY-0MM?-?DD",
|
|
43
|
+
"YYYY-MM",
|
|
44
|
+
"YYYY-?OOO",
|
|
45
|
+
"YY-?MM-?DD",
|
|
46
|
+
"YY-?OOO",
|
|
47
|
+
"YYYY",
|
|
48
|
+
"-YY-?MM",
|
|
49
|
+
"-OOO",
|
|
50
|
+
"-YY",
|
|
51
|
+
"--MM-?DD",
|
|
52
|
+
"--MM",
|
|
53
|
+
"---DD",
|
|
54
|
+
"CC",
|
|
55
|
+
"",
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
_iso8601_re = [
|
|
59
|
+
tmpl.replace("YYYY", r"(?P<year>\d{4})")
|
|
60
|
+
.replace("YY", r"(?P<year>\d\d)")
|
|
61
|
+
.replace("MM", r"(?P<month>[01]\d)")
|
|
62
|
+
.replace("DD", r"(?P<day>[0123]\d)")
|
|
63
|
+
.replace("OOO", r"(?P<ordinal>[0123]\d\d)")
|
|
64
|
+
.replace("CC", r"(?P<century>\d\d$)")
|
|
65
|
+
+ r"(T?(?P<hour>\d{2}):(?P<minute>\d{2})"
|
|
66
|
+
+ r"(:(?P<second>\d{2}))?"
|
|
67
|
+
+ r"(\.(?P<fracsecond>\d+))?"
|
|
68
|
+
+ r"(?P<tz>[+-](?P<tzhour>\d{2})(:(?P<tzmin>\d{2}))?|Z)?)?"
|
|
69
|
+
for tmpl in _iso8601_tmpl
|
|
70
|
+
]
|
|
71
|
+
_iso8601_matches = [re.compile(regex).match for regex in _iso8601_re]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _parse_date_iso8601(date_string):
|
|
75
|
+
"""Parse a variety of ISO-8601-compatible formats like 20040105"""
|
|
76
|
+
m = None
|
|
77
|
+
for _iso8601_match in _iso8601_matches:
|
|
78
|
+
m = _iso8601_match(date_string)
|
|
79
|
+
if m:
|
|
80
|
+
break
|
|
81
|
+
if not m:
|
|
82
|
+
return
|
|
83
|
+
if m.span() == (0, 0):
|
|
84
|
+
return
|
|
85
|
+
params = m.groupdict()
|
|
86
|
+
ordinal = params.get("ordinal", 0)
|
|
87
|
+
if ordinal:
|
|
88
|
+
ordinal = int(ordinal)
|
|
89
|
+
else:
|
|
90
|
+
ordinal = 0
|
|
91
|
+
year = params.get("year", "--")
|
|
92
|
+
if not year or year == "--":
|
|
93
|
+
year = time.gmtime()[0]
|
|
94
|
+
elif len(year) == 2:
|
|
95
|
+
# ISO 8601 assumes current century, i.e. 93 -> 2093, NOT 1993
|
|
96
|
+
year = 100 * int(time.gmtime()[0] / 100) + int(year)
|
|
97
|
+
else:
|
|
98
|
+
year = int(year)
|
|
99
|
+
month = params.get("month", "-")
|
|
100
|
+
if not month or month == "-":
|
|
101
|
+
# ordinals are NOT normalized by mktime, we simulate them
|
|
102
|
+
# by setting month=1, day=ordinal
|
|
103
|
+
if ordinal:
|
|
104
|
+
month = 1
|
|
105
|
+
else:
|
|
106
|
+
month = time.gmtime()[1]
|
|
107
|
+
month = int(month)
|
|
108
|
+
day = params.get("day", 0)
|
|
109
|
+
if not day:
|
|
110
|
+
# see above
|
|
111
|
+
if ordinal:
|
|
112
|
+
day = ordinal
|
|
113
|
+
elif (
|
|
114
|
+
params.get("century", 0) or params.get("year", 0) or params.get("month", 0)
|
|
115
|
+
):
|
|
116
|
+
day = 1
|
|
117
|
+
else:
|
|
118
|
+
day = time.gmtime()[2]
|
|
119
|
+
else:
|
|
120
|
+
day = int(day)
|
|
121
|
+
# special case of the century - is the first year of the 21st century
|
|
122
|
+
# 2000 or 2001 ? The debate goes on...
|
|
123
|
+
if "century" in params:
|
|
124
|
+
year = (int(params["century"]) - 1) * 100 + 1
|
|
125
|
+
# in ISO 8601 most fields are optional
|
|
126
|
+
for field in ["hour", "minute", "second", "tzhour", "tzmin"]:
|
|
127
|
+
if not params.get(field, None):
|
|
128
|
+
params[field] = 0
|
|
129
|
+
hour = int(params.get("hour", 0))
|
|
130
|
+
minute = int(params.get("minute", 0))
|
|
131
|
+
second = int(float(params.get("second", 0)))
|
|
132
|
+
# weekday is normalized by mktime(), we can ignore it
|
|
133
|
+
weekday = 0
|
|
134
|
+
daylight_savings_flag = -1
|
|
135
|
+
tm = [
|
|
136
|
+
year,
|
|
137
|
+
month,
|
|
138
|
+
day,
|
|
139
|
+
hour,
|
|
140
|
+
minute,
|
|
141
|
+
second,
|
|
142
|
+
weekday,
|
|
143
|
+
ordinal,
|
|
144
|
+
daylight_savings_flag,
|
|
145
|
+
]
|
|
146
|
+
# ISO 8601 time zone adjustments
|
|
147
|
+
tz = params.get("tz")
|
|
148
|
+
if tz and tz != "Z":
|
|
149
|
+
if tz[0] == "-":
|
|
150
|
+
tm[3] += int(params.get("tzhour", 0))
|
|
151
|
+
tm[4] += int(params.get("tzmin", 0))
|
|
152
|
+
elif tz[0] == "+":
|
|
153
|
+
tm[3] -= int(params.get("tzhour", 0))
|
|
154
|
+
tm[4] -= int(params.get("tzmin", 0))
|
|
155
|
+
else:
|
|
156
|
+
return None
|
|
157
|
+
# Python's time.mktime() is a wrapper around the ANSI C mktime(3c)
|
|
158
|
+
# which is guaranteed to normalize d/m/y/h/m/s.
|
|
159
|
+
# Many implementations have bugs, but we'll pretend they don't.
|
|
160
|
+
return time.localtime(time.mktime(tuple(tm)))
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
2
|
+
# Copyright 2002-2008 Mark Pilgrim
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is a part of feedparser.
|
|
6
|
+
#
|
|
7
|
+
# Redistribution and use in source and binary forms, with or without
|
|
8
|
+
# modification, are permitted provided that the following conditions are met:
|
|
9
|
+
#
|
|
10
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
11
|
+
# this list of conditions and the following disclaimer.
|
|
12
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
# and/or other materials provided with the distribution.
|
|
15
|
+
#
|
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
17
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
18
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
19
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
20
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
21
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
22
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
23
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
24
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
25
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
26
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
27
|
+
|
|
28
|
+
import re
|
|
29
|
+
|
|
30
|
+
from .w3dtf import _parse_date_w3dtf
|
|
31
|
+
|
|
32
|
+
# 8-bit date handling routines written by ytrewq1.
|
|
33
|
+
_korean_year = "\ub144" # b3e2 in euc-kr
|
|
34
|
+
_korean_month = "\uc6d4" # bff9 in euc-kr
|
|
35
|
+
_korean_day = "\uc77c" # c0cf in euc-kr
|
|
36
|
+
_korean_am = "\uc624\uc804" # bfc0 c0fc in euc-kr
|
|
37
|
+
_korean_pm = "\uc624\ud6c4" # bfc0 c8c4 in euc-kr
|
|
38
|
+
|
|
39
|
+
_korean_onblog_date_re = re.compile(
|
|
40
|
+
r"(\d{4})%s\s+(\d{2})%s\s+(\d{2})%s\s+(\d{2}):(\d{2}):(\d{2})"
|
|
41
|
+
% (_korean_year, _korean_month, _korean_day)
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
_korean_nate_date_re = re.compile(
|
|
45
|
+
r"(\d{4})-(\d{2})-(\d{2})\s+(%s|%s)\s+(\d{,2}):(\d{,2}):(\d{,2})"
|
|
46
|
+
% (_korean_am, _korean_pm)
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _parse_date_onblog(dateString):
|
|
51
|
+
"""Parse a string according to the OnBlog 8-bit date format"""
|
|
52
|
+
m = _korean_onblog_date_re.match(dateString)
|
|
53
|
+
if not m:
|
|
54
|
+
return
|
|
55
|
+
w3dtfdate = (
|
|
56
|
+
"%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s"
|
|
57
|
+
% {
|
|
58
|
+
"year": m.group(1),
|
|
59
|
+
"month": m.group(2),
|
|
60
|
+
"day": m.group(3),
|
|
61
|
+
"hour": m.group(4),
|
|
62
|
+
"minute": m.group(5),
|
|
63
|
+
"second": m.group(6),
|
|
64
|
+
"zonediff": "+09:00",
|
|
65
|
+
}
|
|
66
|
+
)
|
|
67
|
+
return _parse_date_w3dtf(w3dtfdate)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _parse_date_nate(dateString):
|
|
71
|
+
"""Parse a string according to the Nate 8-bit date format"""
|
|
72
|
+
m = _korean_nate_date_re.match(dateString)
|
|
73
|
+
if not m:
|
|
74
|
+
return
|
|
75
|
+
hour = int(m.group(5))
|
|
76
|
+
ampm = m.group(4)
|
|
77
|
+
if ampm == _korean_pm:
|
|
78
|
+
hour += 12
|
|
79
|
+
hour = str(hour)
|
|
80
|
+
if len(hour) == 1:
|
|
81
|
+
hour = "0" + hour
|
|
82
|
+
w3dtfdate = (
|
|
83
|
+
"%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s"
|
|
84
|
+
% {
|
|
85
|
+
"year": m.group(1),
|
|
86
|
+
"month": m.group(2),
|
|
87
|
+
"day": m.group(3),
|
|
88
|
+
"hour": hour,
|
|
89
|
+
"minute": m.group(6),
|
|
90
|
+
"second": m.group(7),
|
|
91
|
+
"zonediff": "+09:00",
|
|
92
|
+
}
|
|
93
|
+
)
|
|
94
|
+
return _parse_date_w3dtf(w3dtfdate)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
2
|
+
# Copyright 2002-2008 Mark Pilgrim
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is a part of feedparser.
|
|
6
|
+
#
|
|
7
|
+
# Redistribution and use in source and binary forms, with or without
|
|
8
|
+
# modification, are permitted provided that the following conditions are met:
|
|
9
|
+
#
|
|
10
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
11
|
+
# this list of conditions and the following disclaimer.
|
|
12
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
# and/or other materials provided with the distribution.
|
|
15
|
+
#
|
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
17
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
18
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
19
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
20
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
21
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
22
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
23
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
24
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
25
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
26
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
27
|
+
|
|
28
|
+
import email.utils
|
|
29
|
+
import re
|
|
30
|
+
import time
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _parse_date_perforce(date_string):
|
|
34
|
+
"""parse a date in yyyy/mm/dd hh:mm:ss TTT format"""
|
|
35
|
+
# Fri, 2006/09/15 08:19:53 EDT
|
|
36
|
+
_my_date_pattern = re.compile(
|
|
37
|
+
r"(\w{,3}), (\d{,4})/(\d{,2})/(\d{2}) (\d{,2}):(\d{2}):(\d{2}) (\w{,3})"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
m = _my_date_pattern.search(date_string)
|
|
41
|
+
if m is None:
|
|
42
|
+
return None
|
|
43
|
+
dow, year, month, day, hour, minute, second, tz = m.groups()
|
|
44
|
+
months = [
|
|
45
|
+
"Jan",
|
|
46
|
+
"Feb",
|
|
47
|
+
"Mar",
|
|
48
|
+
"Apr",
|
|
49
|
+
"May",
|
|
50
|
+
"Jun",
|
|
51
|
+
"Jul",
|
|
52
|
+
"Aug",
|
|
53
|
+
"Sep",
|
|
54
|
+
"Oct",
|
|
55
|
+
"Nov",
|
|
56
|
+
"Dec",
|
|
57
|
+
]
|
|
58
|
+
new_date_string = (
|
|
59
|
+
f"{dow}, {day} {months[int(month) - 1]} {year} {hour}:{minute}:{second} {tz}"
|
|
60
|
+
)
|
|
61
|
+
tm = email.utils.parsedate_tz(new_date_string)
|
|
62
|
+
if tm:
|
|
63
|
+
return time.gmtime(email.utils.mktime_tz(tm))
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
2
|
+
# Copyright 2002-2008 Mark Pilgrim
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is a part of feedparser.
|
|
6
|
+
#
|
|
7
|
+
# Redistribution and use in source and binary forms, with or without
|
|
8
|
+
# modification, are permitted provided that the following conditions are met:
|
|
9
|
+
#
|
|
10
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
11
|
+
# this list of conditions and the following disclaimer.
|
|
12
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
# and/or other materials provided with the distribution.
|
|
15
|
+
#
|
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
17
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
18
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
19
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
20
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
21
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
22
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
23
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
24
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
25
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
26
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
27
|
+
|
|
28
|
+
import datetime
|
|
29
|
+
|
|
30
|
+
timezone_names = {
|
|
31
|
+
"ut": 0,
|
|
32
|
+
"gmt": 0,
|
|
33
|
+
"z": 0,
|
|
34
|
+
"adt": -3,
|
|
35
|
+
"ast": -4,
|
|
36
|
+
"at": -4,
|
|
37
|
+
"edt": -4,
|
|
38
|
+
"est": -5,
|
|
39
|
+
"et": -5,
|
|
40
|
+
"cdt": -5,
|
|
41
|
+
"cst": -6,
|
|
42
|
+
"ct": -6,
|
|
43
|
+
"mdt": -6,
|
|
44
|
+
"mst": -7,
|
|
45
|
+
"mt": -7,
|
|
46
|
+
"pdt": -7,
|
|
47
|
+
"pst": -8,
|
|
48
|
+
"pt": -8,
|
|
49
|
+
"a": -1,
|
|
50
|
+
"n": 1,
|
|
51
|
+
"m": -12,
|
|
52
|
+
"y": 12,
|
|
53
|
+
"met": 1,
|
|
54
|
+
"mest": 2,
|
|
55
|
+
}
|
|
56
|
+
day_names = {"mon", "tue", "wed", "thu", "fri", "sat", "sun"}
|
|
57
|
+
months = {
|
|
58
|
+
"jan": 1,
|
|
59
|
+
"feb": 2,
|
|
60
|
+
"mar": 3,
|
|
61
|
+
"apr": 4,
|
|
62
|
+
"may": 5,
|
|
63
|
+
"jun": 6,
|
|
64
|
+
"jul": 7,
|
|
65
|
+
"aug": 8,
|
|
66
|
+
"sep": 9,
|
|
67
|
+
"oct": 10,
|
|
68
|
+
"nov": 11,
|
|
69
|
+
"dec": 12,
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _parse_date_rfc822(date):
|
|
74
|
+
"""Parse RFC 822 dates and times
|
|
75
|
+
http://tools.ietf.org/html/rfc822#section-5
|
|
76
|
+
|
|
77
|
+
There are some formatting differences that are accounted for:
|
|
78
|
+
1. Years may be two or four digits.
|
|
79
|
+
2. The month and day can be swapped.
|
|
80
|
+
3. Additional timezone names are supported.
|
|
81
|
+
4. A default time and timezone are assumed if only a date is present.
|
|
82
|
+
|
|
83
|
+
:param str date: a date/time string that will be converted to a time tuple
|
|
84
|
+
:returns: a UTC time tuple, or None
|
|
85
|
+
:rtype: time.struct_time | None
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
parts = date.lower().split()
|
|
89
|
+
if len(parts) < 5:
|
|
90
|
+
# Assume that the time and timezone are missing
|
|
91
|
+
parts.extend(("00:00:00", "0000"))
|
|
92
|
+
# Remove the day name
|
|
93
|
+
if parts[0][:3] in day_names:
|
|
94
|
+
# Comma without spaces:
|
|
95
|
+
# 'Fri,24 Nov 2023 18:28:36 -0000'
|
|
96
|
+
if "," in parts[0] and parts[0][-1] != ",":
|
|
97
|
+
parts.insert(1, parts[0].rpartition(",")[2])
|
|
98
|
+
parts = parts[1:]
|
|
99
|
+
if len(parts) < 5:
|
|
100
|
+
# If there are still fewer than five parts, there's not enough
|
|
101
|
+
# information to interpret this.
|
|
102
|
+
return None
|
|
103
|
+
|
|
104
|
+
# Handle the day and month name.
|
|
105
|
+
month = months.get(parts[1][:3])
|
|
106
|
+
try:
|
|
107
|
+
day = int(parts[0])
|
|
108
|
+
except ValueError:
|
|
109
|
+
# Check if the day and month are swapped.
|
|
110
|
+
if months.get(parts[0][:3]):
|
|
111
|
+
try:
|
|
112
|
+
day = int(parts[1])
|
|
113
|
+
except ValueError:
|
|
114
|
+
return None
|
|
115
|
+
month = months.get(parts[0][:3])
|
|
116
|
+
else:
|
|
117
|
+
return None
|
|
118
|
+
if not month:
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
# Handle the year.
|
|
122
|
+
try:
|
|
123
|
+
year = int(parts[2])
|
|
124
|
+
except ValueError:
|
|
125
|
+
return None
|
|
126
|
+
# Normalize two-digit years:
|
|
127
|
+
# Anything in the 90's is interpreted as 1990 and on.
|
|
128
|
+
# Anything 89 or less is interpreted as 2089 or before.
|
|
129
|
+
if len(parts[2]) <= 2:
|
|
130
|
+
year += (1900, 2000)[year < 90]
|
|
131
|
+
|
|
132
|
+
# Handle the time (default to 00:00:00).
|
|
133
|
+
time_parts = parts[3].split(":")
|
|
134
|
+
time_parts.extend(("0",) * (3 - len(time_parts)))
|
|
135
|
+
try:
|
|
136
|
+
(hour, minute, second) = (int(i) for i in time_parts)
|
|
137
|
+
except ValueError:
|
|
138
|
+
return None
|
|
139
|
+
|
|
140
|
+
# Handle the timezone information, if any (default to +0000).
|
|
141
|
+
# Strip 'Etc/' from the timezone.
|
|
142
|
+
if parts[4].startswith("etc/"):
|
|
143
|
+
parts[4] = parts[4][4:]
|
|
144
|
+
# Normalize timezones that start with 'gmt':
|
|
145
|
+
# GMT-05:00 => -0500
|
|
146
|
+
# GMT => GMT
|
|
147
|
+
if parts[4].startswith("gmt"):
|
|
148
|
+
parts[4] = "".join(parts[4][3:].split(":")) or "gmt"
|
|
149
|
+
# Handle timezones like '-0500', '+0500', and 'EST'
|
|
150
|
+
if parts[4] and parts[4][0] in ("-", "+"):
|
|
151
|
+
try:
|
|
152
|
+
if ":" in parts[4]:
|
|
153
|
+
timezone_hours = int(parts[4][1:3])
|
|
154
|
+
timezone_minutes = int(parts[4][4:])
|
|
155
|
+
else:
|
|
156
|
+
timezone_hours = int(parts[4][1:3])
|
|
157
|
+
timezone_minutes = int(parts[4][3:])
|
|
158
|
+
except ValueError:
|
|
159
|
+
return None
|
|
160
|
+
if parts[4].startswith("-"):
|
|
161
|
+
timezone_hours *= -1
|
|
162
|
+
timezone_minutes *= -1
|
|
163
|
+
else:
|
|
164
|
+
timezone_hours = timezone_names.get(parts[4], 0)
|
|
165
|
+
timezone_minutes = 0
|
|
166
|
+
|
|
167
|
+
# Create the datetime object and timezone delta objects
|
|
168
|
+
try:
|
|
169
|
+
stamp = datetime.datetime(year, month, day, hour, minute, second)
|
|
170
|
+
except ValueError:
|
|
171
|
+
return None
|
|
172
|
+
delta = datetime.timedelta(0, 0, 0, 0, timezone_minutes, timezone_hours)
|
|
173
|
+
|
|
174
|
+
# Return the date and timestamp in a UTC 9-tuple
|
|
175
|
+
try:
|
|
176
|
+
return (stamp - delta).utctimetuple()
|
|
177
|
+
except (OverflowError, ValueError):
|
|
178
|
+
# IronPython throws ValueErrors instead of OverflowErrors
|
|
179
|
+
return None
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
|
|
2
|
+
# Copyright 2002-2008 Mark Pilgrim
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This file is a part of feedparser.
|
|
6
|
+
#
|
|
7
|
+
# Redistribution and use in source and binary forms, with or without
|
|
8
|
+
# modification, are permitted provided that the following conditions are met:
|
|
9
|
+
#
|
|
10
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
11
|
+
# this list of conditions and the following disclaimer.
|
|
12
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
# and/or other materials provided with the distribution.
|
|
15
|
+
#
|
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
17
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
18
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
19
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
20
|
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
21
|
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
22
|
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
23
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
24
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
25
|
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
26
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
|
27
|
+
|
|
28
|
+
import datetime
|
|
29
|
+
|
|
30
|
+
timezonenames = {
|
|
31
|
+
"ut": 0,
|
|
32
|
+
"gmt": 0,
|
|
33
|
+
"z": 0,
|
|
34
|
+
"adt": -3,
|
|
35
|
+
"ast": -4,
|
|
36
|
+
"at": -4,
|
|
37
|
+
"edt": -4,
|
|
38
|
+
"est": -5,
|
|
39
|
+
"et": -5,
|
|
40
|
+
"cdt": -5,
|
|
41
|
+
"cst": -6,
|
|
42
|
+
"ct": -6,
|
|
43
|
+
"mdt": -6,
|
|
44
|
+
"mst": -7,
|
|
45
|
+
"mt": -7,
|
|
46
|
+
"pdt": -7,
|
|
47
|
+
"pst": -8,
|
|
48
|
+
"pt": -8,
|
|
49
|
+
"a": -1,
|
|
50
|
+
"n": 1,
|
|
51
|
+
"m": -12,
|
|
52
|
+
"y": 12,
|
|
53
|
+
}
|
|
54
|
+
# W3 date and time format parser
|
|
55
|
+
# http://www.w3.org/TR/NOTE-datetime
|
|
56
|
+
# Also supports MSSQL-style datetimes as defined at:
|
|
57
|
+
# http://msdn.microsoft.com/en-us/library/ms186724.aspx
|
|
58
|
+
# (basically, allow a space as a date/time/timezone separator)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _parse_date_w3dtf(datestr):
|
|
62
|
+
if not datestr.strip():
|
|
63
|
+
return None
|
|
64
|
+
parts = datestr.lower().split("t")
|
|
65
|
+
if len(parts) == 1:
|
|
66
|
+
# This may be a date only, or may be an MSSQL-style date
|
|
67
|
+
parts = parts[0].split()
|
|
68
|
+
if len(parts) == 1:
|
|
69
|
+
# Treat this as a date only
|
|
70
|
+
parts.append("00:00:00z")
|
|
71
|
+
elif len(parts) > 2:
|
|
72
|
+
return None
|
|
73
|
+
date = parts[0].split("-", 2)
|
|
74
|
+
if not date or len(date[0]) != 4:
|
|
75
|
+
return None
|
|
76
|
+
# Ensure that `date` has 3 elements. Using '1' sets the default
|
|
77
|
+
# month to January and the default day to the 1st of the month.
|
|
78
|
+
date.extend(["1"] * (3 - len(date)))
|
|
79
|
+
try:
|
|
80
|
+
year, month, day = (int(i) for i in date)
|
|
81
|
+
except ValueError:
|
|
82
|
+
# `date` may have more than 3 elements or may contain
|
|
83
|
+
# non-integer strings.
|
|
84
|
+
return None
|
|
85
|
+
if parts[1].endswith("z"):
|
|
86
|
+
parts[1] = parts[1][:-1]
|
|
87
|
+
parts.append("z")
|
|
88
|
+
# Append the numeric timezone offset, if any, to parts.
|
|
89
|
+
# If this is an MSSQL-style date then parts[2] already contains
|
|
90
|
+
# the timezone information, so `append()` will not affect it.
|
|
91
|
+
# Add 1 to each value so that if `find()` returns -1 it will be
|
|
92
|
+
# treated as False.
|
|
93
|
+
loc = parts[1].find("-") + 1 or parts[1].find("+") + 1 or len(parts[1]) + 1
|
|
94
|
+
loc = loc - 1
|
|
95
|
+
parts.append(parts[1][loc:])
|
|
96
|
+
parts[1] = parts[1][:loc]
|
|
97
|
+
time = parts[1].split(":", 2)
|
|
98
|
+
# Ensure that time has 3 elements. Using '0' means that the
|
|
99
|
+
# minutes and seconds, if missing, will default to 0.
|
|
100
|
+
time.extend(["0"] * (3 - len(time)))
|
|
101
|
+
if parts[2][:1] in ("-", "+"):
|
|
102
|
+
try:
|
|
103
|
+
tzhour = int(parts[2][1:3])
|
|
104
|
+
tzmin = int(parts[2][4:])
|
|
105
|
+
except ValueError:
|
|
106
|
+
return None
|
|
107
|
+
if parts[2].startswith("-"):
|
|
108
|
+
tzhour = tzhour * -1
|
|
109
|
+
tzmin = tzmin * -1
|
|
110
|
+
else:
|
|
111
|
+
tzhour = timezonenames.get(parts[2], 0)
|
|
112
|
+
tzmin = 0
|
|
113
|
+
try:
|
|
114
|
+
hour, minute, second = (int(float(i)) for i in time)
|
|
115
|
+
except ValueError:
|
|
116
|
+
return None
|
|
117
|
+
# Create the datetime object and timezone delta objects
|
|
118
|
+
try:
|
|
119
|
+
stamp = datetime.datetime(year, month, day, hour, minute, second)
|
|
120
|
+
except ValueError:
|
|
121
|
+
return None
|
|
122
|
+
delta = datetime.timedelta(0, 0, 0, 0, tzmin, tzhour)
|
|
123
|
+
# Return the date and timestamp in a UTC 9-tuple
|
|
124
|
+
try:
|
|
125
|
+
return (stamp - delta).utctimetuple()
|
|
126
|
+
except (OverflowError, ValueError):
|
|
127
|
+
# IronPython throws ValueErrors instead of OverflowErrors
|
|
128
|
+
return None
|