forkparser 2026.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,66 @@
1
+ # Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
2
+ # Copyright 2002-2008 Mark Pilgrim
3
+ # All rights reserved.
4
+ #
5
+ # This file is a part of feedparser.
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions are met:
9
+ #
10
+ # * Redistributions of source code must retain the above copyright notice,
11
+ # this list of conditions and the following disclaimer.
12
+ # * Redistributions in binary form must reproduce the above copyright notice,
13
+ # this list of conditions and the following disclaimer in the documentation
14
+ # and/or other materials provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
17
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26
+ # POSSIBILITY OF SUCH DAMAGE.
27
+
28
+ import re
29
+
30
+ from .w3dtf import _parse_date_w3dtf
31
+
32
+ # Unicode strings for Hungarian date strings
33
+ _hungarian_months = {
34
+ "janu\u00e1r": "01", # e1 in iso-8859-2
35
+ "febru\u00e1ri": "02", # e1 in iso-8859-2
36
+ "m\u00e1rcius": "03", # e1 in iso-8859-2
37
+ "\u00e1prilis": "04", # e1 in iso-8859-2
38
+ "m\u00e1ujus": "05", # e1 in iso-8859-2
39
+ "j\u00fanius": "06", # fa in iso-8859-2
40
+ "j\u00falius": "07", # fa in iso-8859-2
41
+ "augusztus": "08",
42
+ "szeptember": "09",
43
+ "okt\u00f3ber": "10", # f3 in iso-8859-2
44
+ "november": "11",
45
+ "december": "12",
46
+ }
47
+
48
+ _hungarian_date_format_re = re.compile(
49
+ r"(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})([+-](\d{,2}:\d{2}))"
50
+ )
51
+
52
+
53
+ def _parse_date_hungarian(date_string):
54
+ """Parse a string according to a Hungarian 8-bit date format."""
55
+ m = _hungarian_date_format_re.match(date_string)
56
+ if not m or m.group(2) not in _hungarian_months:
57
+ return None
58
+ month = _hungarian_months[m.group(2)]
59
+ day = m.group(3)
60
+ if len(day) == 1:
61
+ day = "0" + day
62
+ hour = m.group(4)
63
+ if len(hour) == 1:
64
+ hour = "0" + hour
65
+ w3dtfdate = f"{m.group(1)}-{month}-{day}T{hour}:{m.group(5)}{m.group(6)}"
66
+ return _parse_date_w3dtf(w3dtfdate)
@@ -0,0 +1,160 @@
1
+ # Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
2
+ # Copyright 2002-2008 Mark Pilgrim
3
+ # All rights reserved.
4
+ #
5
+ # This file is a part of feedparser.
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions are met:
9
+ #
10
+ # * Redistributions of source code must retain the above copyright notice,
11
+ # this list of conditions and the following disclaimer.
12
+ # * Redistributions in binary form must reproduce the above copyright notice,
13
+ # this list of conditions and the following disclaimer in the documentation
14
+ # and/or other materials provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
17
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26
+ # POSSIBILITY OF SUCH DAMAGE.
27
+
28
+ import re
29
+ import time
30
+
31
+ # ISO-8601 date parsing routines written by Fazal Majid.
32
+ # The ISO 8601 standard is very convoluted and irregular - a full ISO 8601
33
+ # parser is beyond the scope of feedparser and would be a worthwhile addition
34
+ # to the Python library.
35
+ # A single regular expression cannot parse ISO 8601 date formats into groups
36
+ # as the standard is highly irregular (for instance is 030104 2003-01-04 or
37
+ # 0301-04-01), so we use templates instead.
38
+ # Please note the order in templates is significant because we need a
39
+ # greedy match.
40
+ _iso8601_tmpl = [
41
+ "YYYY-?MM-?DD",
42
+ "YYYY-0MM?-?DD",
43
+ "YYYY-MM",
44
+ "YYYY-?OOO",
45
+ "YY-?MM-?DD",
46
+ "YY-?OOO",
47
+ "YYYY",
48
+ "-YY-?MM",
49
+ "-OOO",
50
+ "-YY",
51
+ "--MM-?DD",
52
+ "--MM",
53
+ "---DD",
54
+ "CC",
55
+ "",
56
+ ]
57
+
58
+ _iso8601_re = [
59
+ tmpl.replace("YYYY", r"(?P<year>\d{4})")
60
+ .replace("YY", r"(?P<year>\d\d)")
61
+ .replace("MM", r"(?P<month>[01]\d)")
62
+ .replace("DD", r"(?P<day>[0123]\d)")
63
+ .replace("OOO", r"(?P<ordinal>[0123]\d\d)")
64
+ .replace("CC", r"(?P<century>\d\d$)")
65
+ + r"(T?(?P<hour>\d{2}):(?P<minute>\d{2})"
66
+ + r"(:(?P<second>\d{2}))?"
67
+ + r"(\.(?P<fracsecond>\d+))?"
68
+ + r"(?P<tz>[+-](?P<tzhour>\d{2})(:(?P<tzmin>\d{2}))?|Z)?)?"
69
+ for tmpl in _iso8601_tmpl
70
+ ]
71
+ _iso8601_matches = [re.compile(regex).match for regex in _iso8601_re]
72
+
73
+
74
+ def _parse_date_iso8601(date_string):
75
+ """Parse a variety of ISO-8601-compatible formats like 20040105"""
76
+ m = None
77
+ for _iso8601_match in _iso8601_matches:
78
+ m = _iso8601_match(date_string)
79
+ if m:
80
+ break
81
+ if not m:
82
+ return
83
+ if m.span() == (0, 0):
84
+ return
85
+ params = m.groupdict()
86
+ ordinal = params.get("ordinal", 0)
87
+ if ordinal:
88
+ ordinal = int(ordinal)
89
+ else:
90
+ ordinal = 0
91
+ year = params.get("year", "--")
92
+ if not year or year == "--":
93
+ year = time.gmtime()[0]
94
+ elif len(year) == 2:
95
+ # ISO 8601 assumes current century, i.e. 93 -> 2093, NOT 1993
96
+ year = 100 * int(time.gmtime()[0] / 100) + int(year)
97
+ else:
98
+ year = int(year)
99
+ month = params.get("month", "-")
100
+ if not month or month == "-":
101
+ # ordinals are NOT normalized by mktime, we simulate them
102
+ # by setting month=1, day=ordinal
103
+ if ordinal:
104
+ month = 1
105
+ else:
106
+ month = time.gmtime()[1]
107
+ month = int(month)
108
+ day = params.get("day", 0)
109
+ if not day:
110
+ # see above
111
+ if ordinal:
112
+ day = ordinal
113
+ elif (
114
+ params.get("century", 0) or params.get("year", 0) or params.get("month", 0)
115
+ ):
116
+ day = 1
117
+ else:
118
+ day = time.gmtime()[2]
119
+ else:
120
+ day = int(day)
121
+ # special case of the century - is the first year of the 21st century
122
+ # 2000 or 2001 ? The debate goes on...
123
+ if "century" in params:
124
+ year = (int(params["century"]) - 1) * 100 + 1
125
+ # in ISO 8601 most fields are optional
126
+ for field in ["hour", "minute", "second", "tzhour", "tzmin"]:
127
+ if not params.get(field, None):
128
+ params[field] = 0
129
+ hour = int(params.get("hour", 0))
130
+ minute = int(params.get("minute", 0))
131
+ second = int(float(params.get("second", 0)))
132
+ # weekday is normalized by mktime(), we can ignore it
133
+ weekday = 0
134
+ daylight_savings_flag = -1
135
+ tm = [
136
+ year,
137
+ month,
138
+ day,
139
+ hour,
140
+ minute,
141
+ second,
142
+ weekday,
143
+ ordinal,
144
+ daylight_savings_flag,
145
+ ]
146
+ # ISO 8601 time zone adjustments
147
+ tz = params.get("tz")
148
+ if tz and tz != "Z":
149
+ if tz[0] == "-":
150
+ tm[3] += int(params.get("tzhour", 0))
151
+ tm[4] += int(params.get("tzmin", 0))
152
+ elif tz[0] == "+":
153
+ tm[3] -= int(params.get("tzhour", 0))
154
+ tm[4] -= int(params.get("tzmin", 0))
155
+ else:
156
+ return None
157
+ # Python's time.mktime() is a wrapper around the ANSI C mktime(3c)
158
+ # which is guaranteed to normalize d/m/y/h/m/s.
159
+ # Many implementations have bugs, but we'll pretend they don't.
160
+ return time.localtime(time.mktime(tuple(tm)))
@@ -0,0 +1,94 @@
1
+ # Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
2
+ # Copyright 2002-2008 Mark Pilgrim
3
+ # All rights reserved.
4
+ #
5
+ # This file is a part of feedparser.
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions are met:
9
+ #
10
+ # * Redistributions of source code must retain the above copyright notice,
11
+ # this list of conditions and the following disclaimer.
12
+ # * Redistributions in binary form must reproduce the above copyright notice,
13
+ # this list of conditions and the following disclaimer in the documentation
14
+ # and/or other materials provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
17
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26
+ # POSSIBILITY OF SUCH DAMAGE.
27
+
28
+ import re
29
+
30
+ from .w3dtf import _parse_date_w3dtf
31
+
32
+ # 8-bit date handling routines written by ytrewq1.
33
+ _korean_year = "\ub144" # b3e2 in euc-kr
34
+ _korean_month = "\uc6d4" # bff9 in euc-kr
35
+ _korean_day = "\uc77c" # c0cf in euc-kr
36
+ _korean_am = "\uc624\uc804" # bfc0 c0fc in euc-kr
37
+ _korean_pm = "\uc624\ud6c4" # bfc0 c8c4 in euc-kr
38
+
39
+ _korean_onblog_date_re = re.compile(
40
+ r"(\d{4})%s\s+(\d{2})%s\s+(\d{2})%s\s+(\d{2}):(\d{2}):(\d{2})"
41
+ % (_korean_year, _korean_month, _korean_day)
42
+ )
43
+
44
+ _korean_nate_date_re = re.compile(
45
+ r"(\d{4})-(\d{2})-(\d{2})\s+(%s|%s)\s+(\d{,2}):(\d{,2}):(\d{,2})"
46
+ % (_korean_am, _korean_pm)
47
+ )
48
+
49
+
50
+ def _parse_date_onblog(dateString):
51
+ """Parse a string according to the OnBlog 8-bit date format"""
52
+ m = _korean_onblog_date_re.match(dateString)
53
+ if not m:
54
+ return
55
+ w3dtfdate = (
56
+ "%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s"
57
+ % {
58
+ "year": m.group(1),
59
+ "month": m.group(2),
60
+ "day": m.group(3),
61
+ "hour": m.group(4),
62
+ "minute": m.group(5),
63
+ "second": m.group(6),
64
+ "zonediff": "+09:00",
65
+ }
66
+ )
67
+ return _parse_date_w3dtf(w3dtfdate)
68
+
69
+
70
+ def _parse_date_nate(dateString):
71
+ """Parse a string according to the Nate 8-bit date format"""
72
+ m = _korean_nate_date_re.match(dateString)
73
+ if not m:
74
+ return
75
+ hour = int(m.group(5))
76
+ ampm = m.group(4)
77
+ if ampm == _korean_pm:
78
+ hour += 12
79
+ hour = str(hour)
80
+ if len(hour) == 1:
81
+ hour = "0" + hour
82
+ w3dtfdate = (
83
+ "%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s"
84
+ % {
85
+ "year": m.group(1),
86
+ "month": m.group(2),
87
+ "day": m.group(3),
88
+ "hour": hour,
89
+ "minute": m.group(6),
90
+ "second": m.group(7),
91
+ "zonediff": "+09:00",
92
+ }
93
+ )
94
+ return _parse_date_w3dtf(w3dtfdate)
@@ -0,0 +1,63 @@
1
+ # Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
2
+ # Copyright 2002-2008 Mark Pilgrim
3
+ # All rights reserved.
4
+ #
5
+ # This file is a part of feedparser.
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions are met:
9
+ #
10
+ # * Redistributions of source code must retain the above copyright notice,
11
+ # this list of conditions and the following disclaimer.
12
+ # * Redistributions in binary form must reproduce the above copyright notice,
13
+ # this list of conditions and the following disclaimer in the documentation
14
+ # and/or other materials provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
17
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26
+ # POSSIBILITY OF SUCH DAMAGE.
27
+
28
+ import email.utils
29
+ import re
30
+ import time
31
+
32
+
33
+ def _parse_date_perforce(date_string):
34
+ """parse a date in yyyy/mm/dd hh:mm:ss TTT format"""
35
+ # Fri, 2006/09/15 08:19:53 EDT
36
+ _my_date_pattern = re.compile(
37
+ r"(\w{,3}), (\d{,4})/(\d{,2})/(\d{2}) (\d{,2}):(\d{2}):(\d{2}) (\w{,3})"
38
+ )
39
+
40
+ m = _my_date_pattern.search(date_string)
41
+ if m is None:
42
+ return None
43
+ dow, year, month, day, hour, minute, second, tz = m.groups()
44
+ months = [
45
+ "Jan",
46
+ "Feb",
47
+ "Mar",
48
+ "Apr",
49
+ "May",
50
+ "Jun",
51
+ "Jul",
52
+ "Aug",
53
+ "Sep",
54
+ "Oct",
55
+ "Nov",
56
+ "Dec",
57
+ ]
58
+ new_date_string = (
59
+ f"{dow}, {day} {months[int(month) - 1]} {year} {hour}:{minute}:{second} {tz}"
60
+ )
61
+ tm = email.utils.parsedate_tz(new_date_string)
62
+ if tm:
63
+ return time.gmtime(email.utils.mktime_tz(tm))
@@ -0,0 +1,179 @@
1
+ # Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
2
+ # Copyright 2002-2008 Mark Pilgrim
3
+ # All rights reserved.
4
+ #
5
+ # This file is a part of feedparser.
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions are met:
9
+ #
10
+ # * Redistributions of source code must retain the above copyright notice,
11
+ # this list of conditions and the following disclaimer.
12
+ # * Redistributions in binary form must reproduce the above copyright notice,
13
+ # this list of conditions and the following disclaimer in the documentation
14
+ # and/or other materials provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
17
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26
+ # POSSIBILITY OF SUCH DAMAGE.
27
+
28
+ import datetime
29
+
30
+ timezone_names = {
31
+ "ut": 0,
32
+ "gmt": 0,
33
+ "z": 0,
34
+ "adt": -3,
35
+ "ast": -4,
36
+ "at": -4,
37
+ "edt": -4,
38
+ "est": -5,
39
+ "et": -5,
40
+ "cdt": -5,
41
+ "cst": -6,
42
+ "ct": -6,
43
+ "mdt": -6,
44
+ "mst": -7,
45
+ "mt": -7,
46
+ "pdt": -7,
47
+ "pst": -8,
48
+ "pt": -8,
49
+ "a": -1,
50
+ "n": 1,
51
+ "m": -12,
52
+ "y": 12,
53
+ "met": 1,
54
+ "mest": 2,
55
+ }
56
+ day_names = {"mon", "tue", "wed", "thu", "fri", "sat", "sun"}
57
+ months = {
58
+ "jan": 1,
59
+ "feb": 2,
60
+ "mar": 3,
61
+ "apr": 4,
62
+ "may": 5,
63
+ "jun": 6,
64
+ "jul": 7,
65
+ "aug": 8,
66
+ "sep": 9,
67
+ "oct": 10,
68
+ "nov": 11,
69
+ "dec": 12,
70
+ }
71
+
72
+
73
+ def _parse_date_rfc822(date):
74
+ """Parse RFC 822 dates and times
75
+ http://tools.ietf.org/html/rfc822#section-5
76
+
77
+ There are some formatting differences that are accounted for:
78
+ 1. Years may be two or four digits.
79
+ 2. The month and day can be swapped.
80
+ 3. Additional timezone names are supported.
81
+ 4. A default time and timezone are assumed if only a date is present.
82
+
83
+ :param str date: a date/time string that will be converted to a time tuple
84
+ :returns: a UTC time tuple, or None
85
+ :rtype: time.struct_time | None
86
+ """
87
+
88
+ parts = date.lower().split()
89
+ if len(parts) < 5:
90
+ # Assume that the time and timezone are missing
91
+ parts.extend(("00:00:00", "0000"))
92
+ # Remove the day name
93
+ if parts[0][:3] in day_names:
94
+ # Comma without spaces:
95
+ # 'Fri,24 Nov 2023 18:28:36 -0000'
96
+ if "," in parts[0] and parts[0][-1] != ",":
97
+ parts.insert(1, parts[0].rpartition(",")[2])
98
+ parts = parts[1:]
99
+ if len(parts) < 5:
100
+ # If there are still fewer than five parts, there's not enough
101
+ # information to interpret this.
102
+ return None
103
+
104
+ # Handle the day and month name.
105
+ month = months.get(parts[1][:3])
106
+ try:
107
+ day = int(parts[0])
108
+ except ValueError:
109
+ # Check if the day and month are swapped.
110
+ if months.get(parts[0][:3]):
111
+ try:
112
+ day = int(parts[1])
113
+ except ValueError:
114
+ return None
115
+ month = months.get(parts[0][:3])
116
+ else:
117
+ return None
118
+ if not month:
119
+ return None
120
+
121
+ # Handle the year.
122
+ try:
123
+ year = int(parts[2])
124
+ except ValueError:
125
+ return None
126
+ # Normalize two-digit years:
127
+ # Anything in the 90's is interpreted as 1990 and on.
128
+ # Anything 89 or less is interpreted as 2089 or before.
129
+ if len(parts[2]) <= 2:
130
+ year += (1900, 2000)[year < 90]
131
+
132
+ # Handle the time (default to 00:00:00).
133
+ time_parts = parts[3].split(":")
134
+ time_parts.extend(("0",) * (3 - len(time_parts)))
135
+ try:
136
+ (hour, minute, second) = (int(i) for i in time_parts)
137
+ except ValueError:
138
+ return None
139
+
140
+ # Handle the timezone information, if any (default to +0000).
141
+ # Strip 'Etc/' from the timezone.
142
+ if parts[4].startswith("etc/"):
143
+ parts[4] = parts[4][4:]
144
+ # Normalize timezones that start with 'gmt':
145
+ # GMT-05:00 => -0500
146
+ # GMT => GMT
147
+ if parts[4].startswith("gmt"):
148
+ parts[4] = "".join(parts[4][3:].split(":")) or "gmt"
149
+ # Handle timezones like '-0500', '+0500', and 'EST'
150
+ if parts[4] and parts[4][0] in ("-", "+"):
151
+ try:
152
+ if ":" in parts[4]:
153
+ timezone_hours = int(parts[4][1:3])
154
+ timezone_minutes = int(parts[4][4:])
155
+ else:
156
+ timezone_hours = int(parts[4][1:3])
157
+ timezone_minutes = int(parts[4][3:])
158
+ except ValueError:
159
+ return None
160
+ if parts[4].startswith("-"):
161
+ timezone_hours *= -1
162
+ timezone_minutes *= -1
163
+ else:
164
+ timezone_hours = timezone_names.get(parts[4], 0)
165
+ timezone_minutes = 0
166
+
167
+ # Create the datetime object and timezone delta objects
168
+ try:
169
+ stamp = datetime.datetime(year, month, day, hour, minute, second)
170
+ except ValueError:
171
+ return None
172
+ delta = datetime.timedelta(0, 0, 0, 0, timezone_minutes, timezone_hours)
173
+
174
+ # Return the date and timestamp in a UTC 9-tuple
175
+ try:
176
+ return (stamp - delta).utctimetuple()
177
+ except (OverflowError, ValueError):
178
+ # IronPython throws ValueErrors instead of OverflowErrors
179
+ return None
@@ -0,0 +1,128 @@
1
+ # Copyright 2010-2025 Kurt McKee <contactme@kurtmckee.org>
2
+ # Copyright 2002-2008 Mark Pilgrim
3
+ # All rights reserved.
4
+ #
5
+ # This file is a part of feedparser.
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions are met:
9
+ #
10
+ # * Redistributions of source code must retain the above copyright notice,
11
+ # this list of conditions and the following disclaimer.
12
+ # * Redistributions in binary form must reproduce the above copyright notice,
13
+ # this list of conditions and the following disclaimer in the documentation
14
+ # and/or other materials provided with the distribution.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
17
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26
+ # POSSIBILITY OF SUCH DAMAGE.
27
+
28
+ import datetime
29
+
30
+ timezonenames = {
31
+ "ut": 0,
32
+ "gmt": 0,
33
+ "z": 0,
34
+ "adt": -3,
35
+ "ast": -4,
36
+ "at": -4,
37
+ "edt": -4,
38
+ "est": -5,
39
+ "et": -5,
40
+ "cdt": -5,
41
+ "cst": -6,
42
+ "ct": -6,
43
+ "mdt": -6,
44
+ "mst": -7,
45
+ "mt": -7,
46
+ "pdt": -7,
47
+ "pst": -8,
48
+ "pt": -8,
49
+ "a": -1,
50
+ "n": 1,
51
+ "m": -12,
52
+ "y": 12,
53
+ }
54
+ # W3 date and time format parser
55
+ # http://www.w3.org/TR/NOTE-datetime
56
+ # Also supports MSSQL-style datetimes as defined at:
57
+ # http://msdn.microsoft.com/en-us/library/ms186724.aspx
58
+ # (basically, allow a space as a date/time/timezone separator)
59
+
60
+
61
+ def _parse_date_w3dtf(datestr):
62
+ if not datestr.strip():
63
+ return None
64
+ parts = datestr.lower().split("t")
65
+ if len(parts) == 1:
66
+ # This may be a date only, or may be an MSSQL-style date
67
+ parts = parts[0].split()
68
+ if len(parts) == 1:
69
+ # Treat this as a date only
70
+ parts.append("00:00:00z")
71
+ elif len(parts) > 2:
72
+ return None
73
+ date = parts[0].split("-", 2)
74
+ if not date or len(date[0]) != 4:
75
+ return None
76
+ # Ensure that `date` has 3 elements. Using '1' sets the default
77
+ # month to January and the default day to the 1st of the month.
78
+ date.extend(["1"] * (3 - len(date)))
79
+ try:
80
+ year, month, day = (int(i) for i in date)
81
+ except ValueError:
82
+ # `date` may have more than 3 elements or may contain
83
+ # non-integer strings.
84
+ return None
85
+ if parts[1].endswith("z"):
86
+ parts[1] = parts[1][:-1]
87
+ parts.append("z")
88
+ # Append the numeric timezone offset, if any, to parts.
89
+ # If this is an MSSQL-style date then parts[2] already contains
90
+ # the timezone information, so `append()` will not affect it.
91
+ # Add 1 to each value so that if `find()` returns -1 it will be
92
+ # treated as False.
93
+ loc = parts[1].find("-") + 1 or parts[1].find("+") + 1 or len(parts[1]) + 1
94
+ loc = loc - 1
95
+ parts.append(parts[1][loc:])
96
+ parts[1] = parts[1][:loc]
97
+ time = parts[1].split(":", 2)
98
+ # Ensure that time has 3 elements. Using '0' means that the
99
+ # minutes and seconds, if missing, will default to 0.
100
+ time.extend(["0"] * (3 - len(time)))
101
+ if parts[2][:1] in ("-", "+"):
102
+ try:
103
+ tzhour = int(parts[2][1:3])
104
+ tzmin = int(parts[2][4:])
105
+ except ValueError:
106
+ return None
107
+ if parts[2].startswith("-"):
108
+ tzhour = tzhour * -1
109
+ tzmin = tzmin * -1
110
+ else:
111
+ tzhour = timezonenames.get(parts[2], 0)
112
+ tzmin = 0
113
+ try:
114
+ hour, minute, second = (int(float(i)) for i in time)
115
+ except ValueError:
116
+ return None
117
+ # Create the datetime object and timezone delta objects
118
+ try:
119
+ stamp = datetime.datetime(year, month, day, hour, minute, second)
120
+ except ValueError:
121
+ return None
122
+ delta = datetime.timedelta(0, 0, 0, 0, tzmin, tzhour)
123
+ # Return the date and timestamp in a UTC 9-tuple
124
+ try:
125
+ return (stamp - delta).utctimetuple()
126
+ except (OverflowError, ValueError):
127
+ # IronPython throws ValueErrors instead of OverflowErrors
128
+ return None