plain 0.24.0__py3-none-any.whl → 0.25.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plain/assets/README.md +5 -5
- plain/cli/README.md +2 -4
- plain/cli/cli.py +6 -6
- plain/csrf/middleware.py +0 -1
- plain/exceptions.py +1 -21
- plain/forms/fields.py +13 -3
- plain/forms/forms.py +0 -1
- plain/http/multipartparser.py +0 -2
- plain/http/request.py +18 -41
- plain/internal/files/base.py +1 -29
- plain/internal/handlers/wsgi.py +18 -1
- plain/paginator.py +0 -4
- plain/preflight/urls.py +0 -7
- plain/urls/resolvers.py +1 -1
- plain/utils/cache.py +0 -202
- plain/utils/encoding.py +0 -105
- plain/utils/functional.py +0 -7
- plain/utils/html.py +1 -276
- plain/utils/http.py +2 -189
- plain/utils/inspect.py +0 -35
- plain/utils/safestring.py +0 -3
- plain/utils/text.py +0 -253
- plain/validators.py +0 -11
- {plain-0.24.0.dist-info → plain-0.25.0.dist-info}/METADATA +1 -1
- {plain-0.24.0.dist-info → plain-0.25.0.dist-info}/RECORD +28 -32
- plain/utils/_os.py +0 -52
- plain/utils/dateformat.py +0 -330
- plain/utils/dates.py +0 -76
- plain/utils/email.py +0 -12
- {plain-0.24.0.dist-info → plain-0.25.0.dist-info}/WHEEL +0 -0
- {plain-0.24.0.dist-info → plain-0.25.0.dist-info}/entry_points.txt +0 -0
- {plain-0.24.0.dist-info → plain-0.25.0.dist-info}/licenses/LICENSE +0 -0
plain/utils/encoding.py
CHANGED
@@ -1,6 +1,4 @@
|
|
1
|
-
import codecs
|
2
1
|
import datetime
|
3
|
-
import locale
|
4
2
|
from decimal import Decimal
|
5
3
|
from types import NoneType
|
6
4
|
from urllib.parse import quote
|
@@ -127,109 +125,6 @@ _hextobyte.update(
|
|
127
125
|
)
|
128
126
|
|
129
127
|
|
130
|
-
def uri_to_iri(uri):
|
131
|
-
"""
|
132
|
-
Convert a Uniform Resource Identifier(URI) into an Internationalized
|
133
|
-
Resource Identifier(IRI).
|
134
|
-
|
135
|
-
This is the algorithm from RFC 3987 Section 3.2, excluding step 4.
|
136
|
-
|
137
|
-
Take an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Plain/') and return
|
138
|
-
a string containing the encoded result (e.g. '/I%20♥%20Plain/').
|
139
|
-
"""
|
140
|
-
if uri is None:
|
141
|
-
return uri
|
142
|
-
uri = force_bytes(uri)
|
143
|
-
# Fast selective unquote: First, split on '%' and then starting with the
|
144
|
-
# second block, decode the first 2 bytes if they represent a hex code to
|
145
|
-
# decode. The rest of the block is the part after '%AB', not containing
|
146
|
-
# any '%'. Add that to the output without further processing.
|
147
|
-
bits = uri.split(b"%")
|
148
|
-
if len(bits) == 1:
|
149
|
-
iri = uri
|
150
|
-
else:
|
151
|
-
parts = [bits[0]]
|
152
|
-
append = parts.append
|
153
|
-
hextobyte = _hextobyte
|
154
|
-
for item in bits[1:]:
|
155
|
-
hex = item[:2]
|
156
|
-
if hex in hextobyte:
|
157
|
-
append(hextobyte[item[:2]])
|
158
|
-
append(item[2:])
|
159
|
-
else:
|
160
|
-
append(b"%")
|
161
|
-
append(item)
|
162
|
-
iri = b"".join(parts)
|
163
|
-
return repercent_broken_unicode(iri).decode()
|
164
|
-
|
165
|
-
|
166
|
-
def escape_uri_path(path):
|
167
|
-
"""
|
168
|
-
Escape the unsafe characters from the path portion of a Uniform Resource
|
169
|
-
Identifier (URI).
|
170
|
-
"""
|
171
|
-
# These are the "reserved" and "unreserved" characters specified in RFC
|
172
|
-
# 3986 Sections 2.2 and 2.3:
|
173
|
-
# reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
|
174
|
-
# unreserved = alphanum | mark
|
175
|
-
# mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
|
176
|
-
# The list of safe characters here is constructed subtracting ";", "=",
|
177
|
-
# and "?" according to RFC 3986 Section 3.3.
|
178
|
-
# The reason for not subtracting and escaping "/" is that we are escaping
|
179
|
-
# the entire path, not a path segment.
|
180
|
-
return quote(path, safe="/:@&+$,-_.!~*'()")
|
181
|
-
|
182
|
-
|
183
128
|
def punycode(domain):
|
184
129
|
"""Return the Punycode of the given domain if it's non-ASCII."""
|
185
130
|
return domain.encode("idna").decode("ascii")
|
186
|
-
|
187
|
-
|
188
|
-
def repercent_broken_unicode(path):
|
189
|
-
"""
|
190
|
-
As per RFC 3987 Section 3.2, step three of converting a URI into an IRI,
|
191
|
-
repercent-encode any octet produced that is not part of a strictly legal
|
192
|
-
UTF-8 octet sequence.
|
193
|
-
"""
|
194
|
-
while True:
|
195
|
-
try:
|
196
|
-
path.decode()
|
197
|
-
except UnicodeDecodeError as e:
|
198
|
-
# CVE-2019-14235: A recursion shouldn't be used since the exception
|
199
|
-
# handling uses massive amounts of memory
|
200
|
-
repercent = quote(path[e.start : e.end], safe=b"/#%[]=:;$&()+,!?*@'~")
|
201
|
-
path = path[: e.start] + repercent.encode() + path[e.end :]
|
202
|
-
else:
|
203
|
-
return path
|
204
|
-
|
205
|
-
|
206
|
-
def filepath_to_uri(path):
|
207
|
-
"""Convert a file system path to a URI portion that is suitable for
|
208
|
-
inclusion in a URL.
|
209
|
-
|
210
|
-
Encode certain chars that would normally be recognized as special chars
|
211
|
-
for URIs. Do not encode the ' character, as it is a valid character
|
212
|
-
within URIs. See the encodeURIComponent() JavaScript function for details.
|
213
|
-
"""
|
214
|
-
if path is None:
|
215
|
-
return path
|
216
|
-
# I know about `os.sep` and `os.altsep` but I want to leave
|
217
|
-
# some flexibility for hardcoding separators.
|
218
|
-
return quote(str(path).replace("\\", "/"), safe="/~!*()'")
|
219
|
-
|
220
|
-
|
221
|
-
def get_system_encoding():
|
222
|
-
"""
|
223
|
-
The encoding for the character type functions. Fallback to 'ascii' if the
|
224
|
-
#encoding is unsupported by Python or could not be determined. See tickets
|
225
|
-
#10335 and #5846.
|
226
|
-
"""
|
227
|
-
try:
|
228
|
-
encoding = locale.getlocale()[1] or "ascii"
|
229
|
-
codecs.lookup(encoding)
|
230
|
-
except Exception:
|
231
|
-
encoding = "ascii"
|
232
|
-
return encoding
|
233
|
-
|
234
|
-
|
235
|
-
DEFAULT_LOCALE_ENCODING = get_system_encoding()
|
plain/utils/functional.py
CHANGED
@@ -205,13 +205,6 @@ def _lazy_proxy_unpickle(func, args, kwargs, *resultclasses):
|
|
205
205
|
return lazy(func, *resultclasses)(*args, **kwargs)
|
206
206
|
|
207
207
|
|
208
|
-
def lazystr(text):
|
209
|
-
"""
|
210
|
-
Shortcut for the common case of a lazy callable that returns str.
|
211
|
-
"""
|
212
|
-
return lazy(str, str)(text)
|
213
|
-
|
214
|
-
|
215
208
|
def keep_lazy(*resultclasses):
|
216
209
|
"""
|
217
210
|
A decorator that allows a function to be called with one or more lazy
|
plain/utils/html.py
CHANGED
@@ -2,16 +2,10 @@
|
|
2
2
|
|
3
3
|
import html
|
4
4
|
import json
|
5
|
-
import re
|
6
5
|
from html.parser import HTMLParser
|
7
|
-
from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit
|
8
6
|
|
9
|
-
from plain.utils.encoding import punycode
|
10
7
|
from plain.utils.functional import Promise, keep_lazy, keep_lazy_text
|
11
|
-
from plain.utils.
|
12
|
-
from plain.utils.regex_helper import _lazy_re_compile
|
13
|
-
from plain.utils.safestring import SafeData, SafeString, mark_safe
|
14
|
-
from plain.utils.text import normalize_newlines
|
8
|
+
from plain.utils.safestring import SafeString, mark_safe
|
15
9
|
|
16
10
|
|
17
11
|
@keep_lazy(SafeString)
|
@@ -27,31 +21,6 @@ def escape(text):
|
|
27
21
|
return SafeString(html.escape(str(text)))
|
28
22
|
|
29
23
|
|
30
|
-
_js_escapes = {
|
31
|
-
ord("\\"): "\\u005C",
|
32
|
-
ord("'"): "\\u0027",
|
33
|
-
ord('"'): "\\u0022",
|
34
|
-
ord(">"): "\\u003E",
|
35
|
-
ord("<"): "\\u003C",
|
36
|
-
ord("&"): "\\u0026",
|
37
|
-
ord("="): "\\u003D",
|
38
|
-
ord("-"): "\\u002D",
|
39
|
-
ord(";"): "\\u003B",
|
40
|
-
ord("`"): "\\u0060",
|
41
|
-
ord("\u2028"): "\\u2028",
|
42
|
-
ord("\u2029"): "\\u2029",
|
43
|
-
}
|
44
|
-
|
45
|
-
# Escape every ASCII character with a value less than 32.
|
46
|
-
_js_escapes.update((ord("%c" % z), f"\\u{z:04X}") for z in range(32)) # noqa: UP031
|
47
|
-
|
48
|
-
|
49
|
-
@keep_lazy(SafeString)
|
50
|
-
def escapejs(value):
|
51
|
-
"""Hex encode characters for use in JavaScript strings."""
|
52
|
-
return mark_safe(str(value).translate(_js_escapes))
|
53
|
-
|
54
|
-
|
55
24
|
_json_script_escapes = {
|
56
25
|
ord(">"): "\\u003E",
|
57
26
|
ord("<"): "\\u003C",
|
@@ -105,39 +74,6 @@ def format_html(format_string, *args, **kwargs):
|
|
105
74
|
return mark_safe(format_string.format(*args_safe, **kwargs_safe))
|
106
75
|
|
107
76
|
|
108
|
-
def format_html_join(sep, format_string, args_generator):
|
109
|
-
"""
|
110
|
-
A wrapper of format_html, for the common case of a group of arguments that
|
111
|
-
need to be formatted using the same format string, and then joined using
|
112
|
-
'sep'. 'sep' is also passed through conditional_escape.
|
113
|
-
|
114
|
-
'args_generator' should be an iterator that returns the sequence of 'args'
|
115
|
-
that will be passed to format_html.
|
116
|
-
|
117
|
-
Example:
|
118
|
-
|
119
|
-
format_html_join('\n', "<li>{} {}</li>", ((u.first_name, u.last_name)
|
120
|
-
for u in users))
|
121
|
-
"""
|
122
|
-
return mark_safe(
|
123
|
-
conditional_escape(sep).join(
|
124
|
-
format_html(format_string, *args) for args in args_generator
|
125
|
-
)
|
126
|
-
)
|
127
|
-
|
128
|
-
|
129
|
-
@keep_lazy_text
|
130
|
-
def linebreaks(value, autoescape=False):
|
131
|
-
"""Convert newlines into <p> and <br>s."""
|
132
|
-
value = normalize_newlines(value)
|
133
|
-
paras = re.split("\n{2,}", str(value))
|
134
|
-
if autoescape:
|
135
|
-
paras = ["<p>{}</p>".format(escape(p).replace("\n", "<br>")) for p in paras]
|
136
|
-
else:
|
137
|
-
paras = ["<p>{}</p>".format(p.replace("\n", "<br>")) for p in paras]
|
138
|
-
return "\n\n".join(paras)
|
139
|
-
|
140
|
-
|
141
77
|
class MLStripper(HTMLParser):
|
142
78
|
def __init__(self):
|
143
79
|
super().__init__(convert_charrefs=False)
|
@@ -182,217 +118,6 @@ def strip_tags(value):
|
|
182
118
|
return value
|
183
119
|
|
184
120
|
|
185
|
-
@keep_lazy_text
|
186
|
-
def strip_spaces_between_tags(value):
|
187
|
-
"""Return the given HTML with spaces between tags removed."""
|
188
|
-
return re.sub(r">\s+<", "><", str(value))
|
189
|
-
|
190
|
-
|
191
|
-
def smart_urlquote(url):
|
192
|
-
"""Quote a URL if it isn't already quoted."""
|
193
|
-
|
194
|
-
def unquote_quote(segment):
|
195
|
-
segment = unquote(segment)
|
196
|
-
# Tilde is part of RFC 3986 Section 2.3 Unreserved Characters,
|
197
|
-
# see also https://bugs.python.org/issue16285
|
198
|
-
return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + "~")
|
199
|
-
|
200
|
-
# Handle IDN before quoting.
|
201
|
-
try:
|
202
|
-
scheme, netloc, path, query, fragment = urlsplit(url)
|
203
|
-
except ValueError:
|
204
|
-
# invalid IPv6 URL (normally square brackets in hostname part).
|
205
|
-
return unquote_quote(url)
|
206
|
-
|
207
|
-
try:
|
208
|
-
netloc = punycode(netloc) # IDN -> ACE
|
209
|
-
except UnicodeError: # invalid domain part
|
210
|
-
return unquote_quote(url)
|
211
|
-
|
212
|
-
if query:
|
213
|
-
# Separately unquoting key/value, so as to not mix querystring separators
|
214
|
-
# included in query values. See #22267.
|
215
|
-
query_parts = [
|
216
|
-
(unquote(q[0]), unquote(q[1]))
|
217
|
-
for q in parse_qsl(query, keep_blank_values=True)
|
218
|
-
]
|
219
|
-
# urlencode will take care of quoting
|
220
|
-
query = urlencode(query_parts)
|
221
|
-
|
222
|
-
path = unquote_quote(path)
|
223
|
-
fragment = unquote_quote(fragment)
|
224
|
-
|
225
|
-
return urlunsplit((scheme, netloc, path, query, fragment))
|
226
|
-
|
227
|
-
|
228
|
-
class Urlizer:
|
229
|
-
"""
|
230
|
-
Convert any URLs in text into clickable links.
|
231
|
-
|
232
|
-
Work on http://, https://, www. links, and also on links ending in one of
|
233
|
-
the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org).
|
234
|
-
Links can have trailing punctuation (periods, commas, close-parens) and
|
235
|
-
leading punctuation (opening parens) and it'll still do the right thing.
|
236
|
-
"""
|
237
|
-
|
238
|
-
trailing_punctuation_chars = ".,:;!"
|
239
|
-
wrapping_punctuation = [("(", ")"), ("[", "]")]
|
240
|
-
|
241
|
-
simple_url_re = _lazy_re_compile(r"^https?://\[?\w", re.IGNORECASE)
|
242
|
-
simple_url_2_re = _lazy_re_compile(
|
243
|
-
r"^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$", re.IGNORECASE
|
244
|
-
)
|
245
|
-
word_split_re = _lazy_re_compile(r"""([\s<>"']+)""")
|
246
|
-
|
247
|
-
mailto_template = "mailto:{local}@{domain}"
|
248
|
-
url_template = '<a href="{href}"{attrs}>{url}</a>'
|
249
|
-
|
250
|
-
def __call__(self, text, trim_url_limit=None, nofollow=False, autoescape=False):
|
251
|
-
"""
|
252
|
-
If trim_url_limit is not None, truncate the URLs in the link text
|
253
|
-
longer than this limit to trim_url_limit - 1 characters and append an
|
254
|
-
ellipsis.
|
255
|
-
|
256
|
-
If nofollow is True, give the links a rel="nofollow" attribute.
|
257
|
-
|
258
|
-
If autoescape is True, autoescape the link text and URLs.
|
259
|
-
"""
|
260
|
-
safe_input = isinstance(text, SafeData)
|
261
|
-
|
262
|
-
words = self.word_split_re.split(str(text))
|
263
|
-
return "".join(
|
264
|
-
[
|
265
|
-
self.handle_word(
|
266
|
-
word,
|
267
|
-
safe_input=safe_input,
|
268
|
-
trim_url_limit=trim_url_limit,
|
269
|
-
nofollow=nofollow,
|
270
|
-
autoescape=autoescape,
|
271
|
-
)
|
272
|
-
for word in words
|
273
|
-
]
|
274
|
-
)
|
275
|
-
|
276
|
-
def handle_word(
|
277
|
-
self,
|
278
|
-
word,
|
279
|
-
*,
|
280
|
-
safe_input,
|
281
|
-
trim_url_limit=None,
|
282
|
-
nofollow=False,
|
283
|
-
autoescape=False,
|
284
|
-
):
|
285
|
-
if "." in word or "@" in word or ":" in word:
|
286
|
-
# lead: Punctuation trimmed from the beginning of the word.
|
287
|
-
# middle: State of the word.
|
288
|
-
# trail: Punctuation trimmed from the end of the word.
|
289
|
-
lead, middle, trail = self.trim_punctuation(word)
|
290
|
-
# Make URL we want to point to.
|
291
|
-
url = None
|
292
|
-
nofollow_attr = ' rel="nofollow"' if nofollow else ""
|
293
|
-
if self.simple_url_re.match(middle):
|
294
|
-
url = smart_urlquote(html.unescape(middle))
|
295
|
-
elif self.simple_url_2_re.match(middle):
|
296
|
-
url = smart_urlquote(f"http://{html.unescape(middle)}")
|
297
|
-
elif ":" not in middle and self.is_email_simple(middle):
|
298
|
-
local, domain = middle.rsplit("@", 1)
|
299
|
-
try:
|
300
|
-
domain = punycode(domain)
|
301
|
-
except UnicodeError:
|
302
|
-
return word
|
303
|
-
url = self.mailto_template.format(local=local, domain=domain)
|
304
|
-
nofollow_attr = ""
|
305
|
-
# Make link.
|
306
|
-
if url:
|
307
|
-
trimmed = self.trim_url(middle, limit=trim_url_limit)
|
308
|
-
if autoescape and not safe_input:
|
309
|
-
lead, trail = escape(lead), escape(trail)
|
310
|
-
trimmed = escape(trimmed)
|
311
|
-
middle = self.url_template.format(
|
312
|
-
href=escape(url),
|
313
|
-
attrs=nofollow_attr,
|
314
|
-
url=trimmed,
|
315
|
-
)
|
316
|
-
return mark_safe(f"{lead}{middle}{trail}")
|
317
|
-
else:
|
318
|
-
if safe_input:
|
319
|
-
return mark_safe(word)
|
320
|
-
elif autoescape:
|
321
|
-
return escape(word)
|
322
|
-
elif safe_input:
|
323
|
-
return mark_safe(word)
|
324
|
-
elif autoescape:
|
325
|
-
return escape(word)
|
326
|
-
return word
|
327
|
-
|
328
|
-
def trim_url(self, x, *, limit):
|
329
|
-
if limit is None or len(x) <= limit:
|
330
|
-
return x
|
331
|
-
return f"{x[: max(0, limit - 1)]}…"
|
332
|
-
|
333
|
-
def trim_punctuation(self, word):
|
334
|
-
"""
|
335
|
-
Trim trailing and wrapping punctuation from `word`. Return the items of
|
336
|
-
the new state.
|
337
|
-
"""
|
338
|
-
lead, middle, trail = "", word, ""
|
339
|
-
# Continue trimming until middle remains unchanged.
|
340
|
-
trimmed_something = True
|
341
|
-
while trimmed_something:
|
342
|
-
trimmed_something = False
|
343
|
-
# Trim wrapping punctuation.
|
344
|
-
for opening, closing in self.wrapping_punctuation:
|
345
|
-
if middle.startswith(opening):
|
346
|
-
middle = middle.removeprefix(opening)
|
347
|
-
lead += opening
|
348
|
-
trimmed_something = True
|
349
|
-
# Keep parentheses at the end only if they're balanced.
|
350
|
-
if (
|
351
|
-
middle.endswith(closing)
|
352
|
-
and middle.count(closing) == middle.count(opening) + 1
|
353
|
-
):
|
354
|
-
middle = middle.removesuffix(closing)
|
355
|
-
trail = closing + trail
|
356
|
-
trimmed_something = True
|
357
|
-
# Trim trailing punctuation (after trimming wrapping punctuation,
|
358
|
-
# as encoded entities contain ';'). Unescape entities to avoid
|
359
|
-
# breaking them by removing ';'.
|
360
|
-
middle_unescaped = html.unescape(middle)
|
361
|
-
stripped = middle_unescaped.rstrip(self.trailing_punctuation_chars)
|
362
|
-
if middle_unescaped != stripped:
|
363
|
-
punctuation_count = len(middle_unescaped) - len(stripped)
|
364
|
-
trail = middle[-punctuation_count:] + trail
|
365
|
-
middle = middle[:-punctuation_count]
|
366
|
-
trimmed_something = True
|
367
|
-
return lead, middle, trail
|
368
|
-
|
369
|
-
@staticmethod
|
370
|
-
def is_email_simple(value):
|
371
|
-
"""Return True if value looks like an email address."""
|
372
|
-
# An @ must be in the middle of the value.
|
373
|
-
if "@" not in value or value.startswith("@") or value.endswith("@"):
|
374
|
-
return False
|
375
|
-
try:
|
376
|
-
p1, p2 = value.split("@")
|
377
|
-
except ValueError:
|
378
|
-
# value contains more than one @.
|
379
|
-
return False
|
380
|
-
# Dot must be in p2 (e.g. example.com)
|
381
|
-
if "." not in p2 or p2.startswith("."):
|
382
|
-
return False
|
383
|
-
return True
|
384
|
-
|
385
|
-
|
386
|
-
urlizer = Urlizer()
|
387
|
-
|
388
|
-
|
389
|
-
@keep_lazy_text
|
390
|
-
def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
391
|
-
return urlizer(
|
392
|
-
text, trim_url_limit=trim_url_limit, nofollow=nofollow, autoescape=autoescape
|
393
|
-
)
|
394
|
-
|
395
|
-
|
396
121
|
def avoid_wrapping(value):
|
397
122
|
"""
|
398
123
|
Avoid text wrapping in the middle of a phrase by adding non-breaking
|
plain/utils/http.py
CHANGED
@@ -1,40 +1,9 @@
|
|
1
|
-
import base64
|
2
|
-
import datetime
|
3
|
-
import re
|
4
|
-
import unicodedata
|
5
|
-
from binascii import Error as BinasciiError
|
6
1
|
from email.utils import formatdate
|
7
|
-
from urllib.parse import quote, unquote
|
2
|
+
from urllib.parse import quote, unquote
|
8
3
|
from urllib.parse import urlencode as original_urlencode
|
9
4
|
|
10
5
|
from plain.utils.datastructures import MultiValueDict
|
11
|
-
from plain.utils.regex_helper import _lazy_re_compile
|
12
6
|
|
13
|
-
# Based on RFC 9110 Appendix A.
|
14
|
-
ETAG_MATCH = _lazy_re_compile(
|
15
|
-
r"""
|
16
|
-
\A( # start of string and capture group
|
17
|
-
(?:W/)? # optional weak indicator
|
18
|
-
" # opening quote
|
19
|
-
[^"]* # any sequence of non-quote characters
|
20
|
-
" # end quote
|
21
|
-
)\Z # end of string and capture group
|
22
|
-
""",
|
23
|
-
re.X,
|
24
|
-
)
|
25
|
-
|
26
|
-
MONTHS = "jan feb mar apr may jun jul aug sep oct nov dec".split()
|
27
|
-
__D = r"(?P<day>[0-9]{2})"
|
28
|
-
__D2 = r"(?P<day>[ 0-9][0-9])"
|
29
|
-
__M = r"(?P<mon>\w{3})"
|
30
|
-
__Y = r"(?P<year>[0-9]{4})"
|
31
|
-
__Y2 = r"(?P<year>[0-9]{2})"
|
32
|
-
__T = r"(?P<hour>[0-9]{2}):(?P<min>[0-9]{2}):(?P<sec>[0-9]{2})"
|
33
|
-
RFC1123_DATE = _lazy_re_compile(rf"^\w{{3}}, {__D} {__M} {__Y} {__T} GMT$")
|
34
|
-
RFC850_DATE = _lazy_re_compile(rf"^\w{{6,9}}, {__D}-{__M}-{__Y2} {__T} GMT$")
|
35
|
-
ASCTIME_DATE = _lazy_re_compile(rf"^\w{{3}} {__M} {__D2} {__T} {__Y}$")
|
36
|
-
|
37
|
-
RFC3986_GENDELIMS = ":/?#[]@"
|
38
7
|
RFC3986_SUBDELIMS = "!$&'()*+,;="
|
39
8
|
|
40
9
|
|
@@ -93,57 +62,6 @@ def http_date(epoch_seconds=None):
|
|
93
62
|
return formatdate(epoch_seconds, usegmt=True)
|
94
63
|
|
95
64
|
|
96
|
-
def parse_http_date(date):
|
97
|
-
"""
|
98
|
-
Parse a date format as specified by HTTP RFC 9110 Section 5.6.7.
|
99
|
-
|
100
|
-
The three formats allowed by the RFC are accepted, even if only the first
|
101
|
-
one is still in widespread use.
|
102
|
-
|
103
|
-
Return an integer expressed in seconds since the epoch, in UTC.
|
104
|
-
"""
|
105
|
-
# email.utils.parsedate() does the job for RFC 1123 dates; unfortunately
|
106
|
-
# RFC 9110 makes it mandatory to support RFC 850 dates too. So we roll
|
107
|
-
# our own RFC-compliant parsing.
|
108
|
-
for regex in RFC1123_DATE, RFC850_DATE, ASCTIME_DATE:
|
109
|
-
m = regex.match(date)
|
110
|
-
if m is not None:
|
111
|
-
break
|
112
|
-
else:
|
113
|
-
raise ValueError(f"{date!r} is not in a valid HTTP date format")
|
114
|
-
try:
|
115
|
-
tz = datetime.UTC
|
116
|
-
year = int(m["year"])
|
117
|
-
if year < 100:
|
118
|
-
current_year = datetime.datetime.now(tz=tz).year
|
119
|
-
current_century = current_year - (current_year % 100)
|
120
|
-
if year - (current_year % 100) > 50:
|
121
|
-
# year that appears to be more than 50 years in the future are
|
122
|
-
# interpreted as representing the past.
|
123
|
-
year += current_century - 100
|
124
|
-
else:
|
125
|
-
year += current_century
|
126
|
-
month = MONTHS.index(m["mon"].lower()) + 1
|
127
|
-
day = int(m["day"])
|
128
|
-
hour = int(m["hour"])
|
129
|
-
min = int(m["min"])
|
130
|
-
sec = int(m["sec"])
|
131
|
-
result = datetime.datetime(year, month, day, hour, min, sec, tzinfo=tz)
|
132
|
-
return int(result.timestamp())
|
133
|
-
except Exception as exc:
|
134
|
-
raise ValueError(f"{date!r} is not a valid date") from exc
|
135
|
-
|
136
|
-
|
137
|
-
def parse_http_date_safe(date):
|
138
|
-
"""
|
139
|
-
Same as parse_http_date, but return None if the input is invalid.
|
140
|
-
"""
|
141
|
-
try:
|
142
|
-
return parse_http_date(date)
|
143
|
-
except Exception:
|
144
|
-
pass
|
145
|
-
|
146
|
-
|
147
65
|
# Base 36 functions: useful for generating compact URLs
|
148
66
|
|
149
67
|
|
@@ -174,51 +92,6 @@ def int_to_base36(i):
|
|
174
92
|
return b36
|
175
93
|
|
176
94
|
|
177
|
-
def urlsafe_base64_encode(s):
|
178
|
-
"""
|
179
|
-
Encode a bytestring to a base64 string for use in URLs. Strip any trailing
|
180
|
-
equal signs.
|
181
|
-
"""
|
182
|
-
return base64.urlsafe_b64encode(s).rstrip(b"\n=").decode("ascii")
|
183
|
-
|
184
|
-
|
185
|
-
def urlsafe_base64_decode(s):
|
186
|
-
"""
|
187
|
-
Decode a base64 encoded string. Add back any trailing equal signs that
|
188
|
-
might have been stripped.
|
189
|
-
"""
|
190
|
-
s = s.encode()
|
191
|
-
try:
|
192
|
-
return base64.urlsafe_b64decode(s.ljust(len(s) + len(s) % 4, b"="))
|
193
|
-
except (LookupError, BinasciiError) as e:
|
194
|
-
raise ValueError(e)
|
195
|
-
|
196
|
-
|
197
|
-
def parse_etags(etag_str):
|
198
|
-
"""
|
199
|
-
Parse a string of ETags given in an If-None-Match or If-Match header as
|
200
|
-
defined by RFC 9110. Return a list of quoted ETags, or ['*'] if all ETags
|
201
|
-
should be matched.
|
202
|
-
"""
|
203
|
-
if etag_str.strip() == "*":
|
204
|
-
return ["*"]
|
205
|
-
else:
|
206
|
-
# Parse each ETag individually, and return any that are valid.
|
207
|
-
etag_matches = (ETAG_MATCH.match(etag.strip()) for etag in etag_str.split(","))
|
208
|
-
return [match[1] for match in etag_matches if match]
|
209
|
-
|
210
|
-
|
211
|
-
def quote_etag(etag_str):
|
212
|
-
"""
|
213
|
-
If the provided string is already a quoted ETag, return it. Otherwise, wrap
|
214
|
-
the string in quotes, making it a strong ETag.
|
215
|
-
"""
|
216
|
-
if ETAG_MATCH.match(etag_str):
|
217
|
-
return etag_str
|
218
|
-
else:
|
219
|
-
return f'"{etag_str}"'
|
220
|
-
|
221
|
-
|
222
95
|
def is_same_domain(host, pattern):
|
223
96
|
"""
|
224
97
|
Return ``True`` if the host is either an exact match or a match
|
@@ -239,66 +112,6 @@ def is_same_domain(host, pattern):
|
|
239
112
|
)
|
240
113
|
|
241
114
|
|
242
|
-
def url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
|
243
|
-
"""
|
244
|
-
Return ``True`` if the url uses an allowed host and a safe scheme.
|
245
|
-
|
246
|
-
Always return ``False`` on an empty url.
|
247
|
-
|
248
|
-
If ``require_https`` is ``True``, only 'https' will be considered a valid
|
249
|
-
scheme, as opposed to 'http' and 'https' with the default, ``False``.
|
250
|
-
|
251
|
-
Note: "True" doesn't entail that a URL is "safe". It may still be e.g.
|
252
|
-
quoted incorrectly. Ensure to also use plain.utils.encoding.iri_to_uri()
|
253
|
-
on the path component of untrusted URLs.
|
254
|
-
"""
|
255
|
-
if url is not None:
|
256
|
-
url = url.strip()
|
257
|
-
if not url:
|
258
|
-
return False
|
259
|
-
if allowed_hosts is None:
|
260
|
-
allowed_hosts = set()
|
261
|
-
elif isinstance(allowed_hosts, str):
|
262
|
-
allowed_hosts = {allowed_hosts}
|
263
|
-
# Chrome treats \ completely as / in paths but it could be part of some
|
264
|
-
# basic auth credentials so we need to check both URLs.
|
265
|
-
return _url_has_allowed_host_and_scheme(
|
266
|
-
url, allowed_hosts, require_https=require_https
|
267
|
-
) and _url_has_allowed_host_and_scheme(
|
268
|
-
url.replace("\\", "/"), allowed_hosts, require_https=require_https
|
269
|
-
)
|
270
|
-
|
271
|
-
|
272
|
-
def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
|
273
|
-
# Chrome considers any URL with more than two slashes to be absolute, but
|
274
|
-
# urlparse is not so flexible. Treat any url with three slashes as unsafe.
|
275
|
-
if url.startswith("///"):
|
276
|
-
return False
|
277
|
-
try:
|
278
|
-
url_info = urlparse(url)
|
279
|
-
except ValueError: # e.g. invalid IPv6 addresses
|
280
|
-
return False
|
281
|
-
# Forbid URLs like http:///example.com - with a scheme, but without a hostname.
|
282
|
-
# In that URL, example.com is not the hostname but, a path component. However,
|
283
|
-
# Chrome will still consider example.com to be the hostname, so we must not
|
284
|
-
# allow this syntax.
|
285
|
-
if not url_info.netloc and url_info.scheme:
|
286
|
-
return False
|
287
|
-
# Forbid URLs that start with control characters. Some browsers (like
|
288
|
-
# Chrome) ignore quite a few control characters at the start of a
|
289
|
-
# URL and might consider the URL as scheme relative.
|
290
|
-
if unicodedata.category(url[0])[0] == "C":
|
291
|
-
return False
|
292
|
-
scheme = url_info.scheme
|
293
|
-
# Consider URLs without a scheme (e.g. //example.com/p) to be http.
|
294
|
-
if not url_info.scheme and url_info.netloc:
|
295
|
-
scheme = "http"
|
296
|
-
valid_schemes = ["https"] if require_https else ["http", "https"]
|
297
|
-
return (not url_info.netloc or url_info.netloc in allowed_hosts) and (
|
298
|
-
not scheme or scheme in valid_schemes
|
299
|
-
)
|
300
|
-
|
301
|
-
|
302
115
|
def escape_leading_slashes(url):
|
303
116
|
"""
|
304
117
|
If redirecting to an absolute path (two leading slashes), a slash must be
|
@@ -347,7 +160,7 @@ def parse_header_parameters(line):
|
|
347
160
|
value = value[1:-1]
|
348
161
|
value = value.replace("\\\\", "\\").replace('\\"', '"')
|
349
162
|
if has_encoding:
|
350
|
-
encoding,
|
163
|
+
encoding, _, value = value.split("'")
|
351
164
|
value = unquote(value, encoding=encoding)
|
352
165
|
pdict[name] = value
|
353
166
|
return key, pdict
|