plain 0.24.1__py3-none-any.whl → 0.26.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
plain/utils/html.py CHANGED
@@ -2,16 +2,10 @@
2
2
 
3
3
  import html
4
4
  import json
5
- import re
6
5
  from html.parser import HTMLParser
7
- from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit
8
6
 
9
- from plain.utils.encoding import punycode
10
7
  from plain.utils.functional import Promise, keep_lazy, keep_lazy_text
11
- from plain.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
12
- from plain.utils.regex_helper import _lazy_re_compile
13
- from plain.utils.safestring import SafeData, SafeString, mark_safe
14
- from plain.utils.text import normalize_newlines
8
+ from plain.utils.safestring import SafeString, mark_safe
15
9
 
16
10
 
17
11
  @keep_lazy(SafeString)
@@ -27,31 +21,6 @@ def escape(text):
27
21
  return SafeString(html.escape(str(text)))
28
22
 
29
23
 
30
- _js_escapes = {
31
- ord("\\"): "\\u005C",
32
- ord("'"): "\\u0027",
33
- ord('"'): "\\u0022",
34
- ord(">"): "\\u003E",
35
- ord("<"): "\\u003C",
36
- ord("&"): "\\u0026",
37
- ord("="): "\\u003D",
38
- ord("-"): "\\u002D",
39
- ord(";"): "\\u003B",
40
- ord("`"): "\\u0060",
41
- ord("\u2028"): "\\u2028",
42
- ord("\u2029"): "\\u2029",
43
- }
44
-
45
- # Escape every ASCII character with a value less than 32.
46
- _js_escapes.update((ord("%c" % z), f"\\u{z:04X}") for z in range(32)) # noqa: UP031
47
-
48
-
49
- @keep_lazy(SafeString)
50
- def escapejs(value):
51
- """Hex encode characters for use in JavaScript strings."""
52
- return mark_safe(str(value).translate(_js_escapes))
53
-
54
-
55
24
  _json_script_escapes = {
56
25
  ord(">"): "\\u003E",
57
26
  ord("<"): "\\u003C",
@@ -105,39 +74,6 @@ def format_html(format_string, *args, **kwargs):
105
74
  return mark_safe(format_string.format(*args_safe, **kwargs_safe))
106
75
 
107
76
 
108
- def format_html_join(sep, format_string, args_generator):
109
- """
110
- A wrapper of format_html, for the common case of a group of arguments that
111
- need to be formatted using the same format string, and then joined using
112
- 'sep'. 'sep' is also passed through conditional_escape.
113
-
114
- 'args_generator' should be an iterator that returns the sequence of 'args'
115
- that will be passed to format_html.
116
-
117
- Example:
118
-
119
- format_html_join('\n', "<li>{} {}</li>", ((u.first_name, u.last_name)
120
- for u in users))
121
- """
122
- return mark_safe(
123
- conditional_escape(sep).join(
124
- format_html(format_string, *args) for args in args_generator
125
- )
126
- )
127
-
128
-
129
- @keep_lazy_text
130
- def linebreaks(value, autoescape=False):
131
- """Convert newlines into <p> and <br>s."""
132
- value = normalize_newlines(value)
133
- paras = re.split("\n{2,}", str(value))
134
- if autoescape:
135
- paras = ["<p>{}</p>".format(escape(p).replace("\n", "<br>")) for p in paras]
136
- else:
137
- paras = ["<p>{}</p>".format(p.replace("\n", "<br>")) for p in paras]
138
- return "\n\n".join(paras)
139
-
140
-
141
77
  class MLStripper(HTMLParser):
142
78
  def __init__(self):
143
79
  super().__init__(convert_charrefs=False)
@@ -182,217 +118,6 @@ def strip_tags(value):
182
118
  return value
183
119
 
184
120
 
185
- @keep_lazy_text
186
- def strip_spaces_between_tags(value):
187
- """Return the given HTML with spaces between tags removed."""
188
- return re.sub(r">\s+<", "><", str(value))
189
-
190
-
191
- def smart_urlquote(url):
192
- """Quote a URL if it isn't already quoted."""
193
-
194
- def unquote_quote(segment):
195
- segment = unquote(segment)
196
- # Tilde is part of RFC 3986 Section 2.3 Unreserved Characters,
197
- # see also https://bugs.python.org/issue16285
198
- return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + "~")
199
-
200
- # Handle IDN before quoting.
201
- try:
202
- scheme, netloc, path, query, fragment = urlsplit(url)
203
- except ValueError:
204
- # invalid IPv6 URL (normally square brackets in hostname part).
205
- return unquote_quote(url)
206
-
207
- try:
208
- netloc = punycode(netloc) # IDN -> ACE
209
- except UnicodeError: # invalid domain part
210
- return unquote_quote(url)
211
-
212
- if query:
213
- # Separately unquoting key/value, so as to not mix querystring separators
214
- # included in query values. See #22267.
215
- query_parts = [
216
- (unquote(q[0]), unquote(q[1]))
217
- for q in parse_qsl(query, keep_blank_values=True)
218
- ]
219
- # urlencode will take care of quoting
220
- query = urlencode(query_parts)
221
-
222
- path = unquote_quote(path)
223
- fragment = unquote_quote(fragment)
224
-
225
- return urlunsplit((scheme, netloc, path, query, fragment))
226
-
227
-
228
- class Urlizer:
229
- """
230
- Convert any URLs in text into clickable links.
231
-
232
- Work on http://, https://, www. links, and also on links ending in one of
233
- the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org).
234
- Links can have trailing punctuation (periods, commas, close-parens) and
235
- leading punctuation (opening parens) and it'll still do the right thing.
236
- """
237
-
238
- trailing_punctuation_chars = ".,:;!"
239
- wrapping_punctuation = [("(", ")"), ("[", "]")]
240
-
241
- simple_url_re = _lazy_re_compile(r"^https?://\[?\w", re.IGNORECASE)
242
- simple_url_2_re = _lazy_re_compile(
243
- r"^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$", re.IGNORECASE
244
- )
245
- word_split_re = _lazy_re_compile(r"""([\s<>"']+)""")
246
-
247
- mailto_template = "mailto:{local}@{domain}"
248
- url_template = '<a href="{href}"{attrs}>{url}</a>'
249
-
250
- def __call__(self, text, trim_url_limit=None, nofollow=False, autoescape=False):
251
- """
252
- If trim_url_limit is not None, truncate the URLs in the link text
253
- longer than this limit to trim_url_limit - 1 characters and append an
254
- ellipsis.
255
-
256
- If nofollow is True, give the links a rel="nofollow" attribute.
257
-
258
- If autoescape is True, autoescape the link text and URLs.
259
- """
260
- safe_input = isinstance(text, SafeData)
261
-
262
- words = self.word_split_re.split(str(text))
263
- return "".join(
264
- [
265
- self.handle_word(
266
- word,
267
- safe_input=safe_input,
268
- trim_url_limit=trim_url_limit,
269
- nofollow=nofollow,
270
- autoescape=autoescape,
271
- )
272
- for word in words
273
- ]
274
- )
275
-
276
- def handle_word(
277
- self,
278
- word,
279
- *,
280
- safe_input,
281
- trim_url_limit=None,
282
- nofollow=False,
283
- autoescape=False,
284
- ):
285
- if "." in word or "@" in word or ":" in word:
286
- # lead: Punctuation trimmed from the beginning of the word.
287
- # middle: State of the word.
288
- # trail: Punctuation trimmed from the end of the word.
289
- lead, middle, trail = self.trim_punctuation(word)
290
- # Make URL we want to point to.
291
- url = None
292
- nofollow_attr = ' rel="nofollow"' if nofollow else ""
293
- if self.simple_url_re.match(middle):
294
- url = smart_urlquote(html.unescape(middle))
295
- elif self.simple_url_2_re.match(middle):
296
- url = smart_urlquote(f"http://{html.unescape(middle)}")
297
- elif ":" not in middle and self.is_email_simple(middle):
298
- local, domain = middle.rsplit("@", 1)
299
- try:
300
- domain = punycode(domain)
301
- except UnicodeError:
302
- return word
303
- url = self.mailto_template.format(local=local, domain=domain)
304
- nofollow_attr = ""
305
- # Make link.
306
- if url:
307
- trimmed = self.trim_url(middle, limit=trim_url_limit)
308
- if autoescape and not safe_input:
309
- lead, trail = escape(lead), escape(trail)
310
- trimmed = escape(trimmed)
311
- middle = self.url_template.format(
312
- href=escape(url),
313
- attrs=nofollow_attr,
314
- url=trimmed,
315
- )
316
- return mark_safe(f"{lead}{middle}{trail}")
317
- else:
318
- if safe_input:
319
- return mark_safe(word)
320
- elif autoescape:
321
- return escape(word)
322
- elif safe_input:
323
- return mark_safe(word)
324
- elif autoescape:
325
- return escape(word)
326
- return word
327
-
328
- def trim_url(self, x, *, limit):
329
- if limit is None or len(x) <= limit:
330
- return x
331
- return f"{x[: max(0, limit - 1)]}…"
332
-
333
- def trim_punctuation(self, word):
334
- """
335
- Trim trailing and wrapping punctuation from `word`. Return the items of
336
- the new state.
337
- """
338
- lead, middle, trail = "", word, ""
339
- # Continue trimming until middle remains unchanged.
340
- trimmed_something = True
341
- while trimmed_something:
342
- trimmed_something = False
343
- # Trim wrapping punctuation.
344
- for opening, closing in self.wrapping_punctuation:
345
- if middle.startswith(opening):
346
- middle = middle.removeprefix(opening)
347
- lead += opening
348
- trimmed_something = True
349
- # Keep parentheses at the end only if they're balanced.
350
- if (
351
- middle.endswith(closing)
352
- and middle.count(closing) == middle.count(opening) + 1
353
- ):
354
- middle = middle.removesuffix(closing)
355
- trail = closing + trail
356
- trimmed_something = True
357
- # Trim trailing punctuation (after trimming wrapping punctuation,
358
- # as encoded entities contain ';'). Unescape entities to avoid
359
- # breaking them by removing ';'.
360
- middle_unescaped = html.unescape(middle)
361
- stripped = middle_unescaped.rstrip(self.trailing_punctuation_chars)
362
- if middle_unescaped != stripped:
363
- punctuation_count = len(middle_unescaped) - len(stripped)
364
- trail = middle[-punctuation_count:] + trail
365
- middle = middle[:-punctuation_count]
366
- trimmed_something = True
367
- return lead, middle, trail
368
-
369
- @staticmethod
370
- def is_email_simple(value):
371
- """Return True if value looks like an email address."""
372
- # An @ must be in the middle of the value.
373
- if "@" not in value or value.startswith("@") or value.endswith("@"):
374
- return False
375
- try:
376
- p1, p2 = value.split("@")
377
- except ValueError:
378
- # value contains more than one @.
379
- return False
380
- # Dot must be in p2 (e.g. example.com)
381
- if "." not in p2 or p2.startswith("."):
382
- return False
383
- return True
384
-
385
-
386
- urlizer = Urlizer()
387
-
388
-
389
- @keep_lazy_text
390
- def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
391
- return urlizer(
392
- text, trim_url_limit=trim_url_limit, nofollow=nofollow, autoescape=autoescape
393
- )
394
-
395
-
396
121
  def avoid_wrapping(value):
397
122
  """
398
123
  Avoid text wrapping in the middle of a phrase by adding non-breaking
plain/utils/http.py CHANGED
@@ -1,40 +1,9 @@
1
- import base64
2
- import datetime
3
- import re
4
- import unicodedata
5
- from binascii import Error as BinasciiError
6
1
  from email.utils import formatdate
7
- from urllib.parse import quote, unquote, urlparse
2
+ from urllib.parse import quote, unquote
8
3
  from urllib.parse import urlencode as original_urlencode
9
4
 
10
5
  from plain.utils.datastructures import MultiValueDict
11
- from plain.utils.regex_helper import _lazy_re_compile
12
6
 
13
- # Based on RFC 9110 Appendix A.
14
- ETAG_MATCH = _lazy_re_compile(
15
- r"""
16
- \A( # start of string and capture group
17
- (?:W/)? # optional weak indicator
18
- " # opening quote
19
- [^"]* # any sequence of non-quote characters
20
- " # end quote
21
- )\Z # end of string and capture group
22
- """,
23
- re.X,
24
- )
25
-
26
- MONTHS = "jan feb mar apr may jun jul aug sep oct nov dec".split()
27
- __D = r"(?P<day>[0-9]{2})"
28
- __D2 = r"(?P<day>[ 0-9][0-9])"
29
- __M = r"(?P<mon>\w{3})"
30
- __Y = r"(?P<year>[0-9]{4})"
31
- __Y2 = r"(?P<year>[0-9]{2})"
32
- __T = r"(?P<hour>[0-9]{2}):(?P<min>[0-9]{2}):(?P<sec>[0-9]{2})"
33
- RFC1123_DATE = _lazy_re_compile(rf"^\w{{3}}, {__D} {__M} {__Y} {__T} GMT$")
34
- RFC850_DATE = _lazy_re_compile(rf"^\w{{6,9}}, {__D}-{__M}-{__Y2} {__T} GMT$")
35
- ASCTIME_DATE = _lazy_re_compile(rf"^\w{{3}} {__M} {__D2} {__T} {__Y}$")
36
-
37
- RFC3986_GENDELIMS = ":/?#[]@"
38
7
  RFC3986_SUBDELIMS = "!$&'()*+,;="
39
8
 
40
9
 
@@ -93,57 +62,6 @@ def http_date(epoch_seconds=None):
93
62
  return formatdate(epoch_seconds, usegmt=True)
94
63
 
95
64
 
96
- def parse_http_date(date):
97
- """
98
- Parse a date format as specified by HTTP RFC 9110 Section 5.6.7.
99
-
100
- The three formats allowed by the RFC are accepted, even if only the first
101
- one is still in widespread use.
102
-
103
- Return an integer expressed in seconds since the epoch, in UTC.
104
- """
105
- # email.utils.parsedate() does the job for RFC 1123 dates; unfortunately
106
- # RFC 9110 makes it mandatory to support RFC 850 dates too. So we roll
107
- # our own RFC-compliant parsing.
108
- for regex in RFC1123_DATE, RFC850_DATE, ASCTIME_DATE:
109
- m = regex.match(date)
110
- if m is not None:
111
- break
112
- else:
113
- raise ValueError(f"{date!r} is not in a valid HTTP date format")
114
- try:
115
- tz = datetime.UTC
116
- year = int(m["year"])
117
- if year < 100:
118
- current_year = datetime.datetime.now(tz=tz).year
119
- current_century = current_year - (current_year % 100)
120
- if year - (current_year % 100) > 50:
121
- # year that appears to be more than 50 years in the future are
122
- # interpreted as representing the past.
123
- year += current_century - 100
124
- else:
125
- year += current_century
126
- month = MONTHS.index(m["mon"].lower()) + 1
127
- day = int(m["day"])
128
- hour = int(m["hour"])
129
- min = int(m["min"])
130
- sec = int(m["sec"])
131
- result = datetime.datetime(year, month, day, hour, min, sec, tzinfo=tz)
132
- return int(result.timestamp())
133
- except Exception as exc:
134
- raise ValueError(f"{date!r} is not a valid date") from exc
135
-
136
-
137
- def parse_http_date_safe(date):
138
- """
139
- Same as parse_http_date, but return None if the input is invalid.
140
- """
141
- try:
142
- return parse_http_date(date)
143
- except Exception:
144
- pass
145
-
146
-
147
65
  # Base 36 functions: useful for generating compact URLs
148
66
 
149
67
 
@@ -174,51 +92,6 @@ def int_to_base36(i):
174
92
  return b36
175
93
 
176
94
 
177
- def urlsafe_base64_encode(s):
178
- """
179
- Encode a bytestring to a base64 string for use in URLs. Strip any trailing
180
- equal signs.
181
- """
182
- return base64.urlsafe_b64encode(s).rstrip(b"\n=").decode("ascii")
183
-
184
-
185
- def urlsafe_base64_decode(s):
186
- """
187
- Decode a base64 encoded string. Add back any trailing equal signs that
188
- might have been stripped.
189
- """
190
- s = s.encode()
191
- try:
192
- return base64.urlsafe_b64decode(s.ljust(len(s) + len(s) % 4, b"="))
193
- except (LookupError, BinasciiError) as e:
194
- raise ValueError(e)
195
-
196
-
197
- def parse_etags(etag_str):
198
- """
199
- Parse a string of ETags given in an If-None-Match or If-Match header as
200
- defined by RFC 9110. Return a list of quoted ETags, or ['*'] if all ETags
201
- should be matched.
202
- """
203
- if etag_str.strip() == "*":
204
- return ["*"]
205
- else:
206
- # Parse each ETag individually, and return any that are valid.
207
- etag_matches = (ETAG_MATCH.match(etag.strip()) for etag in etag_str.split(","))
208
- return [match[1] for match in etag_matches if match]
209
-
210
-
211
- def quote_etag(etag_str):
212
- """
213
- If the provided string is already a quoted ETag, return it. Otherwise, wrap
214
- the string in quotes, making it a strong ETag.
215
- """
216
- if ETAG_MATCH.match(etag_str):
217
- return etag_str
218
- else:
219
- return f'"{etag_str}"'
220
-
221
-
222
95
  def is_same_domain(host, pattern):
223
96
  """
224
97
  Return ``True`` if the host is either an exact match or a match
@@ -239,66 +112,6 @@ def is_same_domain(host, pattern):
239
112
  )
240
113
 
241
114
 
242
- def url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
243
- """
244
- Return ``True`` if the url uses an allowed host and a safe scheme.
245
-
246
- Always return ``False`` on an empty url.
247
-
248
- If ``require_https`` is ``True``, only 'https' will be considered a valid
249
- scheme, as opposed to 'http' and 'https' with the default, ``False``.
250
-
251
- Note: "True" doesn't entail that a URL is "safe". It may still be e.g.
252
- quoted incorrectly. Ensure to also use plain.utils.encoding.iri_to_uri()
253
- on the path component of untrusted URLs.
254
- """
255
- if url is not None:
256
- url = url.strip()
257
- if not url:
258
- return False
259
- if allowed_hosts is None:
260
- allowed_hosts = set()
261
- elif isinstance(allowed_hosts, str):
262
- allowed_hosts = {allowed_hosts}
263
- # Chrome treats \ completely as / in paths but it could be part of some
264
- # basic auth credentials so we need to check both URLs.
265
- return _url_has_allowed_host_and_scheme(
266
- url, allowed_hosts, require_https=require_https
267
- ) and _url_has_allowed_host_and_scheme(
268
- url.replace("\\", "/"), allowed_hosts, require_https=require_https
269
- )
270
-
271
-
272
- def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
273
- # Chrome considers any URL with more than two slashes to be absolute, but
274
- # urlparse is not so flexible. Treat any url with three slashes as unsafe.
275
- if url.startswith("///"):
276
- return False
277
- try:
278
- url_info = urlparse(url)
279
- except ValueError: # e.g. invalid IPv6 addresses
280
- return False
281
- # Forbid URLs like http:///example.com - with a scheme, but without a hostname.
282
- # In that URL, example.com is not the hostname but, a path component. However,
283
- # Chrome will still consider example.com to be the hostname, so we must not
284
- # allow this syntax.
285
- if not url_info.netloc and url_info.scheme:
286
- return False
287
- # Forbid URLs that start with control characters. Some browsers (like
288
- # Chrome) ignore quite a few control characters at the start of a
289
- # URL and might consider the URL as scheme relative.
290
- if unicodedata.category(url[0])[0] == "C":
291
- return False
292
- scheme = url_info.scheme
293
- # Consider URLs without a scheme (e.g. //example.com/p) to be http.
294
- if not url_info.scheme and url_info.netloc:
295
- scheme = "http"
296
- valid_schemes = ["https"] if require_https else ["http", "https"]
297
- return (not url_info.netloc or url_info.netloc in allowed_hosts) and (
298
- not scheme or scheme in valid_schemes
299
- )
300
-
301
-
302
115
  def escape_leading_slashes(url):
303
116
  """
304
117
  If redirecting to an absolute path (two leading slashes), a slash must be
@@ -347,7 +160,7 @@ def parse_header_parameters(line):
347
160
  value = value[1:-1]
348
161
  value = value.replace("\\\\", "\\").replace('\\"', '"')
349
162
  if has_encoding:
350
- encoding, lang, value = value.split("'")
163
+ encoding, _, value = value.split("'")
351
164
  value = unquote(value, encoding=encoding)
352
165
  pdict[name] = value
353
166
  return key, pdict
plain/utils/inspect.py CHANGED
@@ -25,49 +25,14 @@ def get_func_args(func):
25
25
  ]
26
26
 
27
27
 
28
- def get_func_full_args(func):
29
- """
30
- Return a list of (argument name, default value) tuples. If the argument
31
- does not have a default value, omit it in the tuple. Arguments such as
32
- *args and **kwargs are also included.
33
- """
34
- params = _get_callable_parameters(func)
35
- args = []
36
- for param in params:
37
- name = param.name
38
- # Ignore 'self'
39
- if name == "self":
40
- continue
41
- if param.kind == inspect.Parameter.VAR_POSITIONAL:
42
- name = "*" + name
43
- elif param.kind == inspect.Parameter.VAR_KEYWORD:
44
- name = "**" + name
45
- if param.default != inspect.Parameter.empty:
46
- args.append((name, param.default))
47
- else:
48
- args.append((name,))
49
- return args
50
-
51
-
52
28
  def func_accepts_kwargs(func):
53
29
  """Return True if function 'func' accepts keyword arguments **kwargs."""
54
30
  return any(p for p in _get_callable_parameters(func) if p.kind == p.VAR_KEYWORD)
55
31
 
56
32
 
57
- def func_accepts_var_args(func):
58
- """
59
- Return True if function 'func' accepts positional arguments *args.
60
- """
61
- return any(p for p in _get_callable_parameters(func) if p.kind == p.VAR_POSITIONAL)
62
-
63
-
64
33
  def method_has_no_args(meth):
65
34
  """Return True if a method only accepts 'self'."""
66
35
  count = len(
67
36
  [p for p in _get_callable_parameters(meth) if p.kind == p.POSITIONAL_OR_KEYWORD]
68
37
  )
69
38
  return count == 0 if inspect.ismethod(meth) else count == 1
70
-
71
-
72
- def func_supports_parameter(func, name):
73
- return any(param.name == name for param in _get_callable_parameters(func))
@@ -1,7 +1,6 @@
1
1
  import os
2
2
  import sys
3
3
  from importlib import import_module
4
- from importlib.util import find_spec as importlib_find
5
4
 
6
5
 
7
6
  def cached_import(module_path, class_name):
@@ -33,24 +32,6 @@ def import_string(dotted_path):
33
32
  ) from err
34
33
 
35
34
 
36
- def module_has_submodule(package, module_name):
37
- """See if 'module' is in 'package'."""
38
- try:
39
- package_name = package.__name__
40
- package_path = package.__path__
41
- except AttributeError:
42
- # package isn't a package.
43
- return False
44
-
45
- full_module_name = package_name + "." + module_name
46
- try:
47
- return importlib_find(full_module_name, package_path) is not None
48
- except ModuleNotFoundError:
49
- # When module_name is an invalid dotted path, Python raises
50
- # ModuleNotFoundError.
51
- return False
52
-
53
-
54
35
  def module_dir(module):
55
36
  """
56
37
  Find the name of the directory that contains a module, if possible.
plain/utils/safestring.py CHANGED
@@ -44,9 +44,6 @@ class SafeString(str, SafeData):
44
44
  return self
45
45
 
46
46
 
47
- SafeText = SafeString # For backwards compatibility since Plain 2.0.
48
-
49
-
50
47
  def _safety_decorator(safety_marker, func):
51
48
  @wraps(func)
52
49
  def wrapper(*args, **kwargs):