omlish 0.0.0.dev220__py3-none-any.whl → 0.0.0.dev222__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. omlish/__about__.py +2 -2
  2. omlish/algorithm/all.py +13 -0
  3. omlish/algorithm/distribute.py +46 -0
  4. omlish/algorithm/toposort.py +26 -0
  5. omlish/algorithm/unify.py +31 -0
  6. omlish/collections/__init__.py +0 -2
  7. omlish/collections/utils.py +0 -46
  8. omlish/graphs/trees.py +2 -1
  9. omlish/http/coro/server.py +42 -33
  10. omlish/http/{simple.py → coro/simple.py} +17 -17
  11. omlish/specs/irc/__init__.py +0 -0
  12. omlish/specs/irc/format/LICENSE +11 -0
  13. omlish/specs/irc/format/__init__.py +61 -0
  14. omlish/specs/irc/format/consts.py +6 -0
  15. omlish/specs/irc/format/errors.py +30 -0
  16. omlish/specs/irc/format/message.py +18 -0
  17. omlish/specs/irc/format/nuh.py +52 -0
  18. omlish/specs/irc/format/parsing.py +155 -0
  19. omlish/specs/irc/format/rendering.py +150 -0
  20. omlish/specs/irc/format/tags.py +99 -0
  21. omlish/specs/irc/format/utils.py +27 -0
  22. omlish/specs/irc/numerics/__init__.py +0 -0
  23. omlish/specs/irc/numerics/formats.py +94 -0
  24. omlish/specs/irc/numerics/numerics.py +808 -0
  25. omlish/specs/irc/numerics/types.py +59 -0
  26. {omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/METADATA +1 -1
  27. {omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/RECORD +32 -15
  28. omlish/docker/oci/data.py +0 -71
  29. omlish/docker/oci/media.py +0 -124
  30. /omlish/{docker/oci → algorithm}/__init__.py +0 -0
  31. {omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/LICENSE +0 -0
  32. {omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/WHEEL +0 -0
  33. {omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/entry_points.txt +0 -0
  34. {omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,155 @@
1
+ import typing as ta
2
+
3
+ from .consts import MAX_LEN_CLIENT_TAG_DATA
4
+ from .consts import MAX_LEN_TAG_DATA
5
+ from .errors import BadCharactersError
6
+ from .errors import LineEmptyError
7
+ from .errors import TagsTooLongError
8
+ from .message import Message
9
+ from .tags import parse_tags
10
+ from .utils import is_ascii
11
+ from .utils import trim_initial_spaces
12
+ from .utils import truncate_utf8_safe
13
+
14
+
15
+ class ParsedLine(ta.NamedTuple):
16
+ message: Message
17
+
18
+ truncated: bool = False
19
+
20
+
21
+ def parse_line_(
22
+ line: str,
23
+ *,
24
+ max_tag_data_length: int | None = None,
25
+ truncate_len: int | None = None,
26
+ ) -> ParsedLine:
27
+ # Remove either \n or \r\n from the end of the line:
28
+ line = line.removesuffix('\n')
29
+ line = line.removesuffix('\r')
30
+
31
+ # Whether we removed them ourselves, or whether they were removed previously, they count against the line limit:
32
+ if truncate_len is not None:
33
+ if truncate_len <= 2:
34
+ raise LineEmptyError
35
+ truncate_len -= 2
36
+
37
+ # Now validate for the 3 forbidden bytes:
38
+ if any(c in line for c in '\x00\n\r'):
39
+ raise BadCharactersError
40
+
41
+ if not line:
42
+ raise LineEmptyError
43
+
44
+ #
45
+
46
+ # Handle tags
47
+ tags: ta.Mapping[str, str] | None = None
48
+ if line.startswith('@'):
49
+ tag_end = line.find(' ')
50
+ if tag_end == -1:
51
+ raise LineEmptyError
52
+ raw_tags = line[1:tag_end]
53
+ if max_tag_data_length is not None and len(raw_tags) > max_tag_data_length:
54
+ raise TagsTooLongError
55
+ tags = parse_tags(raw_tags)
56
+ # Skip over the tags and the separating space
57
+ line = line[tag_end + 1:]
58
+
59
+ #
60
+
61
+ # Truncate if desired
62
+ truncated = False
63
+ if truncate_len is not None and len(line) > truncate_len:
64
+ line = truncate_utf8_safe(line, truncate_len)
65
+ truncated = True
66
+
67
+ line = trim_initial_spaces(line)
68
+
69
+ # Handle source
70
+ source: str | None = None
71
+
72
+ if line.startswith(':'):
73
+ source_end = line.find(' ')
74
+ if source_end == -1:
75
+ raise LineEmptyError
76
+ source = line[1:source_end]
77
+ line = line[source_end + 1:]
78
+
79
+ # Modern: "These message parts, and parameters themselves, are separated by one or more ASCII SPACE characters"
80
+ line = trim_initial_spaces(line)
81
+
82
+ # Handle command
83
+ command_end = line.find(' ')
84
+ param_start = command_end + 1 if command_end != -1 else len(line)
85
+ base_command = line[:command_end] if command_end != -1 else line
86
+
87
+ if not base_command:
88
+ raise LineEmptyError
89
+ # Technically this must be either letters or a 3-digit numeric:
90
+ if not is_ascii(base_command):
91
+ raise BadCharactersError
92
+
93
+ # Normalize command to uppercase:
94
+ command = base_command.upper()
95
+ line = line[param_start:]
96
+
97
+ # Handle parameters
98
+ params: list[str] = []
99
+ while line:
100
+ line = trim_initial_spaces(line)
101
+ if not line:
102
+ break
103
+ # Handle trailing
104
+ if line.startswith(':'):
105
+ params.append(line[1:])
106
+ break
107
+ param_end = line.find(' ')
108
+ if param_end == -1:
109
+ params.append(line)
110
+ break
111
+ params.append(line[:param_end])
112
+ line = line[param_end + 1:]
113
+
114
+ #
115
+
116
+ msg = Message(
117
+ source=source,
118
+ command=command,
119
+ params=params,
120
+ tags=tags,
121
+ )
122
+
123
+ return ParsedLine(
124
+ msg,
125
+ truncated=truncated,
126
+ )
127
+
128
+
129
+ def parse_line(
130
+ line: str,
131
+ *,
132
+ max_tag_data_length: int | None = None,
133
+ truncate_len: int | None = None,
134
+ ) -> Message:
135
+ return parse_line_(
136
+ line,
137
+ max_tag_data_length=max_tag_data_length,
138
+ truncate_len=truncate_len,
139
+ ).message
140
+
141
+
142
+ def parse_line_strict(
143
+ line: str,
144
+ from_client: bool,
145
+ truncate_len: int | None,
146
+ ) -> ParsedLine:
147
+ max_tag_data_length = MAX_LEN_TAG_DATA
148
+ if from_client:
149
+ max_tag_data_length = MAX_LEN_CLIENT_TAG_DATA
150
+
151
+ return parse_line_(
152
+ line,
153
+ max_tag_data_length=max_tag_data_length,
154
+ truncate_len=truncate_len,
155
+ )
@@ -0,0 +1,150 @@
1
+ import typing as ta
2
+
3
+ from .consts import MAX_LEN_CLIENT_TAG_DATA
4
+ from .consts import MAX_LEN_SERVER_TAG_DATA
5
+ from .consts import MAX_LEN_TAGS_FROM_CLIENT
6
+ from .errors import BadCharactersError
7
+ from .errors import BadParamError
8
+ from .errors import CommandMissingError
9
+ from .errors import InvalidTagContentError
10
+ from .errors import TagsTooLongError
11
+ from .message import Message
12
+ from .tags import escape_tag_value
13
+ from .tags import validate_tag_name
14
+ from .tags import validate_tag_value
15
+ from .utils import find_utf8_truncation_point
16
+
17
+
18
+ def param_requires_trailing(param: str) -> bool:
19
+ return len(param) == 0 or ' ' in param or param[0] == ':'
20
+
21
+
22
+ class RenderedLine(ta.NamedTuple):
23
+ raw: bytes
24
+
25
+ truncated: bool = False
26
+
27
+
28
+ def render_line_(
29
+ msg: Message,
30
+ *,
31
+ tag_limit: int | None = None,
32
+ client_only_tag_data_limit: int | None = None,
33
+ server_added_tag_data_limit: int | None = None,
34
+ truncate_len: int | None = None,
35
+ ) -> RenderedLine:
36
+ if not msg.command:
37
+ raise CommandMissingError
38
+
39
+ buf = bytearray()
40
+ len_regular_tags = len_client_only_tags = 0
41
+
42
+ # Write the tags, computing the budgets for client-only tags and regular tags
43
+ if msg.tags or msg.client_only_tags:
44
+ buf.append(ord('@'))
45
+ first_tag = True
46
+ tag_error = None
47
+
48
+ def write_tags(tags: ta.Mapping[str, str]) -> None:
49
+ nonlocal first_tag, tag_error
50
+ for tag, val in tags.items():
51
+ if not (validate_tag_name(tag) and validate_tag_value(val)):
52
+ tag_error = InvalidTagContentError
53
+ if not first_tag:
54
+ buf.append(ord(';'))
55
+ buf.extend(tag.encode('utf-8'))
56
+ if val:
57
+ buf.append(ord('='))
58
+ buf.extend(escape_tag_value(val).encode('utf-8'))
59
+ first_tag = False
60
+
61
+ write_tags(msg.tags or {})
62
+ len_regular_tags = len(buf) - 1
63
+ write_tags(msg.client_only_tags or {})
64
+ len_client_only_tags = (len(buf) - 1) - len_regular_tags
65
+ if len_regular_tags:
66
+ # Semicolon between regular and client-only tags is not counted
67
+ len_client_only_tags -= 1
68
+ buf.append(ord(' '))
69
+ if tag_error:
70
+ raise tag_error
71
+
72
+ len_tags = len(buf)
73
+ if tag_limit is not None and len(buf) > tag_limit:
74
+ raise TagsTooLongError
75
+ if (
76
+ (client_only_tag_data_limit is not None and len_client_only_tags > client_only_tag_data_limit) or
77
+ (server_added_tag_data_limit is not None and len_regular_tags > server_added_tag_data_limit)
78
+ ):
79
+ raise TagsTooLongError
80
+
81
+ if msg.source:
82
+ buf.append(ord(':'))
83
+ buf.extend(msg.source.encode('utf-8'))
84
+ buf.append(ord(' '))
85
+
86
+ buf.extend(msg.command.encode('utf-8'))
87
+
88
+ for i, param in enumerate(msg.params):
89
+ buf.append(ord(' '))
90
+ requires_trailing = param_requires_trailing(param)
91
+ last_param = i == len(msg.params) - 1
92
+
93
+ if (requires_trailing or msg.force_trailing) and last_param:
94
+ buf.append(ord(':'))
95
+ elif requires_trailing and not last_param:
96
+ raise BadParamError
97
+
98
+ buf.extend(param.encode('utf-8'))
99
+
100
+ # Truncate if desired; leave 2 bytes over for \r\n:
101
+ truncated = False
102
+ if truncate_len is not None and (truncate_len - 2) < (len(buf) - len_tags):
103
+ truncated = True
104
+ new_buf_len = len_tags + (truncate_len - 2)
105
+ buf = buf[:find_utf8_truncation_point(buf, new_buf_len)]
106
+
107
+ buf.extend(b'\r\n')
108
+
109
+ to_validate = buf[:-2]
110
+ if any(c in to_validate for c in (b'\x00', b'\r', b'\n')):
111
+ raise BadCharactersError
112
+
113
+ raw = bytes(buf)
114
+
115
+ return RenderedLine(
116
+ raw=raw,
117
+ truncated=truncated,
118
+ )
119
+
120
+
121
+ def render_line(msg: Message) -> bytes:
122
+ return render_line_(msg).raw
123
+
124
+
125
+ def render_line_strict(
126
+ msg: Message,
127
+ from_client: bool,
128
+ truncate_len: int | None,
129
+ ) -> RenderedLine:
130
+ tag_limit: int | None = None
131
+ client_only_tag_data_limit: int | None = None
132
+ server_added_tag_data_limit: int | None = None
133
+ if from_client:
134
+ # enforce client max tags:
135
+ # <client_max> (4096) :: '@' <tag_data 4094> ' '
136
+ tag_limit = MAX_LEN_TAGS_FROM_CLIENT
137
+ else:
138
+ # on the server side, enforce separate client-only and server-added tag budgets:
139
+ # "Servers MUST NOT add tag data exceeding 4094 bytes to messages."
140
+ # <combined_max> (8191) :: '@' <tag_data 4094> ';' <tag_data 4094> ' '
141
+ client_only_tag_data_limit = MAX_LEN_CLIENT_TAG_DATA
142
+ server_added_tag_data_limit = MAX_LEN_SERVER_TAG_DATA
143
+
144
+ return render_line_(
145
+ msg,
146
+ tag_limit=tag_limit,
147
+ client_only_tag_data_limit=client_only_tag_data_limit,
148
+ server_added_tag_data_limit=server_added_tag_data_limit,
149
+ truncate_len=truncate_len,
150
+ )
@@ -0,0 +1,99 @@
1
+ from .errors import InvalidTagContentError
2
+
3
+
4
+ # Mapping for escaping tag values
5
+ TAG_VAL_TO_ESCAPE = {
6
+ '\\': '\\\\',
7
+ ';': '\\:',
8
+ ' ': '\\s',
9
+ '\r': '\\r',
10
+ '\n': '\\n',
11
+ }
12
+
13
+
14
+ TAG_ESCAPE_CHAR_LOOKUP_TABLE = {i: chr(i) for i in range(256)} # Most chars escape to themselves
15
+
16
+ # These are the exceptions
17
+ TAG_ESCAPE_CHAR_LOOKUP_TABLE.update({
18
+ ord(':'): ';',
19
+ ord('s'): ' ',
20
+ ord('r'): '\r',
21
+ ord('n'): '\n',
22
+ })
23
+
24
+
25
+ def escape_tag_value(in_string: str) -> str:
26
+ for key, val in TAG_VAL_TO_ESCAPE.items():
27
+ in_string = in_string.replace(key, val)
28
+ return in_string
29
+
30
+
31
+ def unescape_tag_value(in_string: str) -> str:
32
+ if '\\' not in in_string:
33
+ return in_string
34
+
35
+ buf = []
36
+ remainder = in_string
37
+ while remainder:
38
+ backslash_pos = remainder.find('\\')
39
+ if backslash_pos == -1:
40
+ buf.append(remainder)
41
+ break
42
+ elif backslash_pos == len(remainder) - 1:
43
+ # Trailing backslash, which we strip
44
+ buf.append(remainder[:-1])
45
+ break
46
+
47
+ buf.append(remainder[:backslash_pos])
48
+ buf.append(TAG_ESCAPE_CHAR_LOOKUP_TABLE.get(ord(remainder[backslash_pos + 1]), remainder[backslash_pos + 1]))
49
+ remainder = remainder[backslash_pos + 2:]
50
+
51
+ return ''.join(buf)
52
+
53
+
54
+ def validate_tag_name(name: str) -> bool:
55
+ if len(name) == 0:
56
+ return False
57
+ if name[0] == '+':
58
+ name = name[1:]
59
+ if len(name) == 0:
60
+ return False
61
+ # Let's err on the side of leniency here; allow -./ (45-47) in any position
62
+ for c in name: # noqa
63
+ if not (('-' <= c <= '/') or ('0' <= c <= '9') or ('A' <= c <= 'Z') or ('a' <= c <= 'z')):
64
+ return False
65
+ return True
66
+
67
+
68
+ def validate_tag_value(value: str) -> bool:
69
+ rt = value.encode('utf-8', 'ignore').decode('utf-8', 'ignore')
70
+ return value == rt
71
+
72
+
73
+ def parse_tags(raw_tags: str) -> dict[str, str]:
74
+ dct: dict[str, str] = {}
75
+
76
+ while raw_tags:
77
+ tag_end = raw_tags.find(';')
78
+ if tag_end == -1:
79
+ tag_pair = raw_tags
80
+ raw_tags = ''
81
+ else:
82
+ tag_pair = raw_tags[:tag_end]
83
+ raw_tags = raw_tags[tag_end + 1:]
84
+
85
+ equals_index = tag_pair.find('=')
86
+ if equals_index == -1:
87
+ # Tag with no value
88
+ tag_name, tag_value = tag_pair, ''
89
+ else:
90
+ tag_name, tag_value = tag_pair[:equals_index], tag_pair[equals_index + 1:]
91
+
92
+ # "Implementations [...] MUST NOT perform any validation that would reject the message if an invalid tag key
93
+ # name is used."
94
+ if validate_tag_name(tag_name):
95
+ if not validate_tag_value(tag_value):
96
+ raise InvalidTagContentError
97
+ dct[tag_name] = unescape_tag_value(tag_value)
98
+
99
+ return dct
@@ -0,0 +1,27 @@
1
+ import itertools
2
+ import operator
3
+
4
+
5
+ def truncate_utf8_safe(string: str, length: int) -> str:
6
+ return string[:length] \
7
+ .encode('utf-8', 'ignore') \
8
+ .decode('utf-8', 'ignore')
9
+
10
+
11
+ def find_utf8_truncation_point(buf: bytes | bytearray, length: int) -> int:
12
+ if len(buf) < length:
13
+ raise ValueError(buf)
14
+ cs = itertools.accumulate(
15
+ (len(c.encode('utf-8')) for c in buf.decode('utf-8')),
16
+ operator.add,
17
+ initial=0,
18
+ )
19
+ return next(i for i, o in enumerate(cs) if o >= length)
20
+
21
+
22
+ def trim_initial_spaces(string: str) -> str:
23
+ return string.lstrip(' ')
24
+
25
+
26
+ def is_ascii(string: str) -> bool:
27
+ return all(ord(c) < 128 for c in string)
File without changes
@@ -0,0 +1,94 @@
1
+ import dataclasses as dc
2
+ import typing as ta
3
+
4
+ from .... import check
5
+ from .... import lang
6
+
7
+
8
+ FormatPart: ta.TypeAlias = ta.Union[str, 'Formats.Optional', 'Formats.Variadic']
9
+ FormatParts: ta.TypeAlias = ta.Sequence[FormatPart]
10
+
11
+
12
+ class Formats(lang.Namespace):
13
+ @dc.dataclass(frozen=True)
14
+ class Name:
15
+ name: str
16
+
17
+ @dc.dataclass(frozen=True)
18
+ class Optional:
19
+ body: FormatParts
20
+
21
+ @dc.dataclass(frozen=True)
22
+ class Variadic:
23
+ body: FormatParts
24
+
25
+ #
26
+
27
+ _PARTS_BY_DELIMITERS: ta.Mapping[tuple[str, str], type] = {
28
+ ('[', ']'): Optional,
29
+ ('{', '}'): Variadic,
30
+ }
31
+
32
+ _DELIMITERS_BY_PARTS: ta.Mapping[type, tuple[str, str]] = {v: k for k, v in _PARTS_BY_DELIMITERS.items()}
33
+
34
+ #
35
+
36
+ @staticmethod
37
+ def split_parts(s: str) -> FormatParts:
38
+ stk: list[tuple[str, list]] = [('', [])]
39
+
40
+ p = 0
41
+ while p < len(s):
42
+ n = lang.find_any(s, '{}[]<', p)
43
+
44
+ if n < 0:
45
+ check.state(not stk[-1][0])
46
+ stk[-1][1].append(s[p:])
47
+ break
48
+
49
+ if n != p:
50
+ stk[-1][1].append(s[p:n])
51
+
52
+ d = s[n]
53
+ if d == '<':
54
+ e = s.index('>', n)
55
+ stk[-1][1].append(Formats.Name(s[n + 1:e]))
56
+ p = e + 1
57
+
58
+ elif d in '{[':
59
+ stk.append((d, []))
60
+ p = n + 1
61
+
62
+ elif d in '}]':
63
+ x, l = stk.pop()
64
+ pc = Formats._PARTS_BY_DELIMITERS[(x, d)]
65
+ stk[-1][1].append(pc(l))
66
+ p = n + 1
67
+
68
+ else:
69
+ raise RuntimeError
70
+
71
+ _, ret = check.single(stk)
72
+ return ret
73
+
74
+ #
75
+
76
+ @staticmethod
77
+ def render_parts(p: FormatPart | FormatParts) -> ta.Iterator[str]:
78
+ if isinstance(p, str):
79
+ yield p
80
+
81
+ elif isinstance(p, Formats.Name):
82
+ yield '<'
83
+ yield p.name
84
+ yield '>'
85
+
86
+ elif isinstance(p, (Formats.Optional, Formats.Variadic)):
87
+ l, r = Formats._DELIMITERS_BY_PARTS[type(p)]
88
+ yield l
89
+ yield from Formats.render_parts(p.body)
90
+ yield r
91
+
92
+ else:
93
+ for c in p:
94
+ yield from Formats.render_parts(c)