omlish 0.0.0.dev220__py3-none-any.whl → 0.0.0.dev222__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. omlish/__about__.py +2 -2
  2. omlish/algorithm/all.py +13 -0
  3. omlish/algorithm/distribute.py +46 -0
  4. omlish/algorithm/toposort.py +26 -0
  5. omlish/algorithm/unify.py +31 -0
  6. omlish/collections/__init__.py +0 -2
  7. omlish/collections/utils.py +0 -46
  8. omlish/graphs/trees.py +2 -1
  9. omlish/http/coro/server.py +42 -33
  10. omlish/http/{simple.py → coro/simple.py} +17 -17
  11. omlish/specs/irc/__init__.py +0 -0
  12. omlish/specs/irc/format/LICENSE +11 -0
  13. omlish/specs/irc/format/__init__.py +61 -0
  14. omlish/specs/irc/format/consts.py +6 -0
  15. omlish/specs/irc/format/errors.py +30 -0
  16. omlish/specs/irc/format/message.py +18 -0
  17. omlish/specs/irc/format/nuh.py +52 -0
  18. omlish/specs/irc/format/parsing.py +155 -0
  19. omlish/specs/irc/format/rendering.py +150 -0
  20. omlish/specs/irc/format/tags.py +99 -0
  21. omlish/specs/irc/format/utils.py +27 -0
  22. omlish/specs/irc/numerics/__init__.py +0 -0
  23. omlish/specs/irc/numerics/formats.py +94 -0
  24. omlish/specs/irc/numerics/numerics.py +808 -0
  25. omlish/specs/irc/numerics/types.py +59 -0
  26. {omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/METADATA +1 -1
  27. {omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/RECORD +32 -15
  28. omlish/docker/oci/data.py +0 -71
  29. omlish/docker/oci/media.py +0 -124
  30. /omlish/{docker/oci → algorithm}/__init__.py +0 -0
  31. {omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/LICENSE +0 -0
  32. {omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/WHEEL +0 -0
  33. {omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/entry_points.txt +0 -0
  34. {omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,155 @@
1
+ import typing as ta
2
+
3
+ from .consts import MAX_LEN_CLIENT_TAG_DATA
4
+ from .consts import MAX_LEN_TAG_DATA
5
+ from .errors import BadCharactersError
6
+ from .errors import LineEmptyError
7
+ from .errors import TagsTooLongError
8
+ from .message import Message
9
+ from .tags import parse_tags
10
+ from .utils import is_ascii
11
+ from .utils import trim_initial_spaces
12
+ from .utils import truncate_utf8_safe
13
+
14
+
15
+ class ParsedLine(ta.NamedTuple):
16
+ message: Message
17
+
18
+ truncated: bool = False
19
+
20
+
21
+ def parse_line_(
22
+ line: str,
23
+ *,
24
+ max_tag_data_length: int | None = None,
25
+ truncate_len: int | None = None,
26
+ ) -> ParsedLine:
27
+ # Remove either \n or \r\n from the end of the line:
28
+ line = line.removesuffix('\n')
29
+ line = line.removesuffix('\r')
30
+
31
+ # Whether we removed them ourselves, or whether they were removed previously, they count against the line limit:
32
+ if truncate_len is not None:
33
+ if truncate_len <= 2:
34
+ raise LineEmptyError
35
+ truncate_len -= 2
36
+
37
+ # Now validate for the 3 forbidden bytes:
38
+ if any(c in line for c in '\x00\n\r'):
39
+ raise BadCharactersError
40
+
41
+ if not line:
42
+ raise LineEmptyError
43
+
44
+ #
45
+
46
+ # Handle tags
47
+ tags: ta.Mapping[str, str] | None = None
48
+ if line.startswith('@'):
49
+ tag_end = line.find(' ')
50
+ if tag_end == -1:
51
+ raise LineEmptyError
52
+ raw_tags = line[1:tag_end]
53
+ if max_tag_data_length is not None and len(raw_tags) > max_tag_data_length:
54
+ raise TagsTooLongError
55
+ tags = parse_tags(raw_tags)
56
+ # Skip over the tags and the separating space
57
+ line = line[tag_end + 1:]
58
+
59
+ #
60
+
61
+ # Truncate if desired
62
+ truncated = False
63
+ if truncate_len is not None and len(line) > truncate_len:
64
+ line = truncate_utf8_safe(line, truncate_len)
65
+ truncated = True
66
+
67
+ line = trim_initial_spaces(line)
68
+
69
+ # Handle source
70
+ source: str | None = None
71
+
72
+ if line.startswith(':'):
73
+ source_end = line.find(' ')
74
+ if source_end == -1:
75
+ raise LineEmptyError
76
+ source = line[1:source_end]
77
+ line = line[source_end + 1:]
78
+
79
+ # Modern: "These message parts, and parameters themselves, are separated by one or more ASCII SPACE characters"
80
+ line = trim_initial_spaces(line)
81
+
82
+ # Handle command
83
+ command_end = line.find(' ')
84
+ param_start = command_end + 1 if command_end != -1 else len(line)
85
+ base_command = line[:command_end] if command_end != -1 else line
86
+
87
+ if not base_command:
88
+ raise LineEmptyError
89
+ # Technically this must be either letters or a 3-digit numeric:
90
+ if not is_ascii(base_command):
91
+ raise BadCharactersError
92
+
93
+ # Normalize command to uppercase:
94
+ command = base_command.upper()
95
+ line = line[param_start:]
96
+
97
+ # Handle parameters
98
+ params: list[str] = []
99
+ while line:
100
+ line = trim_initial_spaces(line)
101
+ if not line:
102
+ break
103
+ # Handle trailing
104
+ if line.startswith(':'):
105
+ params.append(line[1:])
106
+ break
107
+ param_end = line.find(' ')
108
+ if param_end == -1:
109
+ params.append(line)
110
+ break
111
+ params.append(line[:param_end])
112
+ line = line[param_end + 1:]
113
+
114
+ #
115
+
116
+ msg = Message(
117
+ source=source,
118
+ command=command,
119
+ params=params,
120
+ tags=tags,
121
+ )
122
+
123
+ return ParsedLine(
124
+ msg,
125
+ truncated=truncated,
126
+ )
127
+
128
+
129
+ def parse_line(
130
+ line: str,
131
+ *,
132
+ max_tag_data_length: int | None = None,
133
+ truncate_len: int | None = None,
134
+ ) -> Message:
135
+ return parse_line_(
136
+ line,
137
+ max_tag_data_length=max_tag_data_length,
138
+ truncate_len=truncate_len,
139
+ ).message
140
+
141
+
142
+ def parse_line_strict(
143
+ line: str,
144
+ from_client: bool,
145
+ truncate_len: int | None,
146
+ ) -> ParsedLine:
147
+ max_tag_data_length = MAX_LEN_TAG_DATA
148
+ if from_client:
149
+ max_tag_data_length = MAX_LEN_CLIENT_TAG_DATA
150
+
151
+ return parse_line_(
152
+ line,
153
+ max_tag_data_length=max_tag_data_length,
154
+ truncate_len=truncate_len,
155
+ )
@@ -0,0 +1,150 @@
1
+ import typing as ta
2
+
3
+ from .consts import MAX_LEN_CLIENT_TAG_DATA
4
+ from .consts import MAX_LEN_SERVER_TAG_DATA
5
+ from .consts import MAX_LEN_TAGS_FROM_CLIENT
6
+ from .errors import BadCharactersError
7
+ from .errors import BadParamError
8
+ from .errors import CommandMissingError
9
+ from .errors import InvalidTagContentError
10
+ from .errors import TagsTooLongError
11
+ from .message import Message
12
+ from .tags import escape_tag_value
13
+ from .tags import validate_tag_name
14
+ from .tags import validate_tag_value
15
+ from .utils import find_utf8_truncation_point
16
+
17
+
18
+ def param_requires_trailing(param: str) -> bool:
19
+ return len(param) == 0 or ' ' in param or param[0] == ':'
20
+
21
+
22
+ class RenderedLine(ta.NamedTuple):
23
+ raw: bytes
24
+
25
+ truncated: bool = False
26
+
27
+
28
+ def render_line_(
29
+ msg: Message,
30
+ *,
31
+ tag_limit: int | None = None,
32
+ client_only_tag_data_limit: int | None = None,
33
+ server_added_tag_data_limit: int | None = None,
34
+ truncate_len: int | None = None,
35
+ ) -> RenderedLine:
36
+ if not msg.command:
37
+ raise CommandMissingError
38
+
39
+ buf = bytearray()
40
+ len_regular_tags = len_client_only_tags = 0
41
+
42
+ # Write the tags, computing the budgets for client-only tags and regular tags
43
+ if msg.tags or msg.client_only_tags:
44
+ buf.append(ord('@'))
45
+ first_tag = True
46
+ tag_error = None
47
+
48
+ def write_tags(tags: ta.Mapping[str, str]) -> None:
49
+ nonlocal first_tag, tag_error
50
+ for tag, val in tags.items():
51
+ if not (validate_tag_name(tag) and validate_tag_value(val)):
52
+ tag_error = InvalidTagContentError
53
+ if not first_tag:
54
+ buf.append(ord(';'))
55
+ buf.extend(tag.encode('utf-8'))
56
+ if val:
57
+ buf.append(ord('='))
58
+ buf.extend(escape_tag_value(val).encode('utf-8'))
59
+ first_tag = False
60
+
61
+ write_tags(msg.tags or {})
62
+ len_regular_tags = len(buf) - 1
63
+ write_tags(msg.client_only_tags or {})
64
+ len_client_only_tags = (len(buf) - 1) - len_regular_tags
65
+ if len_regular_tags:
66
+ # Semicolon between regular and client-only tags is not counted
67
+ len_client_only_tags -= 1
68
+ buf.append(ord(' '))
69
+ if tag_error:
70
+ raise tag_error
71
+
72
+ len_tags = len(buf)
73
+ if tag_limit is not None and len(buf) > tag_limit:
74
+ raise TagsTooLongError
75
+ if (
76
+ (client_only_tag_data_limit is not None and len_client_only_tags > client_only_tag_data_limit) or
77
+ (server_added_tag_data_limit is not None and len_regular_tags > server_added_tag_data_limit)
78
+ ):
79
+ raise TagsTooLongError
80
+
81
+ if msg.source:
82
+ buf.append(ord(':'))
83
+ buf.extend(msg.source.encode('utf-8'))
84
+ buf.append(ord(' '))
85
+
86
+ buf.extend(msg.command.encode('utf-8'))
87
+
88
+ for i, param in enumerate(msg.params):
89
+ buf.append(ord(' '))
90
+ requires_trailing = param_requires_trailing(param)
91
+ last_param = i == len(msg.params) - 1
92
+
93
+ if (requires_trailing or msg.force_trailing) and last_param:
94
+ buf.append(ord(':'))
95
+ elif requires_trailing and not last_param:
96
+ raise BadParamError
97
+
98
+ buf.extend(param.encode('utf-8'))
99
+
100
+ # Truncate if desired; leave 2 bytes over for \r\n:
101
+ truncated = False
102
+ if truncate_len is not None and (truncate_len - 2) < (len(buf) - len_tags):
103
+ truncated = True
104
+ new_buf_len = len_tags + (truncate_len - 2)
105
+ buf = buf[:find_utf8_truncation_point(buf, new_buf_len)]
106
+
107
+ buf.extend(b'\r\n')
108
+
109
+ to_validate = buf[:-2]
110
+ if any(c in to_validate for c in (b'\x00', b'\r', b'\n')):
111
+ raise BadCharactersError
112
+
113
+ raw = bytes(buf)
114
+
115
+ return RenderedLine(
116
+ raw=raw,
117
+ truncated=truncated,
118
+ )
119
+
120
+
121
+ def render_line(msg: Message) -> bytes:
122
+ return render_line_(msg).raw
123
+
124
+
125
+ def render_line_strict(
126
+ msg: Message,
127
+ from_client: bool,
128
+ truncate_len: int | None,
129
+ ) -> RenderedLine:
130
+ tag_limit: int | None = None
131
+ client_only_tag_data_limit: int | None = None
132
+ server_added_tag_data_limit: int | None = None
133
+ if from_client:
134
+ # enforce client max tags:
135
+ # <client_max> (4096) :: '@' <tag_data 4094> ' '
136
+ tag_limit = MAX_LEN_TAGS_FROM_CLIENT
137
+ else:
138
+ # on the server side, enforce separate client-only and server-added tag budgets:
139
+ # "Servers MUST NOT add tag data exceeding 4094 bytes to messages."
140
+ # <combined_max> (8191) :: '@' <tag_data 4094> ';' <tag_data 4094> ' '
141
+ client_only_tag_data_limit = MAX_LEN_CLIENT_TAG_DATA
142
+ server_added_tag_data_limit = MAX_LEN_SERVER_TAG_DATA
143
+
144
+ return render_line_(
145
+ msg,
146
+ tag_limit=tag_limit,
147
+ client_only_tag_data_limit=client_only_tag_data_limit,
148
+ server_added_tag_data_limit=server_added_tag_data_limit,
149
+ truncate_len=truncate_len,
150
+ )
@@ -0,0 +1,99 @@
1
+ from .errors import InvalidTagContentError
2
+
3
+
4
+ # Mapping for escaping tag values
5
+ TAG_VAL_TO_ESCAPE = {
6
+ '\\': '\\\\',
7
+ ';': '\\:',
8
+ ' ': '\\s',
9
+ '\r': '\\r',
10
+ '\n': '\\n',
11
+ }
12
+
13
+
14
+ TAG_ESCAPE_CHAR_LOOKUP_TABLE = {i: chr(i) for i in range(256)} # Most chars escape to themselves
15
+
16
+ # These are the exceptions
17
+ TAG_ESCAPE_CHAR_LOOKUP_TABLE.update({
18
+ ord(':'): ';',
19
+ ord('s'): ' ',
20
+ ord('r'): '\r',
21
+ ord('n'): '\n',
22
+ })
23
+
24
+
25
+ def escape_tag_value(in_string: str) -> str:
26
+ for key, val in TAG_VAL_TO_ESCAPE.items():
27
+ in_string = in_string.replace(key, val)
28
+ return in_string
29
+
30
+
31
+ def unescape_tag_value(in_string: str) -> str:
32
+ if '\\' not in in_string:
33
+ return in_string
34
+
35
+ buf = []
36
+ remainder = in_string
37
+ while remainder:
38
+ backslash_pos = remainder.find('\\')
39
+ if backslash_pos == -1:
40
+ buf.append(remainder)
41
+ break
42
+ elif backslash_pos == len(remainder) - 1:
43
+ # Trailing backslash, which we strip
44
+ buf.append(remainder[:-1])
45
+ break
46
+
47
+ buf.append(remainder[:backslash_pos])
48
+ buf.append(TAG_ESCAPE_CHAR_LOOKUP_TABLE.get(ord(remainder[backslash_pos + 1]), remainder[backslash_pos + 1]))
49
+ remainder = remainder[backslash_pos + 2:]
50
+
51
+ return ''.join(buf)
52
+
53
+
54
+ def validate_tag_name(name: str) -> bool:
55
+ if len(name) == 0:
56
+ return False
57
+ if name[0] == '+':
58
+ name = name[1:]
59
+ if len(name) == 0:
60
+ return False
61
+ # Let's err on the side of leniency here; allow -./ (45-47) in any position
62
+ for c in name: # noqa
63
+ if not (('-' <= c <= '/') or ('0' <= c <= '9') or ('A' <= c <= 'Z') or ('a' <= c <= 'z')):
64
+ return False
65
+ return True
66
+
67
+
68
+ def validate_tag_value(value: str) -> bool:
69
+ rt = value.encode('utf-8', 'ignore').decode('utf-8', 'ignore')
70
+ return value == rt
71
+
72
+
73
+ def parse_tags(raw_tags: str) -> dict[str, str]:
74
+ dct: dict[str, str] = {}
75
+
76
+ while raw_tags:
77
+ tag_end = raw_tags.find(';')
78
+ if tag_end == -1:
79
+ tag_pair = raw_tags
80
+ raw_tags = ''
81
+ else:
82
+ tag_pair = raw_tags[:tag_end]
83
+ raw_tags = raw_tags[tag_end + 1:]
84
+
85
+ equals_index = tag_pair.find('=')
86
+ if equals_index == -1:
87
+ # Tag with no value
88
+ tag_name, tag_value = tag_pair, ''
89
+ else:
90
+ tag_name, tag_value = tag_pair[:equals_index], tag_pair[equals_index + 1:]
91
+
92
+ # "Implementations [...] MUST NOT perform any validation that would reject the message if an invalid tag key
93
+ # name is used."
94
+ if validate_tag_name(tag_name):
95
+ if not validate_tag_value(tag_value):
96
+ raise InvalidTagContentError
97
+ dct[tag_name] = unescape_tag_value(tag_value)
98
+
99
+ return dct
@@ -0,0 +1,27 @@
1
+ import itertools
2
+ import operator
3
+
4
+
5
+ def truncate_utf8_safe(string: str, length: int) -> str:
6
+ return string[:length] \
7
+ .encode('utf-8', 'ignore') \
8
+ .decode('utf-8', 'ignore')
9
+
10
+
11
+ def find_utf8_truncation_point(buf: bytes | bytearray, length: int) -> int:
12
+ if len(buf) < length:
13
+ raise ValueError(buf)
14
+ cs = itertools.accumulate(
15
+ (len(c.encode('utf-8')) for c in buf.decode('utf-8')),
16
+ operator.add,
17
+ initial=0,
18
+ )
19
+ return next(i for i, o in enumerate(cs) if o >= length)
20
+
21
+
22
+ def trim_initial_spaces(string: str) -> str:
23
+ return string.lstrip(' ')
24
+
25
+
26
+ def is_ascii(string: str) -> bool:
27
+ return all(ord(c) < 128 for c in string)
File without changes
@@ -0,0 +1,94 @@
1
+ import dataclasses as dc
2
+ import typing as ta
3
+
4
+ from .... import check
5
+ from .... import lang
6
+
7
+
8
+ FormatPart: ta.TypeAlias = ta.Union[str, 'Formats.Optional', 'Formats.Variadic']
9
+ FormatParts: ta.TypeAlias = ta.Sequence[FormatPart]
10
+
11
+
12
+ class Formats(lang.Namespace):
13
+ @dc.dataclass(frozen=True)
14
+ class Name:
15
+ name: str
16
+
17
+ @dc.dataclass(frozen=True)
18
+ class Optional:
19
+ body: FormatParts
20
+
21
+ @dc.dataclass(frozen=True)
22
+ class Variadic:
23
+ body: FormatParts
24
+
25
+ #
26
+
27
+ _PARTS_BY_DELIMITERS: ta.Mapping[tuple[str, str], type] = {
28
+ ('[', ']'): Optional,
29
+ ('{', '}'): Variadic,
30
+ }
31
+
32
+ _DELIMITERS_BY_PARTS: ta.Mapping[type, tuple[str, str]] = {v: k for k, v in _PARTS_BY_DELIMITERS.items()}
33
+
34
+ #
35
+
36
+ @staticmethod
37
+ def split_parts(s: str) -> FormatParts:
38
+ stk: list[tuple[str, list]] = [('', [])]
39
+
40
+ p = 0
41
+ while p < len(s):
42
+ n = lang.find_any(s, '{}[]<', p)
43
+
44
+ if n < 0:
45
+ check.state(not stk[-1][0])
46
+ stk[-1][1].append(s[p:])
47
+ break
48
+
49
+ if n != p:
50
+ stk[-1][1].append(s[p:n])
51
+
52
+ d = s[n]
53
+ if d == '<':
54
+ e = s.index('>', n)
55
+ stk[-1][1].append(Formats.Name(s[n + 1:e]))
56
+ p = e + 1
57
+
58
+ elif d in '{[':
59
+ stk.append((d, []))
60
+ p = n + 1
61
+
62
+ elif d in '}]':
63
+ x, l = stk.pop()
64
+ pc = Formats._PARTS_BY_DELIMITERS[(x, d)]
65
+ stk[-1][1].append(pc(l))
66
+ p = n + 1
67
+
68
+ else:
69
+ raise RuntimeError
70
+
71
+ _, ret = check.single(stk)
72
+ return ret
73
+
74
+ #
75
+
76
+ @staticmethod
77
+ def render_parts(p: FormatPart | FormatParts) -> ta.Iterator[str]:
78
+ if isinstance(p, str):
79
+ yield p
80
+
81
+ elif isinstance(p, Formats.Name):
82
+ yield '<'
83
+ yield p.name
84
+ yield '>'
85
+
86
+ elif isinstance(p, (Formats.Optional, Formats.Variadic)):
87
+ l, r = Formats._DELIMITERS_BY_PARTS[type(p)]
88
+ yield l
89
+ yield from Formats.render_parts(p.body)
90
+ yield r
91
+
92
+ else:
93
+ for c in p:
94
+ yield from Formats.render_parts(c)