PyPI - omlish - Versions diffs - 0.0.0.dev220__py3-none-any.whl → 0.0.0.dev222__py3-none-any.whl - Mend

omlish 0.0.0.dev220py3-none-any.whl → 0.0.0.dev222py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

omlish/__about__.py +2 -2
omlish/algorithm/all.py +13 -0
omlish/algorithm/distribute.py +46 -0
omlish/algorithm/toposort.py +26 -0
omlish/algorithm/unify.py +31 -0
omlish/collections/__init__.py +0 -2
omlish/collections/utils.py +0 -46
omlish/graphs/trees.py +2 -1
omlish/http/coro/server.py +42 -33
omlish/http/{simple.py → coro/simple.py} +17 -17
omlish/specs/irc/__init__.py +0 -0
omlish/specs/irc/format/LICENSE +11 -0
omlish/specs/irc/format/__init__.py +61 -0
omlish/specs/irc/format/consts.py +6 -0
omlish/specs/irc/format/errors.py +30 -0
omlish/specs/irc/format/message.py +18 -0
omlish/specs/irc/format/nuh.py +52 -0
omlish/specs/irc/format/parsing.py +155 -0
omlish/specs/irc/format/rendering.py +150 -0
omlish/specs/irc/format/tags.py +99 -0
omlish/specs/irc/format/utils.py +27 -0
omlish/specs/irc/numerics/__init__.py +0 -0
omlish/specs/irc/numerics/formats.py +94 -0
omlish/specs/irc/numerics/numerics.py +808 -0
omlish/specs/irc/numerics/types.py +59 -0
{omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/METADATA +1 -1
{omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/RECORD +32 -15
omlish/docker/oci/data.py +0 -71
omlish/docker/oci/media.py +0 -124
/omlish/{docker/oci → algorithm}/__init__.py +0 -0
{omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/LICENSE +0 -0
{omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/WHEEL +0 -0
{omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/entry_points.txt +0 -0
{omlish-0.0.0.dev220.dist-info → omlish-0.0.0.dev222.dist-info}/top_level.txt +0 -0

omlish/specs/irc/format/parsing.py ADDED Viewed

@@ -0,0 +1,155 @@
+import typing as ta
+from .consts import MAX_LEN_CLIENT_TAG_DATA
+from .consts import MAX_LEN_TAG_DATA
+from .errors import BadCharactersError
+from .errors import LineEmptyError
+from .errors import TagsTooLongError
+from .message import Message
+from .tags import parse_tags
+from .utils import is_ascii
+from .utils import trim_initial_spaces
+from .utils import truncate_utf8_safe
+class ParsedLine(ta.NamedTuple):
+    message: Message
+    truncated: bool = False
+def parse_line_(
+        line: str,
+        *,
+        max_tag_data_length: int | None = None,
+        truncate_len: int | None = None,
+) -> ParsedLine:
+    # Remove either \n or \r\n from the end of the line:
+    line = line.removesuffix('\n')
+    line = line.removesuffix('\r')
+    # Whether we removed them ourselves, or whether they were removed previously, they count against the line limit:
+    if truncate_len is not None:
+        if truncate_len <= 2:
+            raise LineEmptyError
+        truncate_len -= 2
+    # Now validate for the 3 forbidden bytes:
+    if any(c in line for c in '\x00\n\r'):
+        raise BadCharactersError
+    if not line:
+        raise LineEmptyError
+    #
+    # Handle tags
+    tags: ta.Mapping[str, str] | None = None
+    if line.startswith('@'):
+        tag_end = line.find(' ')
+        if tag_end == -1:
+            raise LineEmptyError
+        raw_tags = line[1:tag_end]
+        if max_tag_data_length is not None and len(raw_tags) > max_tag_data_length:
+            raise TagsTooLongError
+        tags = parse_tags(raw_tags)
+        # Skip over the tags and the separating space
+        line = line[tag_end + 1:]
+    #
+    # Truncate if desired
+    truncated = False
+    if truncate_len is not None and len(line) > truncate_len:
+        line = truncate_utf8_safe(line, truncate_len)
+        truncated = True
+    line = trim_initial_spaces(line)
+    # Handle source
+    source: str | None = None
+    if line.startswith(':'):
+        source_end = line.find(' ')
+        if source_end == -1:
+            raise LineEmptyError
+        source = line[1:source_end]
+        line = line[source_end + 1:]
+    # Modern: "These message parts, and parameters themselves, are separated by one or more ASCII SPACE characters"
+    line = trim_initial_spaces(line)
+    # Handle command
+    command_end = line.find(' ')
+    param_start = command_end + 1 if command_end != -1 else len(line)
+    base_command = line[:command_end] if command_end != -1 else line
+    if not base_command:
+        raise LineEmptyError
+    # Technically this must be either letters or a 3-digit numeric:
+    if not is_ascii(base_command):
+        raise BadCharactersError
+    # Normalize command to uppercase:
+    command = base_command.upper()
+    line = line[param_start:]
+    # Handle parameters
+    params: list[str] = []
+    while line:
+        line = trim_initial_spaces(line)
+        if not line:
+            break
+        # Handle trailing
+        if line.startswith(':'):
+            params.append(line[1:])
+            break
+        param_end = line.find(' ')
+        if param_end == -1:
+            params.append(line)
+            break
+        params.append(line[:param_end])
+        line = line[param_end + 1:]
+    #
+    msg = Message(
+        source=source,
+        command=command,
+        params=params,
+        tags=tags,
+    )
+    return ParsedLine(
+        msg,
+        truncated=truncated,
+    )
+def parse_line(
+        line: str,
+        *,
+        max_tag_data_length: int | None = None,
+        truncate_len: int | None = None,
+) -> Message:
+    return parse_line_(
+        line,
+        max_tag_data_length=max_tag_data_length,
+        truncate_len=truncate_len,
+    ).message
+def parse_line_strict(
+        line: str,
+        from_client: bool,
+        truncate_len: int | None,
+) -> ParsedLine:
+    max_tag_data_length = MAX_LEN_TAG_DATA
+    if from_client:
+        max_tag_data_length = MAX_LEN_CLIENT_TAG_DATA
+    return parse_line_(
+        line,
+        max_tag_data_length=max_tag_data_length,
+        truncate_len=truncate_len,
+    )

omlish/specs/irc/format/rendering.py ADDED Viewed

@@ -0,0 +1,150 @@
+import typing as ta
+from .consts import MAX_LEN_CLIENT_TAG_DATA
+from .consts import MAX_LEN_SERVER_TAG_DATA
+from .consts import MAX_LEN_TAGS_FROM_CLIENT
+from .errors import BadCharactersError
+from .errors import BadParamError
+from .errors import CommandMissingError
+from .errors import InvalidTagContentError
+from .errors import TagsTooLongError
+from .message import Message
+from .tags import escape_tag_value
+from .tags import validate_tag_name
+from .tags import validate_tag_value
+from .utils import find_utf8_truncation_point
+def param_requires_trailing(param: str) -> bool:
+    return len(param) == 0 or ' ' in param or param[0] == ':'
+class RenderedLine(ta.NamedTuple):
+    raw: bytes
+    truncated: bool = False
+def render_line_(
+        msg: Message,
+        *,
+        tag_limit: int | None = None,
+        client_only_tag_data_limit: int | None = None,
+        server_added_tag_data_limit: int | None = None,
+        truncate_len: int | None = None,
+) -> RenderedLine:
+    if not msg.command:
+        raise CommandMissingError
+    buf = bytearray()
+    len_regular_tags = len_client_only_tags = 0
+    # Write the tags, computing the budgets for client-only tags and regular tags
+    if msg.tags or msg.client_only_tags:
+        buf.append(ord('@'))
+        first_tag = True
+        tag_error = None
+        def write_tags(tags: ta.Mapping[str, str]) -> None:
+            nonlocal first_tag, tag_error
+            for tag, val in tags.items():
+                if not (validate_tag_name(tag) and validate_tag_value(val)):
+                    tag_error = InvalidTagContentError
+                if not first_tag:
+                    buf.append(ord(';'))
+                buf.extend(tag.encode('utf-8'))
+                if val:
+                    buf.append(ord('='))
+                    buf.extend(escape_tag_value(val).encode('utf-8'))
+                first_tag = False
+        write_tags(msg.tags or {})
+        len_regular_tags = len(buf) - 1
+        write_tags(msg.client_only_tags or {})
+        len_client_only_tags = (len(buf) - 1) - len_regular_tags
+        if len_regular_tags:
+            # Semicolon between regular and client-only tags is not counted
+            len_client_only_tags -= 1
+        buf.append(ord(' '))
+        if tag_error:
+            raise tag_error
+    len_tags = len(buf)
+    if tag_limit is not None and len(buf) > tag_limit:
+        raise TagsTooLongError
+    if (
+            (client_only_tag_data_limit is not None and len_client_only_tags > client_only_tag_data_limit) or
+            (server_added_tag_data_limit is not None and len_regular_tags > server_added_tag_data_limit)
+    ):
+        raise TagsTooLongError
+    if msg.source:
+        buf.append(ord(':'))
+        buf.extend(msg.source.encode('utf-8'))
+        buf.append(ord(' '))
+    buf.extend(msg.command.encode('utf-8'))
+    for i, param in enumerate(msg.params):
+        buf.append(ord(' '))
+        requires_trailing = param_requires_trailing(param)
+        last_param = i == len(msg.params) - 1
+        if (requires_trailing or msg.force_trailing) and last_param:
+            buf.append(ord(':'))
+        elif requires_trailing and not last_param:
+            raise BadParamError
+        buf.extend(param.encode('utf-8'))
+    # Truncate if desired; leave 2 bytes over for \r\n:
+    truncated = False
+    if truncate_len is not None and (truncate_len - 2) < (len(buf) - len_tags):
+        truncated = True
+        new_buf_len = len_tags + (truncate_len - 2)
+        buf = buf[:find_utf8_truncation_point(buf, new_buf_len)]
+    buf.extend(b'\r\n')
+    to_validate = buf[:-2]
+    if any(c in to_validate for c in (b'\x00', b'\r', b'\n')):
+        raise BadCharactersError
+    raw = bytes(buf)
+    return RenderedLine(
+        raw=raw,
+        truncated=truncated,
+    )
+def render_line(msg: Message) -> bytes:
+    return render_line_(msg).raw
+def render_line_strict(
+        msg: Message,
+        from_client: bool,
+        truncate_len: int | None,
+) -> RenderedLine:
+    tag_limit: int | None = None
+    client_only_tag_data_limit: int | None = None
+    server_added_tag_data_limit: int | None = None
+    if from_client:
+        # enforce client max tags:
+        # <client_max>   (4096)  :: '@' <tag_data 4094> ' '
+        tag_limit = MAX_LEN_TAGS_FROM_CLIENT
+    else:
+        # on the server side, enforce separate client-only and server-added tag budgets:
+        # "Servers MUST NOT add tag data exceeding 4094 bytes to messages."
+        # <combined_max> (8191)  :: '@' <tag_data 4094> ';' <tag_data 4094> ' '
+        client_only_tag_data_limit = MAX_LEN_CLIENT_TAG_DATA
+        server_added_tag_data_limit = MAX_LEN_SERVER_TAG_DATA
+    return render_line_(
+        msg,
+        tag_limit=tag_limit,
+        client_only_tag_data_limit=client_only_tag_data_limit,
+        server_added_tag_data_limit=server_added_tag_data_limit,
+        truncate_len=truncate_len,
+    )

omlish/specs/irc/format/tags.py ADDED Viewed

@@ -0,0 +1,99 @@
+from .errors import InvalidTagContentError
+# Mapping for escaping tag values
+TAG_VAL_TO_ESCAPE = {
+    '\\': '\\\\',
+    ';': '\\:',
+    ' ': '\\s',
+    '\r': '\\r',
+    '\n': '\\n',
+}
+TAG_ESCAPE_CHAR_LOOKUP_TABLE = {i: chr(i) for i in range(256)}  # Most chars escape to themselves
+# These are the exceptions
+TAG_ESCAPE_CHAR_LOOKUP_TABLE.update({
+    ord(':'): ';',
+    ord('s'): ' ',
+    ord('r'): '\r',
+    ord('n'): '\n',
+})
+def escape_tag_value(in_string: str) -> str:
+    for key, val in TAG_VAL_TO_ESCAPE.items():
+        in_string = in_string.replace(key, val)
+    return in_string
+def unescape_tag_value(in_string: str) -> str:
+    if '\\' not in in_string:
+        return in_string
+    buf = []
+    remainder = in_string
+    while remainder:
+        backslash_pos = remainder.find('\\')
+        if backslash_pos == -1:
+            buf.append(remainder)
+            break
+        elif backslash_pos == len(remainder) - 1:
+            # Trailing backslash, which we strip
+            buf.append(remainder[:-1])
+            break
+        buf.append(remainder[:backslash_pos])
+        buf.append(TAG_ESCAPE_CHAR_LOOKUP_TABLE.get(ord(remainder[backslash_pos + 1]), remainder[backslash_pos + 1]))
+        remainder = remainder[backslash_pos + 2:]
+    return ''.join(buf)
+def validate_tag_name(name: str) -> bool:
+    if len(name) == 0:
+        return False
+    if name[0] == '+':
+        name = name[1:]
+    if len(name) == 0:
+        return False
+    # Let's err on the side of leniency here; allow -./ (45-47) in any position
+    for c in name:  # noqa
+        if not (('-' <= c <= '/') or ('0' <= c <= '9') or ('A' <= c <= 'Z') or ('a' <= c <= 'z')):
+            return False
+    return True
+def validate_tag_value(value: str) -> bool:
+    rt = value.encode('utf-8', 'ignore').decode('utf-8', 'ignore')
+    return value == rt
+def parse_tags(raw_tags: str) -> dict[str, str]:
+    dct: dict[str, str] = {}
+    while raw_tags:
+        tag_end = raw_tags.find(';')
+        if tag_end == -1:
+            tag_pair = raw_tags
+            raw_tags = ''
+        else:
+            tag_pair = raw_tags[:tag_end]
+            raw_tags = raw_tags[tag_end + 1:]
+        equals_index = tag_pair.find('=')
+        if equals_index == -1:
+            # Tag with no value
+            tag_name, tag_value = tag_pair, ''
+        else:
+            tag_name, tag_value = tag_pair[:equals_index], tag_pair[equals_index + 1:]
+        # "Implementations [...] MUST NOT perform any validation that would reject the message if an invalid tag key
+        # name is used."
+        if validate_tag_name(tag_name):
+            if not validate_tag_value(tag_value):
+                raise InvalidTagContentError
+            dct[tag_name] = unescape_tag_value(tag_value)
+    return dct

omlish/specs/irc/format/utils.py ADDED Viewed

@@ -0,0 +1,27 @@
+import itertools
+import operator
+def truncate_utf8_safe(string: str, length: int) -> str:
+    return string[:length] \
+        .encode('utf-8', 'ignore') \
+        .decode('utf-8', 'ignore')
+def find_utf8_truncation_point(buf: bytes | bytearray, length: int) -> int:
+    if len(buf) < length:
+        raise ValueError(buf)
+    cs = itertools.accumulate(
+        (len(c.encode('utf-8')) for c in buf.decode('utf-8')),
+        operator.add,
+        initial=0,
+    )
+    return next(i for i, o in enumerate(cs) if o >= length)
+def trim_initial_spaces(string: str) -> str:
+    return string.lstrip(' ')
+def is_ascii(string: str) -> bool:
+    return all(ord(c) < 128 for c in string)

omlish/specs/irc/numerics/__init__.py ADDED Viewed

File without changes

omlish/specs/irc/numerics/formats.py ADDED Viewed

@@ -0,0 +1,94 @@
+import dataclasses as dc
+import typing as ta
+from .... import check
+from .... import lang
+FormatPart: ta.TypeAlias = ta.Union[str, 'Formats.Optional', 'Formats.Variadic']
+FormatParts: ta.TypeAlias = ta.Sequence[FormatPart]
+class Formats(lang.Namespace):
+    @dc.dataclass(frozen=True)
+    class Name:
+        name: str
+    @dc.dataclass(frozen=True)
+    class Optional:
+        body: FormatParts
+    @dc.dataclass(frozen=True)
+    class Variadic:
+        body: FormatParts
+    #
+    _PARTS_BY_DELIMITERS: ta.Mapping[tuple[str, str], type] = {
+        ('[', ']'): Optional,
+        ('{', '}'): Variadic,
+    }
+    _DELIMITERS_BY_PARTS: ta.Mapping[type, tuple[str, str]] = {v: k for k, v in _PARTS_BY_DELIMITERS.items()}
+    #
+    @staticmethod
+    def split_parts(s: str) -> FormatParts:
+        stk: list[tuple[str, list]] = [('', [])]
+        p = 0
+        while p < len(s):
+            n = lang.find_any(s, '{}[]<', p)
+            if n < 0:
+                check.state(not stk[-1][0])
+                stk[-1][1].append(s[p:])
+                break
+            if n != p:
+                stk[-1][1].append(s[p:n])
+            d = s[n]
+            if d == '<':
+                e = s.index('>', n)
+                stk[-1][1].append(Formats.Name(s[n + 1:e]))
+                p = e + 1
+            elif d in '{[':
+                stk.append((d, []))
+                p = n + 1
+            elif d in '}]':
+                x, l = stk.pop()
+                pc = Formats._PARTS_BY_DELIMITERS[(x, d)]
+                stk[-1][1].append(pc(l))
+                p = n + 1
+            else:
+                raise RuntimeError
+        _, ret = check.single(stk)
+        return ret
+    #
+    @staticmethod
+    def render_parts(p: FormatPart | FormatParts) -> ta.Iterator[str]:
+        if isinstance(p, str):
+            yield p
+        elif isinstance(p, Formats.Name):
+            yield '<'
+            yield p.name
+            yield '>'
+        elif isinstance(p, (Formats.Optional, Formats.Variadic)):
+            l, r = Formats._DELIMITERS_BY_PARTS[type(p)]
+            yield l
+            yield from Formats.render_parts(p.body)
+            yield r
+        else:
+            for c in p:
+                yield from Formats.render_parts(c)

omlish 0.0.0.dev220__py3-none-any.whl → 0.0.0.dev222__py3-none-any.whl

omlish 0.0.0.dev220py3-none-any.whl → 0.0.0.dev222py3-none-any.whl