PyPI - omlish - Versions diffs - 0.0.0.dev46__py3-none-any.whl → 0.0.0.dev47__py3-none-any.whl - Mend

omlish 0.0.0.dev46py3-none-any.whl → 0.0.0.dev47py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

omlish/.manifests.json +12 -0
omlish/__about__.py +2 -2
omlish/specs/__init__.py +0 -1
omlish/specs/jmespath/LICENSE +16 -0
omlish/specs/jmespath/__init__.py +20 -0
omlish/specs/jmespath/__main__.py +11 -0
omlish/specs/jmespath/ast.py +90 -0
omlish/specs/jmespath/cli.py +64 -0
omlish/specs/jmespath/exceptions.py +116 -0
omlish/specs/jmespath/functions.py +372 -0
omlish/specs/jmespath/lexer.py +312 -0
omlish/specs/jmespath/parser.py +587 -0
omlish/specs/jmespath/visitor.py +344 -0
{omlish-0.0.0.dev46.dist-info → omlish-0.0.0.dev47.dist-info}/METADATA +1 -1
{omlish-0.0.0.dev46.dist-info → omlish-0.0.0.dev47.dist-info}/RECORD +19 -9
{omlish-0.0.0.dev46.dist-info → omlish-0.0.0.dev47.dist-info}/LICENSE +0 -0
{omlish-0.0.0.dev46.dist-info → omlish-0.0.0.dev47.dist-info}/WHEEL +0 -0
{omlish-0.0.0.dev46.dist-info → omlish-0.0.0.dev47.dist-info}/entry_points.txt +0 -0
{omlish-0.0.0.dev46.dist-info → omlish-0.0.0.dev47.dist-info}/top_level.txt +0 -0

omlish/specs/jmespath/functions.py ADDED Viewed

@@ -0,0 +1,372 @@
+import inspect
+import json
+import math
+from . import exceptions
+# python types -> jmespath types
+TYPES_MAP = {
+    'bool': 'boolean',
+    'list': 'array',
+    'dict': 'object',
+    'NoneType': 'null',
+    'unicode': 'string',
+    'str': 'string',
+    'float': 'number',
+    'int': 'number',
+    'long': 'number',
+    'OrderedDict': 'object',
+    '_Projection': 'array',
+    '_Expression': 'expref',
+}
+# jmespath types -> python types
+REVERSE_TYPES_MAP = {
+    'boolean': ('bool',),
+    'array': ('list', '_Projection'),
+    'object': ('dict', 'OrderedDict'),
+    'null': ('NoneType',),
+    'string': ('unicode', 'str'),
+    'number': ('float', 'int', 'long'),
+    'expref': ('_Expression',),
+}
+def signature(*arguments):
+    def _record_signature(func):
+        func.signature = arguments
+        return func
+    return _record_signature
+class FunctionRegistry(type):
+    def __init__(cls, name, bases, attrs):
+        cls._populate_function_table()
+        super().__init__(name, bases, attrs)
+    def _populate_function_table(cls):
+        function_table = {}
+        # Any method with a @signature decorator that also starts with "_func_" is registered as a function.
+        # _func_max_by -> max_by function.
+        for name, method in inspect.getmembers(cls, predicate=inspect.isfunction):
+            if not name.startswith('_func_'):
+                continue
+            signature = getattr(method, 'signature', None)
+            if signature is not None:
+                function_table[name[6:]] = {
+                    'function': method,
+                    'signature': signature,
+                }
+        cls.FUNCTION_TABLE = function_table
+class Functions(metaclass=FunctionRegistry):
+    FUNCTION_TABLE: dict = {}  # noqa
+    def call_function(self, function_name, resolved_args):
+        try:
+            spec = self.FUNCTION_TABLE[function_name]
+        except KeyError:
+            raise exceptions.UnknownFunctionError(f'Unknown function: {function_name}()')  # noqa
+        function = spec['function']
+        signature = spec['signature']
+        self._validate_arguments(resolved_args, signature, function_name)
+        return function(self, *resolved_args)
+    def _validate_arguments(self, args, signature, function_name):
+        if signature and signature[-1].get('variadic'):
+            if len(args) < len(signature):
+                raise exceptions.VariadicArityError(len(signature), len(args), function_name)
+        elif len(args) != len(signature):
+            raise exceptions.ArityError(len(signature), len(args), function_name)
+        return self._type_check(args, signature, function_name)
+    def _type_check(self, actual, signature, function_name):
+        for i in range(len(signature)):
+            allowed_types = signature[i]['types']
+            if allowed_types:
+                self._type_check_single(actual[i], allowed_types, function_name)
+    def _type_check_single(self, current, types, function_name):
+        # Type checking involves checking the top level type, and in the case of arrays, potentially checking the types
+        # of each element.
+        allowed_types, allowed_subtypes = self._get_allowed_pytypes(types)
+        # We're not using isinstance() on purpose. The type model for jmespath does not map 1-1 with python types
+        # (booleans are considered integers in python for example).
+        actual_typename = type(current).__name__
+        if actual_typename not in allowed_types:
+            raise exceptions.JmespathTypeError(
+                function_name, current,
+                self._convert_to_jmespath_type(actual_typename), types)
+        # If we're dealing with a list type, we can have additional restrictions on the type of the list elements (for
+        # example a function can require a list of numbers or a list of strings). Arrays are the only types that can
+        # have subtypes.
+        if allowed_subtypes:
+            self._subtype_check(current, allowed_subtypes, types, function_name)
+    def _get_allowed_pytypes(self, types):
+        allowed_types: list = []
+        allowed_subtypes: list = []
+        for t in types:
+            type_ = t.split('-', 1)
+            if len(type_) == 2:
+                type_, subtype = type_
+                allowed_subtypes.append(REVERSE_TYPES_MAP[subtype])
+            else:
+                type_ = type_[0]
+            allowed_types.extend(REVERSE_TYPES_MAP[type_])
+        return allowed_types, allowed_subtypes
+    def _subtype_check(self, current, allowed_subtypes, types, function_name):
+        if len(allowed_subtypes) == 1:
+            # The easy case, we know up front what type we need to validate.
+            allowed_subtypes = allowed_subtypes[0]
+            for element in current:
+                actual_typename = type(element).__name__
+                if actual_typename not in allowed_subtypes:
+                    raise exceptions.JmespathTypeError(function_name, element, actual_typename, types)
+        elif len(allowed_subtypes) > 1 and current:
+            # Dynamic type validation.  Based on the first type we see, we validate that the remaining types match.
+            first = type(current[0]).__name__
+            for subtypes in allowed_subtypes:
+                if first in subtypes:
+                    allowed = subtypes
+                    break
+            else:
+                raise exceptions.JmespathTypeError(function_name, current[0], first, types)
+            for element in current:
+                actual_typename = type(element).__name__
+                if actual_typename not in allowed:
+                    raise exceptions.JmespathTypeError(function_name, element, actual_typename, types)
+    @signature({'types': ['number']})
+    def _func_abs(self, arg):
+        return abs(arg)
+    @signature({'types': ['array-number']})
+    def _func_avg(self, arg):
+        if arg:
+            return sum(arg) / len(arg)
+        else:
+            return None
+    @signature({'types': [], 'variadic': True})
+    def _func_not_null(self, *arguments):
+        for argument in arguments:
+            if argument is not None:
+                return argument
+        return None
+    @signature({'types': []})
+    def _func_to_array(self, arg):
+        if isinstance(arg, list):
+            return arg
+        else:
+            return [arg]
+    @signature({'types': []})
+    def _func_to_string(self, arg):
+        if isinstance(arg, str):
+            return arg
+        else:
+            return json.dumps(arg, separators=(',', ':'), default=str)
+    @signature({'types': []})
+    def _func_to_number(self, arg):
+        if isinstance(arg, (list, dict, bool)):
+            return None
+        elif arg is None:
+            return None
+        elif isinstance(arg, (int, float)):
+            return arg
+        else:
+            try:
+                return int(arg)
+            except ValueError:
+                try:
+                    return float(arg)
+                except ValueError:
+                    return None
+    @signature({'types': ['array', 'string']}, {'types': []})
+    def _func_contains(self, subject, search):
+        return search in subject
+    @signature({'types': ['string', 'array', 'object']})
+    def _func_length(self, arg):
+        return len(arg)
+    @signature({'types': ['string']}, {'types': ['string']})
+    def _func_ends_with(self, search, suffix):
+        return search.endswith(suffix)
+    @signature({'types': ['string']}, {'types': ['string']})
+    def _func_starts_with(self, search, suffix):
+        return search.startswith(suffix)
+    @signature({'types': ['array', 'string']})
+    def _func_reverse(self, arg):
+        if isinstance(arg, str):
+            return arg[::-1]
+        else:
+            return list(reversed(arg))
+    @signature({'types': ['number']})
+    def _func_ceil(self, arg):
+        return math.ceil(arg)
+    @signature({'types': ['number']})
+    def _func_floor(self, arg):
+        return math.floor(arg)
+    @signature({'types': ['string']}, {'types': ['array-string']})
+    def _func_join(self, separator, array):
+        return separator.join(array)
+    @signature({'types': ['expref']}, {'types': ['array']})
+    def _func_map(self, expref, arg):
+        result = []
+        for element in arg:
+            result.append(expref.visit(expref.expression, element))
+        return result
+    @signature({'types': ['array-number', 'array-string']})
+    def _func_max(self, arg):
+        if arg:
+            return max(arg)
+        else:
+            return None
+    @signature({'types': ['object'], 'variadic': True})
+    def _func_merge(self, *arguments):
+        merged = {}
+        for arg in arguments:
+            merged.update(arg)
+        return merged
+    @signature({'types': ['array-number', 'array-string']})
+    def _func_min(self, arg):
+        if arg:
+            return min(arg)
+        else:
+            return None
+    @signature({'types': ['array-string', 'array-number']})
+    def _func_sort(self, arg):
+        return sorted(arg)
+    @signature({'types': ['array-number']})
+    def _func_sum(self, arg):
+        return sum(arg)
+    @signature({'types': ['object']})
+    def _func_keys(self, arg):
+        # To be consistent with .values() should we also return the indices of a list?
+        return list(arg.keys())
+    @signature({'types': ['object']})
+    def _func_values(self, arg):
+        return list(arg.values())
+    @signature({'types': []})
+    def _func_type(self, arg):
+        if isinstance(arg, str):
+            return 'string'
+        elif isinstance(arg, bool):
+            return 'boolean'
+        elif isinstance(arg, list):
+            return 'array'
+        elif isinstance(arg, dict):
+            return 'object'
+        elif isinstance(arg, (float, int)):
+            return 'number'
+        elif arg is None:
+            return 'null'
+        else:
+            return None
+    @signature({'types': ['array']}, {'types': ['expref']})
+    def _func_sort_by(self, array, expref):
+        if not array:
+            return array
+        # sort_by allows for the expref to be either a number of a string, so we have some special logic to handle this.
+        # We evaluate the first array element and verify that it's either a string of a number.  We then create a key
+        # function that validates that type, which requires that remaining array elements resolve to the same type as
+        # the first element.
+        required_type = self._convert_to_jmespath_type(type(expref.visit(expref.expression, array[0])).__name__)
+        if required_type not in ['number', 'string']:
+            raise exceptions.JmespathTypeError(
+                'sort_by',
+                array[0],
+                required_type,
+                ['string', 'number'],
+            )
+        keyfunc = self._create_key_func(expref, [required_type], 'sort_by')
+        return sorted(array, key=keyfunc)
+    @signature({'types': ['array']}, {'types': ['expref']})
+    def _func_min_by(self, array, expref):
+        keyfunc = self._create_key_func(
+            expref,
+            ['number', 'string'],
+            'min_by',
+        )
+        if array:
+            return min(array, key=keyfunc)
+        else:
+            return None
+    @signature({'types': ['array']}, {'types': ['expref']})
+    def _func_max_by(self, array, expref):
+        keyfunc = self._create_key_func(
+            expref,
+            ['number', 'string'],
+            'max_by',
+        )
+        if array:
+            return max(array, key=keyfunc)
+        else:
+            return None
+    def _create_key_func(self, expref, allowed_types, function_name):
+        def keyfunc(x):
+            result = expref.visit(expref.expression, x)
+            actual_typename = type(result).__name__
+            jmespath_type = self._convert_to_jmespath_type(actual_typename)
+            # allowed_types is in term of jmespath types, not python types.
+            if jmespath_type not in allowed_types:
+                raise exceptions.JmespathTypeError(
+                    function_name, result, jmespath_type, allowed_types)
+            return result
+        return keyfunc
+    def _convert_to_jmespath_type(self, pyobject):
+        return TYPES_MAP.get(pyobject, 'unknown')

omlish/specs/jmespath/lexer.py ADDED Viewed

@@ -0,0 +1,312 @@
+import json
+import string
+import typing as ta
+import warnings
+from .exceptions import EmptyExpressionError
+from .exceptions import LexerError
+class Lexer:
+    START_IDENTIFIER: ta.AbstractSet[str] = set(string.ascii_letters + '_')
+    VALID_IDENTIFIER: ta.AbstractSet[str] = set(string.ascii_letters + string.digits + '_')
+    VALID_NUMBER: ta.AbstractSet[str] = set(string.digits)
+    WHITESPACE: ta.AbstractSet[str] = set(' \t\n\r')
+    SIMPLE_TOKENS: ta.Mapping[str, str] = {
+        '.': 'dot',
+        '*': 'star',
+        ']': 'rbracket',
+        ',': 'comma',
+        ':': 'colon',
+        '@': 'current',
+        '(': 'lparen',
+        ')': 'rparen',
+        '{': 'lbrace',
+        '}': 'rbrace',
+    }
+    def tokenize(self, expression):
+        self._initialize_for_expression(expression)
+        while self._current is not None:
+            if self._current in self.SIMPLE_TOKENS:
+                yield {
+                    'type': self.SIMPLE_TOKENS[self._current],
+                    'value': self._current,
+                    'start': self._position,
+                    'end': self._position + 1,
+                }
+                self._next()
+            elif self._current in self.START_IDENTIFIER:
+                start = self._position
+                buff = self._current
+                while self._next() in self.VALID_IDENTIFIER:
+                    buff += self._current
+                yield {
+                    'type': 'unquoted_identifier',
+                    'value': buff,
+                    'start': start,
+                    'end': start + len(buff),
+                }
+            elif self._current in self.WHITESPACE:
+                self._next()
+            elif self._current == '[':
+                start = self._position
+                next_char = self._next()
+                if next_char == ']':
+                    self._next()
+                    yield {
+                        'type': 'flatten',
+                        'value': '[]',
+                        'start': start,
+                        'end': start + 2,
+                    }
+                elif next_char == '?':
+                    self._next()
+                    yield {
+                        'type': 'filter',
+                        'value': '[?',
+                        'start': start,
+                        'end': start + 2,
+                    }
+                else:
+                    yield {
+                        'type': 'lbracket',
+                        'value': '[',
+                        'start': start,
+                        'end': start + 1,
+                    }
+            elif self._current == "'":
+                yield self._consume_raw_string_literal()
+            elif self._current == '|':
+                yield self._match_or_else('|', 'or', 'pipe')
+            elif self._current == '&':
+                yield self._match_or_else('&', 'and', 'expref')
+            elif self._current == '`':
+                yield self._consume_literal()
+            elif self._current in self.VALID_NUMBER:
+                start = self._position
+                buff = self._consume_number()
+                yield {
+                    'type': 'number',
+                    'value': int(buff),
+                    'start': start,
+                    'end': start + len(buff),
+                }
+            elif self._current == '-':
+                # Negative number.
+                start = self._position
+                buff = self._consume_number()
+                if len(buff) > 1:
+                    yield {
+                        'type': 'number',
+                        'value': int(buff),
+                        'start': start,
+                        'end': start + len(buff),
+                    }
+                else:
+                    raise LexerError(
+                        lexer_position=start,
+                        lexer_value=buff,
+                        message=f"Unknown token '{buff}'",
+                    )
+            elif self._current == '"':
+                yield self._consume_quoted_identifier()
+            elif self._current == '<':
+                yield self._match_or_else('=', 'lte', 'lt')
+            elif self._current == '>':
+                yield self._match_or_else('=', 'gte', 'gt')
+            elif self._current == '!':
+                yield self._match_or_else('=', 'ne', 'not')
+            elif self._current == '=':
+                if self._next() == '=':
+                    yield {
+                        'type': 'eq',
+                        'value': '==',
+                        'start': self._position - 1,
+                        'end': self._position,
+                    }
+                    self._next()
+                else:
+                    if self._current is None:
+                        # If we're at the EOF, we never advanced the position so we don't need to rewind it back one
+                        # location.
+                        position = self._position
+                    else:
+                        position = self._position - 1
+                    raise LexerError(
+                        lexer_position=position,
+                        lexer_value='=',
+                        message="Unknown token '='",
+                    )
+            else:
+                raise LexerError(
+                    lexer_position=self._position,
+                    lexer_value=self._current,
+                    message=f'Unknown token {self._current}',
+                )
+        yield {
+            'type': 'eof',
+            'value': '',
+            'start': self._length,
+            'end': self._length,
+        }
+    def _consume_number(self):
+        start = self._position  # noqa
+        buff = self._current
+        while self._next() in self.VALID_NUMBER:
+            buff += self._current
+        return buff
+    def _initialize_for_expression(self, expression):
+        if not expression:
+            raise EmptyExpressionError
+        self._position = 0
+        self._expression = expression
+        self._chars = list(self._expression)
+        self._current = self._chars[self._position]
+        self._length = len(self._expression)
+    def _next(self):
+        if self._position == self._length - 1:
+            self._current = None
+        else:
+            self._position += 1
+            self._current = self._chars[self._position]
+        return self._current
+    def _consume_until(self, delimiter):
+        # Consume until the delimiter is reached, allowing for the delimiter to be escaped with "\".
+        start = self._position
+        buff = ''
+        self._next()
+        while self._current != delimiter:
+            if self._current == '\\':
+                buff += '\\'
+                self._next()
+            if self._current is None:
+                # We're at the EOF.
+                raise LexerError(
+                    lexer_position=start,
+                    lexer_value=self._expression[start:],
+                    message=f'Unclosed {delimiter} delimiter',
+                )
+            buff += self._current
+            self._next()
+        # Skip the closing delimiter.
+        self._next()
+        return buff
+    def _consume_literal(self):
+        start = self._position
+        lexeme = self._consume_until('`').replace('\\`', '`')
+        try:
+            # Assume it is valid JSON and attempt to parse.
+            parsed_json = json.loads(lexeme)
+        except ValueError:
+            try:
+                # Invalid JSON values should be converted to quoted JSON strings during the JEP-12 deprecation period.
+                parsed_json = json.loads('"%s"' % lexeme.lstrip())  # noqa
+                warnings.warn('deprecated string literal syntax', PendingDeprecationWarning)
+            except ValueError:
+                raise LexerError(  # noqa
+                    lexer_position=start,
+                    lexer_value=self._expression[start:],
+                    message=f'Bad token {lexeme}',
+                )
+        token_len = self._position - start
+        return {
+            'type': 'literal',
+            'value': parsed_json,
+            'start': start,
+            'end': token_len,
+        }
+    def _consume_quoted_identifier(self):
+        start = self._position
+        lexeme = '"' + self._consume_until('"') + '"'
+        try:
+            token_len = self._position - start
+            return {
+                'type': 'quoted_identifier',
+                'value': json.loads(lexeme),
+                'start': start,
+                'end': token_len,
+            }
+        except ValueError as e:
+            error_message = str(e).split(':')[0]
+            raise LexerError(  # noqa
+                lexer_position=start,
+                lexer_value=lexeme,
+                message=error_message,
+            )
+    def _consume_raw_string_literal(self):
+        start = self._position
+        lexeme = self._consume_until("'").replace("\\'", "'")
+        token_len = self._position - start
+        return {
+            'type': 'literal',
+            'value': lexeme,
+            'start': start,
+            'end': token_len,
+        }
+    def _match_or_else(self, expected, match_type, else_type):
+        start = self._position
+        current = self._current
+        next_char = self._next()
+        if next_char == expected:
+            self._next()
+            return {
+                'type': match_type,
+                'value': current + next_char,
+                'start': start,
+                'end': start + 1,
+            }
+        return {
+            'type': else_type,
+            'value': current,
+            'start': start,
+            'end': start,
+        }

omlish 0.0.0.dev46__py3-none-any.whl → 0.0.0.dev47__py3-none-any.whl

omlish 0.0.0.dev46py3-none-any.whl → 0.0.0.dev47py3-none-any.whl