omlish 0.0.0.dev46__py3-none-any.whl → 0.0.0.dev47__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,372 @@
1
+ import inspect
2
+ import json
3
+ import math
4
+
5
+ from . import exceptions
6
+
7
+
8
+ # python types -> jmespath types
9
+ TYPES_MAP = {
10
+ 'bool': 'boolean',
11
+ 'list': 'array',
12
+ 'dict': 'object',
13
+ 'NoneType': 'null',
14
+ 'unicode': 'string',
15
+ 'str': 'string',
16
+ 'float': 'number',
17
+ 'int': 'number',
18
+ 'long': 'number',
19
+ 'OrderedDict': 'object',
20
+ '_Projection': 'array',
21
+ '_Expression': 'expref',
22
+ }
23
+
24
+
25
+ # jmespath types -> python types
26
+ REVERSE_TYPES_MAP = {
27
+ 'boolean': ('bool',),
28
+ 'array': ('list', '_Projection'),
29
+ 'object': ('dict', 'OrderedDict'),
30
+ 'null': ('NoneType',),
31
+ 'string': ('unicode', 'str'),
32
+ 'number': ('float', 'int', 'long'),
33
+ 'expref': ('_Expression',),
34
+ }
35
+
36
+
37
+ def signature(*arguments):
38
+ def _record_signature(func):
39
+ func.signature = arguments
40
+ return func
41
+ return _record_signature
42
+
43
+
44
+ class FunctionRegistry(type):
45
+ def __init__(cls, name, bases, attrs):
46
+ cls._populate_function_table()
47
+ super().__init__(name, bases, attrs)
48
+
49
+ def _populate_function_table(cls):
50
+ function_table = {}
51
+
52
+ # Any method with a @signature decorator that also starts with "_func_" is registered as a function.
53
+ # _func_max_by -> max_by function.
54
+ for name, method in inspect.getmembers(cls, predicate=inspect.isfunction):
55
+ if not name.startswith('_func_'):
56
+ continue
57
+
58
+ signature = getattr(method, 'signature', None)
59
+ if signature is not None:
60
+ function_table[name[6:]] = {
61
+ 'function': method,
62
+ 'signature': signature,
63
+ }
64
+
65
+ cls.FUNCTION_TABLE = function_table
66
+
67
+
68
+ class Functions(metaclass=FunctionRegistry):
69
+
70
+ FUNCTION_TABLE: dict = {} # noqa
71
+
72
+ def call_function(self, function_name, resolved_args):
73
+ try:
74
+ spec = self.FUNCTION_TABLE[function_name]
75
+ except KeyError:
76
+ raise exceptions.UnknownFunctionError(f'Unknown function: {function_name}()') # noqa
77
+
78
+ function = spec['function']
79
+ signature = spec['signature']
80
+
81
+ self._validate_arguments(resolved_args, signature, function_name)
82
+
83
+ return function(self, *resolved_args)
84
+
85
+ def _validate_arguments(self, args, signature, function_name):
86
+ if signature and signature[-1].get('variadic'):
87
+ if len(args) < len(signature):
88
+ raise exceptions.VariadicArityError(len(signature), len(args), function_name)
89
+
90
+ elif len(args) != len(signature):
91
+ raise exceptions.ArityError(len(signature), len(args), function_name)
92
+
93
+ return self._type_check(args, signature, function_name)
94
+
95
+ def _type_check(self, actual, signature, function_name):
96
+ for i in range(len(signature)):
97
+ allowed_types = signature[i]['types']
98
+ if allowed_types:
99
+ self._type_check_single(actual[i], allowed_types, function_name)
100
+
101
+ def _type_check_single(self, current, types, function_name):
102
+ # Type checking involves checking the top level type, and in the case of arrays, potentially checking the types
103
+ # of each element.
104
+ allowed_types, allowed_subtypes = self._get_allowed_pytypes(types)
105
+
106
+ # We're not using isinstance() on purpose. The type model for jmespath does not map 1-1 with python types
107
+ # (booleans are considered integers in python for example).
108
+ actual_typename = type(current).__name__
109
+ if actual_typename not in allowed_types:
110
+ raise exceptions.JmespathTypeError(
111
+ function_name, current,
112
+ self._convert_to_jmespath_type(actual_typename), types)
113
+
114
+ # If we're dealing with a list type, we can have additional restrictions on the type of the list elements (for
115
+ # example a function can require a list of numbers or a list of strings). Arrays are the only types that can
116
+ # have subtypes.
117
+ if allowed_subtypes:
118
+ self._subtype_check(current, allowed_subtypes, types, function_name)
119
+
120
+ def _get_allowed_pytypes(self, types):
121
+ allowed_types: list = []
122
+ allowed_subtypes: list = []
123
+
124
+ for t in types:
125
+ type_ = t.split('-', 1)
126
+ if len(type_) == 2:
127
+ type_, subtype = type_
128
+ allowed_subtypes.append(REVERSE_TYPES_MAP[subtype])
129
+ else:
130
+ type_ = type_[0]
131
+
132
+ allowed_types.extend(REVERSE_TYPES_MAP[type_])
133
+
134
+ return allowed_types, allowed_subtypes
135
+
136
+ def _subtype_check(self, current, allowed_subtypes, types, function_name):
137
+ if len(allowed_subtypes) == 1:
138
+ # The easy case, we know up front what type we need to validate.
139
+ allowed_subtypes = allowed_subtypes[0]
140
+ for element in current:
141
+ actual_typename = type(element).__name__
142
+ if actual_typename not in allowed_subtypes:
143
+ raise exceptions.JmespathTypeError(function_name, element, actual_typename, types)
144
+
145
+ elif len(allowed_subtypes) > 1 and current:
146
+ # Dynamic type validation. Based on the first type we see, we validate that the remaining types match.
147
+ first = type(current[0]).__name__
148
+ for subtypes in allowed_subtypes:
149
+ if first in subtypes:
150
+ allowed = subtypes
151
+ break
152
+ else:
153
+ raise exceptions.JmespathTypeError(function_name, current[0], first, types)
154
+
155
+ for element in current:
156
+ actual_typename = type(element).__name__
157
+ if actual_typename not in allowed:
158
+ raise exceptions.JmespathTypeError(function_name, element, actual_typename, types)
159
+
160
+ @signature({'types': ['number']})
161
+ def _func_abs(self, arg):
162
+ return abs(arg)
163
+
164
+ @signature({'types': ['array-number']})
165
+ def _func_avg(self, arg):
166
+ if arg:
167
+ return sum(arg) / len(arg)
168
+ else:
169
+ return None
170
+
171
+ @signature({'types': [], 'variadic': True})
172
+ def _func_not_null(self, *arguments):
173
+ for argument in arguments:
174
+ if argument is not None:
175
+ return argument
176
+ return None
177
+
178
+ @signature({'types': []})
179
+ def _func_to_array(self, arg):
180
+ if isinstance(arg, list):
181
+ return arg
182
+ else:
183
+ return [arg]
184
+
185
+ @signature({'types': []})
186
+ def _func_to_string(self, arg):
187
+ if isinstance(arg, str):
188
+ return arg
189
+ else:
190
+ return json.dumps(arg, separators=(',', ':'), default=str)
191
+
192
+ @signature({'types': []})
193
+ def _func_to_number(self, arg):
194
+ if isinstance(arg, (list, dict, bool)):
195
+ return None
196
+
197
+ elif arg is None:
198
+ return None
199
+
200
+ elif isinstance(arg, (int, float)):
201
+ return arg
202
+
203
+ else:
204
+ try:
205
+ return int(arg)
206
+ except ValueError:
207
+ try:
208
+ return float(arg)
209
+ except ValueError:
210
+ return None
211
+
212
+ @signature({'types': ['array', 'string']}, {'types': []})
213
+ def _func_contains(self, subject, search):
214
+ return search in subject
215
+
216
+ @signature({'types': ['string', 'array', 'object']})
217
+ def _func_length(self, arg):
218
+ return len(arg)
219
+
220
+ @signature({'types': ['string']}, {'types': ['string']})
221
+ def _func_ends_with(self, search, suffix):
222
+ return search.endswith(suffix)
223
+
224
+ @signature({'types': ['string']}, {'types': ['string']})
225
+ def _func_starts_with(self, search, suffix):
226
+ return search.startswith(suffix)
227
+
228
+ @signature({'types': ['array', 'string']})
229
+ def _func_reverse(self, arg):
230
+ if isinstance(arg, str):
231
+ return arg[::-1]
232
+ else:
233
+ return list(reversed(arg))
234
+
235
+ @signature({'types': ['number']})
236
+ def _func_ceil(self, arg):
237
+ return math.ceil(arg)
238
+
239
+ @signature({'types': ['number']})
240
+ def _func_floor(self, arg):
241
+ return math.floor(arg)
242
+
243
+ @signature({'types': ['string']}, {'types': ['array-string']})
244
+ def _func_join(self, separator, array):
245
+ return separator.join(array)
246
+
247
+ @signature({'types': ['expref']}, {'types': ['array']})
248
+ def _func_map(self, expref, arg):
249
+ result = []
250
+ for element in arg:
251
+ result.append(expref.visit(expref.expression, element))
252
+ return result
253
+
254
+ @signature({'types': ['array-number', 'array-string']})
255
+ def _func_max(self, arg):
256
+ if arg:
257
+ return max(arg)
258
+ else:
259
+ return None
260
+
261
+ @signature({'types': ['object'], 'variadic': True})
262
+ def _func_merge(self, *arguments):
263
+ merged = {}
264
+ for arg in arguments:
265
+ merged.update(arg)
266
+ return merged
267
+
268
+ @signature({'types': ['array-number', 'array-string']})
269
+ def _func_min(self, arg):
270
+ if arg:
271
+ return min(arg)
272
+ else:
273
+ return None
274
+
275
+ @signature({'types': ['array-string', 'array-number']})
276
+ def _func_sort(self, arg):
277
+ return sorted(arg)
278
+
279
+ @signature({'types': ['array-number']})
280
+ def _func_sum(self, arg):
281
+ return sum(arg)
282
+
283
+ @signature({'types': ['object']})
284
+ def _func_keys(self, arg):
285
+ # To be consistent with .values() should we also return the indices of a list?
286
+ return list(arg.keys())
287
+
288
+ @signature({'types': ['object']})
289
+ def _func_values(self, arg):
290
+ return list(arg.values())
291
+
292
+ @signature({'types': []})
293
+ def _func_type(self, arg):
294
+ if isinstance(arg, str):
295
+ return 'string'
296
+ elif isinstance(arg, bool):
297
+ return 'boolean'
298
+ elif isinstance(arg, list):
299
+ return 'array'
300
+ elif isinstance(arg, dict):
301
+ return 'object'
302
+ elif isinstance(arg, (float, int)):
303
+ return 'number'
304
+ elif arg is None:
305
+ return 'null'
306
+ else:
307
+ return None
308
+
309
+ @signature({'types': ['array']}, {'types': ['expref']})
310
+ def _func_sort_by(self, array, expref):
311
+ if not array:
312
+ return array
313
+ # sort_by allows for the expref to be either a number of a string, so we have some special logic to handle this.
314
+ # We evaluate the first array element and verify that it's either a string of a number. We then create a key
315
+ # function that validates that type, which requires that remaining array elements resolve to the same type as
316
+ # the first element.
317
+ required_type = self._convert_to_jmespath_type(type(expref.visit(expref.expression, array[0])).__name__)
318
+ if required_type not in ['number', 'string']:
319
+ raise exceptions.JmespathTypeError(
320
+ 'sort_by',
321
+ array[0],
322
+ required_type,
323
+ ['string', 'number'],
324
+ )
325
+
326
+ keyfunc = self._create_key_func(expref, [required_type], 'sort_by')
327
+
328
+ return sorted(array, key=keyfunc)
329
+
330
+ @signature({'types': ['array']}, {'types': ['expref']})
331
+ def _func_min_by(self, array, expref):
332
+ keyfunc = self._create_key_func(
333
+ expref,
334
+ ['number', 'string'],
335
+ 'min_by',
336
+ )
337
+
338
+ if array:
339
+ return min(array, key=keyfunc)
340
+ else:
341
+ return None
342
+
343
+ @signature({'types': ['array']}, {'types': ['expref']})
344
+ def _func_max_by(self, array, expref):
345
+ keyfunc = self._create_key_func(
346
+ expref,
347
+ ['number', 'string'],
348
+ 'max_by',
349
+ )
350
+
351
+ if array:
352
+ return max(array, key=keyfunc)
353
+ else:
354
+ return None
355
+
356
+ def _create_key_func(self, expref, allowed_types, function_name):
357
+ def keyfunc(x):
358
+ result = expref.visit(expref.expression, x)
359
+ actual_typename = type(result).__name__
360
+
361
+ jmespath_type = self._convert_to_jmespath_type(actual_typename)
362
+ # allowed_types is in term of jmespath types, not python types.
363
+ if jmespath_type not in allowed_types:
364
+ raise exceptions.JmespathTypeError(
365
+ function_name, result, jmespath_type, allowed_types)
366
+
367
+ return result
368
+
369
+ return keyfunc
370
+
371
+ def _convert_to_jmespath_type(self, pyobject):
372
+ return TYPES_MAP.get(pyobject, 'unknown')
@@ -0,0 +1,312 @@
1
+ import json
2
+ import string
3
+ import typing as ta
4
+ import warnings
5
+
6
+ from .exceptions import EmptyExpressionError
7
+ from .exceptions import LexerError
8
+
9
+
10
+ class Lexer:
11
+ START_IDENTIFIER: ta.AbstractSet[str] = set(string.ascii_letters + '_')
12
+ VALID_IDENTIFIER: ta.AbstractSet[str] = set(string.ascii_letters + string.digits + '_')
13
+
14
+ VALID_NUMBER: ta.AbstractSet[str] = set(string.digits)
15
+
16
+ WHITESPACE: ta.AbstractSet[str] = set(' \t\n\r')
17
+
18
+ SIMPLE_TOKENS: ta.Mapping[str, str] = {
19
+ '.': 'dot',
20
+ '*': 'star',
21
+ ']': 'rbracket',
22
+ ',': 'comma',
23
+ ':': 'colon',
24
+ '@': 'current',
25
+ '(': 'lparen',
26
+ ')': 'rparen',
27
+ '{': 'lbrace',
28
+ '}': 'rbrace',
29
+ }
30
+
31
+ def tokenize(self, expression):
32
+ self._initialize_for_expression(expression)
33
+ while self._current is not None:
34
+ if self._current in self.SIMPLE_TOKENS:
35
+ yield {
36
+ 'type': self.SIMPLE_TOKENS[self._current],
37
+ 'value': self._current,
38
+ 'start': self._position,
39
+ 'end': self._position + 1,
40
+ }
41
+ self._next()
42
+
43
+ elif self._current in self.START_IDENTIFIER:
44
+ start = self._position
45
+
46
+ buff = self._current
47
+ while self._next() in self.VALID_IDENTIFIER:
48
+ buff += self._current
49
+
50
+ yield {
51
+ 'type': 'unquoted_identifier',
52
+ 'value': buff,
53
+ 'start': start,
54
+ 'end': start + len(buff),
55
+ }
56
+
57
+ elif self._current in self.WHITESPACE:
58
+ self._next()
59
+
60
+ elif self._current == '[':
61
+ start = self._position
62
+
63
+ next_char = self._next()
64
+ if next_char == ']':
65
+ self._next()
66
+ yield {
67
+ 'type': 'flatten',
68
+ 'value': '[]',
69
+ 'start': start,
70
+ 'end': start + 2,
71
+ }
72
+
73
+ elif next_char == '?':
74
+ self._next()
75
+ yield {
76
+ 'type': 'filter',
77
+ 'value': '[?',
78
+ 'start': start,
79
+ 'end': start + 2,
80
+ }
81
+
82
+ else:
83
+ yield {
84
+ 'type': 'lbracket',
85
+ 'value': '[',
86
+ 'start': start,
87
+ 'end': start + 1,
88
+ }
89
+
90
+ elif self._current == "'":
91
+ yield self._consume_raw_string_literal()
92
+
93
+ elif self._current == '|':
94
+ yield self._match_or_else('|', 'or', 'pipe')
95
+
96
+ elif self._current == '&':
97
+ yield self._match_or_else('&', 'and', 'expref')
98
+
99
+ elif self._current == '`':
100
+ yield self._consume_literal()
101
+
102
+ elif self._current in self.VALID_NUMBER:
103
+ start = self._position
104
+
105
+ buff = self._consume_number()
106
+ yield {
107
+ 'type': 'number',
108
+ 'value': int(buff),
109
+ 'start': start,
110
+ 'end': start + len(buff),
111
+ }
112
+
113
+ elif self._current == '-':
114
+ # Negative number.
115
+ start = self._position
116
+
117
+ buff = self._consume_number()
118
+ if len(buff) > 1:
119
+ yield {
120
+ 'type': 'number',
121
+ 'value': int(buff),
122
+ 'start': start,
123
+ 'end': start + len(buff),
124
+ }
125
+
126
+ else:
127
+ raise LexerError(
128
+ lexer_position=start,
129
+ lexer_value=buff,
130
+ message=f"Unknown token '{buff}'",
131
+ )
132
+
133
+ elif self._current == '"':
134
+ yield self._consume_quoted_identifier()
135
+
136
+ elif self._current == '<':
137
+ yield self._match_or_else('=', 'lte', 'lt')
138
+
139
+ elif self._current == '>':
140
+ yield self._match_or_else('=', 'gte', 'gt')
141
+
142
+ elif self._current == '!':
143
+ yield self._match_or_else('=', 'ne', 'not')
144
+
145
+ elif self._current == '=':
146
+ if self._next() == '=':
147
+ yield {
148
+ 'type': 'eq',
149
+ 'value': '==',
150
+ 'start': self._position - 1,
151
+ 'end': self._position,
152
+ }
153
+ self._next()
154
+
155
+ else:
156
+ if self._current is None:
157
+ # If we're at the EOF, we never advanced the position so we don't need to rewind it back one
158
+ # location.
159
+ position = self._position
160
+ else:
161
+ position = self._position - 1
162
+ raise LexerError(
163
+ lexer_position=position,
164
+ lexer_value='=',
165
+ message="Unknown token '='",
166
+ )
167
+
168
+ else:
169
+ raise LexerError(
170
+ lexer_position=self._position,
171
+ lexer_value=self._current,
172
+ message=f'Unknown token {self._current}',
173
+ )
174
+
175
+ yield {
176
+ 'type': 'eof',
177
+ 'value': '',
178
+ 'start': self._length,
179
+ 'end': self._length,
180
+ }
181
+
182
+ def _consume_number(self):
183
+ start = self._position # noqa
184
+
185
+ buff = self._current
186
+ while self._next() in self.VALID_NUMBER:
187
+ buff += self._current
188
+ return buff
189
+
190
+ def _initialize_for_expression(self, expression):
191
+ if not expression:
192
+ raise EmptyExpressionError
193
+ self._position = 0
194
+ self._expression = expression
195
+ self._chars = list(self._expression)
196
+ self._current = self._chars[self._position]
197
+ self._length = len(self._expression)
198
+
199
+ def _next(self):
200
+ if self._position == self._length - 1:
201
+ self._current = None
202
+ else:
203
+ self._position += 1
204
+ self._current = self._chars[self._position]
205
+ return self._current
206
+
207
+ def _consume_until(self, delimiter):
208
+ # Consume until the delimiter is reached, allowing for the delimiter to be escaped with "\".
209
+ start = self._position
210
+
211
+ buff = ''
212
+ self._next()
213
+ while self._current != delimiter:
214
+ if self._current == '\\':
215
+ buff += '\\'
216
+ self._next()
217
+
218
+ if self._current is None:
219
+ # We're at the EOF.
220
+ raise LexerError(
221
+ lexer_position=start,
222
+ lexer_value=self._expression[start:],
223
+ message=f'Unclosed {delimiter} delimiter',
224
+ )
225
+
226
+ buff += self._current
227
+ self._next()
228
+
229
+ # Skip the closing delimiter.
230
+ self._next()
231
+ return buff
232
+
233
+ def _consume_literal(self):
234
+ start = self._position
235
+
236
+ lexeme = self._consume_until('`').replace('\\`', '`')
237
+ try:
238
+ # Assume it is valid JSON and attempt to parse.
239
+ parsed_json = json.loads(lexeme)
240
+ except ValueError:
241
+ try:
242
+ # Invalid JSON values should be converted to quoted JSON strings during the JEP-12 deprecation period.
243
+ parsed_json = json.loads('"%s"' % lexeme.lstrip()) # noqa
244
+ warnings.warn('deprecated string literal syntax', PendingDeprecationWarning)
245
+ except ValueError:
246
+ raise LexerError( # noqa
247
+ lexer_position=start,
248
+ lexer_value=self._expression[start:],
249
+ message=f'Bad token {lexeme}',
250
+ )
251
+
252
+ token_len = self._position - start
253
+ return {
254
+ 'type': 'literal',
255
+ 'value': parsed_json,
256
+ 'start': start,
257
+ 'end': token_len,
258
+ }
259
+
260
+ def _consume_quoted_identifier(self):
261
+ start = self._position
262
+
263
+ lexeme = '"' + self._consume_until('"') + '"'
264
+ try:
265
+ token_len = self._position - start
266
+ return {
267
+ 'type': 'quoted_identifier',
268
+ 'value': json.loads(lexeme),
269
+ 'start': start,
270
+ 'end': token_len,
271
+ }
272
+
273
+ except ValueError as e:
274
+ error_message = str(e).split(':')[0]
275
+ raise LexerError( # noqa
276
+ lexer_position=start,
277
+ lexer_value=lexeme,
278
+ message=error_message,
279
+ )
280
+
281
+ def _consume_raw_string_literal(self):
282
+ start = self._position
283
+
284
+ lexeme = self._consume_until("'").replace("\\'", "'")
285
+ token_len = self._position - start
286
+ return {
287
+ 'type': 'literal',
288
+ 'value': lexeme,
289
+ 'start': start,
290
+ 'end': token_len,
291
+ }
292
+
293
+ def _match_or_else(self, expected, match_type, else_type):
294
+ start = self._position
295
+
296
+ current = self._current
297
+ next_char = self._next()
298
+ if next_char == expected:
299
+ self._next()
300
+ return {
301
+ 'type': match_type,
302
+ 'value': current + next_char,
303
+ 'start': start,
304
+ 'end': start + 1,
305
+ }
306
+
307
+ return {
308
+ 'type': else_type,
309
+ 'value': current,
310
+ 'start': start,
311
+ 'end': start,
312
+ }