omlish 0.0.0.dev46__py3-none-any.whl → 0.0.0.dev48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omlish/.manifests.json +12 -0
- omlish/__about__.py +2 -2
- omlish/bootstrap/harness.py +1 -2
- omlish/lang/objects.py +5 -2
- omlish/specs/__init__.py +0 -1
- omlish/specs/jmespath/LICENSE +16 -0
- omlish/specs/jmespath/__init__.py +41 -0
- omlish/specs/jmespath/__main__.py +11 -0
- omlish/specs/jmespath/ast.py +114 -0
- omlish/specs/jmespath/cli.py +68 -0
- omlish/specs/jmespath/exceptions.py +140 -0
- omlish/specs/jmespath/functions.py +593 -0
- omlish/specs/jmespath/lexer.py +389 -0
- omlish/specs/jmespath/parser.py +664 -0
- omlish/specs/jmespath/scope.py +35 -0
- omlish/specs/jmespath/visitor.py +429 -0
- omlish/specs/jsonschema/keywords/__init__.py +6 -0
- omlish/specs/jsonschema/keywords/base.py +1 -1
- omlish/specs/jsonschema/keywords/core.py +1 -1
- omlish/specs/jsonschema/keywords/metadata.py +1 -1
- omlish/specs/jsonschema/keywords/parse.py +68 -35
- omlish/specs/jsonschema/keywords/validation.py +1 -1
- {omlish-0.0.0.dev46.dist-info → omlish-0.0.0.dev48.dist-info}/METADATA +1 -1
- {omlish-0.0.0.dev46.dist-info → omlish-0.0.0.dev48.dist-info}/RECORD +28 -17
- {omlish-0.0.0.dev46.dist-info → omlish-0.0.0.dev48.dist-info}/LICENSE +0 -0
- {omlish-0.0.0.dev46.dist-info → omlish-0.0.0.dev48.dist-info}/WHEEL +0 -0
- {omlish-0.0.0.dev46.dist-info → omlish-0.0.0.dev48.dist-info}/entry_points.txt +0 -0
- {omlish-0.0.0.dev46.dist-info → omlish-0.0.0.dev48.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,389 @@
|
|
1
|
+
import json
|
2
|
+
import string
|
3
|
+
import typing as ta
|
4
|
+
import warnings
|
5
|
+
|
6
|
+
from .exceptions import EmptyExpressionError
|
7
|
+
from .exceptions import LexerError
|
8
|
+
|
9
|
+
|
10
|
+
class Lexer:
|
11
|
+
START_IDENTIFIER: ta.AbstractSet[str] = set(string.ascii_letters + '_')
|
12
|
+
VALID_IDENTIFIER: ta.AbstractSet[str] = set(string.ascii_letters + string.digits + '_')
|
13
|
+
|
14
|
+
VALID_NUMBER: ta.AbstractSet[str] = set(string.digits)
|
15
|
+
|
16
|
+
WHITESPACE: ta.AbstractSet[str] = set(' \t\n\r')
|
17
|
+
|
18
|
+
SIMPLE_TOKENS: ta.Mapping[str, str] = {
|
19
|
+
'.': 'dot',
|
20
|
+
'*': 'star',
|
21
|
+
']': 'rbracket',
|
22
|
+
',': 'comma',
|
23
|
+
':': 'colon',
|
24
|
+
'@': 'current',
|
25
|
+
'(': 'lparen',
|
26
|
+
')': 'rparen',
|
27
|
+
'{': 'lbrace',
|
28
|
+
'}': 'rbrace',
|
29
|
+
'+': 'plus',
|
30
|
+
'%': 'modulo',
|
31
|
+
'\u2212': 'minus',
|
32
|
+
'\u00d7': 'multiply',
|
33
|
+
'\u00f7': 'divide',
|
34
|
+
}
|
35
|
+
|
36
|
+
def __init__(self):
|
37
|
+
self._enable_legacy_literals = False
|
38
|
+
|
39
|
+
def tokenize(self, expression, options=None):
|
40
|
+
if options is not None:
|
41
|
+
self._enable_legacy_literals = options.enable_legacy_literals
|
42
|
+
|
43
|
+
self._initialize_for_expression(expression)
|
44
|
+
while self._current is not None:
|
45
|
+
if self._current in self.SIMPLE_TOKENS:
|
46
|
+
yield {
|
47
|
+
'type': self.SIMPLE_TOKENS[self._current],
|
48
|
+
'value': self._current,
|
49
|
+
'start': self._position,
|
50
|
+
'end': self._position + 1,
|
51
|
+
}
|
52
|
+
self._next()
|
53
|
+
|
54
|
+
elif self._current in self.START_IDENTIFIER:
|
55
|
+
start = self._position
|
56
|
+
|
57
|
+
buff = self._current
|
58
|
+
while self._next() in self.VALID_IDENTIFIER:
|
59
|
+
buff += self._current
|
60
|
+
|
61
|
+
yield {
|
62
|
+
'type': 'unquoted_identifier',
|
63
|
+
'value': buff,
|
64
|
+
'start': start,
|
65
|
+
'end': start + len(buff),
|
66
|
+
}
|
67
|
+
|
68
|
+
elif self._current in self.WHITESPACE:
|
69
|
+
self._next()
|
70
|
+
|
71
|
+
elif self._current == '[':
|
72
|
+
start = self._position
|
73
|
+
|
74
|
+
next_char = self._next()
|
75
|
+
if next_char == ']':
|
76
|
+
self._next()
|
77
|
+
yield {
|
78
|
+
'type': 'flatten',
|
79
|
+
'value': '[]',
|
80
|
+
'start': start,
|
81
|
+
'end': start + 2,
|
82
|
+
}
|
83
|
+
|
84
|
+
elif next_char == '?':
|
85
|
+
self._next()
|
86
|
+
yield {
|
87
|
+
'type': 'filter',
|
88
|
+
'value': '[?',
|
89
|
+
'start': start,
|
90
|
+
'end': start + 2,
|
91
|
+
}
|
92
|
+
|
93
|
+
else:
|
94
|
+
yield {
|
95
|
+
'type': 'lbracket',
|
96
|
+
'value': '[',
|
97
|
+
'start': start,
|
98
|
+
'end': start + 1,
|
99
|
+
}
|
100
|
+
|
101
|
+
elif self._current == "'":
|
102
|
+
yield self._consume_raw_string_literal()
|
103
|
+
|
104
|
+
elif self._current == '|':
|
105
|
+
yield self._match_or_else('|', 'or', 'pipe')
|
106
|
+
|
107
|
+
elif self._current == '&':
|
108
|
+
yield self._match_or_else('&', 'and', 'expref')
|
109
|
+
|
110
|
+
elif self._current == '`':
|
111
|
+
yield self._consume_literal()
|
112
|
+
|
113
|
+
elif self._current in self.VALID_NUMBER:
|
114
|
+
start = self._position
|
115
|
+
|
116
|
+
buff = self._consume_number()
|
117
|
+
yield {
|
118
|
+
'type': 'number',
|
119
|
+
'value': int(buff),
|
120
|
+
'start': start,
|
121
|
+
'end': start + len(buff),
|
122
|
+
}
|
123
|
+
|
124
|
+
elif self._current == '-':
|
125
|
+
if not self._peek_is_next_digit():
|
126
|
+
self._next()
|
127
|
+
yield {
|
128
|
+
'type': 'minus',
|
129
|
+
'value': '-',
|
130
|
+
'start': self._position - 1,
|
131
|
+
'end': self._position,
|
132
|
+
}
|
133
|
+
else:
|
134
|
+
# Negative number.
|
135
|
+
start = self._position
|
136
|
+
buff = self._consume_number()
|
137
|
+
if len(buff) > 1:
|
138
|
+
yield {
|
139
|
+
'type': 'number',
|
140
|
+
'value': int(buff),
|
141
|
+
'start': start,
|
142
|
+
'end': start + len(buff),
|
143
|
+
}
|
144
|
+
else:
|
145
|
+
raise LexerError(
|
146
|
+
lexer_position=start,
|
147
|
+
lexer_value=buff,
|
148
|
+
message=f"Unknown token '{buff}'")
|
149
|
+
|
150
|
+
elif self._current == '/':
|
151
|
+
self._next()
|
152
|
+
if self._current == '/':
|
153
|
+
self._next()
|
154
|
+
yield {
|
155
|
+
'type': 'div',
|
156
|
+
'value': '//',
|
157
|
+
'start': self._position - 1,
|
158
|
+
'end': self._position,
|
159
|
+
}
|
160
|
+
else:
|
161
|
+
yield {
|
162
|
+
'type': 'divide',
|
163
|
+
'value': '/',
|
164
|
+
'start': self._position,
|
165
|
+
'end': self._position + 1,
|
166
|
+
}
|
167
|
+
|
168
|
+
elif self._current == '"':
|
169
|
+
yield self._consume_quoted_identifier()
|
170
|
+
|
171
|
+
elif self._current == '<':
|
172
|
+
yield self._match_or_else('=', 'lte', 'lt')
|
173
|
+
|
174
|
+
elif self._current == '>':
|
175
|
+
yield self._match_or_else('=', 'gte', 'gt')
|
176
|
+
|
177
|
+
elif self._current == '!':
|
178
|
+
yield self._match_or_else('=', 'ne', 'not')
|
179
|
+
|
180
|
+
elif self._current == '=':
|
181
|
+
yield self._match_or_else('=', 'eq', 'assign')
|
182
|
+
|
183
|
+
elif self._current == '$':
|
184
|
+
if self._peek_may_be_valid_unquoted_identifier():
|
185
|
+
yield self._consume_variable()
|
186
|
+
else:
|
187
|
+
yield {
|
188
|
+
'type': 'root',
|
189
|
+
'value': self._current,
|
190
|
+
'start': self._position,
|
191
|
+
'end': self._position + 1,
|
192
|
+
}
|
193
|
+
self._next()
|
194
|
+
else:
|
195
|
+
raise LexerError(
|
196
|
+
lexer_position=self._position,
|
197
|
+
lexer_value=self._current,
|
198
|
+
message=f'Unknown token {self._current})',
|
199
|
+
)
|
200
|
+
|
201
|
+
yield {
|
202
|
+
'type': 'eof',
|
203
|
+
'value': '',
|
204
|
+
'start': self._length,
|
205
|
+
'end': self._length,
|
206
|
+
}
|
207
|
+
|
208
|
+
def _consume_number(self):
|
209
|
+
start = self._position # noqa
|
210
|
+
|
211
|
+
buff = self._current
|
212
|
+
while self._next() in self.VALID_NUMBER:
|
213
|
+
buff += self._current
|
214
|
+
return buff
|
215
|
+
|
216
|
+
def _consume_variable(self):
|
217
|
+
start = self._position
|
218
|
+
|
219
|
+
buff = self._current
|
220
|
+
self._next()
|
221
|
+
if self._current not in self.START_IDENTIFIER:
|
222
|
+
raise LexerError(
|
223
|
+
lexer_position=start,
|
224
|
+
lexer_value=self._current,
|
225
|
+
message=f'Invalid variable starting character {self._current}',
|
226
|
+
)
|
227
|
+
|
228
|
+
buff += self._current
|
229
|
+
while self._next() in self.VALID_IDENTIFIER:
|
230
|
+
buff += self._current
|
231
|
+
|
232
|
+
return {
|
233
|
+
'type': 'variable',
|
234
|
+
'value': buff,
|
235
|
+
'start': start,
|
236
|
+
'end': start + len(buff),
|
237
|
+
}
|
238
|
+
|
239
|
+
def _peek_may_be_valid_unquoted_identifier(self):
|
240
|
+
if (self._position == self._length - 1):
|
241
|
+
return False
|
242
|
+
else:
|
243
|
+
nxt = self._chars[self._position + 1]
|
244
|
+
return nxt in self.START_IDENTIFIER
|
245
|
+
|
246
|
+
def _peek_is_next_digit(self):
|
247
|
+
if (self._position == self._length - 1):
|
248
|
+
return False
|
249
|
+
else:
|
250
|
+
nxt = self._chars[self._position + 1]
|
251
|
+
return nxt in self.VALID_NUMBER
|
252
|
+
|
253
|
+
def _initialize_for_expression(self, expression):
|
254
|
+
if not expression:
|
255
|
+
raise EmptyExpressionError
|
256
|
+
self._position = 0
|
257
|
+
self._expression = expression
|
258
|
+
self._chars = list(self._expression)
|
259
|
+
self._current = self._chars[self._position]
|
260
|
+
self._length = len(self._expression)
|
261
|
+
|
262
|
+
def _next(self):
|
263
|
+
if self._position == self._length - 1:
|
264
|
+
self._current = None
|
265
|
+
else:
|
266
|
+
self._position += 1
|
267
|
+
self._current = self._chars[self._position]
|
268
|
+
return self._current
|
269
|
+
|
270
|
+
def _consume_until(self, delimiter):
|
271
|
+
# Consume until the delimiter is reached, allowing for the delimiter to be escaped with "\".
|
272
|
+
start = self._position
|
273
|
+
|
274
|
+
buff = ''
|
275
|
+
self._next()
|
276
|
+
while self._current != delimiter:
|
277
|
+
if self._current == '\\':
|
278
|
+
buff += '\\'
|
279
|
+
self._next()
|
280
|
+
|
281
|
+
if self._current is None:
|
282
|
+
# We're at the EOF.
|
283
|
+
raise LexerError(
|
284
|
+
lexer_position=start,
|
285
|
+
lexer_value=self._expression[start:],
|
286
|
+
message=f'Unclosed {delimiter} delimiter',
|
287
|
+
)
|
288
|
+
|
289
|
+
buff += self._current
|
290
|
+
self._next()
|
291
|
+
|
292
|
+
# Skip the closing delimiter.
|
293
|
+
self._next()
|
294
|
+
return buff
|
295
|
+
|
296
|
+
def _consume_literal(self):
|
297
|
+
start = self._position
|
298
|
+
|
299
|
+
token = self._consume_until('`')
|
300
|
+
lexeme = token.replace('\\`', '`')
|
301
|
+
parsed_json = None
|
302
|
+
try:
|
303
|
+
# Assume it is valid JSON and attempt to parse.
|
304
|
+
parsed_json = json.loads(lexeme)
|
305
|
+
except ValueError:
|
306
|
+
error = LexerError(
|
307
|
+
lexer_position=start,
|
308
|
+
lexer_value=self._expression[start:],
|
309
|
+
message=f'Bad token %s `{token}`',
|
310
|
+
)
|
311
|
+
|
312
|
+
if not self._enable_legacy_literals:
|
313
|
+
raise error # noqa
|
314
|
+
|
315
|
+
try:
|
316
|
+
# Invalid JSON values should be converted to quoted JSON strings during the JEP-12 deprecation period.
|
317
|
+
parsed_json = json.loads('"%s"' % lexeme.lstrip()) # noqa
|
318
|
+
warnings.warn('deprecated string literal syntax', DeprecationWarning)
|
319
|
+
except ValueError:
|
320
|
+
raise LexerError( # noqa
|
321
|
+
lexer_position=start,
|
322
|
+
lexer_value=self._expression[start:],
|
323
|
+
message=f'Bad token {lexeme}',
|
324
|
+
)
|
325
|
+
|
326
|
+
token_len = self._position - start
|
327
|
+
return {
|
328
|
+
'type': 'literal',
|
329
|
+
'value': parsed_json,
|
330
|
+
'start': start,
|
331
|
+
'end': token_len,
|
332
|
+
}
|
333
|
+
|
334
|
+
def _consume_quoted_identifier(self):
|
335
|
+
start = self._position
|
336
|
+
|
337
|
+
lexeme = '"' + self._consume_until('"') + '"'
|
338
|
+
try:
|
339
|
+
token_len = self._position - start
|
340
|
+
return {
|
341
|
+
'type': 'quoted_identifier',
|
342
|
+
'value': json.loads(lexeme),
|
343
|
+
'start': start,
|
344
|
+
'end': token_len,
|
345
|
+
}
|
346
|
+
|
347
|
+
except ValueError as e:
|
348
|
+
error_message = str(e).split(':')[0]
|
349
|
+
raise LexerError( # noqa
|
350
|
+
lexer_position=start,
|
351
|
+
lexer_value=lexeme,
|
352
|
+
message=error_message,
|
353
|
+
)
|
354
|
+
|
355
|
+
def _consume_raw_string_literal(self):
|
356
|
+
start = self._position
|
357
|
+
|
358
|
+
lexeme = self._consume_until("'") \
|
359
|
+
.replace("\\'", "'") \
|
360
|
+
.replace('\\\\', '\\')
|
361
|
+
|
362
|
+
token_len = self._position - start
|
363
|
+
return {
|
364
|
+
'type': 'literal',
|
365
|
+
'value': lexeme,
|
366
|
+
'start': start,
|
367
|
+
'end': token_len,
|
368
|
+
}
|
369
|
+
|
370
|
+
def _match_or_else(self, expected, match_type, else_type):
|
371
|
+
start = self._position
|
372
|
+
|
373
|
+
current = self._current
|
374
|
+
next_char = self._next()
|
375
|
+
if next_char == expected:
|
376
|
+
self._next()
|
377
|
+
return {
|
378
|
+
'type': match_type,
|
379
|
+
'value': current + next_char,
|
380
|
+
'start': start,
|
381
|
+
'end': start + 1,
|
382
|
+
}
|
383
|
+
|
384
|
+
return {
|
385
|
+
'type': else_type,
|
386
|
+
'value': current,
|
387
|
+
'start': start,
|
388
|
+
'end': start,
|
389
|
+
}
|