omlish 0.0.0.dev46__py3-none-any.whl → 0.0.0.dev48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,389 @@
1
+ import json
2
+ import string
3
+ import typing as ta
4
+ import warnings
5
+
6
+ from .exceptions import EmptyExpressionError
7
+ from .exceptions import LexerError
8
+
9
+
10
+ class Lexer:
11
+ START_IDENTIFIER: ta.AbstractSet[str] = set(string.ascii_letters + '_')
12
+ VALID_IDENTIFIER: ta.AbstractSet[str] = set(string.ascii_letters + string.digits + '_')
13
+
14
+ VALID_NUMBER: ta.AbstractSet[str] = set(string.digits)
15
+
16
+ WHITESPACE: ta.AbstractSet[str] = set(' \t\n\r')
17
+
18
+ SIMPLE_TOKENS: ta.Mapping[str, str] = {
19
+ '.': 'dot',
20
+ '*': 'star',
21
+ ']': 'rbracket',
22
+ ',': 'comma',
23
+ ':': 'colon',
24
+ '@': 'current',
25
+ '(': 'lparen',
26
+ ')': 'rparen',
27
+ '{': 'lbrace',
28
+ '}': 'rbrace',
29
+ '+': 'plus',
30
+ '%': 'modulo',
31
+ '\u2212': 'minus',
32
+ '\u00d7': 'multiply',
33
+ '\u00f7': 'divide',
34
+ }
35
+
36
+ def __init__(self):
37
+ self._enable_legacy_literals = False
38
+
39
+ def tokenize(self, expression, options=None):
40
+ if options is not None:
41
+ self._enable_legacy_literals = options.enable_legacy_literals
42
+
43
+ self._initialize_for_expression(expression)
44
+ while self._current is not None:
45
+ if self._current in self.SIMPLE_TOKENS:
46
+ yield {
47
+ 'type': self.SIMPLE_TOKENS[self._current],
48
+ 'value': self._current,
49
+ 'start': self._position,
50
+ 'end': self._position + 1,
51
+ }
52
+ self._next()
53
+
54
+ elif self._current in self.START_IDENTIFIER:
55
+ start = self._position
56
+
57
+ buff = self._current
58
+ while self._next() in self.VALID_IDENTIFIER:
59
+ buff += self._current
60
+
61
+ yield {
62
+ 'type': 'unquoted_identifier',
63
+ 'value': buff,
64
+ 'start': start,
65
+ 'end': start + len(buff),
66
+ }
67
+
68
+ elif self._current in self.WHITESPACE:
69
+ self._next()
70
+
71
+ elif self._current == '[':
72
+ start = self._position
73
+
74
+ next_char = self._next()
75
+ if next_char == ']':
76
+ self._next()
77
+ yield {
78
+ 'type': 'flatten',
79
+ 'value': '[]',
80
+ 'start': start,
81
+ 'end': start + 2,
82
+ }
83
+
84
+ elif next_char == '?':
85
+ self._next()
86
+ yield {
87
+ 'type': 'filter',
88
+ 'value': '[?',
89
+ 'start': start,
90
+ 'end': start + 2,
91
+ }
92
+
93
+ else:
94
+ yield {
95
+ 'type': 'lbracket',
96
+ 'value': '[',
97
+ 'start': start,
98
+ 'end': start + 1,
99
+ }
100
+
101
+ elif self._current == "'":
102
+ yield self._consume_raw_string_literal()
103
+
104
+ elif self._current == '|':
105
+ yield self._match_or_else('|', 'or', 'pipe')
106
+
107
+ elif self._current == '&':
108
+ yield self._match_or_else('&', 'and', 'expref')
109
+
110
+ elif self._current == '`':
111
+ yield self._consume_literal()
112
+
113
+ elif self._current in self.VALID_NUMBER:
114
+ start = self._position
115
+
116
+ buff = self._consume_number()
117
+ yield {
118
+ 'type': 'number',
119
+ 'value': int(buff),
120
+ 'start': start,
121
+ 'end': start + len(buff),
122
+ }
123
+
124
+ elif self._current == '-':
125
+ if not self._peek_is_next_digit():
126
+ self._next()
127
+ yield {
128
+ 'type': 'minus',
129
+ 'value': '-',
130
+ 'start': self._position - 1,
131
+ 'end': self._position,
132
+ }
133
+ else:
134
+ # Negative number.
135
+ start = self._position
136
+ buff = self._consume_number()
137
+ if len(buff) > 1:
138
+ yield {
139
+ 'type': 'number',
140
+ 'value': int(buff),
141
+ 'start': start,
142
+ 'end': start + len(buff),
143
+ }
144
+ else:
145
+ raise LexerError(
146
+ lexer_position=start,
147
+ lexer_value=buff,
148
+ message=f"Unknown token '{buff}'")
149
+
150
+ elif self._current == '/':
151
+ self._next()
152
+ if self._current == '/':
153
+ self._next()
154
+ yield {
155
+ 'type': 'div',
156
+ 'value': '//',
157
+ 'start': self._position - 1,
158
+ 'end': self._position,
159
+ }
160
+ else:
161
+ yield {
162
+ 'type': 'divide',
163
+ 'value': '/',
164
+ 'start': self._position,
165
+ 'end': self._position + 1,
166
+ }
167
+
168
+ elif self._current == '"':
169
+ yield self._consume_quoted_identifier()
170
+
171
+ elif self._current == '<':
172
+ yield self._match_or_else('=', 'lte', 'lt')
173
+
174
+ elif self._current == '>':
175
+ yield self._match_or_else('=', 'gte', 'gt')
176
+
177
+ elif self._current == '!':
178
+ yield self._match_or_else('=', 'ne', 'not')
179
+
180
+ elif self._current == '=':
181
+ yield self._match_or_else('=', 'eq', 'assign')
182
+
183
+ elif self._current == '$':
184
+ if self._peek_may_be_valid_unquoted_identifier():
185
+ yield self._consume_variable()
186
+ else:
187
+ yield {
188
+ 'type': 'root',
189
+ 'value': self._current,
190
+ 'start': self._position,
191
+ 'end': self._position + 1,
192
+ }
193
+ self._next()
194
+ else:
195
+ raise LexerError(
196
+ lexer_position=self._position,
197
+ lexer_value=self._current,
198
+ message=f'Unknown token {self._current})',
199
+ )
200
+
201
+ yield {
202
+ 'type': 'eof',
203
+ 'value': '',
204
+ 'start': self._length,
205
+ 'end': self._length,
206
+ }
207
+
208
+ def _consume_number(self):
209
+ start = self._position # noqa
210
+
211
+ buff = self._current
212
+ while self._next() in self.VALID_NUMBER:
213
+ buff += self._current
214
+ return buff
215
+
216
+ def _consume_variable(self):
217
+ start = self._position
218
+
219
+ buff = self._current
220
+ self._next()
221
+ if self._current not in self.START_IDENTIFIER:
222
+ raise LexerError(
223
+ lexer_position=start,
224
+ lexer_value=self._current,
225
+ message=f'Invalid variable starting character {self._current}',
226
+ )
227
+
228
+ buff += self._current
229
+ while self._next() in self.VALID_IDENTIFIER:
230
+ buff += self._current
231
+
232
+ return {
233
+ 'type': 'variable',
234
+ 'value': buff,
235
+ 'start': start,
236
+ 'end': start + len(buff),
237
+ }
238
+
239
+ def _peek_may_be_valid_unquoted_identifier(self):
240
+ if (self._position == self._length - 1):
241
+ return False
242
+ else:
243
+ nxt = self._chars[self._position + 1]
244
+ return nxt in self.START_IDENTIFIER
245
+
246
+ def _peek_is_next_digit(self):
247
+ if (self._position == self._length - 1):
248
+ return False
249
+ else:
250
+ nxt = self._chars[self._position + 1]
251
+ return nxt in self.VALID_NUMBER
252
+
253
+ def _initialize_for_expression(self, expression):
254
+ if not expression:
255
+ raise EmptyExpressionError
256
+ self._position = 0
257
+ self._expression = expression
258
+ self._chars = list(self._expression)
259
+ self._current = self._chars[self._position]
260
+ self._length = len(self._expression)
261
+
262
+ def _next(self):
263
+ if self._position == self._length - 1:
264
+ self._current = None
265
+ else:
266
+ self._position += 1
267
+ self._current = self._chars[self._position]
268
+ return self._current
269
+
270
+ def _consume_until(self, delimiter):
271
+ # Consume until the delimiter is reached, allowing for the delimiter to be escaped with "\".
272
+ start = self._position
273
+
274
+ buff = ''
275
+ self._next()
276
+ while self._current != delimiter:
277
+ if self._current == '\\':
278
+ buff += '\\'
279
+ self._next()
280
+
281
+ if self._current is None:
282
+ # We're at the EOF.
283
+ raise LexerError(
284
+ lexer_position=start,
285
+ lexer_value=self._expression[start:],
286
+ message=f'Unclosed {delimiter} delimiter',
287
+ )
288
+
289
+ buff += self._current
290
+ self._next()
291
+
292
+ # Skip the closing delimiter.
293
+ self._next()
294
+ return buff
295
+
296
+ def _consume_literal(self):
297
+ start = self._position
298
+
299
+ token = self._consume_until('`')
300
+ lexeme = token.replace('\\`', '`')
301
+ parsed_json = None
302
+ try:
303
+ # Assume it is valid JSON and attempt to parse.
304
+ parsed_json = json.loads(lexeme)
305
+ except ValueError:
306
+ error = LexerError(
307
+ lexer_position=start,
308
+ lexer_value=self._expression[start:],
309
+ message=f'Bad token %s `{token}`',
310
+ )
311
+
312
+ if not self._enable_legacy_literals:
313
+ raise error # noqa
314
+
315
+ try:
316
+ # Invalid JSON values should be converted to quoted JSON strings during the JEP-12 deprecation period.
317
+ parsed_json = json.loads('"%s"' % lexeme.lstrip()) # noqa
318
+ warnings.warn('deprecated string literal syntax', DeprecationWarning)
319
+ except ValueError:
320
+ raise LexerError( # noqa
321
+ lexer_position=start,
322
+ lexer_value=self._expression[start:],
323
+ message=f'Bad token {lexeme}',
324
+ )
325
+
326
+ token_len = self._position - start
327
+ return {
328
+ 'type': 'literal',
329
+ 'value': parsed_json,
330
+ 'start': start,
331
+ 'end': token_len,
332
+ }
333
+
334
+ def _consume_quoted_identifier(self):
335
+ start = self._position
336
+
337
+ lexeme = '"' + self._consume_until('"') + '"'
338
+ try:
339
+ token_len = self._position - start
340
+ return {
341
+ 'type': 'quoted_identifier',
342
+ 'value': json.loads(lexeme),
343
+ 'start': start,
344
+ 'end': token_len,
345
+ }
346
+
347
+ except ValueError as e:
348
+ error_message = str(e).split(':')[0]
349
+ raise LexerError( # noqa
350
+ lexer_position=start,
351
+ lexer_value=lexeme,
352
+ message=error_message,
353
+ )
354
+
355
+ def _consume_raw_string_literal(self):
356
+ start = self._position
357
+
358
+ lexeme = self._consume_until("'") \
359
+ .replace("\\'", "'") \
360
+ .replace('\\\\', '\\')
361
+
362
+ token_len = self._position - start
363
+ return {
364
+ 'type': 'literal',
365
+ 'value': lexeme,
366
+ 'start': start,
367
+ 'end': token_len,
368
+ }
369
+
370
+ def _match_or_else(self, expected, match_type, else_type):
371
+ start = self._position
372
+
373
+ current = self._current
374
+ next_char = self._next()
375
+ if next_char == expected:
376
+ self._next()
377
+ return {
378
+ 'type': match_type,
379
+ 'value': current + next_char,
380
+ 'start': start,
381
+ 'end': start + 1,
382
+ }
383
+
384
+ return {
385
+ 'type': else_type,
386
+ 'value': current,
387
+ 'start': start,
388
+ 'end': start,
389
+ }