scylla-cqlsh 6.0.30__cp314-cp314-musllinux_1_2_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cqlshlib/pylexotron.py ADDED
@@ -0,0 +1,562 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ """Pylexotron uses Python's re.Scanner module as a simple regex-based tokenizer for BNF production rules"""
18
+
19
+ import re
20
+ import inspect
21
+ import sys
22
+ from typing import Union
23
+
24
+ from cqlshlib.saferscanner import SaferScanner
25
+
26
+
27
+ class LexingError(Exception):
28
+
29
+ @classmethod
30
+ def from_text(cls, rulestr, unmatched, msg='Lexing error'):
31
+ bad_char = len(rulestr) - len(unmatched)
32
+ linenum = rulestr[:bad_char].count('\n') + 1
33
+ charnum = len(rulestr[:bad_char].rsplit('\n', 1)[-1]) + 1
34
+ snippet_start = max(0, min(len(rulestr), bad_char - 10))
35
+ snippet_end = max(0, min(len(rulestr), bad_char + 10))
36
+ msg += " (Error at: '...%s...')" % (rulestr[snippet_start:snippet_end],)
37
+ raise cls(linenum, charnum, msg)
38
+
39
+ def __init__(self, linenum, charnum, msg='Lexing error'):
40
+ self.linenum = linenum
41
+ self.charnum = charnum
42
+ self.msg = msg
43
+ self.args = (linenum, charnum, msg)
44
+
45
+ def __str__(self):
46
+ return '%s at line %d, char %d' % (self.msg, self.linenum, self.charnum)
47
+
48
+
49
+ class Hint:
50
+
51
+ def __init__(self, text):
52
+ self.text = text
53
+
54
+ def __hash__(self):
55
+ return hash((id(self.__class__), self.text))
56
+
57
+ def __eq__(self, other):
58
+ return isinstance(other, self.__class__) and other.text == self.text
59
+
60
+ def __repr__(self):
61
+ return '%s(%r)' % (self.__class__, self.text)
62
+
63
+
64
+ def is_hint(obj):
65
+ return isinstance(obj, Hint)
66
+
67
+
68
+ class ParseContext:
69
+ """
70
+ These are meant to be immutable, although it would be something of a
71
+ pain to enforce that in python.
72
+ """
73
+
74
+ def __init__(self, ruleset, bindings, matched, remainder, productionname):
75
+ self.ruleset = ruleset
76
+ self.bindings = bindings
77
+ self.matched = matched
78
+ self.remainder = remainder
79
+ self.productionname = productionname
80
+
81
+ def get_production_by_name(self, name):
82
+ return self.ruleset[name]
83
+
84
+ def get_completer(self, symname):
85
+ return self.ruleset[(self.productionname, symname)]
86
+
87
+ def get_binding(self, name, default=None):
88
+ return self.bindings.get(name, default)
89
+
90
+ def with_binding(self, name, val):
91
+ newbinds = self.bindings.copy()
92
+ newbinds[name] = val
93
+ return self.__class__(self.ruleset, newbinds, self.matched,
94
+ self.remainder, self.productionname)
95
+
96
+ def with_match(self, num):
97
+ return self.__class__(self.ruleset, self.bindings,
98
+ self.matched + self.remainder[:num],
99
+ self.remainder[num:], self.productionname)
100
+
101
+ def with_production_named(self, newname):
102
+ return self.__class__(self.ruleset, self.bindings, self.matched,
103
+ self.remainder, newname)
104
+
105
+ def extract_orig(self, tokens=None):
106
+ if tokens is None:
107
+ tokens = self.matched
108
+ if not tokens:
109
+ return ''
110
+ orig = self.bindings.get('*SRC*', None)
111
+ if orig is None:
112
+ # pretty much just guess
113
+ return ' '.join([t[1] for t in tokens])
114
+ # low end of span for first token, to high end of span for last token
115
+ orig_text = orig[tokens[0][2][0]:tokens[-1][2][1]]
116
+ return orig_text
117
+
118
+ def __repr__(self):
119
+ return '<%s matched=%r remainder=%r prodname=%r bindings=%r>' \
120
+ % (self.__class__.__name__, self.matched, self.remainder, self.productionname, self.bindings)
121
+
122
+
123
+ class Matcher:
124
+
125
+ def __init__(self, arg):
126
+ self.arg = arg
127
+
128
+ def match(self, ctxt, completions):
129
+ raise NotImplementedError
130
+
131
+ def match_with_results(self, ctxt, completions):
132
+ matched_before = len(ctxt.matched)
133
+ newctxts = self.match(ctxt, completions)
134
+ return [(newctxt, newctxt.matched[matched_before:]) for newctxt in newctxts]
135
+
136
+ @staticmethod
137
+ def try_registered_completion(ctxt, symname, completions):
138
+ debugging = ctxt.get_binding('*DEBUG*', False)
139
+ if ctxt.remainder or completions is None:
140
+ return False
141
+ try:
142
+ completer = ctxt.get_completer(symname)
143
+ except KeyError:
144
+ return False
145
+ if debugging:
146
+ print("Trying completer %r with %r" % (completer, ctxt))
147
+ try:
148
+ new_compls = completer(ctxt)
149
+ except Exception:
150
+ if debugging:
151
+ import traceback
152
+ traceback.print_exc()
153
+ return False
154
+ if debugging:
155
+ print("got %r" % (new_compls,))
156
+ completions.update(new_compls)
157
+ return True
158
+
159
+ def __repr__(self):
160
+ return '%s(%r)' % (self.__class__.__name__, self.arg)
161
+
162
+
163
+ class Choice(Matcher):
164
+
165
+ def match(self, ctxt, completions):
166
+ foundctxts = []
167
+ for each in self.arg:
168
+ subctxts = each.match(ctxt, completions)
169
+ foundctxts.extend(subctxts)
170
+ return foundctxts
171
+
172
+
173
+ class OneOrNone(Matcher):
174
+
175
+ def match(self, ctxt, completions):
176
+ return [ctxt] + list(self.arg.match(ctxt, completions))
177
+
178
+
179
+ class Repeat(Matcher):
180
+
181
+ def match(self, ctxt, completions):
182
+ found = [ctxt]
183
+ ctxts = [ctxt]
184
+ while True:
185
+ new_ctxts = []
186
+ for each in ctxts:
187
+ new_ctxts.extend(self.arg.match(each, completions))
188
+ if not new_ctxts:
189
+ return found
190
+ found.extend(new_ctxts)
191
+ ctxts = new_ctxts
192
+
193
+
194
+ class RuleReference(Matcher):
195
+
196
+ def match(self, ctxt, completions):
197
+ prevname = ctxt.productionname
198
+ try:
199
+ rule = ctxt.get_production_by_name(self.arg)
200
+ except KeyError:
201
+ raise ValueError("Can't look up production rule named %r" % (self.arg,))
202
+ output = rule.match(ctxt.with_production_named(self.arg), completions)
203
+ return [c.with_production_named(prevname) for c in output]
204
+
205
+
206
+ class RuleSeries(Matcher):
207
+
208
+ def match(self, ctxt, completions):
209
+ ctxts = [ctxt]
210
+ for patpiece in self.arg:
211
+ new_ctxts = []
212
+ for each in ctxts:
213
+ new_ctxts.extend(patpiece.match(each, completions))
214
+ if not new_ctxts:
215
+ return ()
216
+ ctxts = new_ctxts
217
+ return ctxts
218
+
219
+
220
+ class NamedSymbol(Matcher):
221
+
222
+ def __init__(self, name, arg):
223
+ Matcher.__init__(self, arg)
224
+ self.name = name
225
+
226
+ def match(self, ctxt, completions):
227
+ pass_in_compls = completions
228
+ if self.try_registered_completion(ctxt, self.name, completions):
229
+ # don't collect other completions under this; use a dummy
230
+ pass_in_compls = set()
231
+ results = self.arg.match_with_results(ctxt, pass_in_compls)
232
+ return [c.with_binding(self.name, ctxt.extract_orig(matchtoks))
233
+ for (c, matchtoks) in results]
234
+
235
+ def __repr__(self):
236
+ return '%s(%r, %r)' % (self.__class__.__name__, self.name, self.arg)
237
+
238
+
239
+ class NamedCollector(NamedSymbol):
240
+
241
+ def match(self, ctxt, completions):
242
+ pass_in_compls = completions
243
+ if self.try_registered_completion(ctxt, self.name, completions):
244
+ # don't collect other completions under this; use a dummy
245
+ pass_in_compls = set()
246
+ output = []
247
+ for ctxt, matchtoks in self.arg.match_with_results(ctxt, pass_in_compls):
248
+ oldval = ctxt.get_binding(self.name, ())
249
+ output.append(ctxt.with_binding(self.name, oldval + (ctxt.extract_orig(matchtoks),)))
250
+ return output
251
+
252
+
253
+ class TerminalMatcher(Matcher):
254
+
255
+ def match(self, ctxt, completions):
256
+ raise NotImplementedError
257
+
258
+ def pattern(self):
259
+ raise NotImplementedError
260
+
261
+
262
+ class RegexRule(TerminalMatcher):
263
+
264
+ def __init__(self, pat):
265
+ TerminalMatcher.__init__(self, pat)
266
+ self.regex = pat
267
+ self.re = re.compile(pat + '$', re.IGNORECASE | re.DOTALL)
268
+
269
+ def match(self, ctxt, completions):
270
+ if ctxt.remainder:
271
+ if self.re.match(ctxt.remainder[0][1]):
272
+ return [ctxt.with_match(1)]
273
+ elif completions is not None:
274
+ completions.add(Hint('<%s>' % ctxt.productionname))
275
+ return []
276
+
277
+ def pattern(self):
278
+ return self.regex
279
+
280
+
281
+ class TextMatch(TerminalMatcher):
282
+ alpha_re = re.compile(r'[a-zA-Z]')
283
+
284
+ def __init__(self, text):
285
+ try:
286
+ TerminalMatcher.__init__(self, eval(text))
287
+ except SyntaxError:
288
+ print("bad syntax %r" % (text,))
289
+
290
+ def match(self, ctxt, completions):
291
+ if ctxt.remainder:
292
+ if self.arg.lower() == ctxt.remainder[0][1].lower():
293
+ return [ctxt.with_match(1)]
294
+ elif completions is not None:
295
+ completions.add(self.arg)
296
+ return []
297
+
298
+ def pattern(self):
299
+ # can't use (?i) here- Scanner component regex flags won't be applied
300
+ def ignorecaseify(matchobj):
301
+ val = matchobj.group(0)
302
+ return '[%s%s]' % (val.upper(), val.lower())
303
+
304
+ return self.alpha_re.sub(ignorecaseify, re.escape(self.arg))
305
+
306
+
307
+ class CaseMatch(TextMatch):
308
+
309
+ def match(self, ctxt, completions):
310
+ if ctxt.remainder:
311
+ if self.arg == ctxt.remainder[0][1]:
312
+ return [ctxt.with_match(1)]
313
+ elif completions is not None:
314
+ completions.add(self.arg)
315
+ return []
316
+
317
+ def pattern(self):
318
+ return re.escape(self.arg)
319
+
320
+
321
+ class WordMatch(TextMatch):
322
+
323
+ def pattern(self):
324
+ return r'\b' + TextMatch.pattern(self) + r'\b'
325
+
326
+
327
+ class CaseWordMatch(CaseMatch):
328
+
329
+ def pattern(self):
330
+ return r'\b' + CaseMatch.pattern(self) + r'\b'
331
+
332
+
333
+ class TerminalTypeMatcher(Matcher):
334
+
335
+ def __init__(self, tokentype, submatcher):
336
+ Matcher.__init__(self, tokentype)
337
+ self.tokentype = tokentype
338
+ self.submatcher = submatcher
339
+
340
+ def match(self, ctxt, completions):
341
+ if ctxt.remainder:
342
+ if ctxt.remainder[0][0] == self.tokentype:
343
+ return [ctxt.with_match(1)]
344
+ elif completions is not None:
345
+ self.submatcher.match(ctxt, completions)
346
+ return []
347
+
348
+ def __repr__(self):
349
+ return '%s(%r, %r)' % (self.__class__.__name__, self.tokentype, self.submatcher)
350
+
351
+
352
+ class ParsingRuleSet:
353
+ """Define the BNF tokenization rules for cql3handling.syntax_rules. Backus-Naur Form consists of
354
+ - Production rules in the form: Left-Hand-Side ::= Right-Hand-Side. The LHS is a non-terminal.
355
+ - Productions or non-terminal symbols
356
+ - Terminal symbols. Every terminal is a single token.
357
+ """
358
+
359
+ RuleSpecScanner = SaferScanner([
360
+ (r'::=', lambda s, t: t), # BNF rule definition
361
+ (r'\[[a-z0-9_]+\]=', lambda s, t: ('named_collector', t[1:-2])),
362
+ (r'[a-z0-9_]+=', lambda s, t: ('named_symbol', t[:-1])),
363
+ (r'/(\[\^?.[^]]*\]|[^/]|\\.)*/', lambda s, t: ('regex', t[1:-1].replace(r'\/', '/'))),
364
+ (r'"([^"]|\\.)*"', lambda s, t: ('string_literal', t)),
365
+ (r'<[^>]*>', lambda s, t: ('reference', t[1:-1])),
366
+ (r'\bJUNK\b', lambda s, t: ('junk', t)),
367
+ (r'[@()|?*;]', lambda s, t: t),
368
+ (r'\s+', None), # whitespace
369
+ (r'#[^\n]*', None),
370
+ ], re.IGNORECASE | re.DOTALL | re.UNICODE)
371
+
372
+ def __init__(self):
373
+ self.ruleset = {}
374
+ self.scanner = None
375
+ self.terminals = []
376
+
377
+ @classmethod
378
+ def from_rule_defs(cls, rule_defs):
379
+ prs = cls()
380
+ prs.ruleset, prs.terminals = cls.parse_rules(rule_defs)
381
+ return prs
382
+
383
+ @classmethod
384
+ def parse_rules(cls, rulestr):
385
+ tokens, unmatched = cls.RuleSpecScanner.scan(rulestr)
386
+ if unmatched:
387
+ raise LexingError.from_text(rulestr, unmatched, msg="Syntax rules are unparsable")
388
+ rules = {}
389
+ terminals = []
390
+ tokeniter = iter(tokens)
391
+ for t in tokeniter:
392
+ if isinstance(t, tuple) and t[0] in ('reference', 'junk'):
393
+ assign = next(tokeniter)
394
+ if assign != '::=':
395
+ raise ValueError('Unexpected token %r; expected "::="' % (assign,))
396
+ name = t[1]
397
+ production = cls.read_rule_tokens_until(';', tokeniter)
398
+ if isinstance(production, TerminalMatcher):
399
+ terminals.append((name, production))
400
+ production = TerminalTypeMatcher(name, production)
401
+ rules[name] = production
402
+ else:
403
+ raise ValueError('Unexpected token %r; expected name' % (t,))
404
+ return rules, terminals
405
+
406
+ @staticmethod
407
+ def mkrule(pieces):
408
+ if isinstance(pieces, (tuple, list)):
409
+ if len(pieces) == 1:
410
+ return pieces[0]
411
+ return RuleSeries(pieces)
412
+ return pieces
413
+
414
+ @classmethod
415
+ def read_rule_tokens_until(cls, endtoks: Union[str, int], tokeniter):
416
+ if isinstance(endtoks, str):
417
+ endtoks = (endtoks,)
418
+ counttarget = None
419
+ if isinstance(endtoks, int):
420
+ counttarget = endtoks
421
+ endtoks = ()
422
+ countsofar = 0
423
+ myrules = []
424
+ mybranches = [myrules]
425
+ for t in tokeniter:
426
+ countsofar += 1
427
+ if t in endtoks:
428
+ if len(mybranches) == 1:
429
+ return cls.mkrule(mybranches[0])
430
+ return Choice(list(map(cls.mkrule, mybranches)))
431
+ if isinstance(t, tuple):
432
+ if t[0] == 'reference':
433
+ t = RuleReference(t[1])
434
+ elif t[0] == 'string_literal':
435
+ if t[1][1].isalnum() or t[1][1] == '_':
436
+ t = WordMatch(t[1])
437
+ else:
438
+ t = TextMatch(t[1])
439
+ elif t[0] == 'regex':
440
+ t = RegexRule(t[1])
441
+ elif t[0] == 'named_collector':
442
+ t = NamedCollector(t[1], cls.read_rule_tokens_until(1, tokeniter))
443
+ elif t[0] == 'named_symbol':
444
+ t = NamedSymbol(t[1], cls.read_rule_tokens_until(1, tokeniter))
445
+ elif t == '(':
446
+ t = cls.read_rule_tokens_until(')', tokeniter)
447
+ elif t == '?':
448
+ t = OneOrNone(myrules.pop(-1))
449
+ elif t == '*':
450
+ t = Repeat(myrules.pop(-1))
451
+ elif t == '@':
452
+ val = next(tokeniter)
453
+ if not isinstance(val, tuple) or val[0] != 'string_literal':
454
+ raise ValueError("Unexpected token %r following '@'" % (val,))
455
+ t = CaseMatch(val[1])
456
+ elif t == '|':
457
+ myrules = []
458
+ mybranches.append(myrules)
459
+ continue
460
+ else:
461
+ raise ValueError('Unparseable rule token %r after %r' % (t, myrules[-1]))
462
+ myrules.append(t)
463
+ if countsofar == counttarget:
464
+ if len(mybranches) == 1:
465
+ return cls.mkrule(mybranches[0])
466
+ return Choice(list(map(cls.mkrule, mybranches)))
467
+ raise ValueError('Unexpected end of rule tokens')
468
+
469
+ def append_rules(self, rulestr):
470
+ rules, terminals = self.parse_rules(rulestr)
471
+ self.ruleset.update(rules)
472
+ self.terminals.extend(terminals)
473
+ if terminals:
474
+ self.scanner = None # recreate it if/when necessary
475
+
476
+ def register_completer(self, func, rulename, symname):
477
+ self.ruleset[(rulename, symname)] = func
478
+
479
+ def make_lexer(self):
480
+ def make_handler(name):
481
+ if name == 'JUNK':
482
+ return None
483
+ return lambda s, t: (name, t, s.match.span())
484
+
485
+ regexes = [(p.pattern(), make_handler(name)) for (name, p) in self.terminals]
486
+ return SaferScanner(regexes, re.IGNORECASE | re.DOTALL | re.UNICODE).scan
487
+
488
+ def lex(self, text):
489
+ if self.scanner is None:
490
+ self.scanner = self.make_lexer()
491
+ tokens, unmatched = self.scanner(text)
492
+ if unmatched:
493
+ raise LexingError.from_text(text, unmatched, 'text could not be lexed')
494
+ return tokens
495
+
496
+ def parse(self, startsymbol, tokens, init_bindings=None):
497
+ if init_bindings is None:
498
+ init_bindings = {}
499
+ ctxt = ParseContext(self.ruleset, init_bindings, (), tuple(tokens), startsymbol)
500
+ pattern = self.ruleset[startsymbol]
501
+ return pattern.match(ctxt, None)
502
+
503
+ def whole_match(self, startsymbol, tokens, srcstr=None):
504
+ bindings = {}
505
+ if srcstr is not None:
506
+ bindings['*SRC*'] = srcstr
507
+ for val in self.parse(startsymbol, tokens, init_bindings=bindings):
508
+ if not val.remainder:
509
+ return val
510
+
511
+ def lex_and_parse(self, text, startsymbol='Start'):
512
+ return self.parse(startsymbol, self.lex(text), init_bindings={'*SRC*': text})
513
+
514
+ def lex_and_whole_match(self, text, startsymbol='Start'):
515
+ tokens = self.lex(text)
516
+ return self.whole_match(startsymbol, tokens, srcstr=text)
517
+
518
+ def complete(self, startsymbol, tokens, init_bindings=None):
519
+ if init_bindings is None:
520
+ init_bindings = {}
521
+ ctxt = ParseContext(self.ruleset, init_bindings, (), tuple(tokens), startsymbol)
522
+ pattern = self.ruleset[startsymbol]
523
+ if init_bindings.get('*DEBUG*', False):
524
+ completions = Debugotron(stream=sys.stderr)
525
+ else:
526
+ completions = set()
527
+ pattern.match(ctxt, completions)
528
+ return completions
529
+
530
+
531
+ class Debugotron(set):
532
+ depth = 10
533
+
534
+ def __init__(self, initializer=(), stream=sys.stdout):
535
+ set.__init__(self, initializer)
536
+ self.stream = stream
537
+
538
+ def add(self, item):
539
+ self._note_addition(item)
540
+ set.add(self, item)
541
+
542
+ def _note_addition(self, item):
543
+ self.stream.write("\nitem %r added by:\n" % (item,))
544
+ frame = inspect.currentframe().f_back.f_back
545
+ for i in range(self.depth):
546
+ name = frame.f_code.co_name
547
+ filename = frame.f_code.co_filename
548
+ lineno = frame.f_lineno
549
+ if 'self' in frame.f_locals:
550
+ clsobj = frame.f_locals['self']
551
+ line = '%s.%s() (%s:%d)' % (clsobj, name, filename, lineno)
552
+ else:
553
+ line = '%s (%s:%d)' % (name, filename, lineno)
554
+ self.stream.write(' - %s\n' % (line,))
555
+ if i == 0 and 'ctxt' in frame.f_locals:
556
+ self.stream.write(' - %s\n' % (frame.f_locals['ctxt'],))
557
+ frame = frame.f_back
558
+
559
+ def update(self, items):
560
+ if items:
561
+ self._note_addition(items)
562
+ set.update(self, items)
@@ -0,0 +1,91 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ # SaferScanner is just like re.Scanner, but it neuters any grouping in the lexicon
18
+ # regular expressions and throws an error on group references, named groups, or
19
+ # regex in-pattern flags. Any of those can break correct operation of Scanner.
20
+
21
+ import re
22
+ from sre_constants import BRANCH, SUBPATTERN, GROUPREF, GROUPREF_IGNORE, GROUPREF_EXISTS
23
+ from sys import version_info
24
+
25
+ try:
26
+ sre_parse = re._parser
27
+ sre_compile = re._compiler
28
+ except AttributeError:
29
+ sre_parse = re.sre_parse
30
+ sre_compile = re.sre_compile
31
+
32
+
33
+ class SaferScannerBase(re.Scanner):
34
+
35
+ @classmethod
36
+ def subpat(cls, phrase, flags):
37
+ return cls.scrub_sub(sre_parse.parse(phrase, flags), flags)
38
+
39
+ @classmethod
40
+ def scrub_sub(cls, sub, flags):
41
+ scrubbedsub = []
42
+ seqtypes = (type(()), type([]))
43
+ for op, arg in sub.data:
44
+ if type(arg) in seqtypes:
45
+ arg = [cls.scrub_sub(a, flags) if isinstance(a, sre_parse.SubPattern) else a
46
+ for a in arg]
47
+ if op in (BRANCH, SUBPATTERN):
48
+ arg = [None] + arg[1:]
49
+ if op in (GROUPREF, GROUPREF_IGNORE, GROUPREF_EXISTS):
50
+ raise ValueError("Group references not allowed in SaferScanner lexicon")
51
+ scrubbedsub.append((op, arg))
52
+ if sub.pattern.groupdict:
53
+ raise ValueError("Named captures not allowed in SaferScanner lexicon")
54
+ if sub.pattern.flags ^ flags:
55
+ raise ValueError("RE flag setting not allowed in SaferScanner lexicon (%s)" % (bin(sub.pattern.flags),))
56
+ return sre_parse.SubPattern(sub.pattern, scrubbedsub)
57
+
58
+
59
+ class Py36SaferScanner(SaferScannerBase):
60
+
61
+ def __init__(self, lexicon, flags=0):
62
+ self.lexicon = lexicon
63
+ p = []
64
+ s = sre_parse.Pattern()
65
+ s.flags = flags
66
+ for phrase, action in lexicon:
67
+ gid = s.opengroup()
68
+ p.append(sre_parse.SubPattern(s, [(SUBPATTERN, (gid, 0, 0, sre_parse.parse(phrase, flags))), ]))
69
+ s.closegroup(gid, p[-1])
70
+ p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
71
+ self.p = p
72
+ self.scanner = sre_compile.compile(p)
73
+
74
+
75
+ class Py38SaferScanner(SaferScannerBase):
76
+
77
+ def __init__(self, lexicon, flags=0):
78
+ self.lexicon = lexicon
79
+ p = []
80
+ s = sre_parse.State()
81
+ s.flags = flags
82
+ for phrase, action in lexicon:
83
+ gid = s.opengroup()
84
+ p.append(sre_parse.SubPattern(s, [(SUBPATTERN, (gid, 0, 0, sre_parse.parse(phrase, flags))), ]))
85
+ s.closegroup(gid, p[-1])
86
+ p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
87
+ self.p = p
88
+ self.scanner = sre_compile.compile(p)
89
+
90
+
91
+ SaferScanner = Py38SaferScanner if version_info >= (3, 8) else Py36SaferScanner