scylla-cqlsh 6.0.30__cp314-cp314-musllinux_1_2_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- copyutil.cpython-314-aarch64-linux-musl.so +0 -0
- cqlsh/__init__.py +1 -0
- cqlsh/__main__.py +11 -0
- cqlsh/cqlsh.py +2751 -0
- cqlshlib/__init__.py +90 -0
- cqlshlib/_version.py +34 -0
- cqlshlib/authproviderhandling.py +176 -0
- cqlshlib/copyutil.py +2762 -0
- cqlshlib/cql3handling.py +1670 -0
- cqlshlib/cqlhandling.py +333 -0
- cqlshlib/cqlshhandling.py +314 -0
- cqlshlib/displaying.py +128 -0
- cqlshlib/formatting.py +601 -0
- cqlshlib/helptopics.py +190 -0
- cqlshlib/pylexotron.py +562 -0
- cqlshlib/saferscanner.py +91 -0
- cqlshlib/sslhandling.py +109 -0
- cqlshlib/tracing.py +90 -0
- cqlshlib/util.py +183 -0
- cqlshlib/wcwidth.py +379 -0
- scylla_cqlsh-6.0.30.dist-info/METADATA +108 -0
- scylla_cqlsh-6.0.30.dist-info/RECORD +26 -0
- scylla_cqlsh-6.0.30.dist-info/WHEEL +5 -0
- scylla_cqlsh-6.0.30.dist-info/entry_points.txt +2 -0
- scylla_cqlsh-6.0.30.dist-info/licenses/LICENSE.txt +204 -0
- scylla_cqlsh-6.0.30.dist-info/top_level.txt +3 -0
cqlshlib/pylexotron.py
ADDED
|
@@ -0,0 +1,562 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
"""Pylexotron uses Python's re.Scanner module as a simple regex-based tokenizer for BNF production rules"""
|
|
18
|
+
|
|
19
|
+
import re
|
|
20
|
+
import inspect
|
|
21
|
+
import sys
|
|
22
|
+
from typing import Union
|
|
23
|
+
|
|
24
|
+
from cqlshlib.saferscanner import SaferScanner
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class LexingError(Exception):
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def from_text(cls, rulestr, unmatched, msg='Lexing error'):
|
|
31
|
+
bad_char = len(rulestr) - len(unmatched)
|
|
32
|
+
linenum = rulestr[:bad_char].count('\n') + 1
|
|
33
|
+
charnum = len(rulestr[:bad_char].rsplit('\n', 1)[-1]) + 1
|
|
34
|
+
snippet_start = max(0, min(len(rulestr), bad_char - 10))
|
|
35
|
+
snippet_end = max(0, min(len(rulestr), bad_char + 10))
|
|
36
|
+
msg += " (Error at: '...%s...')" % (rulestr[snippet_start:snippet_end],)
|
|
37
|
+
raise cls(linenum, charnum, msg)
|
|
38
|
+
|
|
39
|
+
def __init__(self, linenum, charnum, msg='Lexing error'):
|
|
40
|
+
self.linenum = linenum
|
|
41
|
+
self.charnum = charnum
|
|
42
|
+
self.msg = msg
|
|
43
|
+
self.args = (linenum, charnum, msg)
|
|
44
|
+
|
|
45
|
+
def __str__(self):
|
|
46
|
+
return '%s at line %d, char %d' % (self.msg, self.linenum, self.charnum)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class Hint:
|
|
50
|
+
|
|
51
|
+
def __init__(self, text):
|
|
52
|
+
self.text = text
|
|
53
|
+
|
|
54
|
+
def __hash__(self):
|
|
55
|
+
return hash((id(self.__class__), self.text))
|
|
56
|
+
|
|
57
|
+
def __eq__(self, other):
|
|
58
|
+
return isinstance(other, self.__class__) and other.text == self.text
|
|
59
|
+
|
|
60
|
+
def __repr__(self):
|
|
61
|
+
return '%s(%r)' % (self.__class__, self.text)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def is_hint(obj):
|
|
65
|
+
return isinstance(obj, Hint)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class ParseContext:
|
|
69
|
+
"""
|
|
70
|
+
These are meant to be immutable, although it would be something of a
|
|
71
|
+
pain to enforce that in python.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def __init__(self, ruleset, bindings, matched, remainder, productionname):
|
|
75
|
+
self.ruleset = ruleset
|
|
76
|
+
self.bindings = bindings
|
|
77
|
+
self.matched = matched
|
|
78
|
+
self.remainder = remainder
|
|
79
|
+
self.productionname = productionname
|
|
80
|
+
|
|
81
|
+
def get_production_by_name(self, name):
|
|
82
|
+
return self.ruleset[name]
|
|
83
|
+
|
|
84
|
+
def get_completer(self, symname):
|
|
85
|
+
return self.ruleset[(self.productionname, symname)]
|
|
86
|
+
|
|
87
|
+
def get_binding(self, name, default=None):
|
|
88
|
+
return self.bindings.get(name, default)
|
|
89
|
+
|
|
90
|
+
def with_binding(self, name, val):
|
|
91
|
+
newbinds = self.bindings.copy()
|
|
92
|
+
newbinds[name] = val
|
|
93
|
+
return self.__class__(self.ruleset, newbinds, self.matched,
|
|
94
|
+
self.remainder, self.productionname)
|
|
95
|
+
|
|
96
|
+
def with_match(self, num):
|
|
97
|
+
return self.__class__(self.ruleset, self.bindings,
|
|
98
|
+
self.matched + self.remainder[:num],
|
|
99
|
+
self.remainder[num:], self.productionname)
|
|
100
|
+
|
|
101
|
+
def with_production_named(self, newname):
|
|
102
|
+
return self.__class__(self.ruleset, self.bindings, self.matched,
|
|
103
|
+
self.remainder, newname)
|
|
104
|
+
|
|
105
|
+
def extract_orig(self, tokens=None):
|
|
106
|
+
if tokens is None:
|
|
107
|
+
tokens = self.matched
|
|
108
|
+
if not tokens:
|
|
109
|
+
return ''
|
|
110
|
+
orig = self.bindings.get('*SRC*', None)
|
|
111
|
+
if orig is None:
|
|
112
|
+
# pretty much just guess
|
|
113
|
+
return ' '.join([t[1] for t in tokens])
|
|
114
|
+
# low end of span for first token, to high end of span for last token
|
|
115
|
+
orig_text = orig[tokens[0][2][0]:tokens[-1][2][1]]
|
|
116
|
+
return orig_text
|
|
117
|
+
|
|
118
|
+
def __repr__(self):
|
|
119
|
+
return '<%s matched=%r remainder=%r prodname=%r bindings=%r>' \
|
|
120
|
+
% (self.__class__.__name__, self.matched, self.remainder, self.productionname, self.bindings)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class Matcher:
|
|
124
|
+
|
|
125
|
+
def __init__(self, arg):
|
|
126
|
+
self.arg = arg
|
|
127
|
+
|
|
128
|
+
def match(self, ctxt, completions):
|
|
129
|
+
raise NotImplementedError
|
|
130
|
+
|
|
131
|
+
def match_with_results(self, ctxt, completions):
|
|
132
|
+
matched_before = len(ctxt.matched)
|
|
133
|
+
newctxts = self.match(ctxt, completions)
|
|
134
|
+
return [(newctxt, newctxt.matched[matched_before:]) for newctxt in newctxts]
|
|
135
|
+
|
|
136
|
+
@staticmethod
|
|
137
|
+
def try_registered_completion(ctxt, symname, completions):
|
|
138
|
+
debugging = ctxt.get_binding('*DEBUG*', False)
|
|
139
|
+
if ctxt.remainder or completions is None:
|
|
140
|
+
return False
|
|
141
|
+
try:
|
|
142
|
+
completer = ctxt.get_completer(symname)
|
|
143
|
+
except KeyError:
|
|
144
|
+
return False
|
|
145
|
+
if debugging:
|
|
146
|
+
print("Trying completer %r with %r" % (completer, ctxt))
|
|
147
|
+
try:
|
|
148
|
+
new_compls = completer(ctxt)
|
|
149
|
+
except Exception:
|
|
150
|
+
if debugging:
|
|
151
|
+
import traceback
|
|
152
|
+
traceback.print_exc()
|
|
153
|
+
return False
|
|
154
|
+
if debugging:
|
|
155
|
+
print("got %r" % (new_compls,))
|
|
156
|
+
completions.update(new_compls)
|
|
157
|
+
return True
|
|
158
|
+
|
|
159
|
+
def __repr__(self):
|
|
160
|
+
return '%s(%r)' % (self.__class__.__name__, self.arg)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class Choice(Matcher):
|
|
164
|
+
|
|
165
|
+
def match(self, ctxt, completions):
|
|
166
|
+
foundctxts = []
|
|
167
|
+
for each in self.arg:
|
|
168
|
+
subctxts = each.match(ctxt, completions)
|
|
169
|
+
foundctxts.extend(subctxts)
|
|
170
|
+
return foundctxts
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class OneOrNone(Matcher):
|
|
174
|
+
|
|
175
|
+
def match(self, ctxt, completions):
|
|
176
|
+
return [ctxt] + list(self.arg.match(ctxt, completions))
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class Repeat(Matcher):
|
|
180
|
+
|
|
181
|
+
def match(self, ctxt, completions):
|
|
182
|
+
found = [ctxt]
|
|
183
|
+
ctxts = [ctxt]
|
|
184
|
+
while True:
|
|
185
|
+
new_ctxts = []
|
|
186
|
+
for each in ctxts:
|
|
187
|
+
new_ctxts.extend(self.arg.match(each, completions))
|
|
188
|
+
if not new_ctxts:
|
|
189
|
+
return found
|
|
190
|
+
found.extend(new_ctxts)
|
|
191
|
+
ctxts = new_ctxts
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class RuleReference(Matcher):
|
|
195
|
+
|
|
196
|
+
def match(self, ctxt, completions):
|
|
197
|
+
prevname = ctxt.productionname
|
|
198
|
+
try:
|
|
199
|
+
rule = ctxt.get_production_by_name(self.arg)
|
|
200
|
+
except KeyError:
|
|
201
|
+
raise ValueError("Can't look up production rule named %r" % (self.arg,))
|
|
202
|
+
output = rule.match(ctxt.with_production_named(self.arg), completions)
|
|
203
|
+
return [c.with_production_named(prevname) for c in output]
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class RuleSeries(Matcher):
|
|
207
|
+
|
|
208
|
+
def match(self, ctxt, completions):
|
|
209
|
+
ctxts = [ctxt]
|
|
210
|
+
for patpiece in self.arg:
|
|
211
|
+
new_ctxts = []
|
|
212
|
+
for each in ctxts:
|
|
213
|
+
new_ctxts.extend(patpiece.match(each, completions))
|
|
214
|
+
if not new_ctxts:
|
|
215
|
+
return ()
|
|
216
|
+
ctxts = new_ctxts
|
|
217
|
+
return ctxts
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
class NamedSymbol(Matcher):
|
|
221
|
+
|
|
222
|
+
def __init__(self, name, arg):
|
|
223
|
+
Matcher.__init__(self, arg)
|
|
224
|
+
self.name = name
|
|
225
|
+
|
|
226
|
+
def match(self, ctxt, completions):
|
|
227
|
+
pass_in_compls = completions
|
|
228
|
+
if self.try_registered_completion(ctxt, self.name, completions):
|
|
229
|
+
# don't collect other completions under this; use a dummy
|
|
230
|
+
pass_in_compls = set()
|
|
231
|
+
results = self.arg.match_with_results(ctxt, pass_in_compls)
|
|
232
|
+
return [c.with_binding(self.name, ctxt.extract_orig(matchtoks))
|
|
233
|
+
for (c, matchtoks) in results]
|
|
234
|
+
|
|
235
|
+
def __repr__(self):
|
|
236
|
+
return '%s(%r, %r)' % (self.__class__.__name__, self.name, self.arg)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
class NamedCollector(NamedSymbol):
|
|
240
|
+
|
|
241
|
+
def match(self, ctxt, completions):
|
|
242
|
+
pass_in_compls = completions
|
|
243
|
+
if self.try_registered_completion(ctxt, self.name, completions):
|
|
244
|
+
# don't collect other completions under this; use a dummy
|
|
245
|
+
pass_in_compls = set()
|
|
246
|
+
output = []
|
|
247
|
+
for ctxt, matchtoks in self.arg.match_with_results(ctxt, pass_in_compls):
|
|
248
|
+
oldval = ctxt.get_binding(self.name, ())
|
|
249
|
+
output.append(ctxt.with_binding(self.name, oldval + (ctxt.extract_orig(matchtoks),)))
|
|
250
|
+
return output
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
class TerminalMatcher(Matcher):
|
|
254
|
+
|
|
255
|
+
def match(self, ctxt, completions):
|
|
256
|
+
raise NotImplementedError
|
|
257
|
+
|
|
258
|
+
def pattern(self):
|
|
259
|
+
raise NotImplementedError
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
class RegexRule(TerminalMatcher):
|
|
263
|
+
|
|
264
|
+
def __init__(self, pat):
|
|
265
|
+
TerminalMatcher.__init__(self, pat)
|
|
266
|
+
self.regex = pat
|
|
267
|
+
self.re = re.compile(pat + '$', re.IGNORECASE | re.DOTALL)
|
|
268
|
+
|
|
269
|
+
def match(self, ctxt, completions):
|
|
270
|
+
if ctxt.remainder:
|
|
271
|
+
if self.re.match(ctxt.remainder[0][1]):
|
|
272
|
+
return [ctxt.with_match(1)]
|
|
273
|
+
elif completions is not None:
|
|
274
|
+
completions.add(Hint('<%s>' % ctxt.productionname))
|
|
275
|
+
return []
|
|
276
|
+
|
|
277
|
+
def pattern(self):
|
|
278
|
+
return self.regex
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
class TextMatch(TerminalMatcher):
|
|
282
|
+
alpha_re = re.compile(r'[a-zA-Z]')
|
|
283
|
+
|
|
284
|
+
def __init__(self, text):
|
|
285
|
+
try:
|
|
286
|
+
TerminalMatcher.__init__(self, eval(text))
|
|
287
|
+
except SyntaxError:
|
|
288
|
+
print("bad syntax %r" % (text,))
|
|
289
|
+
|
|
290
|
+
def match(self, ctxt, completions):
|
|
291
|
+
if ctxt.remainder:
|
|
292
|
+
if self.arg.lower() == ctxt.remainder[0][1].lower():
|
|
293
|
+
return [ctxt.with_match(1)]
|
|
294
|
+
elif completions is not None:
|
|
295
|
+
completions.add(self.arg)
|
|
296
|
+
return []
|
|
297
|
+
|
|
298
|
+
def pattern(self):
|
|
299
|
+
# can't use (?i) here- Scanner component regex flags won't be applied
|
|
300
|
+
def ignorecaseify(matchobj):
|
|
301
|
+
val = matchobj.group(0)
|
|
302
|
+
return '[%s%s]' % (val.upper(), val.lower())
|
|
303
|
+
|
|
304
|
+
return self.alpha_re.sub(ignorecaseify, re.escape(self.arg))
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
class CaseMatch(TextMatch):
|
|
308
|
+
|
|
309
|
+
def match(self, ctxt, completions):
|
|
310
|
+
if ctxt.remainder:
|
|
311
|
+
if self.arg == ctxt.remainder[0][1]:
|
|
312
|
+
return [ctxt.with_match(1)]
|
|
313
|
+
elif completions is not None:
|
|
314
|
+
completions.add(self.arg)
|
|
315
|
+
return []
|
|
316
|
+
|
|
317
|
+
def pattern(self):
|
|
318
|
+
return re.escape(self.arg)
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
class WordMatch(TextMatch):
|
|
322
|
+
|
|
323
|
+
def pattern(self):
|
|
324
|
+
return r'\b' + TextMatch.pattern(self) + r'\b'
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
class CaseWordMatch(CaseMatch):
|
|
328
|
+
|
|
329
|
+
def pattern(self):
|
|
330
|
+
return r'\b' + CaseMatch.pattern(self) + r'\b'
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
class TerminalTypeMatcher(Matcher):
|
|
334
|
+
|
|
335
|
+
def __init__(self, tokentype, submatcher):
|
|
336
|
+
Matcher.__init__(self, tokentype)
|
|
337
|
+
self.tokentype = tokentype
|
|
338
|
+
self.submatcher = submatcher
|
|
339
|
+
|
|
340
|
+
def match(self, ctxt, completions):
|
|
341
|
+
if ctxt.remainder:
|
|
342
|
+
if ctxt.remainder[0][0] == self.tokentype:
|
|
343
|
+
return [ctxt.with_match(1)]
|
|
344
|
+
elif completions is not None:
|
|
345
|
+
self.submatcher.match(ctxt, completions)
|
|
346
|
+
return []
|
|
347
|
+
|
|
348
|
+
def __repr__(self):
|
|
349
|
+
return '%s(%r, %r)' % (self.__class__.__name__, self.tokentype, self.submatcher)
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
class ParsingRuleSet:
|
|
353
|
+
"""Define the BNF tokenization rules for cql3handling.syntax_rules. Backus-Naur Form consists of
|
|
354
|
+
- Production rules in the form: Left-Hand-Side ::= Right-Hand-Side. The LHS is a non-terminal.
|
|
355
|
+
- Productions or non-terminal symbols
|
|
356
|
+
- Terminal symbols. Every terminal is a single token.
|
|
357
|
+
"""
|
|
358
|
+
|
|
359
|
+
RuleSpecScanner = SaferScanner([
|
|
360
|
+
(r'::=', lambda s, t: t), # BNF rule definition
|
|
361
|
+
(r'\[[a-z0-9_]+\]=', lambda s, t: ('named_collector', t[1:-2])),
|
|
362
|
+
(r'[a-z0-9_]+=', lambda s, t: ('named_symbol', t[:-1])),
|
|
363
|
+
(r'/(\[\^?.[^]]*\]|[^/]|\\.)*/', lambda s, t: ('regex', t[1:-1].replace(r'\/', '/'))),
|
|
364
|
+
(r'"([^"]|\\.)*"', lambda s, t: ('string_literal', t)),
|
|
365
|
+
(r'<[^>]*>', lambda s, t: ('reference', t[1:-1])),
|
|
366
|
+
(r'\bJUNK\b', lambda s, t: ('junk', t)),
|
|
367
|
+
(r'[@()|?*;]', lambda s, t: t),
|
|
368
|
+
(r'\s+', None), # whitespace
|
|
369
|
+
(r'#[^\n]*', None),
|
|
370
|
+
], re.IGNORECASE | re.DOTALL | re.UNICODE)
|
|
371
|
+
|
|
372
|
+
def __init__(self):
|
|
373
|
+
self.ruleset = {}
|
|
374
|
+
self.scanner = None
|
|
375
|
+
self.terminals = []
|
|
376
|
+
|
|
377
|
+
@classmethod
|
|
378
|
+
def from_rule_defs(cls, rule_defs):
|
|
379
|
+
prs = cls()
|
|
380
|
+
prs.ruleset, prs.terminals = cls.parse_rules(rule_defs)
|
|
381
|
+
return prs
|
|
382
|
+
|
|
383
|
+
@classmethod
|
|
384
|
+
def parse_rules(cls, rulestr):
|
|
385
|
+
tokens, unmatched = cls.RuleSpecScanner.scan(rulestr)
|
|
386
|
+
if unmatched:
|
|
387
|
+
raise LexingError.from_text(rulestr, unmatched, msg="Syntax rules are unparsable")
|
|
388
|
+
rules = {}
|
|
389
|
+
terminals = []
|
|
390
|
+
tokeniter = iter(tokens)
|
|
391
|
+
for t in tokeniter:
|
|
392
|
+
if isinstance(t, tuple) and t[0] in ('reference', 'junk'):
|
|
393
|
+
assign = next(tokeniter)
|
|
394
|
+
if assign != '::=':
|
|
395
|
+
raise ValueError('Unexpected token %r; expected "::="' % (assign,))
|
|
396
|
+
name = t[1]
|
|
397
|
+
production = cls.read_rule_tokens_until(';', tokeniter)
|
|
398
|
+
if isinstance(production, TerminalMatcher):
|
|
399
|
+
terminals.append((name, production))
|
|
400
|
+
production = TerminalTypeMatcher(name, production)
|
|
401
|
+
rules[name] = production
|
|
402
|
+
else:
|
|
403
|
+
raise ValueError('Unexpected token %r; expected name' % (t,))
|
|
404
|
+
return rules, terminals
|
|
405
|
+
|
|
406
|
+
@staticmethod
|
|
407
|
+
def mkrule(pieces):
|
|
408
|
+
if isinstance(pieces, (tuple, list)):
|
|
409
|
+
if len(pieces) == 1:
|
|
410
|
+
return pieces[0]
|
|
411
|
+
return RuleSeries(pieces)
|
|
412
|
+
return pieces
|
|
413
|
+
|
|
414
|
+
@classmethod
|
|
415
|
+
def read_rule_tokens_until(cls, endtoks: Union[str, int], tokeniter):
|
|
416
|
+
if isinstance(endtoks, str):
|
|
417
|
+
endtoks = (endtoks,)
|
|
418
|
+
counttarget = None
|
|
419
|
+
if isinstance(endtoks, int):
|
|
420
|
+
counttarget = endtoks
|
|
421
|
+
endtoks = ()
|
|
422
|
+
countsofar = 0
|
|
423
|
+
myrules = []
|
|
424
|
+
mybranches = [myrules]
|
|
425
|
+
for t in tokeniter:
|
|
426
|
+
countsofar += 1
|
|
427
|
+
if t in endtoks:
|
|
428
|
+
if len(mybranches) == 1:
|
|
429
|
+
return cls.mkrule(mybranches[0])
|
|
430
|
+
return Choice(list(map(cls.mkrule, mybranches)))
|
|
431
|
+
if isinstance(t, tuple):
|
|
432
|
+
if t[0] == 'reference':
|
|
433
|
+
t = RuleReference(t[1])
|
|
434
|
+
elif t[0] == 'string_literal':
|
|
435
|
+
if t[1][1].isalnum() or t[1][1] == '_':
|
|
436
|
+
t = WordMatch(t[1])
|
|
437
|
+
else:
|
|
438
|
+
t = TextMatch(t[1])
|
|
439
|
+
elif t[0] == 'regex':
|
|
440
|
+
t = RegexRule(t[1])
|
|
441
|
+
elif t[0] == 'named_collector':
|
|
442
|
+
t = NamedCollector(t[1], cls.read_rule_tokens_until(1, tokeniter))
|
|
443
|
+
elif t[0] == 'named_symbol':
|
|
444
|
+
t = NamedSymbol(t[1], cls.read_rule_tokens_until(1, tokeniter))
|
|
445
|
+
elif t == '(':
|
|
446
|
+
t = cls.read_rule_tokens_until(')', tokeniter)
|
|
447
|
+
elif t == '?':
|
|
448
|
+
t = OneOrNone(myrules.pop(-1))
|
|
449
|
+
elif t == '*':
|
|
450
|
+
t = Repeat(myrules.pop(-1))
|
|
451
|
+
elif t == '@':
|
|
452
|
+
val = next(tokeniter)
|
|
453
|
+
if not isinstance(val, tuple) or val[0] != 'string_literal':
|
|
454
|
+
raise ValueError("Unexpected token %r following '@'" % (val,))
|
|
455
|
+
t = CaseMatch(val[1])
|
|
456
|
+
elif t == '|':
|
|
457
|
+
myrules = []
|
|
458
|
+
mybranches.append(myrules)
|
|
459
|
+
continue
|
|
460
|
+
else:
|
|
461
|
+
raise ValueError('Unparseable rule token %r after %r' % (t, myrules[-1]))
|
|
462
|
+
myrules.append(t)
|
|
463
|
+
if countsofar == counttarget:
|
|
464
|
+
if len(mybranches) == 1:
|
|
465
|
+
return cls.mkrule(mybranches[0])
|
|
466
|
+
return Choice(list(map(cls.mkrule, mybranches)))
|
|
467
|
+
raise ValueError('Unexpected end of rule tokens')
|
|
468
|
+
|
|
469
|
+
def append_rules(self, rulestr):
|
|
470
|
+
rules, terminals = self.parse_rules(rulestr)
|
|
471
|
+
self.ruleset.update(rules)
|
|
472
|
+
self.terminals.extend(terminals)
|
|
473
|
+
if terminals:
|
|
474
|
+
self.scanner = None # recreate it if/when necessary
|
|
475
|
+
|
|
476
|
+
def register_completer(self, func, rulename, symname):
|
|
477
|
+
self.ruleset[(rulename, symname)] = func
|
|
478
|
+
|
|
479
|
+
def make_lexer(self):
|
|
480
|
+
def make_handler(name):
|
|
481
|
+
if name == 'JUNK':
|
|
482
|
+
return None
|
|
483
|
+
return lambda s, t: (name, t, s.match.span())
|
|
484
|
+
|
|
485
|
+
regexes = [(p.pattern(), make_handler(name)) for (name, p) in self.terminals]
|
|
486
|
+
return SaferScanner(regexes, re.IGNORECASE | re.DOTALL | re.UNICODE).scan
|
|
487
|
+
|
|
488
|
+
def lex(self, text):
|
|
489
|
+
if self.scanner is None:
|
|
490
|
+
self.scanner = self.make_lexer()
|
|
491
|
+
tokens, unmatched = self.scanner(text)
|
|
492
|
+
if unmatched:
|
|
493
|
+
raise LexingError.from_text(text, unmatched, 'text could not be lexed')
|
|
494
|
+
return tokens
|
|
495
|
+
|
|
496
|
+
def parse(self, startsymbol, tokens, init_bindings=None):
|
|
497
|
+
if init_bindings is None:
|
|
498
|
+
init_bindings = {}
|
|
499
|
+
ctxt = ParseContext(self.ruleset, init_bindings, (), tuple(tokens), startsymbol)
|
|
500
|
+
pattern = self.ruleset[startsymbol]
|
|
501
|
+
return pattern.match(ctxt, None)
|
|
502
|
+
|
|
503
|
+
def whole_match(self, startsymbol, tokens, srcstr=None):
|
|
504
|
+
bindings = {}
|
|
505
|
+
if srcstr is not None:
|
|
506
|
+
bindings['*SRC*'] = srcstr
|
|
507
|
+
for val in self.parse(startsymbol, tokens, init_bindings=bindings):
|
|
508
|
+
if not val.remainder:
|
|
509
|
+
return val
|
|
510
|
+
|
|
511
|
+
def lex_and_parse(self, text, startsymbol='Start'):
|
|
512
|
+
return self.parse(startsymbol, self.lex(text), init_bindings={'*SRC*': text})
|
|
513
|
+
|
|
514
|
+
def lex_and_whole_match(self, text, startsymbol='Start'):
|
|
515
|
+
tokens = self.lex(text)
|
|
516
|
+
return self.whole_match(startsymbol, tokens, srcstr=text)
|
|
517
|
+
|
|
518
|
+
def complete(self, startsymbol, tokens, init_bindings=None):
|
|
519
|
+
if init_bindings is None:
|
|
520
|
+
init_bindings = {}
|
|
521
|
+
ctxt = ParseContext(self.ruleset, init_bindings, (), tuple(tokens), startsymbol)
|
|
522
|
+
pattern = self.ruleset[startsymbol]
|
|
523
|
+
if init_bindings.get('*DEBUG*', False):
|
|
524
|
+
completions = Debugotron(stream=sys.stderr)
|
|
525
|
+
else:
|
|
526
|
+
completions = set()
|
|
527
|
+
pattern.match(ctxt, completions)
|
|
528
|
+
return completions
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
class Debugotron(set):
|
|
532
|
+
depth = 10
|
|
533
|
+
|
|
534
|
+
def __init__(self, initializer=(), stream=sys.stdout):
|
|
535
|
+
set.__init__(self, initializer)
|
|
536
|
+
self.stream = stream
|
|
537
|
+
|
|
538
|
+
def add(self, item):
|
|
539
|
+
self._note_addition(item)
|
|
540
|
+
set.add(self, item)
|
|
541
|
+
|
|
542
|
+
def _note_addition(self, item):
|
|
543
|
+
self.stream.write("\nitem %r added by:\n" % (item,))
|
|
544
|
+
frame = inspect.currentframe().f_back.f_back
|
|
545
|
+
for i in range(self.depth):
|
|
546
|
+
name = frame.f_code.co_name
|
|
547
|
+
filename = frame.f_code.co_filename
|
|
548
|
+
lineno = frame.f_lineno
|
|
549
|
+
if 'self' in frame.f_locals:
|
|
550
|
+
clsobj = frame.f_locals['self']
|
|
551
|
+
line = '%s.%s() (%s:%d)' % (clsobj, name, filename, lineno)
|
|
552
|
+
else:
|
|
553
|
+
line = '%s (%s:%d)' % (name, filename, lineno)
|
|
554
|
+
self.stream.write(' - %s\n' % (line,))
|
|
555
|
+
if i == 0 and 'ctxt' in frame.f_locals:
|
|
556
|
+
self.stream.write(' - %s\n' % (frame.f_locals['ctxt'],))
|
|
557
|
+
frame = frame.f_back
|
|
558
|
+
|
|
559
|
+
def update(self, items):
|
|
560
|
+
if items:
|
|
561
|
+
self._note_addition(items)
|
|
562
|
+
set.update(self, items)
|
cqlshlib/saferscanner.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
# SaferScanner is just like re.Scanner, but it neuters any grouping in the lexicon
|
|
18
|
+
# regular expressions and throws an error on group references, named groups, or
|
|
19
|
+
# regex in-pattern flags. Any of those can break correct operation of Scanner.
|
|
20
|
+
|
|
21
|
+
import re
|
|
22
|
+
from sre_constants import BRANCH, SUBPATTERN, GROUPREF, GROUPREF_IGNORE, GROUPREF_EXISTS
|
|
23
|
+
from sys import version_info
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
sre_parse = re._parser
|
|
27
|
+
sre_compile = re._compiler
|
|
28
|
+
except AttributeError:
|
|
29
|
+
sre_parse = re.sre_parse
|
|
30
|
+
sre_compile = re.sre_compile
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SaferScannerBase(re.Scanner):
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def subpat(cls, phrase, flags):
|
|
37
|
+
return cls.scrub_sub(sre_parse.parse(phrase, flags), flags)
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def scrub_sub(cls, sub, flags):
|
|
41
|
+
scrubbedsub = []
|
|
42
|
+
seqtypes = (type(()), type([]))
|
|
43
|
+
for op, arg in sub.data:
|
|
44
|
+
if type(arg) in seqtypes:
|
|
45
|
+
arg = [cls.scrub_sub(a, flags) if isinstance(a, sre_parse.SubPattern) else a
|
|
46
|
+
for a in arg]
|
|
47
|
+
if op in (BRANCH, SUBPATTERN):
|
|
48
|
+
arg = [None] + arg[1:]
|
|
49
|
+
if op in (GROUPREF, GROUPREF_IGNORE, GROUPREF_EXISTS):
|
|
50
|
+
raise ValueError("Group references not allowed in SaferScanner lexicon")
|
|
51
|
+
scrubbedsub.append((op, arg))
|
|
52
|
+
if sub.pattern.groupdict:
|
|
53
|
+
raise ValueError("Named captures not allowed in SaferScanner lexicon")
|
|
54
|
+
if sub.pattern.flags ^ flags:
|
|
55
|
+
raise ValueError("RE flag setting not allowed in SaferScanner lexicon (%s)" % (bin(sub.pattern.flags),))
|
|
56
|
+
return sre_parse.SubPattern(sub.pattern, scrubbedsub)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class Py36SaferScanner(SaferScannerBase):
|
|
60
|
+
|
|
61
|
+
def __init__(self, lexicon, flags=0):
|
|
62
|
+
self.lexicon = lexicon
|
|
63
|
+
p = []
|
|
64
|
+
s = sre_parse.Pattern()
|
|
65
|
+
s.flags = flags
|
|
66
|
+
for phrase, action in lexicon:
|
|
67
|
+
gid = s.opengroup()
|
|
68
|
+
p.append(sre_parse.SubPattern(s, [(SUBPATTERN, (gid, 0, 0, sre_parse.parse(phrase, flags))), ]))
|
|
69
|
+
s.closegroup(gid, p[-1])
|
|
70
|
+
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
|
|
71
|
+
self.p = p
|
|
72
|
+
self.scanner = sre_compile.compile(p)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class Py38SaferScanner(SaferScannerBase):
|
|
76
|
+
|
|
77
|
+
def __init__(self, lexicon, flags=0):
|
|
78
|
+
self.lexicon = lexicon
|
|
79
|
+
p = []
|
|
80
|
+
s = sre_parse.State()
|
|
81
|
+
s.flags = flags
|
|
82
|
+
for phrase, action in lexicon:
|
|
83
|
+
gid = s.opengroup()
|
|
84
|
+
p.append(sre_parse.SubPattern(s, [(SUBPATTERN, (gid, 0, 0, sre_parse.parse(phrase, flags))), ]))
|
|
85
|
+
s.closegroup(gid, p[-1])
|
|
86
|
+
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
|
|
87
|
+
self.p = p
|
|
88
|
+
self.scanner = sre_compile.compile(p)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
SaferScanner = Py38SaferScanner if version_info >= (3, 8) else Py36SaferScanner
|