okstra 0.31.0 → 0.32.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/package.json +1 -1
  2. package/runtime/BUILD.json +2 -2
  3. package/runtime/agents/SKILL.md +3 -3
  4. package/runtime/agents/workers/report-writer-worker.md +45 -67
  5. package/runtime/bin/okstra-render-final-report.py +101 -0
  6. package/runtime/bin/okstra-render-report-views.py +17 -10
  7. package/runtime/bin/okstra-token-usage.py +3 -1
  8. package/runtime/python/okstra_ctl/final_report_schema.py +253 -0
  9. package/runtime/python/okstra_ctl/render_final_report.py +201 -0
  10. package/runtime/python/okstra_ctl/report_views.py +108 -305
  11. package/runtime/python/okstra_ctl/wizard.py +16 -5
  12. package/runtime/python/okstra_token_usage/__init__.py +5 -1
  13. package/runtime/python/okstra_token_usage/cli.py +66 -36
  14. package/runtime/python/okstra_token_usage/report.py +148 -65
  15. package/runtime/python/okstra_vendor/__init__.py +37 -0
  16. package/runtime/python/okstra_vendor/jinja2/__init__.py +38 -0
  17. package/runtime/python/okstra_vendor/jinja2/_identifier.py +6 -0
  18. package/runtime/python/okstra_vendor/jinja2/async_utils.py +99 -0
  19. package/runtime/python/okstra_vendor/jinja2/bccache.py +408 -0
  20. package/runtime/python/okstra_vendor/jinja2/compiler.py +1998 -0
  21. package/runtime/python/okstra_vendor/jinja2/constants.py +20 -0
  22. package/runtime/python/okstra_vendor/jinja2/debug.py +191 -0
  23. package/runtime/python/okstra_vendor/jinja2/defaults.py +48 -0
  24. package/runtime/python/okstra_vendor/jinja2/environment.py +1672 -0
  25. package/runtime/python/okstra_vendor/jinja2/exceptions.py +166 -0
  26. package/runtime/python/okstra_vendor/jinja2/ext.py +870 -0
  27. package/runtime/python/okstra_vendor/jinja2/filters.py +1873 -0
  28. package/runtime/python/okstra_vendor/jinja2/idtracking.py +318 -0
  29. package/runtime/python/okstra_vendor/jinja2/lexer.py +868 -0
  30. package/runtime/python/okstra_vendor/jinja2/loaders.py +693 -0
  31. package/runtime/python/okstra_vendor/jinja2/meta.py +112 -0
  32. package/runtime/python/okstra_vendor/jinja2/nativetypes.py +130 -0
  33. package/runtime/python/okstra_vendor/jinja2/nodes.py +1206 -0
  34. package/runtime/python/okstra_vendor/jinja2/optimizer.py +48 -0
  35. package/runtime/python/okstra_vendor/jinja2/parser.py +1049 -0
  36. package/runtime/python/okstra_vendor/jinja2/py.typed +0 -0
  37. package/runtime/python/okstra_vendor/jinja2/runtime.py +1062 -0
  38. package/runtime/python/okstra_vendor/jinja2/sandbox.py +436 -0
  39. package/runtime/python/okstra_vendor/jinja2/tests.py +256 -0
  40. package/runtime/python/okstra_vendor/jinja2/utils.py +766 -0
  41. package/runtime/python/okstra_vendor/jinja2/visitor.py +92 -0
  42. package/runtime/python/okstra_vendor/markupsafe/__init__.py +396 -0
  43. package/runtime/python/okstra_vendor/markupsafe/_native.py +8 -0
  44. package/runtime/python/okstra_vendor/markupsafe/py.typed +0 -0
  45. package/runtime/schemas/final-report-v1.0.schema.json +1391 -0
  46. package/runtime/skills/okstra-report-writer/SKILL.md +29 -28
  47. package/runtime/templates/reports/final-report.template.md +370 -411
  48. package/runtime/templates/reports/report.css +12 -6
  49. package/runtime/validators/lib/fixtures.sh +7 -7
  50. package/runtime/validators/validate-report-views.py +24 -153
  51. package/runtime/validators/validate-run.py +102 -19
  52. package/src/install.mjs +20 -1
@@ -0,0 +1,868 @@
1
+ """Implements a Jinja / Python combination lexer. The ``Lexer`` class
2
+ is used to do some preprocessing. It filters out invalid operators like
3
+ the bitshift operators we don't allow in templates. It separates
4
+ template code and python code in expressions.
5
+ """
6
+
7
+ import re
8
+ import typing as t
9
+ from ast import literal_eval
10
+ from collections import deque
11
+ from sys import intern
12
+
13
+ from ._identifier import pattern as name_re
14
+ from .exceptions import TemplateSyntaxError
15
+ from .utils import LRUCache
16
+
17
+ if t.TYPE_CHECKING:
18
+ import typing_extensions as te
19
+
20
+ from .environment import Environment
21
+
22
+ # cache for the lexers. Exists in order to be able to have multiple
23
+ # environments with the same lexer
24
+ _lexer_cache: t.MutableMapping[t.Tuple, "Lexer"] = LRUCache(50) # type: ignore
25
+
26
+ # static regular expressions
27
+ whitespace_re = re.compile(r"\s+")
28
+ newline_re = re.compile(r"(\r\n|\r|\n)")
29
+ string_re = re.compile(
30
+ r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S
31
+ )
32
+ integer_re = re.compile(
33
+ r"""
34
+ (
35
+ 0b(_?[0-1])+ # binary
36
+ |
37
+ 0o(_?[0-7])+ # octal
38
+ |
39
+ 0x(_?[\da-f])+ # hex
40
+ |
41
+ [1-9](_?\d)* # decimal
42
+ |
43
+ 0(_?0)* # decimal zero
44
+ )
45
+ """,
46
+ re.IGNORECASE | re.VERBOSE,
47
+ )
48
+ float_re = re.compile(
49
+ r"""
50
+ (?<!\.) # doesn't start with a .
51
+ (\d+_)*\d+ # digits, possibly _ separated
52
+ (
53
+ (\.(\d+_)*\d+)? # optional fractional part
54
+ e[+\-]?(\d+_)*\d+ # exponent part
55
+ |
56
+ \.(\d+_)*\d+ # required fractional part
57
+ )
58
+ """,
59
+ re.IGNORECASE | re.VERBOSE,
60
+ )
61
+
62
+ # internal the tokens and keep references to them
63
+ TOKEN_ADD = intern("add")
64
+ TOKEN_ASSIGN = intern("assign")
65
+ TOKEN_COLON = intern("colon")
66
+ TOKEN_COMMA = intern("comma")
67
+ TOKEN_DIV = intern("div")
68
+ TOKEN_DOT = intern("dot")
69
+ TOKEN_EQ = intern("eq")
70
+ TOKEN_FLOORDIV = intern("floordiv")
71
+ TOKEN_GT = intern("gt")
72
+ TOKEN_GTEQ = intern("gteq")
73
+ TOKEN_LBRACE = intern("lbrace")
74
+ TOKEN_LBRACKET = intern("lbracket")
75
+ TOKEN_LPAREN = intern("lparen")
76
+ TOKEN_LT = intern("lt")
77
+ TOKEN_LTEQ = intern("lteq")
78
+ TOKEN_MOD = intern("mod")
79
+ TOKEN_MUL = intern("mul")
80
+ TOKEN_NE = intern("ne")
81
+ TOKEN_PIPE = intern("pipe")
82
+ TOKEN_POW = intern("pow")
83
+ TOKEN_RBRACE = intern("rbrace")
84
+ TOKEN_RBRACKET = intern("rbracket")
85
+ TOKEN_RPAREN = intern("rparen")
86
+ TOKEN_SEMICOLON = intern("semicolon")
87
+ TOKEN_SUB = intern("sub")
88
+ TOKEN_TILDE = intern("tilde")
89
+ TOKEN_WHITESPACE = intern("whitespace")
90
+ TOKEN_FLOAT = intern("float")
91
+ TOKEN_INTEGER = intern("integer")
92
+ TOKEN_NAME = intern("name")
93
+ TOKEN_STRING = intern("string")
94
+ TOKEN_OPERATOR = intern("operator")
95
+ TOKEN_BLOCK_BEGIN = intern("block_begin")
96
+ TOKEN_BLOCK_END = intern("block_end")
97
+ TOKEN_VARIABLE_BEGIN = intern("variable_begin")
98
+ TOKEN_VARIABLE_END = intern("variable_end")
99
+ TOKEN_RAW_BEGIN = intern("raw_begin")
100
+ TOKEN_RAW_END = intern("raw_end")
101
+ TOKEN_COMMENT_BEGIN = intern("comment_begin")
102
+ TOKEN_COMMENT_END = intern("comment_end")
103
+ TOKEN_COMMENT = intern("comment")
104
+ TOKEN_LINESTATEMENT_BEGIN = intern("linestatement_begin")
105
+ TOKEN_LINESTATEMENT_END = intern("linestatement_end")
106
+ TOKEN_LINECOMMENT_BEGIN = intern("linecomment_begin")
107
+ TOKEN_LINECOMMENT_END = intern("linecomment_end")
108
+ TOKEN_LINECOMMENT = intern("linecomment")
109
+ TOKEN_DATA = intern("data")
110
+ TOKEN_INITIAL = intern("initial")
111
+ TOKEN_EOF = intern("eof")
112
+
113
+ # bind operators to token types
114
+ operators = {
115
+ "+": TOKEN_ADD,
116
+ "-": TOKEN_SUB,
117
+ "/": TOKEN_DIV,
118
+ "//": TOKEN_FLOORDIV,
119
+ "*": TOKEN_MUL,
120
+ "%": TOKEN_MOD,
121
+ "**": TOKEN_POW,
122
+ "~": TOKEN_TILDE,
123
+ "[": TOKEN_LBRACKET,
124
+ "]": TOKEN_RBRACKET,
125
+ "(": TOKEN_LPAREN,
126
+ ")": TOKEN_RPAREN,
127
+ "{": TOKEN_LBRACE,
128
+ "}": TOKEN_RBRACE,
129
+ "==": TOKEN_EQ,
130
+ "!=": TOKEN_NE,
131
+ ">": TOKEN_GT,
132
+ ">=": TOKEN_GTEQ,
133
+ "<": TOKEN_LT,
134
+ "<=": TOKEN_LTEQ,
135
+ "=": TOKEN_ASSIGN,
136
+ ".": TOKEN_DOT,
137
+ ":": TOKEN_COLON,
138
+ "|": TOKEN_PIPE,
139
+ ",": TOKEN_COMMA,
140
+ ";": TOKEN_SEMICOLON,
141
+ }
142
+
143
+ reverse_operators = {v: k for k, v in operators.items()}
144
+ assert len(operators) == len(reverse_operators), "operators dropped"
145
+ operator_re = re.compile(
146
+ f"({'|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))})"
147
+ )
148
+
149
+ ignored_tokens = frozenset(
150
+ [
151
+ TOKEN_COMMENT_BEGIN,
152
+ TOKEN_COMMENT,
153
+ TOKEN_COMMENT_END,
154
+ TOKEN_WHITESPACE,
155
+ TOKEN_LINECOMMENT_BEGIN,
156
+ TOKEN_LINECOMMENT_END,
157
+ TOKEN_LINECOMMENT,
158
+ ]
159
+ )
160
+ ignore_if_empty = frozenset(
161
+ [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT]
162
+ )
163
+
164
+
165
+ def _describe_token_type(token_type: str) -> str:
166
+ if token_type in reverse_operators:
167
+ return reverse_operators[token_type]
168
+
169
+ return {
170
+ TOKEN_COMMENT_BEGIN: "begin of comment",
171
+ TOKEN_COMMENT_END: "end of comment",
172
+ TOKEN_COMMENT: "comment",
173
+ TOKEN_LINECOMMENT: "comment",
174
+ TOKEN_BLOCK_BEGIN: "begin of statement block",
175
+ TOKEN_BLOCK_END: "end of statement block",
176
+ TOKEN_VARIABLE_BEGIN: "begin of print statement",
177
+ TOKEN_VARIABLE_END: "end of print statement",
178
+ TOKEN_LINESTATEMENT_BEGIN: "begin of line statement",
179
+ TOKEN_LINESTATEMENT_END: "end of line statement",
180
+ TOKEN_DATA: "template data / text",
181
+ TOKEN_EOF: "end of template",
182
+ }.get(token_type, token_type)
183
+
184
+
185
+ def describe_token(token: "Token") -> str:
186
+ """Returns a description of the token."""
187
+ if token.type == TOKEN_NAME:
188
+ return token.value
189
+
190
+ return _describe_token_type(token.type)
191
+
192
+
193
+ def describe_token_expr(expr: str) -> str:
194
+ """Like `describe_token` but for token expressions."""
195
+ if ":" in expr:
196
+ type, value = expr.split(":", 1)
197
+
198
+ if type == TOKEN_NAME:
199
+ return value
200
+ else:
201
+ type = expr
202
+
203
+ return _describe_token_type(type)
204
+
205
+
206
+ def count_newlines(value: str) -> int:
207
+ """Count the number of newline characters in the string. This is
208
+ useful for extensions that filter a stream.
209
+ """
210
+ return len(newline_re.findall(value))
211
+
212
+
213
+ def compile_rules(environment: "Environment") -> t.List[t.Tuple[str, str]]:
214
+ """Compiles all the rules from the environment into a list of rules."""
215
+ e = re.escape
216
+ rules = [
217
+ (
218
+ len(environment.comment_start_string),
219
+ TOKEN_COMMENT_BEGIN,
220
+ e(environment.comment_start_string),
221
+ ),
222
+ (
223
+ len(environment.block_start_string),
224
+ TOKEN_BLOCK_BEGIN,
225
+ e(environment.block_start_string),
226
+ ),
227
+ (
228
+ len(environment.variable_start_string),
229
+ TOKEN_VARIABLE_BEGIN,
230
+ e(environment.variable_start_string),
231
+ ),
232
+ ]
233
+
234
+ if environment.line_statement_prefix is not None:
235
+ rules.append(
236
+ (
237
+ len(environment.line_statement_prefix),
238
+ TOKEN_LINESTATEMENT_BEGIN,
239
+ r"^[ \t\v]*" + e(environment.line_statement_prefix),
240
+ )
241
+ )
242
+ if environment.line_comment_prefix is not None:
243
+ rules.append(
244
+ (
245
+ len(environment.line_comment_prefix),
246
+ TOKEN_LINECOMMENT_BEGIN,
247
+ r"(?:^|(?<=\S))[^\S\r\n]*" + e(environment.line_comment_prefix),
248
+ )
249
+ )
250
+
251
+ return [x[1:] for x in sorted(rules, reverse=True)]
252
+
253
+
254
+ class Failure:
255
+ """Class that raises a `TemplateSyntaxError` if called.
256
+ Used by the `Lexer` to specify known errors.
257
+ """
258
+
259
+ def __init__(
260
+ self, message: str, cls: t.Type[TemplateSyntaxError] = TemplateSyntaxError
261
+ ) -> None:
262
+ self.message = message
263
+ self.error_class = cls
264
+
265
+ def __call__(self, lineno: int, filename: t.Optional[str]) -> "te.NoReturn":
266
+ raise self.error_class(self.message, lineno, filename)
267
+
268
+
269
+ class Token(t.NamedTuple):
270
+ lineno: int
271
+ type: str
272
+ value: str
273
+
274
+ def __str__(self) -> str:
275
+ return describe_token(self)
276
+
277
+ def test(self, expr: str) -> bool:
278
+ """Test a token against a token expression. This can either be a
279
+ token type or ``'token_type:token_value'``. This can only test
280
+ against string values and types.
281
+ """
282
+ # here we do a regular string equality check as test_any is usually
283
+ # passed an iterable of not interned strings.
284
+ if self.type == expr:
285
+ return True
286
+
287
+ if ":" in expr:
288
+ return expr.split(":", 1) == [self.type, self.value]
289
+
290
+ return False
291
+
292
+ def test_any(self, *iterable: str) -> bool:
293
+ """Test against multiple token expressions."""
294
+ return any(self.test(expr) for expr in iterable)
295
+
296
+
297
+ class TokenStreamIterator:
298
+ """The iterator for tokenstreams. Iterate over the stream
299
+ until the eof token is reached.
300
+ """
301
+
302
+ def __init__(self, stream: "TokenStream") -> None:
303
+ self.stream = stream
304
+
305
+ def __iter__(self) -> "TokenStreamIterator":
306
+ return self
307
+
308
+ def __next__(self) -> Token:
309
+ token = self.stream.current
310
+
311
+ if token.type is TOKEN_EOF:
312
+ self.stream.close()
313
+ raise StopIteration
314
+
315
+ next(self.stream)
316
+ return token
317
+
318
+
319
+ class TokenStream:
320
+ """A token stream is an iterable that yields :class:`Token`\\s. The
321
+ parser however does not iterate over it but calls :meth:`next` to go
322
+ one token ahead. The current active token is stored as :attr:`current`.
323
+ """
324
+
325
+ def __init__(
326
+ self,
327
+ generator: t.Iterable[Token],
328
+ name: t.Optional[str],
329
+ filename: t.Optional[str],
330
+ ):
331
+ self._iter = iter(generator)
332
+ self._pushed: te.Deque[Token] = deque()
333
+ self.name = name
334
+ self.filename = filename
335
+ self.closed = False
336
+ self.current = Token(1, TOKEN_INITIAL, "")
337
+ next(self)
338
+
339
+ def __iter__(self) -> TokenStreamIterator:
340
+ return TokenStreamIterator(self)
341
+
342
+ def __bool__(self) -> bool:
343
+ return bool(self._pushed) or self.current.type is not TOKEN_EOF
344
+
345
+ @property
346
+ def eos(self) -> bool:
347
+ """Are we at the end of the stream?"""
348
+ return not self
349
+
350
+ def push(self, token: Token) -> None:
351
+ """Push a token back to the stream."""
352
+ self._pushed.append(token)
353
+
354
+ def look(self) -> Token:
355
+ """Look at the next token."""
356
+ old_token = next(self)
357
+ result = self.current
358
+ self.push(result)
359
+ self.current = old_token
360
+ return result
361
+
362
+ def skip(self, n: int = 1) -> None:
363
+ """Got n tokens ahead."""
364
+ for _ in range(n):
365
+ next(self)
366
+
367
+ def next_if(self, expr: str) -> t.Optional[Token]:
368
+ """Perform the token test and return the token if it matched.
369
+ Otherwise the return value is `None`.
370
+ """
371
+ if self.current.test(expr):
372
+ return next(self)
373
+
374
+ return None
375
+
376
+ def skip_if(self, expr: str) -> bool:
377
+ """Like :meth:`next_if` but only returns `True` or `False`."""
378
+ return self.next_if(expr) is not None
379
+
380
+ def __next__(self) -> Token:
381
+ """Go one token ahead and return the old one.
382
+
383
+ Use the built-in :func:`next` instead of calling this directly.
384
+ """
385
+ rv = self.current
386
+
387
+ if self._pushed:
388
+ self.current = self._pushed.popleft()
389
+ elif self.current.type is not TOKEN_EOF:
390
+ try:
391
+ self.current = next(self._iter)
392
+ except StopIteration:
393
+ self.close()
394
+
395
+ return rv
396
+
397
+ def close(self) -> None:
398
+ """Close the stream."""
399
+ self.current = Token(self.current.lineno, TOKEN_EOF, "")
400
+ self._iter = iter(())
401
+ self.closed = True
402
+
403
+ def expect(self, expr: str) -> Token:
404
+ """Expect a given token type and return it. This accepts the same
405
+ argument as :meth:`jinja2.lexer.Token.test`.
406
+ """
407
+ if not self.current.test(expr):
408
+ expr = describe_token_expr(expr)
409
+
410
+ if self.current.type is TOKEN_EOF:
411
+ raise TemplateSyntaxError(
412
+ f"unexpected end of template, expected {expr!r}.",
413
+ self.current.lineno,
414
+ self.name,
415
+ self.filename,
416
+ )
417
+
418
+ raise TemplateSyntaxError(
419
+ f"expected token {expr!r}, got {describe_token(self.current)!r}",
420
+ self.current.lineno,
421
+ self.name,
422
+ self.filename,
423
+ )
424
+
425
+ return next(self)
426
+
427
+
428
+ def get_lexer(environment: "Environment") -> "Lexer":
429
+ """Return a lexer which is probably cached."""
430
+ key = (
431
+ environment.block_start_string,
432
+ environment.block_end_string,
433
+ environment.variable_start_string,
434
+ environment.variable_end_string,
435
+ environment.comment_start_string,
436
+ environment.comment_end_string,
437
+ environment.line_statement_prefix,
438
+ environment.line_comment_prefix,
439
+ environment.trim_blocks,
440
+ environment.lstrip_blocks,
441
+ environment.newline_sequence,
442
+ environment.keep_trailing_newline,
443
+ )
444
+ lexer = _lexer_cache.get(key)
445
+
446
+ if lexer is None:
447
+ _lexer_cache[key] = lexer = Lexer(environment)
448
+
449
+ return lexer
450
+
451
+
452
+ class OptionalLStrip(tuple): # type: ignore[type-arg]
453
+ """A special tuple for marking a point in the state that can have
454
+ lstrip applied.
455
+ """
456
+
457
+ __slots__ = ()
458
+
459
+ # Even though it looks like a no-op, creating instances fails
460
+ # without this.
461
+ def __new__(cls, *members, **kwargs): # type: ignore
462
+ return super().__new__(cls, members)
463
+
464
+
465
+ class _Rule(t.NamedTuple):
466
+ pattern: t.Pattern[str]
467
+ tokens: t.Union[str, t.Tuple[str, ...], t.Tuple[Failure]]
468
+ command: t.Optional[str]
469
+
470
+
471
+ class Lexer:
472
+ """Class that implements a lexer for a given environment. Automatically
473
+ created by the environment class, usually you don't have to do that.
474
+
475
+ Note that the lexer is not automatically bound to an environment.
476
+ Multiple environments can share the same lexer.
477
+ """
478
+
479
+ def __init__(self, environment: "Environment") -> None:
480
+ # shortcuts
481
+ e = re.escape
482
+
483
+ def c(x: str) -> t.Pattern[str]:
484
+ return re.compile(x, re.M | re.S)
485
+
486
+ # lexing rules for tags
487
+ tag_rules: t.List[_Rule] = [
488
+ _Rule(whitespace_re, TOKEN_WHITESPACE, None),
489
+ _Rule(float_re, TOKEN_FLOAT, None),
490
+ _Rule(integer_re, TOKEN_INTEGER, None),
491
+ _Rule(name_re, TOKEN_NAME, None),
492
+ _Rule(string_re, TOKEN_STRING, None),
493
+ _Rule(operator_re, TOKEN_OPERATOR, None),
494
+ ]
495
+
496
+ # assemble the root lexing rule. because "|" is ungreedy
497
+ # we have to sort by length so that the lexer continues working
498
+ # as expected when we have parsing rules like <% for block and
499
+ # <%= for variables. (if someone wants asp like syntax)
500
+ # variables are just part of the rules if variable processing
501
+ # is required.
502
+ root_tag_rules = compile_rules(environment)
503
+
504
+ block_start_re = e(environment.block_start_string)
505
+ block_end_re = e(environment.block_end_string)
506
+ comment_end_re = e(environment.comment_end_string)
507
+ variable_end_re = e(environment.variable_end_string)
508
+
509
+ # block suffix if trimming is enabled
510
+ block_suffix_re = "\\n?" if environment.trim_blocks else ""
511
+
512
+ self.lstrip_blocks = environment.lstrip_blocks
513
+
514
+ self.newline_sequence = environment.newline_sequence
515
+ self.keep_trailing_newline = environment.keep_trailing_newline
516
+
517
+ root_raw_re = (
518
+ rf"(?P<raw_begin>{block_start_re}(\-|\+|)\s*raw\s*"
519
+ rf"(?:\-{block_end_re}\s*|{block_end_re}))"
520
+ )
521
+ root_parts_re = "|".join(
522
+ [root_raw_re] + [rf"(?P<{n}>{r}(\-|\+|))" for n, r in root_tag_rules]
523
+ )
524
+
525
+ # global lexing rules
526
+ self.rules: t.Dict[str, t.List[_Rule]] = {
527
+ "root": [
528
+ # directives
529
+ _Rule(
530
+ c(rf"(.*?)(?:{root_parts_re})"),
531
+ OptionalLStrip(TOKEN_DATA, "#bygroup"), # type: ignore
532
+ "#bygroup",
533
+ ),
534
+ # data
535
+ _Rule(c(".+"), TOKEN_DATA, None),
536
+ ],
537
+ # comments
538
+ TOKEN_COMMENT_BEGIN: [
539
+ _Rule(
540
+ c(
541
+ rf"(.*?)((?:\+{comment_end_re}|\-{comment_end_re}\s*"
542
+ rf"|{comment_end_re}{block_suffix_re}))"
543
+ ),
544
+ (TOKEN_COMMENT, TOKEN_COMMENT_END),
545
+ "#pop",
546
+ ),
547
+ _Rule(c(r"(.)"), (Failure("Missing end of comment tag"),), None),
548
+ ],
549
+ # blocks
550
+ TOKEN_BLOCK_BEGIN: [
551
+ _Rule(
552
+ c(
553
+ rf"(?:\+{block_end_re}|\-{block_end_re}\s*"
554
+ rf"|{block_end_re}{block_suffix_re})"
555
+ ),
556
+ TOKEN_BLOCK_END,
557
+ "#pop",
558
+ ),
559
+ ]
560
+ + tag_rules,
561
+ # variables
562
+ TOKEN_VARIABLE_BEGIN: [
563
+ _Rule(
564
+ c(rf"\-{variable_end_re}\s*|{variable_end_re}"),
565
+ TOKEN_VARIABLE_END,
566
+ "#pop",
567
+ )
568
+ ]
569
+ + tag_rules,
570
+ # raw block
571
+ TOKEN_RAW_BEGIN: [
572
+ _Rule(
573
+ c(
574
+ rf"(.*?)((?:{block_start_re}(\-|\+|))\s*endraw\s*"
575
+ rf"(?:\+{block_end_re}|\-{block_end_re}\s*"
576
+ rf"|{block_end_re}{block_suffix_re}))"
577
+ ),
578
+ OptionalLStrip(TOKEN_DATA, TOKEN_RAW_END), # type: ignore
579
+ "#pop",
580
+ ),
581
+ _Rule(c(r"(.)"), (Failure("Missing end of raw directive"),), None),
582
+ ],
583
+ # line statements
584
+ TOKEN_LINESTATEMENT_BEGIN: [
585
+ _Rule(c(r"\s*(\n|$)"), TOKEN_LINESTATEMENT_END, "#pop")
586
+ ]
587
+ + tag_rules,
588
+ # line comments
589
+ TOKEN_LINECOMMENT_BEGIN: [
590
+ _Rule(
591
+ c(r"(.*?)()(?=\n|$)"),
592
+ (TOKEN_LINECOMMENT, TOKEN_LINECOMMENT_END),
593
+ "#pop",
594
+ )
595
+ ],
596
+ }
597
+
598
+ def _normalize_newlines(self, value: str) -> str:
599
+ """Replace all newlines with the configured sequence in strings
600
+ and template data.
601
+ """
602
+ return newline_re.sub(self.newline_sequence, value)
603
+
604
+ def tokenize(
605
+ self,
606
+ source: str,
607
+ name: t.Optional[str] = None,
608
+ filename: t.Optional[str] = None,
609
+ state: t.Optional[str] = None,
610
+ ) -> TokenStream:
611
+ """Calls tokeniter + tokenize and wraps it in a token stream."""
612
+ stream = self.tokeniter(source, name, filename, state)
613
+ return TokenStream(self.wrap(stream, name, filename), name, filename)
614
+
615
+ def wrap(
616
+ self,
617
+ stream: t.Iterable[t.Tuple[int, str, str]],
618
+ name: t.Optional[str] = None,
619
+ filename: t.Optional[str] = None,
620
+ ) -> t.Iterator[Token]:
621
+ """This is called with the stream as returned by `tokenize` and wraps
622
+ every token in a :class:`Token` and converts the value.
623
+ """
624
+ for lineno, token, value_str in stream:
625
+ if token in ignored_tokens:
626
+ continue
627
+
628
+ value: t.Any = value_str
629
+
630
+ if token == TOKEN_LINESTATEMENT_BEGIN:
631
+ token = TOKEN_BLOCK_BEGIN
632
+ elif token == TOKEN_LINESTATEMENT_END:
633
+ token = TOKEN_BLOCK_END
634
+ # we are not interested in those tokens in the parser
635
+ elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END):
636
+ continue
637
+ elif token == TOKEN_DATA:
638
+ value = self._normalize_newlines(value_str)
639
+ elif token == "keyword":
640
+ token = value_str
641
+ elif token == TOKEN_NAME:
642
+ value = value_str
643
+
644
+ if not value.isidentifier():
645
+ raise TemplateSyntaxError(
646
+ "Invalid character in identifier", lineno, name, filename
647
+ )
648
+ elif token == TOKEN_STRING:
649
+ # try to unescape string
650
+ try:
651
+ value = (
652
+ self._normalize_newlines(value_str[1:-1])
653
+ .encode("ascii", "backslashreplace")
654
+ .decode("unicode-escape")
655
+ )
656
+ except Exception as e:
657
+ msg = str(e).split(":")[-1].strip()
658
+ raise TemplateSyntaxError(msg, lineno, name, filename) from e
659
+ elif token == TOKEN_INTEGER:
660
+ value = int(value_str.replace("_", ""), 0)
661
+ elif token == TOKEN_FLOAT:
662
+ # remove all "_" first to support more Python versions
663
+ value = literal_eval(value_str.replace("_", ""))
664
+ elif token == TOKEN_OPERATOR:
665
+ token = operators[value_str]
666
+
667
+ yield Token(lineno, token, value)
668
+
669
+ def tokeniter(
670
+ self,
671
+ source: str,
672
+ name: t.Optional[str],
673
+ filename: t.Optional[str] = None,
674
+ state: t.Optional[str] = None,
675
+ ) -> t.Iterator[t.Tuple[int, str, str]]:
676
+ """This method tokenizes the text and returns the tokens in a
677
+ generator. Use this method if you just want to tokenize a template.
678
+
679
+ .. versionchanged:: 3.0
680
+ Only ``\\n``, ``\\r\\n`` and ``\\r`` are treated as line
681
+ breaks.
682
+ """
683
+ lines = newline_re.split(source)[::2]
684
+
685
+ if not self.keep_trailing_newline and lines[-1] == "":
686
+ del lines[-1]
687
+
688
+ source = "\n".join(lines)
689
+ pos = 0
690
+ lineno = 1
691
+ stack = ["root"]
692
+
693
+ if state is not None and state != "root":
694
+ assert state in ("variable", "block"), "invalid state"
695
+ stack.append(state + "_begin")
696
+
697
+ statetokens = self.rules[stack[-1]]
698
+ source_length = len(source)
699
+ balancing_stack: t.List[str] = []
700
+ newlines_stripped = 0
701
+ line_starting = True
702
+
703
+ while True:
704
+ # tokenizer loop
705
+ for regex, tokens, new_state in statetokens:
706
+ m = regex.match(source, pos)
707
+
708
+ # if no match we try again with the next rule
709
+ if m is None:
710
+ continue
711
+
712
+ # we only match blocks and variables if braces / parentheses
713
+ # are balanced. continue parsing with the lower rule which
714
+ # is the operator rule. do this only if the end tags look
715
+ # like operators
716
+ if balancing_stack and tokens in (
717
+ TOKEN_VARIABLE_END,
718
+ TOKEN_BLOCK_END,
719
+ TOKEN_LINESTATEMENT_END,
720
+ ):
721
+ continue
722
+
723
+ # tuples support more options
724
+ if isinstance(tokens, tuple):
725
+ groups: t.Sequence[str] = m.groups()
726
+
727
+ if isinstance(tokens, OptionalLStrip):
728
+ # Rule supports lstrip. Match will look like
729
+ # text, block type, whitespace control, type, control, ...
730
+ text = groups[0]
731
+ # Skipping the text and first type, every other group is the
732
+ # whitespace control for each type. One of the groups will be
733
+ # -, +, or empty string instead of None.
734
+ strip_sign = next(g for g in groups[2::2] if g is not None)
735
+
736
+ if strip_sign == "-":
737
+ # Strip all whitespace between the text and the tag.
738
+ stripped = text.rstrip()
739
+ newlines_stripped = text[len(stripped) :].count("\n")
740
+ groups = [stripped, *groups[1:]]
741
+ elif (
742
+ # Not marked for preserving whitespace.
743
+ strip_sign != "+"
744
+ # lstrip is enabled.
745
+ and self.lstrip_blocks
746
+ # Not a variable expression.
747
+ and not m.groupdict().get(TOKEN_VARIABLE_BEGIN)
748
+ ):
749
+ # The start of text between the last newline and the tag.
750
+ l_pos = text.rfind("\n") + 1
751
+
752
+ if l_pos > 0 or line_starting:
753
+ # If there's only whitespace between the newline and the
754
+ # tag, strip it.
755
+ if whitespace_re.fullmatch(text, l_pos):
756
+ groups = [text[:l_pos], *groups[1:]]
757
+
758
+ for idx, token in enumerate(tokens):
759
+ # failure group
760
+ if isinstance(token, Failure):
761
+ raise token(lineno, filename)
762
+ # bygroup is a bit more complex, in that case we
763
+ # yield for the current token the first named
764
+ # group that matched
765
+ elif token == "#bygroup":
766
+ for key, value in m.groupdict().items():
767
+ if value is not None:
768
+ yield lineno, key, value
769
+ lineno += value.count("\n")
770
+ break
771
+ else:
772
+ raise RuntimeError(
773
+ f"{regex!r} wanted to resolve the token dynamically"
774
+ " but no group matched"
775
+ )
776
+ # normal group
777
+ else:
778
+ data = groups[idx]
779
+
780
+ if data or token not in ignore_if_empty:
781
+ yield lineno, token, data # type: ignore[misc]
782
+
783
+ lineno += data.count("\n") + newlines_stripped
784
+ newlines_stripped = 0
785
+
786
+ # strings as token just are yielded as it.
787
+ else:
788
+ data = m.group()
789
+
790
+ # update brace/parentheses balance
791
+ if tokens == TOKEN_OPERATOR:
792
+ if data == "{":
793
+ balancing_stack.append("}")
794
+ elif data == "(":
795
+ balancing_stack.append(")")
796
+ elif data == "[":
797
+ balancing_stack.append("]")
798
+ elif data in ("}", ")", "]"):
799
+ if not balancing_stack:
800
+ raise TemplateSyntaxError(
801
+ f"unexpected '{data}'", lineno, name, filename
802
+ )
803
+
804
+ expected_op = balancing_stack.pop()
805
+
806
+ if expected_op != data:
807
+ raise TemplateSyntaxError(
808
+ f"unexpected '{data}', expected '{expected_op}'",
809
+ lineno,
810
+ name,
811
+ filename,
812
+ )
813
+
814
+ # yield items
815
+ if data or tokens not in ignore_if_empty:
816
+ yield lineno, tokens, data
817
+
818
+ lineno += data.count("\n")
819
+
820
+ line_starting = m.group()[-1:] == "\n"
821
+ # fetch new position into new variable so that we can check
822
+ # if there is a internal parsing error which would result
823
+ # in an infinite loop
824
+ pos2 = m.end()
825
+
826
+ # handle state changes
827
+ if new_state is not None:
828
+ # remove the uppermost state
829
+ if new_state == "#pop":
830
+ stack.pop()
831
+ # resolve the new state by group checking
832
+ elif new_state == "#bygroup":
833
+ for key, value in m.groupdict().items():
834
+ if value is not None:
835
+ stack.append(key)
836
+ break
837
+ else:
838
+ raise RuntimeError(
839
+ f"{regex!r} wanted to resolve the new state dynamically"
840
+ f" but no group matched"
841
+ )
842
+ # direct state name given
843
+ else:
844
+ stack.append(new_state)
845
+
846
+ statetokens = self.rules[stack[-1]]
847
+ # we are still at the same position and no stack change.
848
+ # this means a loop without break condition, avoid that and
849
+ # raise error
850
+ elif pos2 == pos:
851
+ raise RuntimeError(
852
+ f"{regex!r} yielded empty string without stack change"
853
+ )
854
+
855
+ # publish new function and start again
856
+ pos = pos2
857
+ break
858
+ # if loop terminated without break we haven't found a single match
859
+ # either we are at the end of the file or we have a problem
860
+ else:
861
+ # end of text
862
+ if pos >= source_length:
863
+ return
864
+
865
+ # something went wrong
866
+ raise TemplateSyntaxError(
867
+ f"unexpected char {source[pos]!r} at {pos}", lineno, name, filename
868
+ )