just-bash 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- just_bash/__init__.py +55 -0
- just_bash/ast/__init__.py +213 -0
- just_bash/ast/factory.py +320 -0
- just_bash/ast/types.py +953 -0
- just_bash/bash.py +220 -0
- just_bash/commands/__init__.py +23 -0
- just_bash/commands/argv/__init__.py +5 -0
- just_bash/commands/argv/argv.py +21 -0
- just_bash/commands/awk/__init__.py +5 -0
- just_bash/commands/awk/awk.py +1168 -0
- just_bash/commands/base64/__init__.py +5 -0
- just_bash/commands/base64/base64.py +138 -0
- just_bash/commands/basename/__init__.py +5 -0
- just_bash/commands/basename/basename.py +72 -0
- just_bash/commands/bash/__init__.py +5 -0
- just_bash/commands/bash/bash.py +188 -0
- just_bash/commands/cat/__init__.py +5 -0
- just_bash/commands/cat/cat.py +173 -0
- just_bash/commands/checksum/__init__.py +5 -0
- just_bash/commands/checksum/checksum.py +179 -0
- just_bash/commands/chmod/__init__.py +5 -0
- just_bash/commands/chmod/chmod.py +216 -0
- just_bash/commands/column/__init__.py +5 -0
- just_bash/commands/column/column.py +180 -0
- just_bash/commands/comm/__init__.py +5 -0
- just_bash/commands/comm/comm.py +150 -0
- just_bash/commands/compression/__init__.py +5 -0
- just_bash/commands/compression/compression.py +298 -0
- just_bash/commands/cp/__init__.py +5 -0
- just_bash/commands/cp/cp.py +149 -0
- just_bash/commands/curl/__init__.py +5 -0
- just_bash/commands/curl/curl.py +801 -0
- just_bash/commands/cut/__init__.py +5 -0
- just_bash/commands/cut/cut.py +327 -0
- just_bash/commands/date/__init__.py +5 -0
- just_bash/commands/date/date.py +258 -0
- just_bash/commands/diff/__init__.py +5 -0
- just_bash/commands/diff/diff.py +118 -0
- just_bash/commands/dirname/__init__.py +5 -0
- just_bash/commands/dirname/dirname.py +56 -0
- just_bash/commands/du/__init__.py +5 -0
- just_bash/commands/du/du.py +150 -0
- just_bash/commands/echo/__init__.py +5 -0
- just_bash/commands/echo/echo.py +125 -0
- just_bash/commands/env/__init__.py +5 -0
- just_bash/commands/env/env.py +163 -0
- just_bash/commands/expand/__init__.py +5 -0
- just_bash/commands/expand/expand.py +299 -0
- just_bash/commands/expr/__init__.py +5 -0
- just_bash/commands/expr/expr.py +273 -0
- just_bash/commands/file/__init__.py +5 -0
- just_bash/commands/file/file.py +274 -0
- just_bash/commands/find/__init__.py +5 -0
- just_bash/commands/find/find.py +623 -0
- just_bash/commands/fold/__init__.py +5 -0
- just_bash/commands/fold/fold.py +160 -0
- just_bash/commands/grep/__init__.py +5 -0
- just_bash/commands/grep/grep.py +418 -0
- just_bash/commands/head/__init__.py +5 -0
- just_bash/commands/head/head.py +167 -0
- just_bash/commands/help/__init__.py +5 -0
- just_bash/commands/help/help.py +67 -0
- just_bash/commands/hostname/__init__.py +5 -0
- just_bash/commands/hostname/hostname.py +21 -0
- just_bash/commands/html_to_markdown/__init__.py +5 -0
- just_bash/commands/html_to_markdown/html_to_markdown.py +191 -0
- just_bash/commands/join/__init__.py +5 -0
- just_bash/commands/join/join.py +252 -0
- just_bash/commands/jq/__init__.py +5 -0
- just_bash/commands/jq/jq.py +280 -0
- just_bash/commands/ln/__init__.py +5 -0
- just_bash/commands/ln/ln.py +127 -0
- just_bash/commands/ls/__init__.py +5 -0
- just_bash/commands/ls/ls.py +280 -0
- just_bash/commands/mkdir/__init__.py +5 -0
- just_bash/commands/mkdir/mkdir.py +92 -0
- just_bash/commands/mv/__init__.py +5 -0
- just_bash/commands/mv/mv.py +142 -0
- just_bash/commands/nl/__init__.py +5 -0
- just_bash/commands/nl/nl.py +180 -0
- just_bash/commands/od/__init__.py +5 -0
- just_bash/commands/od/od.py +157 -0
- just_bash/commands/paste/__init__.py +5 -0
- just_bash/commands/paste/paste.py +100 -0
- just_bash/commands/printf/__init__.py +5 -0
- just_bash/commands/printf/printf.py +157 -0
- just_bash/commands/pwd/__init__.py +5 -0
- just_bash/commands/pwd/pwd.py +23 -0
- just_bash/commands/read/__init__.py +5 -0
- just_bash/commands/read/read.py +185 -0
- just_bash/commands/readlink/__init__.py +5 -0
- just_bash/commands/readlink/readlink.py +86 -0
- just_bash/commands/registry.py +844 -0
- just_bash/commands/rev/__init__.py +5 -0
- just_bash/commands/rev/rev.py +74 -0
- just_bash/commands/rg/__init__.py +5 -0
- just_bash/commands/rg/rg.py +1048 -0
- just_bash/commands/rm/__init__.py +5 -0
- just_bash/commands/rm/rm.py +106 -0
- just_bash/commands/search_engine/__init__.py +13 -0
- just_bash/commands/search_engine/matcher.py +170 -0
- just_bash/commands/search_engine/regex.py +159 -0
- just_bash/commands/sed/__init__.py +5 -0
- just_bash/commands/sed/sed.py +863 -0
- just_bash/commands/seq/__init__.py +5 -0
- just_bash/commands/seq/seq.py +190 -0
- just_bash/commands/shell/__init__.py +5 -0
- just_bash/commands/shell/shell.py +206 -0
- just_bash/commands/sleep/__init__.py +5 -0
- just_bash/commands/sleep/sleep.py +62 -0
- just_bash/commands/sort/__init__.py +5 -0
- just_bash/commands/sort/sort.py +411 -0
- just_bash/commands/split/__init__.py +5 -0
- just_bash/commands/split/split.py +237 -0
- just_bash/commands/sqlite3/__init__.py +5 -0
- just_bash/commands/sqlite3/sqlite3_cmd.py +505 -0
- just_bash/commands/stat/__init__.py +5 -0
- just_bash/commands/stat/stat.py +150 -0
- just_bash/commands/strings/__init__.py +5 -0
- just_bash/commands/strings/strings.py +150 -0
- just_bash/commands/tac/__init__.py +5 -0
- just_bash/commands/tac/tac.py +158 -0
- just_bash/commands/tail/__init__.py +5 -0
- just_bash/commands/tail/tail.py +180 -0
- just_bash/commands/tar/__init__.py +5 -0
- just_bash/commands/tar/tar.py +1067 -0
- just_bash/commands/tee/__init__.py +5 -0
- just_bash/commands/tee/tee.py +63 -0
- just_bash/commands/timeout/__init__.py +5 -0
- just_bash/commands/timeout/timeout.py +188 -0
- just_bash/commands/touch/__init__.py +5 -0
- just_bash/commands/touch/touch.py +91 -0
- just_bash/commands/tr/__init__.py +5 -0
- just_bash/commands/tr/tr.py +297 -0
- just_bash/commands/tree/__init__.py +5 -0
- just_bash/commands/tree/tree.py +139 -0
- just_bash/commands/true/__init__.py +5 -0
- just_bash/commands/true/true.py +32 -0
- just_bash/commands/uniq/__init__.py +5 -0
- just_bash/commands/uniq/uniq.py +323 -0
- just_bash/commands/wc/__init__.py +5 -0
- just_bash/commands/wc/wc.py +169 -0
- just_bash/commands/which/__init__.py +5 -0
- just_bash/commands/which/which.py +52 -0
- just_bash/commands/xan/__init__.py +5 -0
- just_bash/commands/xan/xan.py +1663 -0
- just_bash/commands/xargs/__init__.py +5 -0
- just_bash/commands/xargs/xargs.py +136 -0
- just_bash/commands/yq/__init__.py +5 -0
- just_bash/commands/yq/yq.py +848 -0
- just_bash/fs/__init__.py +29 -0
- just_bash/fs/in_memory_fs.py +621 -0
- just_bash/fs/mountable_fs.py +504 -0
- just_bash/fs/overlay_fs.py +894 -0
- just_bash/fs/read_write_fs.py +455 -0
- just_bash/interpreter/__init__.py +37 -0
- just_bash/interpreter/builtins/__init__.py +92 -0
- just_bash/interpreter/builtins/alias.py +154 -0
- just_bash/interpreter/builtins/cd.py +76 -0
- just_bash/interpreter/builtins/control.py +127 -0
- just_bash/interpreter/builtins/declare.py +336 -0
- just_bash/interpreter/builtins/export.py +56 -0
- just_bash/interpreter/builtins/let.py +44 -0
- just_bash/interpreter/builtins/local.py +57 -0
- just_bash/interpreter/builtins/mapfile.py +152 -0
- just_bash/interpreter/builtins/misc.py +378 -0
- just_bash/interpreter/builtins/readonly.py +80 -0
- just_bash/interpreter/builtins/set.py +234 -0
- just_bash/interpreter/builtins/shopt.py +201 -0
- just_bash/interpreter/builtins/source.py +136 -0
- just_bash/interpreter/builtins/test.py +290 -0
- just_bash/interpreter/builtins/unset.py +53 -0
- just_bash/interpreter/conditionals.py +387 -0
- just_bash/interpreter/control_flow.py +381 -0
- just_bash/interpreter/errors.py +116 -0
- just_bash/interpreter/expansion.py +1156 -0
- just_bash/interpreter/interpreter.py +813 -0
- just_bash/interpreter/types.py +134 -0
- just_bash/network/__init__.py +1 -0
- just_bash/parser/__init__.py +39 -0
- just_bash/parser/lexer.py +948 -0
- just_bash/parser/parser.py +2162 -0
- just_bash/py.typed +0 -0
- just_bash/query_engine/__init__.py +83 -0
- just_bash/query_engine/builtins/__init__.py +1283 -0
- just_bash/query_engine/evaluator.py +578 -0
- just_bash/query_engine/parser.py +525 -0
- just_bash/query_engine/tokenizer.py +329 -0
- just_bash/query_engine/types.py +373 -0
- just_bash/types.py +180 -0
- just_bash-0.1.5.dist-info/METADATA +410 -0
- just_bash-0.1.5.dist-info/RECORD +193 -0
- just_bash-0.1.5.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
"""Tokenizer for jq expressions.
|
|
2
|
+
|
|
3
|
+
Converts a jq expression string into a sequence of tokens.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .types import Token, TokenType
|
|
7
|
+
|
|
8
|
+
# Keywords mapping
|
|
9
|
+
KEYWORDS: dict[str, TokenType] = {
|
|
10
|
+
"and": TokenType.AND,
|
|
11
|
+
"or": TokenType.OR,
|
|
12
|
+
"not": TokenType.NOT,
|
|
13
|
+
"if": TokenType.IF,
|
|
14
|
+
"then": TokenType.THEN,
|
|
15
|
+
"elif": TokenType.ELIF,
|
|
16
|
+
"else": TokenType.ELSE,
|
|
17
|
+
"end": TokenType.END,
|
|
18
|
+
"as": TokenType.AS,
|
|
19
|
+
"try": TokenType.TRY,
|
|
20
|
+
"catch": TokenType.CATCH,
|
|
21
|
+
"true": TokenType.TRUE,
|
|
22
|
+
"false": TokenType.FALSE,
|
|
23
|
+
"null": TokenType.NULL,
|
|
24
|
+
"reduce": TokenType.REDUCE,
|
|
25
|
+
"foreach": TokenType.FOREACH,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def tokenize(input_str: str) -> list[Token]:
|
|
30
|
+
"""Tokenize a jq expression string into a list of tokens.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
input_str: The jq expression to tokenize
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
A list of Token objects
|
|
37
|
+
|
|
38
|
+
Raises:
|
|
39
|
+
ValueError: If an unexpected character is encountered
|
|
40
|
+
"""
|
|
41
|
+
tokens: list[Token] = []
|
|
42
|
+
pos = 0
|
|
43
|
+
length = len(input_str)
|
|
44
|
+
|
|
45
|
+
def peek(offset: int = 0) -> str:
|
|
46
|
+
"""Look at character at current position + offset."""
|
|
47
|
+
idx = pos + offset
|
|
48
|
+
return input_str[idx] if idx < length else ""
|
|
49
|
+
|
|
50
|
+
def advance() -> str:
|
|
51
|
+
"""Advance position and return the character."""
|
|
52
|
+
nonlocal pos
|
|
53
|
+
c = input_str[pos] if pos < length else ""
|
|
54
|
+
pos += 1
|
|
55
|
+
return c
|
|
56
|
+
|
|
57
|
+
def is_eof() -> bool:
|
|
58
|
+
"""Check if at end of input."""
|
|
59
|
+
return pos >= length
|
|
60
|
+
|
|
61
|
+
def is_digit(c: str) -> bool:
|
|
62
|
+
"""Check if character is a digit."""
|
|
63
|
+
return c >= "0" and c <= "9"
|
|
64
|
+
|
|
65
|
+
def is_alpha(c: str) -> bool:
|
|
66
|
+
"""Check if character is alphabetic or underscore."""
|
|
67
|
+
return (c >= "a" and c <= "z") or (c >= "A" and c <= "Z") or c == "_"
|
|
68
|
+
|
|
69
|
+
def is_alnum(c: str) -> bool:
|
|
70
|
+
"""Check if character is alphanumeric or underscore."""
|
|
71
|
+
return is_alpha(c) or is_digit(c)
|
|
72
|
+
|
|
73
|
+
while not is_eof():
|
|
74
|
+
start = pos
|
|
75
|
+
c = advance()
|
|
76
|
+
|
|
77
|
+
# Whitespace - skip
|
|
78
|
+
if c in " \t\n\r":
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
# Comments - skip to end of line
|
|
82
|
+
if c == "#":
|
|
83
|
+
while not is_eof() and peek() != "\n":
|
|
84
|
+
advance()
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
# Two-character operators (must check before single-char)
|
|
88
|
+
|
|
89
|
+
# .. (recurse)
|
|
90
|
+
if c == "." and peek() == ".":
|
|
91
|
+
advance()
|
|
92
|
+
tokens.append(Token(TokenType.DOTDOT, None, start))
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
# == (equality)
|
|
96
|
+
if c == "=" and peek() == "=":
|
|
97
|
+
advance()
|
|
98
|
+
tokens.append(Token(TokenType.EQ, None, start))
|
|
99
|
+
continue
|
|
100
|
+
|
|
101
|
+
# != (not equal)
|
|
102
|
+
if c == "!" and peek() == "=":
|
|
103
|
+
advance()
|
|
104
|
+
tokens.append(Token(TokenType.NE, None, start))
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
# <= (less than or equal)
|
|
108
|
+
if c == "<" and peek() == "=":
|
|
109
|
+
advance()
|
|
110
|
+
tokens.append(Token(TokenType.LE, None, start))
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
# >= (greater than or equal)
|
|
114
|
+
if c == ">" and peek() == "=":
|
|
115
|
+
advance()
|
|
116
|
+
tokens.append(Token(TokenType.GE, None, start))
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
# // and //= (alternative and update alternative)
|
|
120
|
+
if c == "/" and peek() == "/":
|
|
121
|
+
advance()
|
|
122
|
+
if peek() == "=":
|
|
123
|
+
advance()
|
|
124
|
+
tokens.append(Token(TokenType.UPDATE_ALT, None, start))
|
|
125
|
+
else:
|
|
126
|
+
tokens.append(Token(TokenType.ALT, None, start))
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
# += (update add)
|
|
130
|
+
if c == "+" and peek() == "=":
|
|
131
|
+
advance()
|
|
132
|
+
tokens.append(Token(TokenType.UPDATE_ADD, None, start))
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
# -= (update subtract)
|
|
136
|
+
if c == "-" and peek() == "=":
|
|
137
|
+
advance()
|
|
138
|
+
tokens.append(Token(TokenType.UPDATE_SUB, None, start))
|
|
139
|
+
continue
|
|
140
|
+
|
|
141
|
+
# *= (update multiply)
|
|
142
|
+
if c == "*" and peek() == "=":
|
|
143
|
+
advance()
|
|
144
|
+
tokens.append(Token(TokenType.UPDATE_MUL, None, start))
|
|
145
|
+
continue
|
|
146
|
+
|
|
147
|
+
# /= (update divide) - only if not //
|
|
148
|
+
if c == "/" and peek() == "=" and not (pos > 1 and input_str[pos - 2] == "/"):
|
|
149
|
+
advance()
|
|
150
|
+
tokens.append(Token(TokenType.UPDATE_DIV, None, start))
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
# %= (update modulo)
|
|
154
|
+
if c == "%" and peek() == "=":
|
|
155
|
+
advance()
|
|
156
|
+
tokens.append(Token(TokenType.UPDATE_MOD, None, start))
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
# |= (update pipe)
|
|
160
|
+
if c == "|" and peek() == "=":
|
|
161
|
+
advance()
|
|
162
|
+
tokens.append(Token(TokenType.UPDATE_PIPE, None, start))
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
# = (assignment) - single = that's not part of ==
|
|
166
|
+
if c == "=" and peek() != "=":
|
|
167
|
+
tokens.append(Token(TokenType.ASSIGN, None, start))
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
# Single-character tokens
|
|
171
|
+
if c == ".":
|
|
172
|
+
tokens.append(Token(TokenType.DOT, None, start))
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
if c == "|":
|
|
176
|
+
tokens.append(Token(TokenType.PIPE, None, start))
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
if c == ",":
|
|
180
|
+
tokens.append(Token(TokenType.COMMA, None, start))
|
|
181
|
+
continue
|
|
182
|
+
|
|
183
|
+
if c == ":":
|
|
184
|
+
tokens.append(Token(TokenType.COLON, None, start))
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
if c == ";":
|
|
188
|
+
tokens.append(Token(TokenType.SEMICOLON, None, start))
|
|
189
|
+
continue
|
|
190
|
+
|
|
191
|
+
if c == "(":
|
|
192
|
+
tokens.append(Token(TokenType.LPAREN, None, start))
|
|
193
|
+
continue
|
|
194
|
+
|
|
195
|
+
if c == ")":
|
|
196
|
+
tokens.append(Token(TokenType.RPAREN, None, start))
|
|
197
|
+
continue
|
|
198
|
+
|
|
199
|
+
if c == "[":
|
|
200
|
+
tokens.append(Token(TokenType.LBRACKET, None, start))
|
|
201
|
+
continue
|
|
202
|
+
|
|
203
|
+
if c == "]":
|
|
204
|
+
tokens.append(Token(TokenType.RBRACKET, None, start))
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
if c == "{":
|
|
208
|
+
tokens.append(Token(TokenType.LBRACE, None, start))
|
|
209
|
+
continue
|
|
210
|
+
|
|
211
|
+
if c == "}":
|
|
212
|
+
tokens.append(Token(TokenType.RBRACE, None, start))
|
|
213
|
+
continue
|
|
214
|
+
|
|
215
|
+
if c == "?":
|
|
216
|
+
tokens.append(Token(TokenType.QUESTION, None, start))
|
|
217
|
+
continue
|
|
218
|
+
|
|
219
|
+
if c == "+":
|
|
220
|
+
tokens.append(Token(TokenType.PLUS, None, start))
|
|
221
|
+
continue
|
|
222
|
+
|
|
223
|
+
if c == "-":
|
|
224
|
+
# Could be negative number or minus operator
|
|
225
|
+
if is_digit(peek()):
|
|
226
|
+
num = c
|
|
227
|
+
while not is_eof() and (is_digit(peek()) or peek() == "."):
|
|
228
|
+
num += advance()
|
|
229
|
+
# Handle scientific notation
|
|
230
|
+
if not is_eof() and peek() in "eE":
|
|
231
|
+
num += advance()
|
|
232
|
+
if not is_eof() and peek() in "+-":
|
|
233
|
+
num += advance()
|
|
234
|
+
while not is_eof() and is_digit(peek()):
|
|
235
|
+
num += advance()
|
|
236
|
+
try:
|
|
237
|
+
value = float(num) if "." in num or "e" in num or "E" in num else int(num)
|
|
238
|
+
tokens.append(Token(TokenType.NUMBER, value, start))
|
|
239
|
+
except ValueError:
|
|
240
|
+
tokens.append(Token(TokenType.NUMBER, float(num), start))
|
|
241
|
+
continue
|
|
242
|
+
tokens.append(Token(TokenType.MINUS, None, start))
|
|
243
|
+
continue
|
|
244
|
+
|
|
245
|
+
if c == "*":
|
|
246
|
+
tokens.append(Token(TokenType.STAR, None, start))
|
|
247
|
+
continue
|
|
248
|
+
|
|
249
|
+
if c == "/":
|
|
250
|
+
tokens.append(Token(TokenType.SLASH, None, start))
|
|
251
|
+
continue
|
|
252
|
+
|
|
253
|
+
if c == "%":
|
|
254
|
+
tokens.append(Token(TokenType.PERCENT, None, start))
|
|
255
|
+
continue
|
|
256
|
+
|
|
257
|
+
if c == "<":
|
|
258
|
+
tokens.append(Token(TokenType.LT, None, start))
|
|
259
|
+
continue
|
|
260
|
+
|
|
261
|
+
if c == ">":
|
|
262
|
+
tokens.append(Token(TokenType.GT, None, start))
|
|
263
|
+
continue
|
|
264
|
+
|
|
265
|
+
# Numbers
|
|
266
|
+
if is_digit(c):
|
|
267
|
+
num = c
|
|
268
|
+
while not is_eof() and (is_digit(peek()) or peek() == "." or peek() in "eE"):
|
|
269
|
+
if peek() in "eE":
|
|
270
|
+
num += advance()
|
|
271
|
+
if not is_eof() and peek() in "+-":
|
|
272
|
+
num += advance()
|
|
273
|
+
else:
|
|
274
|
+
num += advance()
|
|
275
|
+
try:
|
|
276
|
+
value = float(num) if "." in num or "e" in num or "E" in num else int(num)
|
|
277
|
+
tokens.append(Token(TokenType.NUMBER, value, start))
|
|
278
|
+
except ValueError:
|
|
279
|
+
tokens.append(Token(TokenType.NUMBER, float(num), start))
|
|
280
|
+
continue
|
|
281
|
+
|
|
282
|
+
# Strings
|
|
283
|
+
if c == '"':
|
|
284
|
+
s = ""
|
|
285
|
+
while not is_eof() and peek() != '"':
|
|
286
|
+
if peek() == "\\":
|
|
287
|
+
advance() # consume backslash
|
|
288
|
+
if is_eof():
|
|
289
|
+
break
|
|
290
|
+
escaped = advance()
|
|
291
|
+
if escaped == "n":
|
|
292
|
+
s += "\n"
|
|
293
|
+
elif escaped == "r":
|
|
294
|
+
s += "\r"
|
|
295
|
+
elif escaped == "t":
|
|
296
|
+
s += "\t"
|
|
297
|
+
elif escaped == "\\":
|
|
298
|
+
s += "\\"
|
|
299
|
+
elif escaped == '"':
|
|
300
|
+
s += '"'
|
|
301
|
+
elif escaped == "(":
|
|
302
|
+
s += "\\(" # Keep for string interpolation
|
|
303
|
+
else:
|
|
304
|
+
s += escaped
|
|
305
|
+
else:
|
|
306
|
+
s += advance()
|
|
307
|
+
if not is_eof():
|
|
308
|
+
advance() # consume closing quote
|
|
309
|
+
tokens.append(Token(TokenType.STRING, s, start))
|
|
310
|
+
continue
|
|
311
|
+
|
|
312
|
+
# Identifiers and keywords (including $variables and @formats)
|
|
313
|
+
if is_alpha(c) or c == "$" or c == "@":
|
|
314
|
+
ident = c
|
|
315
|
+
while not is_eof() and is_alnum(peek()):
|
|
316
|
+
ident += advance()
|
|
317
|
+
|
|
318
|
+
# Check for keyword
|
|
319
|
+
keyword_type = KEYWORDS.get(ident)
|
|
320
|
+
if keyword_type:
|
|
321
|
+
tokens.append(Token(keyword_type, None, start))
|
|
322
|
+
else:
|
|
323
|
+
tokens.append(Token(TokenType.IDENT, ident, start))
|
|
324
|
+
continue
|
|
325
|
+
|
|
326
|
+
raise ValueError(f"Unexpected character '{c}' at position {start}")
|
|
327
|
+
|
|
328
|
+
tokens.append(Token(TokenType.EOF, None, pos))
|
|
329
|
+
return tokens
|
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
"""Type definitions for the query engine.
|
|
2
|
+
|
|
3
|
+
Contains TokenType enum, Token dataclass, and all AST node dataclasses.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from enum import Enum, auto
|
|
8
|
+
from typing import Any, Union
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TokenType(Enum):
|
|
12
|
+
"""Token types for the jq expression lexer."""
|
|
13
|
+
|
|
14
|
+
# Structural
|
|
15
|
+
DOT = auto()
|
|
16
|
+
PIPE = auto()
|
|
17
|
+
COMMA = auto()
|
|
18
|
+
COLON = auto()
|
|
19
|
+
SEMICOLON = auto()
|
|
20
|
+
LPAREN = auto()
|
|
21
|
+
RPAREN = auto()
|
|
22
|
+
LBRACKET = auto()
|
|
23
|
+
RBRACKET = auto()
|
|
24
|
+
LBRACE = auto()
|
|
25
|
+
RBRACE = auto()
|
|
26
|
+
QUESTION = auto()
|
|
27
|
+
|
|
28
|
+
# Arithmetic
|
|
29
|
+
PLUS = auto()
|
|
30
|
+
MINUS = auto()
|
|
31
|
+
STAR = auto()
|
|
32
|
+
SLASH = auto()
|
|
33
|
+
PERCENT = auto()
|
|
34
|
+
|
|
35
|
+
# Comparison
|
|
36
|
+
EQ = auto() # ==
|
|
37
|
+
NE = auto() # !=
|
|
38
|
+
LT = auto() # <
|
|
39
|
+
LE = auto() # <=
|
|
40
|
+
GT = auto() # >
|
|
41
|
+
GE = auto() # >=
|
|
42
|
+
|
|
43
|
+
# Logical
|
|
44
|
+
AND = auto() # and
|
|
45
|
+
OR = auto() # or
|
|
46
|
+
NOT = auto() # not
|
|
47
|
+
|
|
48
|
+
# Alternative
|
|
49
|
+
ALT = auto() # //
|
|
50
|
+
|
|
51
|
+
# Assignment
|
|
52
|
+
ASSIGN = auto() # =
|
|
53
|
+
|
|
54
|
+
# Update operators
|
|
55
|
+
UPDATE_ADD = auto() # +=
|
|
56
|
+
UPDATE_SUB = auto() # -=
|
|
57
|
+
UPDATE_MUL = auto() # *=
|
|
58
|
+
UPDATE_DIV = auto() # /=
|
|
59
|
+
UPDATE_MOD = auto() # %=
|
|
60
|
+
UPDATE_ALT = auto() # //=
|
|
61
|
+
UPDATE_PIPE = auto() # |=
|
|
62
|
+
|
|
63
|
+
# Values
|
|
64
|
+
IDENT = auto()
|
|
65
|
+
NUMBER = auto()
|
|
66
|
+
STRING = auto()
|
|
67
|
+
|
|
68
|
+
# Keywords
|
|
69
|
+
IF = auto()
|
|
70
|
+
THEN = auto()
|
|
71
|
+
ELIF = auto()
|
|
72
|
+
ELSE = auto()
|
|
73
|
+
END = auto()
|
|
74
|
+
AS = auto()
|
|
75
|
+
TRY = auto()
|
|
76
|
+
CATCH = auto()
|
|
77
|
+
TRUE = auto()
|
|
78
|
+
FALSE = auto()
|
|
79
|
+
NULL = auto()
|
|
80
|
+
REDUCE = auto()
|
|
81
|
+
FOREACH = auto()
|
|
82
|
+
|
|
83
|
+
# Special
|
|
84
|
+
DOTDOT = auto() # ..
|
|
85
|
+
EOF = auto()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass
|
|
89
|
+
class Token:
|
|
90
|
+
"""A lexer token."""
|
|
91
|
+
|
|
92
|
+
type: TokenType
|
|
93
|
+
value: str | int | float | None = None
|
|
94
|
+
pos: int = 0
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@dataclass
|
|
98
|
+
class QueryExecutionLimits:
|
|
99
|
+
"""Execution limits to prevent runaway queries."""
|
|
100
|
+
|
|
101
|
+
max_iterations: int = 10000
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dataclass
|
|
105
|
+
class EvalContext:
|
|
106
|
+
"""Evaluation context with variables, limits, and environment."""
|
|
107
|
+
|
|
108
|
+
vars: dict[str, Any] = field(default_factory=dict)
|
|
109
|
+
limits: QueryExecutionLimits = field(default_factory=QueryExecutionLimits)
|
|
110
|
+
env: dict[str, str] = field(default_factory=dict)
|
|
111
|
+
root: Any = None
|
|
112
|
+
current_path: list[str | int] = field(default_factory=list)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
# AST Node Types
|
|
116
|
+
@dataclass
|
|
117
|
+
class IdentityNode:
|
|
118
|
+
"""The identity filter (.)"""
|
|
119
|
+
|
|
120
|
+
type: str = field(default="Identity", init=False)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@dataclass
|
|
124
|
+
class FieldNode:
|
|
125
|
+
"""Field access (.name)"""
|
|
126
|
+
|
|
127
|
+
name: str
|
|
128
|
+
base: "AstNode | None" = None
|
|
129
|
+
type: str = field(default="Field", init=False)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@dataclass
|
|
133
|
+
class IndexNode:
|
|
134
|
+
"""Array/object index access (.[n] or .["key"])"""
|
|
135
|
+
|
|
136
|
+
index: "AstNode"
|
|
137
|
+
base: "AstNode | None" = None
|
|
138
|
+
type: str = field(default="Index", init=False)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@dataclass
|
|
142
|
+
class SliceNode:
|
|
143
|
+
"""Array slice (.[start:end])"""
|
|
144
|
+
|
|
145
|
+
start: "AstNode | None" = None
|
|
146
|
+
end: "AstNode | None" = None
|
|
147
|
+
base: "AstNode | None" = None
|
|
148
|
+
type: str = field(default="Slice", init=False)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@dataclass
|
|
152
|
+
class IterateNode:
|
|
153
|
+
"""Iterator (.[] or .[])"""
|
|
154
|
+
|
|
155
|
+
base: "AstNode | None" = None
|
|
156
|
+
type: str = field(default="Iterate", init=False)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@dataclass
|
|
160
|
+
class PipeNode:
|
|
161
|
+
"""Pipe operator (|)"""
|
|
162
|
+
|
|
163
|
+
left: "AstNode"
|
|
164
|
+
right: "AstNode"
|
|
165
|
+
type: str = field(default="Pipe", init=False)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
@dataclass
|
|
169
|
+
class CommaNode:
|
|
170
|
+
"""Comma operator for multiple outputs"""
|
|
171
|
+
|
|
172
|
+
left: "AstNode"
|
|
173
|
+
right: "AstNode"
|
|
174
|
+
type: str = field(default="Comma", init=False)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@dataclass
|
|
178
|
+
class LiteralNode:
|
|
179
|
+
"""Literal value (number, string, true, false, null)"""
|
|
180
|
+
|
|
181
|
+
value: Any
|
|
182
|
+
type: str = field(default="Literal", init=False)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
@dataclass
|
|
186
|
+
class ArrayNode:
|
|
187
|
+
"""Array construction ([...])"""
|
|
188
|
+
|
|
189
|
+
elements: "AstNode | None" = None
|
|
190
|
+
type: str = field(default="Array", init=False)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
@dataclass
|
|
194
|
+
class ObjectEntry:
|
|
195
|
+
"""A single object entry with key and value."""
|
|
196
|
+
|
|
197
|
+
key: "AstNode | str"
|
|
198
|
+
value: "AstNode"
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
@dataclass
|
|
202
|
+
class ObjectNode:
|
|
203
|
+
"""Object construction ({...})"""
|
|
204
|
+
|
|
205
|
+
entries: list[ObjectEntry] = field(default_factory=list)
|
|
206
|
+
type: str = field(default="Object", init=False)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
@dataclass
|
|
210
|
+
class ParenNode:
|
|
211
|
+
"""Parenthesized expression"""
|
|
212
|
+
|
|
213
|
+
expr: "AstNode"
|
|
214
|
+
type: str = field(default="Paren", init=False)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
@dataclass
|
|
218
|
+
class BinaryOpNode:
|
|
219
|
+
"""Binary operation (+, -, *, /, %, ==, !=, <, <=, >, >=, and, or, //)"""
|
|
220
|
+
|
|
221
|
+
op: str # "+", "-", "*", "/", "%", "==", "!=", "<", "<=", ">", ">=", "and", "or", "//"
|
|
222
|
+
left: "AstNode"
|
|
223
|
+
right: "AstNode"
|
|
224
|
+
type: str = field(default="BinaryOp", init=False)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
@dataclass
|
|
228
|
+
class UnaryOpNode:
|
|
229
|
+
"""Unary operation (- or not)"""
|
|
230
|
+
|
|
231
|
+
op: str # "-" or "not"
|
|
232
|
+
operand: "AstNode"
|
|
233
|
+
type: str = field(default="UnaryOp", init=False)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
@dataclass
|
|
237
|
+
class ElifBranch:
|
|
238
|
+
"""An elif branch in a conditional."""
|
|
239
|
+
|
|
240
|
+
cond: "AstNode"
|
|
241
|
+
then: "AstNode"
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
@dataclass
|
|
245
|
+
class CondNode:
|
|
246
|
+
"""Conditional (if-then-elif-else-end)"""
|
|
247
|
+
|
|
248
|
+
cond: "AstNode"
|
|
249
|
+
then: "AstNode"
|
|
250
|
+
elifs: list[ElifBranch] = field(default_factory=list)
|
|
251
|
+
else_: "AstNode | None" = None
|
|
252
|
+
type: str = field(default="Cond", init=False)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
@dataclass
|
|
256
|
+
class TryNode:
|
|
257
|
+
"""Try-catch expression"""
|
|
258
|
+
|
|
259
|
+
body: "AstNode"
|
|
260
|
+
catch: "AstNode | None" = None
|
|
261
|
+
type: str = field(default="Try", init=False)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
@dataclass
|
|
265
|
+
class CallNode:
|
|
266
|
+
"""Function call"""
|
|
267
|
+
|
|
268
|
+
name: str
|
|
269
|
+
args: list["AstNode"] = field(default_factory=list)
|
|
270
|
+
type: str = field(default="Call", init=False)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
@dataclass
|
|
274
|
+
class VarBindNode:
|
|
275
|
+
"""Variable binding (expr as $var | body)"""
|
|
276
|
+
|
|
277
|
+
name: str
|
|
278
|
+
value: "AstNode"
|
|
279
|
+
body: "AstNode"
|
|
280
|
+
type: str = field(default="VarBind", init=False)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
@dataclass
|
|
284
|
+
class VarRefNode:
|
|
285
|
+
"""Variable reference ($var)"""
|
|
286
|
+
|
|
287
|
+
name: str
|
|
288
|
+
type: str = field(default="VarRef", init=False)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
@dataclass
|
|
292
|
+
class RecurseNode:
|
|
293
|
+
"""Recursive descent (..)"""
|
|
294
|
+
|
|
295
|
+
type: str = field(default="Recurse", init=False)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
@dataclass
|
|
299
|
+
class OptionalNode:
|
|
300
|
+
"""Optional operator (?)"""
|
|
301
|
+
|
|
302
|
+
expr: "AstNode"
|
|
303
|
+
type: str = field(default="Optional", init=False)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
@dataclass
|
|
307
|
+
class StringInterpNode:
|
|
308
|
+
"""String with interpolation"""
|
|
309
|
+
|
|
310
|
+
parts: list["str | AstNode"] = field(default_factory=list)
|
|
311
|
+
type: str = field(default="StringInterp", init=False)
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
@dataclass
|
|
315
|
+
class UpdateOpNode:
|
|
316
|
+
"""Update operation (=, |=, +=, -=, *=, /=, %=, //=)"""
|
|
317
|
+
|
|
318
|
+
op: str # "=", "|=", "+=", "-=", "*=", "/=", "%=", "//="
|
|
319
|
+
path: "AstNode"
|
|
320
|
+
value: "AstNode"
|
|
321
|
+
type: str = field(default="UpdateOp", init=False)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
@dataclass
|
|
325
|
+
class ReduceNode:
|
|
326
|
+
"""Reduce expression"""
|
|
327
|
+
|
|
328
|
+
expr: "AstNode"
|
|
329
|
+
var_name: str
|
|
330
|
+
init: "AstNode"
|
|
331
|
+
update: "AstNode"
|
|
332
|
+
type: str = field(default="Reduce", init=False)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
@dataclass
|
|
336
|
+
class ForeachNode:
|
|
337
|
+
"""Foreach expression"""
|
|
338
|
+
|
|
339
|
+
expr: "AstNode"
|
|
340
|
+
var_name: str
|
|
341
|
+
init: "AstNode"
|
|
342
|
+
update: "AstNode"
|
|
343
|
+
extract: "AstNode | None" = None
|
|
344
|
+
type: str = field(default="Foreach", init=False)
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
# Union type for all AST nodes
|
|
348
|
+
AstNode = Union[
|
|
349
|
+
IdentityNode,
|
|
350
|
+
FieldNode,
|
|
351
|
+
IndexNode,
|
|
352
|
+
SliceNode,
|
|
353
|
+
IterateNode,
|
|
354
|
+
PipeNode,
|
|
355
|
+
CommaNode,
|
|
356
|
+
LiteralNode,
|
|
357
|
+
ArrayNode,
|
|
358
|
+
ObjectNode,
|
|
359
|
+
ParenNode,
|
|
360
|
+
BinaryOpNode,
|
|
361
|
+
UnaryOpNode,
|
|
362
|
+
CondNode,
|
|
363
|
+
TryNode,
|
|
364
|
+
CallNode,
|
|
365
|
+
VarBindNode,
|
|
366
|
+
VarRefNode,
|
|
367
|
+
RecurseNode,
|
|
368
|
+
OptionalNode,
|
|
369
|
+
StringInterpNode,
|
|
370
|
+
UpdateOpNode,
|
|
371
|
+
ReduceNode,
|
|
372
|
+
ForeachNode,
|
|
373
|
+
]
|