just-bash 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- just_bash/__init__.py +55 -0
- just_bash/ast/__init__.py +213 -0
- just_bash/ast/factory.py +320 -0
- just_bash/ast/types.py +953 -0
- just_bash/bash.py +220 -0
- just_bash/commands/__init__.py +23 -0
- just_bash/commands/argv/__init__.py +5 -0
- just_bash/commands/argv/argv.py +21 -0
- just_bash/commands/awk/__init__.py +5 -0
- just_bash/commands/awk/awk.py +1168 -0
- just_bash/commands/base64/__init__.py +5 -0
- just_bash/commands/base64/base64.py +138 -0
- just_bash/commands/basename/__init__.py +5 -0
- just_bash/commands/basename/basename.py +72 -0
- just_bash/commands/bash/__init__.py +5 -0
- just_bash/commands/bash/bash.py +188 -0
- just_bash/commands/cat/__init__.py +5 -0
- just_bash/commands/cat/cat.py +173 -0
- just_bash/commands/checksum/__init__.py +5 -0
- just_bash/commands/checksum/checksum.py +179 -0
- just_bash/commands/chmod/__init__.py +5 -0
- just_bash/commands/chmod/chmod.py +216 -0
- just_bash/commands/column/__init__.py +5 -0
- just_bash/commands/column/column.py +180 -0
- just_bash/commands/comm/__init__.py +5 -0
- just_bash/commands/comm/comm.py +150 -0
- just_bash/commands/compression/__init__.py +5 -0
- just_bash/commands/compression/compression.py +298 -0
- just_bash/commands/cp/__init__.py +5 -0
- just_bash/commands/cp/cp.py +149 -0
- just_bash/commands/curl/__init__.py +5 -0
- just_bash/commands/curl/curl.py +801 -0
- just_bash/commands/cut/__init__.py +5 -0
- just_bash/commands/cut/cut.py +327 -0
- just_bash/commands/date/__init__.py +5 -0
- just_bash/commands/date/date.py +258 -0
- just_bash/commands/diff/__init__.py +5 -0
- just_bash/commands/diff/diff.py +118 -0
- just_bash/commands/dirname/__init__.py +5 -0
- just_bash/commands/dirname/dirname.py +56 -0
- just_bash/commands/du/__init__.py +5 -0
- just_bash/commands/du/du.py +150 -0
- just_bash/commands/echo/__init__.py +5 -0
- just_bash/commands/echo/echo.py +125 -0
- just_bash/commands/env/__init__.py +5 -0
- just_bash/commands/env/env.py +163 -0
- just_bash/commands/expand/__init__.py +5 -0
- just_bash/commands/expand/expand.py +299 -0
- just_bash/commands/expr/__init__.py +5 -0
- just_bash/commands/expr/expr.py +273 -0
- just_bash/commands/file/__init__.py +5 -0
- just_bash/commands/file/file.py +274 -0
- just_bash/commands/find/__init__.py +5 -0
- just_bash/commands/find/find.py +623 -0
- just_bash/commands/fold/__init__.py +5 -0
- just_bash/commands/fold/fold.py +160 -0
- just_bash/commands/grep/__init__.py +5 -0
- just_bash/commands/grep/grep.py +418 -0
- just_bash/commands/head/__init__.py +5 -0
- just_bash/commands/head/head.py +167 -0
- just_bash/commands/help/__init__.py +5 -0
- just_bash/commands/help/help.py +67 -0
- just_bash/commands/hostname/__init__.py +5 -0
- just_bash/commands/hostname/hostname.py +21 -0
- just_bash/commands/html_to_markdown/__init__.py +5 -0
- just_bash/commands/html_to_markdown/html_to_markdown.py +191 -0
- just_bash/commands/join/__init__.py +5 -0
- just_bash/commands/join/join.py +252 -0
- just_bash/commands/jq/__init__.py +5 -0
- just_bash/commands/jq/jq.py +280 -0
- just_bash/commands/ln/__init__.py +5 -0
- just_bash/commands/ln/ln.py +127 -0
- just_bash/commands/ls/__init__.py +5 -0
- just_bash/commands/ls/ls.py +280 -0
- just_bash/commands/mkdir/__init__.py +5 -0
- just_bash/commands/mkdir/mkdir.py +92 -0
- just_bash/commands/mv/__init__.py +5 -0
- just_bash/commands/mv/mv.py +142 -0
- just_bash/commands/nl/__init__.py +5 -0
- just_bash/commands/nl/nl.py +180 -0
- just_bash/commands/od/__init__.py +5 -0
- just_bash/commands/od/od.py +157 -0
- just_bash/commands/paste/__init__.py +5 -0
- just_bash/commands/paste/paste.py +100 -0
- just_bash/commands/printf/__init__.py +5 -0
- just_bash/commands/printf/printf.py +157 -0
- just_bash/commands/pwd/__init__.py +5 -0
- just_bash/commands/pwd/pwd.py +23 -0
- just_bash/commands/read/__init__.py +5 -0
- just_bash/commands/read/read.py +185 -0
- just_bash/commands/readlink/__init__.py +5 -0
- just_bash/commands/readlink/readlink.py +86 -0
- just_bash/commands/registry.py +844 -0
- just_bash/commands/rev/__init__.py +5 -0
- just_bash/commands/rev/rev.py +74 -0
- just_bash/commands/rg/__init__.py +5 -0
- just_bash/commands/rg/rg.py +1048 -0
- just_bash/commands/rm/__init__.py +5 -0
- just_bash/commands/rm/rm.py +106 -0
- just_bash/commands/search_engine/__init__.py +13 -0
- just_bash/commands/search_engine/matcher.py +170 -0
- just_bash/commands/search_engine/regex.py +159 -0
- just_bash/commands/sed/__init__.py +5 -0
- just_bash/commands/sed/sed.py +863 -0
- just_bash/commands/seq/__init__.py +5 -0
- just_bash/commands/seq/seq.py +190 -0
- just_bash/commands/shell/__init__.py +5 -0
- just_bash/commands/shell/shell.py +206 -0
- just_bash/commands/sleep/__init__.py +5 -0
- just_bash/commands/sleep/sleep.py +62 -0
- just_bash/commands/sort/__init__.py +5 -0
- just_bash/commands/sort/sort.py +411 -0
- just_bash/commands/split/__init__.py +5 -0
- just_bash/commands/split/split.py +237 -0
- just_bash/commands/sqlite3/__init__.py +5 -0
- just_bash/commands/sqlite3/sqlite3_cmd.py +505 -0
- just_bash/commands/stat/__init__.py +5 -0
- just_bash/commands/stat/stat.py +150 -0
- just_bash/commands/strings/__init__.py +5 -0
- just_bash/commands/strings/strings.py +150 -0
- just_bash/commands/tac/__init__.py +5 -0
- just_bash/commands/tac/tac.py +158 -0
- just_bash/commands/tail/__init__.py +5 -0
- just_bash/commands/tail/tail.py +180 -0
- just_bash/commands/tar/__init__.py +5 -0
- just_bash/commands/tar/tar.py +1067 -0
- just_bash/commands/tee/__init__.py +5 -0
- just_bash/commands/tee/tee.py +63 -0
- just_bash/commands/timeout/__init__.py +5 -0
- just_bash/commands/timeout/timeout.py +188 -0
- just_bash/commands/touch/__init__.py +5 -0
- just_bash/commands/touch/touch.py +91 -0
- just_bash/commands/tr/__init__.py +5 -0
- just_bash/commands/tr/tr.py +297 -0
- just_bash/commands/tree/__init__.py +5 -0
- just_bash/commands/tree/tree.py +139 -0
- just_bash/commands/true/__init__.py +5 -0
- just_bash/commands/true/true.py +32 -0
- just_bash/commands/uniq/__init__.py +5 -0
- just_bash/commands/uniq/uniq.py +323 -0
- just_bash/commands/wc/__init__.py +5 -0
- just_bash/commands/wc/wc.py +169 -0
- just_bash/commands/which/__init__.py +5 -0
- just_bash/commands/which/which.py +52 -0
- just_bash/commands/xan/__init__.py +5 -0
- just_bash/commands/xan/xan.py +1663 -0
- just_bash/commands/xargs/__init__.py +5 -0
- just_bash/commands/xargs/xargs.py +136 -0
- just_bash/commands/yq/__init__.py +5 -0
- just_bash/commands/yq/yq.py +848 -0
- just_bash/fs/__init__.py +29 -0
- just_bash/fs/in_memory_fs.py +621 -0
- just_bash/fs/mountable_fs.py +504 -0
- just_bash/fs/overlay_fs.py +894 -0
- just_bash/fs/read_write_fs.py +455 -0
- just_bash/interpreter/__init__.py +37 -0
- just_bash/interpreter/builtins/__init__.py +92 -0
- just_bash/interpreter/builtins/alias.py +154 -0
- just_bash/interpreter/builtins/cd.py +76 -0
- just_bash/interpreter/builtins/control.py +127 -0
- just_bash/interpreter/builtins/declare.py +336 -0
- just_bash/interpreter/builtins/export.py +56 -0
- just_bash/interpreter/builtins/let.py +44 -0
- just_bash/interpreter/builtins/local.py +57 -0
- just_bash/interpreter/builtins/mapfile.py +152 -0
- just_bash/interpreter/builtins/misc.py +378 -0
- just_bash/interpreter/builtins/readonly.py +80 -0
- just_bash/interpreter/builtins/set.py +234 -0
- just_bash/interpreter/builtins/shopt.py +201 -0
- just_bash/interpreter/builtins/source.py +136 -0
- just_bash/interpreter/builtins/test.py +290 -0
- just_bash/interpreter/builtins/unset.py +53 -0
- just_bash/interpreter/conditionals.py +387 -0
- just_bash/interpreter/control_flow.py +381 -0
- just_bash/interpreter/errors.py +116 -0
- just_bash/interpreter/expansion.py +1156 -0
- just_bash/interpreter/interpreter.py +813 -0
- just_bash/interpreter/types.py +134 -0
- just_bash/network/__init__.py +1 -0
- just_bash/parser/__init__.py +39 -0
- just_bash/parser/lexer.py +948 -0
- just_bash/parser/parser.py +2162 -0
- just_bash/py.typed +0 -0
- just_bash/query_engine/__init__.py +83 -0
- just_bash/query_engine/builtins/__init__.py +1283 -0
- just_bash/query_engine/evaluator.py +578 -0
- just_bash/query_engine/parser.py +525 -0
- just_bash/query_engine/tokenizer.py +329 -0
- just_bash/query_engine/types.py +373 -0
- just_bash/types.py +180 -0
- just_bash-0.1.5.dist-info/METADATA +410 -0
- just_bash-0.1.5.dist-info/RECORD +193 -0
- just_bash-0.1.5.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,1168 @@
|
|
|
1
|
+
"""Awk command implementation.
|
|
2
|
+
|
|
3
|
+
Usage: awk [OPTIONS] 'program' [file ...]
|
|
4
|
+
|
|
5
|
+
Pattern scanning and processing language.
|
|
6
|
+
|
|
7
|
+
Options:
|
|
8
|
+
-F fs field separator (default: whitespace)
|
|
9
|
+
-v var=value assign variable before execution
|
|
10
|
+
-f progfile read program from file
|
|
11
|
+
|
|
12
|
+
Program structure:
|
|
13
|
+
pattern { action }
|
|
14
|
+
BEGIN { action }
|
|
15
|
+
END { action }
|
|
16
|
+
|
|
17
|
+
Variables:
|
|
18
|
+
$0 entire line
|
|
19
|
+
$1, $2, ... fields
|
|
20
|
+
NF number of fields
|
|
21
|
+
NR record number
|
|
22
|
+
FS field separator
|
|
23
|
+
OFS output field separator
|
|
24
|
+
ORS output record separator
|
|
25
|
+
|
|
26
|
+
Built-in functions:
|
|
27
|
+
length(s) string length
|
|
28
|
+
substr(s,i,n) substring
|
|
29
|
+
index(s,t) position of t in s
|
|
30
|
+
split(s,a,fs) split s into array a
|
|
31
|
+
sub(r,s) substitute first match
|
|
32
|
+
gsub(r,s) substitute all matches
|
|
33
|
+
tolower(s) convert to lowercase
|
|
34
|
+
toupper(s) convert to uppercase
|
|
35
|
+
printf(fmt,args...) formatted print
|
|
36
|
+
print print current line
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
import re
|
|
40
|
+
from dataclasses import dataclass, field
|
|
41
|
+
from typing import Any
|
|
42
|
+
from ...types import CommandContext, ExecResult
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class AwkRule:
|
|
47
|
+
"""An awk pattern-action rule."""
|
|
48
|
+
|
|
49
|
+
pattern: str | None # None means always match, "BEGIN", "END", or pattern
|
|
50
|
+
action: str
|
|
51
|
+
is_regex: bool = False
|
|
52
|
+
regex: re.Pattern | None = None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class AwkState:
|
|
57
|
+
"""Execution state for awk."""
|
|
58
|
+
|
|
59
|
+
variables: dict[str, Any] = field(default_factory=dict)
|
|
60
|
+
output: str = ""
|
|
61
|
+
next_record: bool = False
|
|
62
|
+
exit_program: bool = False
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class AwkCommand:
|
|
66
|
+
"""The awk command."""
|
|
67
|
+
|
|
68
|
+
name = "awk"
|
|
69
|
+
|
|
70
|
+
async def execute(self, args: list[str], ctx: CommandContext) -> ExecResult:
|
|
71
|
+
"""Execute the awk command."""
|
|
72
|
+
field_sep = None
|
|
73
|
+
program = None
|
|
74
|
+
pre_vars: list[tuple[str, str]] = []
|
|
75
|
+
files: list[str] = []
|
|
76
|
+
|
|
77
|
+
# Parse arguments
|
|
78
|
+
i = 0
|
|
79
|
+
while i < len(args):
|
|
80
|
+
arg = args[i]
|
|
81
|
+
if arg == "-F":
|
|
82
|
+
if i + 1 < len(args):
|
|
83
|
+
i += 1
|
|
84
|
+
field_sep = self._unescape_field_sep(args[i])
|
|
85
|
+
else:
|
|
86
|
+
return ExecResult(
|
|
87
|
+
stdout="",
|
|
88
|
+
stderr="awk: option requires an argument -- 'F'\n",
|
|
89
|
+
exit_code=1,
|
|
90
|
+
)
|
|
91
|
+
elif arg.startswith("-F"):
|
|
92
|
+
field_sep = self._unescape_field_sep(arg[2:])
|
|
93
|
+
elif arg == "-v":
|
|
94
|
+
if i + 1 < len(args):
|
|
95
|
+
i += 1
|
|
96
|
+
var_assign = args[i]
|
|
97
|
+
if "=" in var_assign:
|
|
98
|
+
name, val = var_assign.split("=", 1)
|
|
99
|
+
pre_vars.append((name, val))
|
|
100
|
+
else:
|
|
101
|
+
return ExecResult(
|
|
102
|
+
stdout="",
|
|
103
|
+
stderr="awk: option requires an argument -- 'v'\n",
|
|
104
|
+
exit_code=1,
|
|
105
|
+
)
|
|
106
|
+
elif arg == "-f":
|
|
107
|
+
if i + 1 < len(args):
|
|
108
|
+
i += 1
|
|
109
|
+
try:
|
|
110
|
+
path = ctx.fs.resolve_path(ctx.cwd, args[i])
|
|
111
|
+
program = await ctx.fs.read_file(path)
|
|
112
|
+
except FileNotFoundError:
|
|
113
|
+
return ExecResult(
|
|
114
|
+
stdout="",
|
|
115
|
+
stderr=f"awk: can't open file {args[i]}\n",
|
|
116
|
+
exit_code=1,
|
|
117
|
+
)
|
|
118
|
+
else:
|
|
119
|
+
return ExecResult(
|
|
120
|
+
stdout="",
|
|
121
|
+
stderr="awk: option requires an argument -- 'f'\n",
|
|
122
|
+
exit_code=1,
|
|
123
|
+
)
|
|
124
|
+
elif arg.startswith("-") and len(arg) > 1:
|
|
125
|
+
return ExecResult(
|
|
126
|
+
stdout="",
|
|
127
|
+
stderr=f"awk: unknown option '{arg}'\n",
|
|
128
|
+
exit_code=1,
|
|
129
|
+
)
|
|
130
|
+
elif program is None:
|
|
131
|
+
program = arg
|
|
132
|
+
else:
|
|
133
|
+
files.append(arg)
|
|
134
|
+
i += 1
|
|
135
|
+
|
|
136
|
+
if program is None:
|
|
137
|
+
return ExecResult(
|
|
138
|
+
stdout="",
|
|
139
|
+
stderr="awk: no program given\n",
|
|
140
|
+
exit_code=1,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Parse program
|
|
144
|
+
try:
|
|
145
|
+
rules = self._parse_program(program)
|
|
146
|
+
except ValueError as e:
|
|
147
|
+
return ExecResult(
|
|
148
|
+
stdout="",
|
|
149
|
+
stderr=f"awk: {e}\n",
|
|
150
|
+
exit_code=1,
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# Initialize state
|
|
154
|
+
state = AwkState(
|
|
155
|
+
variables={
|
|
156
|
+
"FS": field_sep or " ",
|
|
157
|
+
"OFS": " ",
|
|
158
|
+
"ORS": "\n",
|
|
159
|
+
"NR": 0,
|
|
160
|
+
"NF": 0,
|
|
161
|
+
"FILENAME": "",
|
|
162
|
+
}
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Set pre-assigned variables
|
|
166
|
+
for name, val in pre_vars:
|
|
167
|
+
state.variables[name] = self._parse_value(val)
|
|
168
|
+
|
|
169
|
+
# Execute BEGIN rules
|
|
170
|
+
for rule in rules:
|
|
171
|
+
if rule.pattern == "BEGIN":
|
|
172
|
+
self._execute_action(rule.action, state, [])
|
|
173
|
+
|
|
174
|
+
# Default to stdin
|
|
175
|
+
if not files:
|
|
176
|
+
files = ["-"]
|
|
177
|
+
|
|
178
|
+
stderr = ""
|
|
179
|
+
|
|
180
|
+
# Process files
|
|
181
|
+
for fname in files:
|
|
182
|
+
if state.exit_program:
|
|
183
|
+
break
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
if fname == "-":
|
|
187
|
+
content = ctx.stdin
|
|
188
|
+
state.variables["FILENAME"] = ""
|
|
189
|
+
else:
|
|
190
|
+
path = ctx.fs.resolve_path(ctx.cwd, fname)
|
|
191
|
+
content = await ctx.fs.read_file(path)
|
|
192
|
+
state.variables["FILENAME"] = fname
|
|
193
|
+
|
|
194
|
+
lines = content.split("\n")
|
|
195
|
+
if lines and lines[-1] == "":
|
|
196
|
+
lines = lines[:-1]
|
|
197
|
+
|
|
198
|
+
for line in lines:
|
|
199
|
+
if state.exit_program:
|
|
200
|
+
break
|
|
201
|
+
|
|
202
|
+
state.variables["NR"] = state.variables.get("NR", 0) + 1
|
|
203
|
+
state.next_record = False
|
|
204
|
+
|
|
205
|
+
# Split into fields
|
|
206
|
+
fs = state.variables.get("FS", " ")
|
|
207
|
+
if fs == " ":
|
|
208
|
+
fields = line.split()
|
|
209
|
+
else:
|
|
210
|
+
fields = line.split(fs)
|
|
211
|
+
|
|
212
|
+
state.variables["NF"] = len(fields)
|
|
213
|
+
|
|
214
|
+
# Execute matching rules
|
|
215
|
+
for rule in rules:
|
|
216
|
+
if state.next_record or state.exit_program:
|
|
217
|
+
break
|
|
218
|
+
|
|
219
|
+
if rule.pattern in ("BEGIN", "END"):
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
if self._pattern_matches(rule, line, state):
|
|
223
|
+
self._execute_action(rule.action, state, fields, line)
|
|
224
|
+
|
|
225
|
+
except FileNotFoundError:
|
|
226
|
+
stderr += f"awk: can't open file {fname}\n"
|
|
227
|
+
|
|
228
|
+
# Execute END rules
|
|
229
|
+
for rule in rules:
|
|
230
|
+
if rule.pattern == "END":
|
|
231
|
+
self._execute_action(rule.action, state, [])
|
|
232
|
+
|
|
233
|
+
if stderr:
|
|
234
|
+
return ExecResult(stdout=state.output, stderr=stderr, exit_code=1)
|
|
235
|
+
|
|
236
|
+
return ExecResult(stdout=state.output, stderr="", exit_code=0)
|
|
237
|
+
|
|
238
|
+
def _parse_program(self, program: str) -> list[AwkRule]:
|
|
239
|
+
"""Parse an awk program into rules."""
|
|
240
|
+
rules: list[AwkRule] = []
|
|
241
|
+
|
|
242
|
+
# Simple parser for pattern { action } blocks
|
|
243
|
+
program = program.strip()
|
|
244
|
+
pos = 0
|
|
245
|
+
|
|
246
|
+
while pos < len(program):
|
|
247
|
+
# Skip whitespace and comments
|
|
248
|
+
while pos < len(program) and program[pos] in " \t\n":
|
|
249
|
+
pos += 1
|
|
250
|
+
|
|
251
|
+
if pos >= len(program):
|
|
252
|
+
break
|
|
253
|
+
|
|
254
|
+
# Check for comment
|
|
255
|
+
if program[pos] == "#":
|
|
256
|
+
while pos < len(program) and program[pos] != "\n":
|
|
257
|
+
pos += 1
|
|
258
|
+
continue
|
|
259
|
+
|
|
260
|
+
# Parse pattern
|
|
261
|
+
pattern = None
|
|
262
|
+
is_regex = False
|
|
263
|
+
regex = None
|
|
264
|
+
|
|
265
|
+
if program[pos:].startswith("BEGIN"):
|
|
266
|
+
pattern = "BEGIN"
|
|
267
|
+
pos += 5
|
|
268
|
+
elif program[pos:].startswith("END"):
|
|
269
|
+
pattern = "END"
|
|
270
|
+
pos += 3
|
|
271
|
+
elif program[pos] == "/":
|
|
272
|
+
# Regex pattern
|
|
273
|
+
end = self._find_regex_end(program, pos + 1)
|
|
274
|
+
if end != -1:
|
|
275
|
+
pattern = program[pos + 1:end]
|
|
276
|
+
is_regex = True
|
|
277
|
+
try:
|
|
278
|
+
regex = re.compile(pattern)
|
|
279
|
+
except re.error as e:
|
|
280
|
+
raise ValueError(f"invalid regex: {e}")
|
|
281
|
+
pos = end + 1
|
|
282
|
+
elif program[pos] == "{":
|
|
283
|
+
# No pattern, always match
|
|
284
|
+
pass
|
|
285
|
+
else:
|
|
286
|
+
# Expression pattern (simplified: just match literal or variable comparison)
|
|
287
|
+
expr_end = pos
|
|
288
|
+
while expr_end < len(program) and program[expr_end] not in "{\n":
|
|
289
|
+
expr_end += 1
|
|
290
|
+
pattern = program[pos:expr_end].strip()
|
|
291
|
+
if pattern:
|
|
292
|
+
pos = expr_end
|
|
293
|
+
else:
|
|
294
|
+
pattern = None
|
|
295
|
+
|
|
296
|
+
# Skip whitespace
|
|
297
|
+
while pos < len(program) and program[pos] in " \t\n":
|
|
298
|
+
pos += 1
|
|
299
|
+
|
|
300
|
+
if pos >= len(program):
|
|
301
|
+
break
|
|
302
|
+
|
|
303
|
+
# Parse action
|
|
304
|
+
if program[pos] == "{":
|
|
305
|
+
brace_count = 1
|
|
306
|
+
start = pos + 1
|
|
307
|
+
pos += 1
|
|
308
|
+
while pos < len(program) and brace_count > 0:
|
|
309
|
+
if program[pos] == "{":
|
|
310
|
+
brace_count += 1
|
|
311
|
+
elif program[pos] == "}":
|
|
312
|
+
brace_count -= 1
|
|
313
|
+
elif program[pos] == '"':
|
|
314
|
+
# Skip string
|
|
315
|
+
pos += 1
|
|
316
|
+
while pos < len(program) and program[pos] != '"':
|
|
317
|
+
if program[pos] == "\\":
|
|
318
|
+
pos += 1
|
|
319
|
+
pos += 1
|
|
320
|
+
elif program[pos] == "'":
|
|
321
|
+
pos += 1
|
|
322
|
+
while pos < len(program) and program[pos] != "'":
|
|
323
|
+
pos += 1
|
|
324
|
+
pos += 1
|
|
325
|
+
|
|
326
|
+
action = program[start:pos - 1].strip()
|
|
327
|
+
rules.append(AwkRule(pattern=pattern, action=action, is_regex=is_regex, regex=regex))
|
|
328
|
+
else:
|
|
329
|
+
# Default action is print $0
|
|
330
|
+
rules.append(AwkRule(pattern=pattern, action="print", is_regex=is_regex, regex=regex))
|
|
331
|
+
|
|
332
|
+
return rules
|
|
333
|
+
|
|
334
|
+
def _find_regex_end(self, s: str, start: int) -> int:
|
|
335
|
+
"""Find the end of a regex pattern."""
|
|
336
|
+
pos = start
|
|
337
|
+
while pos < len(s):
|
|
338
|
+
if s[pos] == "\\":
|
|
339
|
+
pos += 2
|
|
340
|
+
elif s[pos] == "/":
|
|
341
|
+
return pos
|
|
342
|
+
else:
|
|
343
|
+
pos += 1
|
|
344
|
+
return -1
|
|
345
|
+
|
|
346
|
+
def _pattern_matches(self, rule: AwkRule, line: str, state: AwkState) -> bool:
|
|
347
|
+
"""Check if a pattern matches the current line."""
|
|
348
|
+
if rule.pattern is None:
|
|
349
|
+
return True
|
|
350
|
+
|
|
351
|
+
if rule.is_regex and rule.regex:
|
|
352
|
+
return bool(rule.regex.search(line))
|
|
353
|
+
|
|
354
|
+
# Expression pattern
|
|
355
|
+
pattern = rule.pattern
|
|
356
|
+
|
|
357
|
+
# Simple expression evaluation
|
|
358
|
+
if "~" in pattern:
|
|
359
|
+
# Regex match
|
|
360
|
+
parts = pattern.split("~", 1)
|
|
361
|
+
left = self._eval_expr(parts[0].strip(), state, line)
|
|
362
|
+
right = parts[1].strip().strip("/")
|
|
363
|
+
try:
|
|
364
|
+
return bool(re.search(right, str(left)))
|
|
365
|
+
except re.error:
|
|
366
|
+
return False
|
|
367
|
+
|
|
368
|
+
if "==" in pattern:
|
|
369
|
+
parts = pattern.split("==", 1)
|
|
370
|
+
left = self._eval_expr(parts[0].strip(), state, line)
|
|
371
|
+
right = self._eval_expr(parts[1].strip(), state, line)
|
|
372
|
+
return left == right
|
|
373
|
+
|
|
374
|
+
if "!=" in pattern:
|
|
375
|
+
parts = pattern.split("!=", 1)
|
|
376
|
+
left = self._eval_expr(parts[0].strip(), state, line)
|
|
377
|
+
right = self._eval_expr(parts[1].strip(), state, line)
|
|
378
|
+
return left != right
|
|
379
|
+
|
|
380
|
+
if ">" in pattern:
|
|
381
|
+
parts = pattern.split(">", 1)
|
|
382
|
+
left = self._eval_expr(parts[0].strip(), state, line)
|
|
383
|
+
right = self._eval_expr(parts[1].strip(), state, line)
|
|
384
|
+
try:
|
|
385
|
+
return float(left) > float(right)
|
|
386
|
+
except ValueError:
|
|
387
|
+
return str(left) > str(right)
|
|
388
|
+
|
|
389
|
+
if "<" in pattern:
|
|
390
|
+
parts = pattern.split("<", 1)
|
|
391
|
+
left = self._eval_expr(parts[0].strip(), state, line)
|
|
392
|
+
right = self._eval_expr(parts[1].strip(), state, line)
|
|
393
|
+
try:
|
|
394
|
+
return float(left) < float(right)
|
|
395
|
+
except ValueError:
|
|
396
|
+
return str(left) < str(right)
|
|
397
|
+
|
|
398
|
+
# Just evaluate as truthy
|
|
399
|
+
result = self._eval_expr(pattern, state, line)
|
|
400
|
+
if isinstance(result, str):
|
|
401
|
+
return len(result) > 0
|
|
402
|
+
return bool(result)
|
|
403
|
+
|
|
404
|
+
def _execute_action(
|
|
405
|
+
self, action: str, state: AwkState, fields: list[str], line: str = ""
|
|
406
|
+
) -> None:
|
|
407
|
+
"""Execute an awk action."""
|
|
408
|
+
# Split action into statements
|
|
409
|
+
statements = self._split_statements(action)
|
|
410
|
+
|
|
411
|
+
for stmt in statements:
|
|
412
|
+
if state.next_record or state.exit_program:
|
|
413
|
+
break
|
|
414
|
+
|
|
415
|
+
stmt = stmt.strip()
|
|
416
|
+
if not stmt:
|
|
417
|
+
continue
|
|
418
|
+
|
|
419
|
+
self._execute_statement(stmt, state, fields, line)
|
|
420
|
+
|
|
421
|
+
def _split_statements(self, action: str) -> list[str]:
|
|
422
|
+
"""Split action into individual statements."""
|
|
423
|
+
statements = []
|
|
424
|
+
current = ""
|
|
425
|
+
brace_depth = 0
|
|
426
|
+
paren_depth = 0
|
|
427
|
+
in_string = False
|
|
428
|
+
escape = False
|
|
429
|
+
|
|
430
|
+
for char in action:
|
|
431
|
+
if escape:
|
|
432
|
+
current += char
|
|
433
|
+
escape = False
|
|
434
|
+
continue
|
|
435
|
+
|
|
436
|
+
if char == "\\":
|
|
437
|
+
escape = True
|
|
438
|
+
current += char
|
|
439
|
+
continue
|
|
440
|
+
|
|
441
|
+
if char == '"' and not in_string:
|
|
442
|
+
in_string = True
|
|
443
|
+
current += char
|
|
444
|
+
elif char == '"' and in_string:
|
|
445
|
+
in_string = False
|
|
446
|
+
current += char
|
|
447
|
+
elif in_string:
|
|
448
|
+
current += char
|
|
449
|
+
elif char == "{":
|
|
450
|
+
brace_depth += 1
|
|
451
|
+
current += char
|
|
452
|
+
elif char == "}":
|
|
453
|
+
brace_depth -= 1
|
|
454
|
+
current += char
|
|
455
|
+
elif char == "(":
|
|
456
|
+
paren_depth += 1
|
|
457
|
+
current += char
|
|
458
|
+
elif char == ")":
|
|
459
|
+
paren_depth -= 1
|
|
460
|
+
current += char
|
|
461
|
+
elif char in ";\n" and brace_depth == 0 and paren_depth == 0:
|
|
462
|
+
if current.strip():
|
|
463
|
+
statements.append(current.strip())
|
|
464
|
+
current = ""
|
|
465
|
+
else:
|
|
466
|
+
current += char
|
|
467
|
+
|
|
468
|
+
if current.strip():
|
|
469
|
+
statements.append(current.strip())
|
|
470
|
+
|
|
471
|
+
return statements
|
|
472
|
+
|
|
473
|
+
def _execute_statement(
|
|
474
|
+
self, stmt: str, state: AwkState, fields: list[str], line: str
|
|
475
|
+
) -> None:
|
|
476
|
+
"""Execute a single awk statement."""
|
|
477
|
+
stmt = stmt.strip()
|
|
478
|
+
|
|
479
|
+
# Handle next
|
|
480
|
+
if stmt == "next":
|
|
481
|
+
state.next_record = True
|
|
482
|
+
return
|
|
483
|
+
|
|
484
|
+
# Handle exit
|
|
485
|
+
if stmt.startswith("exit"):
|
|
486
|
+
state.exit_program = True
|
|
487
|
+
return
|
|
488
|
+
|
|
489
|
+
# Handle print
|
|
490
|
+
if stmt == "print" or stmt == "print $0":
|
|
491
|
+
# Use modified line if gsub/sub was called
|
|
492
|
+
current_line = state.variables.get("__line__", line)
|
|
493
|
+
state.output += current_line + state.variables.get("ORS", "\n")
|
|
494
|
+
return
|
|
495
|
+
|
|
496
|
+
if stmt.startswith("print "):
|
|
497
|
+
args = stmt[6:].strip()
|
|
498
|
+
values = self._parse_print_args(args, state, fields, line)
|
|
499
|
+
ofs = state.variables.get("OFS", " ")
|
|
500
|
+
ors = state.variables.get("ORS", "\n")
|
|
501
|
+
state.output += ofs.join(self._format_number(v) for v in values) + ors
|
|
502
|
+
return
|
|
503
|
+
|
|
504
|
+
# Handle printf
|
|
505
|
+
if stmt.startswith("printf"):
|
|
506
|
+
match = re.match(r"printf\s*\(?\s*(.+?)\s*\)?$", stmt)
|
|
507
|
+
if match:
|
|
508
|
+
args_str = match.group(1)
|
|
509
|
+
self._execute_printf(args_str, state, fields, line)
|
|
510
|
+
return
|
|
511
|
+
|
|
512
|
+
# Handle if statement
|
|
513
|
+
if stmt.startswith("if"):
|
|
514
|
+
self._execute_if(stmt, state, fields, line)
|
|
515
|
+
return
|
|
516
|
+
|
|
517
|
+
# Handle for statement
|
|
518
|
+
if stmt.startswith("for"):
|
|
519
|
+
self._execute_for(stmt, state, fields, line)
|
|
520
|
+
return
|
|
521
|
+
|
|
522
|
+
# Handle while statement
|
|
523
|
+
if stmt.startswith("while"):
|
|
524
|
+
self._execute_while(stmt, state, fields, line)
|
|
525
|
+
return
|
|
526
|
+
|
|
527
|
+
# Handle gsub (global substitution)
|
|
528
|
+
if stmt.startswith("gsub("):
|
|
529
|
+
match = re.match(r"gsub\s*\((.+)\)", stmt)
|
|
530
|
+
if match:
|
|
531
|
+
args = self._split_args(match.group(1))
|
|
532
|
+
if len(args) >= 2:
|
|
533
|
+
pattern = args[0].strip()
|
|
534
|
+
if pattern.startswith("/") and pattern.endswith("/"):
|
|
535
|
+
pattern = pattern[1:-1]
|
|
536
|
+
replacement = str(self._eval_expr(args[1], state, line, fields))
|
|
537
|
+
# Default target is $0 (the line)
|
|
538
|
+
if len(args) >= 3:
|
|
539
|
+
target_var = args[2].strip()
|
|
540
|
+
else:
|
|
541
|
+
target_var = None
|
|
542
|
+
try:
|
|
543
|
+
if target_var:
|
|
544
|
+
original = str(state.variables.get(target_var, ""))
|
|
545
|
+
new_val = re.sub(pattern, replacement, original)
|
|
546
|
+
state.variables[target_var] = new_val
|
|
547
|
+
else:
|
|
548
|
+
# Modify $0 - need to update fields array
|
|
549
|
+
new_line = re.sub(pattern, replacement, line)
|
|
550
|
+
# Update fields based on new line
|
|
551
|
+
fs = state.variables.get("FS", " ")
|
|
552
|
+
if fs == " ":
|
|
553
|
+
new_fields = new_line.split()
|
|
554
|
+
else:
|
|
555
|
+
new_fields = new_line.split(fs)
|
|
556
|
+
fields.clear()
|
|
557
|
+
fields.extend(new_fields)
|
|
558
|
+
state.variables["NF"] = len(new_fields)
|
|
559
|
+
# Store the modified line for later use
|
|
560
|
+
state.variables["__line__"] = new_line
|
|
561
|
+
except re.error:
|
|
562
|
+
pass
|
|
563
|
+
return
|
|
564
|
+
|
|
565
|
+
# Handle sub (single substitution)
|
|
566
|
+
if stmt.startswith("sub("):
|
|
567
|
+
match = re.match(r"sub\s*\((.+)\)", stmt)
|
|
568
|
+
if match:
|
|
569
|
+
args = self._split_args(match.group(1))
|
|
570
|
+
if len(args) >= 2:
|
|
571
|
+
pattern = args[0].strip()
|
|
572
|
+
if pattern.startswith("/") and pattern.endswith("/"):
|
|
573
|
+
pattern = pattern[1:-1]
|
|
574
|
+
replacement = str(self._eval_expr(args[1], state, line, fields))
|
|
575
|
+
if len(args) >= 3:
|
|
576
|
+
target_var = args[2].strip()
|
|
577
|
+
else:
|
|
578
|
+
target_var = None
|
|
579
|
+
try:
|
|
580
|
+
if target_var:
|
|
581
|
+
original = str(state.variables.get(target_var, ""))
|
|
582
|
+
new_val = re.sub(pattern, replacement, original, count=1)
|
|
583
|
+
state.variables[target_var] = new_val
|
|
584
|
+
else:
|
|
585
|
+
new_line = re.sub(pattern, replacement, line, count=1)
|
|
586
|
+
fs = state.variables.get("FS", " ")
|
|
587
|
+
if fs == " ":
|
|
588
|
+
new_fields = new_line.split()
|
|
589
|
+
else:
|
|
590
|
+
new_fields = new_line.split(fs)
|
|
591
|
+
fields.clear()
|
|
592
|
+
fields.extend(new_fields)
|
|
593
|
+
state.variables["NF"] = len(new_fields)
|
|
594
|
+
state.variables["__line__"] = new_line
|
|
595
|
+
except re.error:
|
|
596
|
+
pass
|
|
597
|
+
return
|
|
598
|
+
|
|
599
|
+
# Handle assignment
|
|
600
|
+
if "=" in stmt and not stmt.startswith("if") and "==" not in stmt and "!=" not in stmt:
|
|
601
|
+
# Handle += -= *= /=
|
|
602
|
+
for op in ["+=", "-=", "*=", "/="]:
|
|
603
|
+
if op in stmt:
|
|
604
|
+
parts = stmt.split(op, 1)
|
|
605
|
+
var = parts[0].strip()
|
|
606
|
+
val = self._eval_expr(parts[1].strip(), state, line, fields)
|
|
607
|
+
current = state.variables.get(var, 0)
|
|
608
|
+
try:
|
|
609
|
+
current = float(current)
|
|
610
|
+
val = float(val)
|
|
611
|
+
except (ValueError, TypeError):
|
|
612
|
+
current = 0
|
|
613
|
+
val = 0
|
|
614
|
+
if op == "+=":
|
|
615
|
+
state.variables[var] = current + val
|
|
616
|
+
elif op == "-=":
|
|
617
|
+
state.variables[var] = current - val
|
|
618
|
+
elif op == "*=":
|
|
619
|
+
state.variables[var] = current * val
|
|
620
|
+
elif op == "/=":
|
|
621
|
+
state.variables[var] = current / val if val != 0 else 0
|
|
622
|
+
return
|
|
623
|
+
|
|
624
|
+
# Simple assignment
|
|
625
|
+
match = re.match(r"(\w+)\s*=\s*(.+)", stmt)
|
|
626
|
+
if match:
|
|
627
|
+
var = match.group(1)
|
|
628
|
+
val = self._eval_expr(match.group(2).strip(), state, line, fields)
|
|
629
|
+
state.variables[var] = val
|
|
630
|
+
return
|
|
631
|
+
|
|
632
|
+
# Field assignment ($N = val)
|
|
633
|
+
match = re.match(r"\$(\d+)\s*=\s*(.+)", stmt)
|
|
634
|
+
if match:
|
|
635
|
+
field_num = int(match.group(1))
|
|
636
|
+
val = self._eval_expr(match.group(2).strip(), state, line, fields)
|
|
637
|
+
# Extend fields if necessary
|
|
638
|
+
while len(fields) < field_num:
|
|
639
|
+
fields.append("")
|
|
640
|
+
if field_num > 0:
|
|
641
|
+
fields[field_num - 1] = str(val)
|
|
642
|
+
return
|
|
643
|
+
|
|
644
|
+
# Handle increment/decrement
|
|
645
|
+
if stmt.endswith("++"):
|
|
646
|
+
var = stmt[:-2].strip()
|
|
647
|
+
current = state.variables.get(var, 0)
|
|
648
|
+
try:
|
|
649
|
+
state.variables[var] = float(current) + 1
|
|
650
|
+
except (ValueError, TypeError):
|
|
651
|
+
state.variables[var] = 1
|
|
652
|
+
return
|
|
653
|
+
|
|
654
|
+
if stmt.endswith("--"):
|
|
655
|
+
var = stmt[:-2].strip()
|
|
656
|
+
current = state.variables.get(var, 0)
|
|
657
|
+
try:
|
|
658
|
+
state.variables[var] = float(current) - 1
|
|
659
|
+
except (ValueError, TypeError):
|
|
660
|
+
state.variables[var] = -1
|
|
661
|
+
return
|
|
662
|
+
|
|
663
|
+
def _eval_expr(
|
|
664
|
+
self, expr: str, state: AwkState, line: str, fields: list[str] | None = None
|
|
665
|
+
) -> Any:
|
|
666
|
+
"""Evaluate an awk expression."""
|
|
667
|
+
expr = expr.strip()
|
|
668
|
+
|
|
669
|
+
if fields is None:
|
|
670
|
+
fields = []
|
|
671
|
+
|
|
672
|
+
# String literal
|
|
673
|
+
if expr.startswith('"') and expr.endswith('"'):
|
|
674
|
+
return self._unescape_string(expr[1:-1])
|
|
675
|
+
|
|
676
|
+
# Number
|
|
677
|
+
try:
|
|
678
|
+
if "." in expr:
|
|
679
|
+
return float(expr)
|
|
680
|
+
return int(expr)
|
|
681
|
+
except ValueError:
|
|
682
|
+
pass
|
|
683
|
+
|
|
684
|
+
# Field reference
|
|
685
|
+
if expr.startswith("$"):
|
|
686
|
+
rest = expr[1:]
|
|
687
|
+
if rest == "0":
|
|
688
|
+
return line
|
|
689
|
+
try:
|
|
690
|
+
idx = int(rest)
|
|
691
|
+
if 0 < idx <= len(fields):
|
|
692
|
+
return fields[idx - 1]
|
|
693
|
+
return ""
|
|
694
|
+
except ValueError:
|
|
695
|
+
# Could be $NF or $(expr)
|
|
696
|
+
if rest == "NF":
|
|
697
|
+
nf = len(fields)
|
|
698
|
+
if nf > 0:
|
|
699
|
+
return fields[nf - 1]
|
|
700
|
+
return ""
|
|
701
|
+
|
|
702
|
+
# Built-in variables
|
|
703
|
+
if expr in ("NR", "NF", "FS", "OFS", "ORS", "FILENAME"):
|
|
704
|
+
return state.variables.get(expr, "")
|
|
705
|
+
|
|
706
|
+
# Array element access (arr[idx])
|
|
707
|
+
array_match = re.match(r"^([a-zA-Z_]\w*)\[(.+)\]$", expr)
|
|
708
|
+
if array_match:
|
|
709
|
+
arr_name = array_match.group(1)
|
|
710
|
+
idx = self._eval_expr(array_match.group(2), state, line, fields)
|
|
711
|
+
key = f"{arr_name}[{idx}]"
|
|
712
|
+
return state.variables.get(key, "")
|
|
713
|
+
|
|
714
|
+
# User variables
|
|
715
|
+
if re.match(r"^[a-zA-Z_]\w*$", expr):
|
|
716
|
+
return state.variables.get(expr, "")
|
|
717
|
+
|
|
718
|
+
# Built-in functions
|
|
719
|
+
if expr.startswith("length("):
|
|
720
|
+
match = re.match(r"length\((.+)\)", expr)
|
|
721
|
+
if match:
|
|
722
|
+
arg = self._eval_expr(match.group(1), state, line, fields)
|
|
723
|
+
return len(str(arg))
|
|
724
|
+
|
|
725
|
+
if expr.startswith("substr("):
|
|
726
|
+
match = re.match(r"substr\((.+)\)", expr)
|
|
727
|
+
if match:
|
|
728
|
+
args = self._split_args(match.group(1))
|
|
729
|
+
if len(args) >= 2:
|
|
730
|
+
s = str(self._eval_expr(args[0], state, line, fields))
|
|
731
|
+
start = int(self._eval_expr(args[1], state, line, fields)) - 1
|
|
732
|
+
if len(args) >= 3:
|
|
733
|
+
length = int(self._eval_expr(args[2], state, line, fields))
|
|
734
|
+
return s[start:start + length]
|
|
735
|
+
return s[start:]
|
|
736
|
+
return ""
|
|
737
|
+
|
|
738
|
+
if expr.startswith("index("):
|
|
739
|
+
match = re.match(r"index\((.+)\)", expr)
|
|
740
|
+
if match:
|
|
741
|
+
args = self._split_args(match.group(1))
|
|
742
|
+
if len(args) >= 2:
|
|
743
|
+
s = str(self._eval_expr(args[0], state, line, fields))
|
|
744
|
+
t = str(self._eval_expr(args[1], state, line, fields))
|
|
745
|
+
idx = s.find(t)
|
|
746
|
+
return idx + 1 if idx >= 0 else 0
|
|
747
|
+
return 0
|
|
748
|
+
|
|
749
|
+
if expr.startswith("tolower("):
|
|
750
|
+
match = re.match(r"tolower\((.+)\)", expr)
|
|
751
|
+
if match:
|
|
752
|
+
arg = self._eval_expr(match.group(1), state, line, fields)
|
|
753
|
+
return str(arg).lower()
|
|
754
|
+
return ""
|
|
755
|
+
|
|
756
|
+
if expr.startswith("toupper("):
|
|
757
|
+
match = re.match(r"toupper\((.+)\)", expr)
|
|
758
|
+
if match:
|
|
759
|
+
arg = self._eval_expr(match.group(1), state, line, fields)
|
|
760
|
+
return str(arg).upper()
|
|
761
|
+
return ""
|
|
762
|
+
|
|
763
|
+
if expr.startswith("int("):
|
|
764
|
+
match = re.match(r"int\((.+)\)", expr)
|
|
765
|
+
if match:
|
|
766
|
+
arg = self._eval_expr(match.group(1), state, line, fields)
|
|
767
|
+
try:
|
|
768
|
+
return int(float(arg))
|
|
769
|
+
except (ValueError, TypeError):
|
|
770
|
+
return 0
|
|
771
|
+
return 0
|
|
772
|
+
|
|
773
|
+
if expr.startswith("sqrt("):
|
|
774
|
+
match = re.match(r"sqrt\((.+)\)", expr)
|
|
775
|
+
if match:
|
|
776
|
+
import math
|
|
777
|
+
arg = self._eval_expr(match.group(1), state, line, fields)
|
|
778
|
+
try:
|
|
779
|
+
return math.sqrt(float(arg))
|
|
780
|
+
except (ValueError, TypeError):
|
|
781
|
+
return 0
|
|
782
|
+
return 0
|
|
783
|
+
|
|
784
|
+
if expr.startswith("sin("):
|
|
785
|
+
match = re.match(r"sin\((.+)\)", expr)
|
|
786
|
+
if match:
|
|
787
|
+
import math
|
|
788
|
+
arg = self._eval_expr(match.group(1), state, line, fields)
|
|
789
|
+
try:
|
|
790
|
+
return math.sin(float(arg))
|
|
791
|
+
except (ValueError, TypeError):
|
|
792
|
+
return 0
|
|
793
|
+
return 0
|
|
794
|
+
|
|
795
|
+
if expr.startswith("cos("):
|
|
796
|
+
match = re.match(r"cos\((.+)\)", expr)
|
|
797
|
+
if match:
|
|
798
|
+
import math
|
|
799
|
+
arg = self._eval_expr(match.group(1), state, line, fields)
|
|
800
|
+
try:
|
|
801
|
+
return math.cos(float(arg))
|
|
802
|
+
except (ValueError, TypeError):
|
|
803
|
+
return 0
|
|
804
|
+
return 0
|
|
805
|
+
|
|
806
|
+
if expr.startswith("log("):
|
|
807
|
+
match = re.match(r"log\((.+)\)", expr)
|
|
808
|
+
if match:
|
|
809
|
+
import math
|
|
810
|
+
arg = self._eval_expr(match.group(1), state, line, fields)
|
|
811
|
+
try:
|
|
812
|
+
return math.log(float(arg))
|
|
813
|
+
except (ValueError, TypeError):
|
|
814
|
+
return 0
|
|
815
|
+
return 0
|
|
816
|
+
|
|
817
|
+
if expr.startswith("exp("):
|
|
818
|
+
match = re.match(r"exp\((.+)\)", expr)
|
|
819
|
+
if match:
|
|
820
|
+
import math
|
|
821
|
+
arg = self._eval_expr(match.group(1), state, line, fields)
|
|
822
|
+
try:
|
|
823
|
+
return math.exp(float(arg))
|
|
824
|
+
except (ValueError, TypeError):
|
|
825
|
+
return 0
|
|
826
|
+
return 0
|
|
827
|
+
|
|
828
|
+
if expr.startswith("split("):
|
|
829
|
+
match = re.match(r"split\((.+)\)", expr)
|
|
830
|
+
if match:
|
|
831
|
+
args = self._split_args(match.group(1))
|
|
832
|
+
if len(args) >= 2:
|
|
833
|
+
s = str(self._eval_expr(args[0], state, line, fields))
|
|
834
|
+
arr_name = args[1].strip()
|
|
835
|
+
sep = state.variables.get("FS", " ")
|
|
836
|
+
if len(args) >= 3:
|
|
837
|
+
sep = str(self._eval_expr(args[2], state, line, fields))
|
|
838
|
+
if sep == " ":
|
|
839
|
+
parts = s.split()
|
|
840
|
+
else:
|
|
841
|
+
parts = s.split(sep)
|
|
842
|
+
# Store array elements
|
|
843
|
+
for i, part in enumerate(parts):
|
|
844
|
+
state.variables[f"{arr_name}[{i+1}]"] = part
|
|
845
|
+
return len(parts)
|
|
846
|
+
return 0
|
|
847
|
+
|
|
848
|
+
# Arithmetic - check for operators (including with spaces like "2 + 3")
|
|
849
|
+
for op in ["+", "-", "*", "/", "%"]:
|
|
850
|
+
if op in expr:
|
|
851
|
+
# Find the operator not in a function call
|
|
852
|
+
depth = 0
|
|
853
|
+
in_str = False
|
|
854
|
+
for i, c in enumerate(expr):
|
|
855
|
+
if c == '"' and (i == 0 or expr[i-1] != '\\'):
|
|
856
|
+
in_str = not in_str
|
|
857
|
+
elif not in_str:
|
|
858
|
+
if c == "(":
|
|
859
|
+
depth += 1
|
|
860
|
+
elif c == ")":
|
|
861
|
+
depth -= 1
|
|
862
|
+
elif c == op and depth == 0 and i > 0:
|
|
863
|
+
left = self._eval_expr(expr[:i].strip(), state, line, fields)
|
|
864
|
+
right = self._eval_expr(expr[i + 1:].strip(), state, line, fields)
|
|
865
|
+
try:
|
|
866
|
+
left = float(left)
|
|
867
|
+
right = float(right)
|
|
868
|
+
if op == "+":
|
|
869
|
+
return left + right
|
|
870
|
+
elif op == "-":
|
|
871
|
+
return left - right
|
|
872
|
+
elif op == "*":
|
|
873
|
+
return left * right
|
|
874
|
+
elif op == "/":
|
|
875
|
+
return left / right if right != 0 else 0
|
|
876
|
+
elif op == "%":
|
|
877
|
+
return left % right if right != 0 else 0
|
|
878
|
+
except (ValueError, TypeError):
|
|
879
|
+
return 0
|
|
880
|
+
|
|
881
|
+
# String concatenation (spaces between expressions - no operators)
|
|
882
|
+
if " " in expr and not expr.startswith('"'):
|
|
883
|
+
parts = expr.split()
|
|
884
|
+
result = ""
|
|
885
|
+
for part in parts:
|
|
886
|
+
val = self._eval_expr(part, state, line, fields)
|
|
887
|
+
result += str(val)
|
|
888
|
+
return result
|
|
889
|
+
|
|
890
|
+
return expr
|
|
891
|
+
|
|
892
|
+
def _split_args(self, args_str: str) -> list[str]:
|
|
893
|
+
"""Split function arguments."""
|
|
894
|
+
args = []
|
|
895
|
+
current = ""
|
|
896
|
+
depth = 0
|
|
897
|
+
in_string = False
|
|
898
|
+
|
|
899
|
+
for char in args_str:
|
|
900
|
+
if char == '"' and not in_string:
|
|
901
|
+
in_string = True
|
|
902
|
+
current += char
|
|
903
|
+
elif char == '"' and in_string:
|
|
904
|
+
in_string = False
|
|
905
|
+
current += char
|
|
906
|
+
elif in_string:
|
|
907
|
+
current += char
|
|
908
|
+
elif char == "(":
|
|
909
|
+
depth += 1
|
|
910
|
+
current += char
|
|
911
|
+
elif char == ")":
|
|
912
|
+
depth -= 1
|
|
913
|
+
current += char
|
|
914
|
+
elif char == "," and depth == 0:
|
|
915
|
+
args.append(current.strip())
|
|
916
|
+
current = ""
|
|
917
|
+
else:
|
|
918
|
+
current += char
|
|
919
|
+
|
|
920
|
+
if current.strip():
|
|
921
|
+
args.append(current.strip())
|
|
922
|
+
|
|
923
|
+
return args
|
|
924
|
+
|
|
925
|
+
def _parse_print_args(
|
|
926
|
+
self, args: str, state: AwkState, fields: list[str], line: str
|
|
927
|
+
) -> list[Any]:
|
|
928
|
+
"""Parse arguments to print."""
|
|
929
|
+
result = []
|
|
930
|
+
|
|
931
|
+
# Split by comma
|
|
932
|
+
parts = self._split_args(args)
|
|
933
|
+
for part in parts:
|
|
934
|
+
val = self._eval_expr(part.strip(), state, line, fields)
|
|
935
|
+
result.append(val)
|
|
936
|
+
|
|
937
|
+
return result
|
|
938
|
+
|
|
939
|
+
def _execute_printf(
|
|
940
|
+
self, args_str: str, state: AwkState, fields: list[str], line: str
|
|
941
|
+
) -> None:
|
|
942
|
+
"""Execute printf statement."""
|
|
943
|
+
args = self._split_args(args_str)
|
|
944
|
+
if not args:
|
|
945
|
+
return
|
|
946
|
+
|
|
947
|
+
fmt = self._eval_expr(args[0], state, line, fields)
|
|
948
|
+
fmt = str(fmt)
|
|
949
|
+
|
|
950
|
+
values = [self._eval_expr(a, state, line, fields) for a in args[1:]]
|
|
951
|
+
|
|
952
|
+
# Convert format specifiers
|
|
953
|
+
result = ""
|
|
954
|
+
i = 0
|
|
955
|
+
val_idx = 0
|
|
956
|
+
|
|
957
|
+
while i < len(fmt):
|
|
958
|
+
if fmt[i] == "\\" and i + 1 < len(fmt):
|
|
959
|
+
c = fmt[i + 1]
|
|
960
|
+
if c == "n":
|
|
961
|
+
result += "\n"
|
|
962
|
+
elif c == "t":
|
|
963
|
+
result += "\t"
|
|
964
|
+
elif c == "\\":
|
|
965
|
+
result += "\\"
|
|
966
|
+
else:
|
|
967
|
+
result += c
|
|
968
|
+
i += 2
|
|
969
|
+
elif fmt[i] == "%" and i + 1 < len(fmt):
|
|
970
|
+
# Parse format spec
|
|
971
|
+
j = i + 1
|
|
972
|
+
while j < len(fmt) and fmt[j] in "-+0 #":
|
|
973
|
+
j += 1
|
|
974
|
+
while j < len(fmt) and fmt[j].isdigit():
|
|
975
|
+
j += 1
|
|
976
|
+
if j < len(fmt) and fmt[j] == ".":
|
|
977
|
+
j += 1
|
|
978
|
+
while j < len(fmt) and fmt[j].isdigit():
|
|
979
|
+
j += 1
|
|
980
|
+
if j < len(fmt):
|
|
981
|
+
spec = fmt[i:j + 1]
|
|
982
|
+
conv = fmt[j]
|
|
983
|
+
if val_idx < len(values):
|
|
984
|
+
val = values[val_idx]
|
|
985
|
+
val_idx += 1
|
|
986
|
+
try:
|
|
987
|
+
if conv in "diouxX":
|
|
988
|
+
result += spec % int(float(val))
|
|
989
|
+
elif conv in "eEfFgG":
|
|
990
|
+
result += spec % float(val)
|
|
991
|
+
elif conv == "s":
|
|
992
|
+
result += spec % str(val)
|
|
993
|
+
elif conv == "%":
|
|
994
|
+
result += "%"
|
|
995
|
+
else:
|
|
996
|
+
result += spec % val
|
|
997
|
+
except (ValueError, TypeError):
|
|
998
|
+
result += str(val)
|
|
999
|
+
i = j + 1
|
|
1000
|
+
else:
|
|
1001
|
+
result += fmt[i]
|
|
1002
|
+
i += 1
|
|
1003
|
+
else:
|
|
1004
|
+
result += fmt[i]
|
|
1005
|
+
i += 1
|
|
1006
|
+
|
|
1007
|
+
state.output += result
|
|
1008
|
+
|
|
1009
|
+
def _execute_if(
|
|
1010
|
+
self, stmt: str, state: AwkState, fields: list[str], line: str
|
|
1011
|
+
) -> None:
|
|
1012
|
+
"""Execute an if statement."""
|
|
1013
|
+
# Parse: if (condition) { action } [else { action }]
|
|
1014
|
+
match = re.match(r"if\s*\((.+?)\)\s*\{(.+?)\}(?:\s*else\s*\{(.+?)\})?", stmt, re.DOTALL)
|
|
1015
|
+
if match:
|
|
1016
|
+
condition = match.group(1)
|
|
1017
|
+
then_action = match.group(2)
|
|
1018
|
+
else_action = match.group(3)
|
|
1019
|
+
|
|
1020
|
+
if self._eval_condition(condition, state, fields, line):
|
|
1021
|
+
self._execute_action(then_action, state, fields, line)
|
|
1022
|
+
elif else_action:
|
|
1023
|
+
self._execute_action(else_action, state, fields, line)
|
|
1024
|
+
|
|
1025
|
+
def _execute_for(
|
|
1026
|
+
self, stmt: str, state: AwkState, fields: list[str], line: str
|
|
1027
|
+
) -> None:
|
|
1028
|
+
"""Execute a for statement."""
|
|
1029
|
+
# Parse: for (init; condition; update) { action }
|
|
1030
|
+
match = re.match(r"for\s*\((.+?);(.+?);(.+?)\)\s*\{(.+?)\}", stmt, re.DOTALL)
|
|
1031
|
+
if match:
|
|
1032
|
+
init = match.group(1).strip()
|
|
1033
|
+
condition = match.group(2).strip()
|
|
1034
|
+
update = match.group(3).strip()
|
|
1035
|
+
action = match.group(4)
|
|
1036
|
+
|
|
1037
|
+
# Execute init
|
|
1038
|
+
self._execute_statement(init, state, fields, line)
|
|
1039
|
+
|
|
1040
|
+
# Loop
|
|
1041
|
+
max_iter = 10000
|
|
1042
|
+
for _ in range(max_iter):
|
|
1043
|
+
if not self._eval_condition(condition, state, fields, line):
|
|
1044
|
+
break
|
|
1045
|
+
self._execute_action(action, state, fields, line)
|
|
1046
|
+
self._execute_statement(update, state, fields, line)
|
|
1047
|
+
|
|
1048
|
+
def _execute_while(
|
|
1049
|
+
self, stmt: str, state: AwkState, fields: list[str], line: str
|
|
1050
|
+
) -> None:
|
|
1051
|
+
"""Execute a while statement."""
|
|
1052
|
+
match = re.match(r"while\s*\((.+?)\)\s*\{(.+?)\}", stmt, re.DOTALL)
|
|
1053
|
+
if match:
|
|
1054
|
+
condition = match.group(1)
|
|
1055
|
+
action = match.group(2)
|
|
1056
|
+
|
|
1057
|
+
max_iter = 10000
|
|
1058
|
+
for _ in range(max_iter):
|
|
1059
|
+
if not self._eval_condition(condition, state, fields, line):
|
|
1060
|
+
break
|
|
1061
|
+
self._execute_action(action, state, fields, line)
|
|
1062
|
+
|
|
1063
|
+
def _eval_condition(
|
|
1064
|
+
self, condition: str, state: AwkState, fields: list[str], line: str
|
|
1065
|
+
) -> bool:
|
|
1066
|
+
"""Evaluate a condition."""
|
|
1067
|
+
condition = condition.strip()
|
|
1068
|
+
|
|
1069
|
+
# Comparison operators
|
|
1070
|
+
for op in ["==", "!=", ">=", "<=", ">", "<"]:
|
|
1071
|
+
if op in condition:
|
|
1072
|
+
parts = condition.split(op, 1)
|
|
1073
|
+
left = self._eval_expr(parts[0].strip(), state, line, fields)
|
|
1074
|
+
right = self._eval_expr(parts[1].strip(), state, line, fields)
|
|
1075
|
+
|
|
1076
|
+
try:
|
|
1077
|
+
left = float(left)
|
|
1078
|
+
right = float(right)
|
|
1079
|
+
except (ValueError, TypeError):
|
|
1080
|
+
left = str(left) if left else ""
|
|
1081
|
+
right = str(right) if right else ""
|
|
1082
|
+
|
|
1083
|
+
if op == "==":
|
|
1084
|
+
return left == right
|
|
1085
|
+
elif op == "!=":
|
|
1086
|
+
return left != right
|
|
1087
|
+
elif op == ">=":
|
|
1088
|
+
return left >= right
|
|
1089
|
+
elif op == "<=":
|
|
1090
|
+
return left <= right
|
|
1091
|
+
elif op == ">":
|
|
1092
|
+
return left > right
|
|
1093
|
+
elif op == "<":
|
|
1094
|
+
return left < right
|
|
1095
|
+
|
|
1096
|
+
# Just evaluate as truthy
|
|
1097
|
+
result = self._eval_expr(condition, state, line, fields)
|
|
1098
|
+
if isinstance(result, str):
|
|
1099
|
+
return len(result) > 0
|
|
1100
|
+
try:
|
|
1101
|
+
return float(result) != 0
|
|
1102
|
+
except (ValueError, TypeError):
|
|
1103
|
+
return bool(result)
|
|
1104
|
+
|
|
1105
|
+
def _unescape_string(self, s: str) -> str:
|
|
1106
|
+
"""Unescape a string literal."""
|
|
1107
|
+
result = ""
|
|
1108
|
+
i = 0
|
|
1109
|
+
while i < len(s):
|
|
1110
|
+
if s[i] == "\\" and i + 1 < len(s):
|
|
1111
|
+
c = s[i + 1]
|
|
1112
|
+
if c == "n":
|
|
1113
|
+
result += "\n"
|
|
1114
|
+
elif c == "t":
|
|
1115
|
+
result += "\t"
|
|
1116
|
+
elif c == "r":
|
|
1117
|
+
result += "\r"
|
|
1118
|
+
elif c == "\\":
|
|
1119
|
+
result += "\\"
|
|
1120
|
+
elif c == '"':
|
|
1121
|
+
result += '"'
|
|
1122
|
+
else:
|
|
1123
|
+
result += c
|
|
1124
|
+
i += 2
|
|
1125
|
+
else:
|
|
1126
|
+
result += s[i]
|
|
1127
|
+
i += 1
|
|
1128
|
+
return result
|
|
1129
|
+
|
|
1130
|
+
def _parse_value(self, s: str) -> Any:
|
|
1131
|
+
"""Parse a value (for -v option)."""
|
|
1132
|
+
try:
|
|
1133
|
+
if "." in s:
|
|
1134
|
+
return float(s)
|
|
1135
|
+
return int(s)
|
|
1136
|
+
except ValueError:
|
|
1137
|
+
return s
|
|
1138
|
+
|
|
1139
|
+
def _unescape_field_sep(self, s: str) -> str:
|
|
1140
|
+
"""Unescape field separator (handle \\t, \\n, etc.)."""
|
|
1141
|
+
result = ""
|
|
1142
|
+
i = 0
|
|
1143
|
+
while i < len(s):
|
|
1144
|
+
if s[i] == "\\" and i + 1 < len(s):
|
|
1145
|
+
c = s[i + 1]
|
|
1146
|
+
if c == "t":
|
|
1147
|
+
result += "\t"
|
|
1148
|
+
elif c == "n":
|
|
1149
|
+
result += "\n"
|
|
1150
|
+
elif c == "r":
|
|
1151
|
+
result += "\r"
|
|
1152
|
+
elif c == "\\":
|
|
1153
|
+
result += "\\"
|
|
1154
|
+
else:
|
|
1155
|
+
result += c
|
|
1156
|
+
i += 2
|
|
1157
|
+
else:
|
|
1158
|
+
result += s[i]
|
|
1159
|
+
i += 1
|
|
1160
|
+
return result
|
|
1161
|
+
|
|
1162
|
+
def _format_number(self, n: Any) -> str:
|
|
1163
|
+
"""Format a number for output - integers without decimal point."""
|
|
1164
|
+
if isinstance(n, float):
|
|
1165
|
+
if n == int(n):
|
|
1166
|
+
return str(int(n))
|
|
1167
|
+
return str(n)
|
|
1168
|
+
return str(n)
|