just-bash 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- just_bash/__init__.py +55 -0
- just_bash/ast/__init__.py +213 -0
- just_bash/ast/factory.py +320 -0
- just_bash/ast/types.py +953 -0
- just_bash/bash.py +220 -0
- just_bash/commands/__init__.py +23 -0
- just_bash/commands/argv/__init__.py +5 -0
- just_bash/commands/argv/argv.py +21 -0
- just_bash/commands/awk/__init__.py +5 -0
- just_bash/commands/awk/awk.py +1168 -0
- just_bash/commands/base64/__init__.py +5 -0
- just_bash/commands/base64/base64.py +138 -0
- just_bash/commands/basename/__init__.py +5 -0
- just_bash/commands/basename/basename.py +72 -0
- just_bash/commands/bash/__init__.py +5 -0
- just_bash/commands/bash/bash.py +188 -0
- just_bash/commands/cat/__init__.py +5 -0
- just_bash/commands/cat/cat.py +173 -0
- just_bash/commands/checksum/__init__.py +5 -0
- just_bash/commands/checksum/checksum.py +179 -0
- just_bash/commands/chmod/__init__.py +5 -0
- just_bash/commands/chmod/chmod.py +216 -0
- just_bash/commands/column/__init__.py +5 -0
- just_bash/commands/column/column.py +180 -0
- just_bash/commands/comm/__init__.py +5 -0
- just_bash/commands/comm/comm.py +150 -0
- just_bash/commands/compression/__init__.py +5 -0
- just_bash/commands/compression/compression.py +298 -0
- just_bash/commands/cp/__init__.py +5 -0
- just_bash/commands/cp/cp.py +149 -0
- just_bash/commands/curl/__init__.py +5 -0
- just_bash/commands/curl/curl.py +801 -0
- just_bash/commands/cut/__init__.py +5 -0
- just_bash/commands/cut/cut.py +327 -0
- just_bash/commands/date/__init__.py +5 -0
- just_bash/commands/date/date.py +258 -0
- just_bash/commands/diff/__init__.py +5 -0
- just_bash/commands/diff/diff.py +118 -0
- just_bash/commands/dirname/__init__.py +5 -0
- just_bash/commands/dirname/dirname.py +56 -0
- just_bash/commands/du/__init__.py +5 -0
- just_bash/commands/du/du.py +150 -0
- just_bash/commands/echo/__init__.py +5 -0
- just_bash/commands/echo/echo.py +125 -0
- just_bash/commands/env/__init__.py +5 -0
- just_bash/commands/env/env.py +163 -0
- just_bash/commands/expand/__init__.py +5 -0
- just_bash/commands/expand/expand.py +299 -0
- just_bash/commands/expr/__init__.py +5 -0
- just_bash/commands/expr/expr.py +273 -0
- just_bash/commands/file/__init__.py +5 -0
- just_bash/commands/file/file.py +274 -0
- just_bash/commands/find/__init__.py +5 -0
- just_bash/commands/find/find.py +623 -0
- just_bash/commands/fold/__init__.py +5 -0
- just_bash/commands/fold/fold.py +160 -0
- just_bash/commands/grep/__init__.py +5 -0
- just_bash/commands/grep/grep.py +418 -0
- just_bash/commands/head/__init__.py +5 -0
- just_bash/commands/head/head.py +167 -0
- just_bash/commands/help/__init__.py +5 -0
- just_bash/commands/help/help.py +67 -0
- just_bash/commands/hostname/__init__.py +5 -0
- just_bash/commands/hostname/hostname.py +21 -0
- just_bash/commands/html_to_markdown/__init__.py +5 -0
- just_bash/commands/html_to_markdown/html_to_markdown.py +191 -0
- just_bash/commands/join/__init__.py +5 -0
- just_bash/commands/join/join.py +252 -0
- just_bash/commands/jq/__init__.py +5 -0
- just_bash/commands/jq/jq.py +280 -0
- just_bash/commands/ln/__init__.py +5 -0
- just_bash/commands/ln/ln.py +127 -0
- just_bash/commands/ls/__init__.py +5 -0
- just_bash/commands/ls/ls.py +280 -0
- just_bash/commands/mkdir/__init__.py +5 -0
- just_bash/commands/mkdir/mkdir.py +92 -0
- just_bash/commands/mv/__init__.py +5 -0
- just_bash/commands/mv/mv.py +142 -0
- just_bash/commands/nl/__init__.py +5 -0
- just_bash/commands/nl/nl.py +180 -0
- just_bash/commands/od/__init__.py +5 -0
- just_bash/commands/od/od.py +157 -0
- just_bash/commands/paste/__init__.py +5 -0
- just_bash/commands/paste/paste.py +100 -0
- just_bash/commands/printf/__init__.py +5 -0
- just_bash/commands/printf/printf.py +157 -0
- just_bash/commands/pwd/__init__.py +5 -0
- just_bash/commands/pwd/pwd.py +23 -0
- just_bash/commands/read/__init__.py +5 -0
- just_bash/commands/read/read.py +185 -0
- just_bash/commands/readlink/__init__.py +5 -0
- just_bash/commands/readlink/readlink.py +86 -0
- just_bash/commands/registry.py +844 -0
- just_bash/commands/rev/__init__.py +5 -0
- just_bash/commands/rev/rev.py +74 -0
- just_bash/commands/rg/__init__.py +5 -0
- just_bash/commands/rg/rg.py +1048 -0
- just_bash/commands/rm/__init__.py +5 -0
- just_bash/commands/rm/rm.py +106 -0
- just_bash/commands/search_engine/__init__.py +13 -0
- just_bash/commands/search_engine/matcher.py +170 -0
- just_bash/commands/search_engine/regex.py +159 -0
- just_bash/commands/sed/__init__.py +5 -0
- just_bash/commands/sed/sed.py +863 -0
- just_bash/commands/seq/__init__.py +5 -0
- just_bash/commands/seq/seq.py +190 -0
- just_bash/commands/shell/__init__.py +5 -0
- just_bash/commands/shell/shell.py +206 -0
- just_bash/commands/sleep/__init__.py +5 -0
- just_bash/commands/sleep/sleep.py +62 -0
- just_bash/commands/sort/__init__.py +5 -0
- just_bash/commands/sort/sort.py +411 -0
- just_bash/commands/split/__init__.py +5 -0
- just_bash/commands/split/split.py +237 -0
- just_bash/commands/sqlite3/__init__.py +5 -0
- just_bash/commands/sqlite3/sqlite3_cmd.py +505 -0
- just_bash/commands/stat/__init__.py +5 -0
- just_bash/commands/stat/stat.py +150 -0
- just_bash/commands/strings/__init__.py +5 -0
- just_bash/commands/strings/strings.py +150 -0
- just_bash/commands/tac/__init__.py +5 -0
- just_bash/commands/tac/tac.py +158 -0
- just_bash/commands/tail/__init__.py +5 -0
- just_bash/commands/tail/tail.py +180 -0
- just_bash/commands/tar/__init__.py +5 -0
- just_bash/commands/tar/tar.py +1067 -0
- just_bash/commands/tee/__init__.py +5 -0
- just_bash/commands/tee/tee.py +63 -0
- just_bash/commands/timeout/__init__.py +5 -0
- just_bash/commands/timeout/timeout.py +188 -0
- just_bash/commands/touch/__init__.py +5 -0
- just_bash/commands/touch/touch.py +91 -0
- just_bash/commands/tr/__init__.py +5 -0
- just_bash/commands/tr/tr.py +297 -0
- just_bash/commands/tree/__init__.py +5 -0
- just_bash/commands/tree/tree.py +139 -0
- just_bash/commands/true/__init__.py +5 -0
- just_bash/commands/true/true.py +32 -0
- just_bash/commands/uniq/__init__.py +5 -0
- just_bash/commands/uniq/uniq.py +323 -0
- just_bash/commands/wc/__init__.py +5 -0
- just_bash/commands/wc/wc.py +169 -0
- just_bash/commands/which/__init__.py +5 -0
- just_bash/commands/which/which.py +52 -0
- just_bash/commands/xan/__init__.py +5 -0
- just_bash/commands/xan/xan.py +1663 -0
- just_bash/commands/xargs/__init__.py +5 -0
- just_bash/commands/xargs/xargs.py +136 -0
- just_bash/commands/yq/__init__.py +5 -0
- just_bash/commands/yq/yq.py +848 -0
- just_bash/fs/__init__.py +29 -0
- just_bash/fs/in_memory_fs.py +621 -0
- just_bash/fs/mountable_fs.py +504 -0
- just_bash/fs/overlay_fs.py +894 -0
- just_bash/fs/read_write_fs.py +455 -0
- just_bash/interpreter/__init__.py +37 -0
- just_bash/interpreter/builtins/__init__.py +92 -0
- just_bash/interpreter/builtins/alias.py +154 -0
- just_bash/interpreter/builtins/cd.py +76 -0
- just_bash/interpreter/builtins/control.py +127 -0
- just_bash/interpreter/builtins/declare.py +336 -0
- just_bash/interpreter/builtins/export.py +56 -0
- just_bash/interpreter/builtins/let.py +44 -0
- just_bash/interpreter/builtins/local.py +57 -0
- just_bash/interpreter/builtins/mapfile.py +152 -0
- just_bash/interpreter/builtins/misc.py +378 -0
- just_bash/interpreter/builtins/readonly.py +80 -0
- just_bash/interpreter/builtins/set.py +234 -0
- just_bash/interpreter/builtins/shopt.py +201 -0
- just_bash/interpreter/builtins/source.py +136 -0
- just_bash/interpreter/builtins/test.py +290 -0
- just_bash/interpreter/builtins/unset.py +53 -0
- just_bash/interpreter/conditionals.py +387 -0
- just_bash/interpreter/control_flow.py +381 -0
- just_bash/interpreter/errors.py +116 -0
- just_bash/interpreter/expansion.py +1156 -0
- just_bash/interpreter/interpreter.py +813 -0
- just_bash/interpreter/types.py +134 -0
- just_bash/network/__init__.py +1 -0
- just_bash/parser/__init__.py +39 -0
- just_bash/parser/lexer.py +948 -0
- just_bash/parser/parser.py +2162 -0
- just_bash/py.typed +0 -0
- just_bash/query_engine/__init__.py +83 -0
- just_bash/query_engine/builtins/__init__.py +1283 -0
- just_bash/query_engine/evaluator.py +578 -0
- just_bash/query_engine/parser.py +525 -0
- just_bash/query_engine/tokenizer.py +329 -0
- just_bash/query_engine/types.py +373 -0
- just_bash/types.py +180 -0
- just_bash-0.1.5.dist-info/METADATA +410 -0
- just_bash-0.1.5.dist-info/RECORD +193 -0
- just_bash-0.1.5.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,1663 @@
|
|
|
1
|
+
"""Xan command implementation - CSV toolkit.
|
|
2
|
+
|
|
3
|
+
A Python port of the xan CSV toolkit for data manipulation.
|
|
4
|
+
|
|
5
|
+
Usage: xan <COMMAND> [OPTIONS] [FILE]
|
|
6
|
+
|
|
7
|
+
Implemented Commands:
|
|
8
|
+
headers Show column names
|
|
9
|
+
count Count rows
|
|
10
|
+
head Show first N rows
|
|
11
|
+
tail Show last N rows
|
|
12
|
+
slice Extract row range
|
|
13
|
+
select Select columns
|
|
14
|
+
drop Drop columns (inverse of select)
|
|
15
|
+
rename Rename columns (old:new syntax)
|
|
16
|
+
filter Filter rows by expression
|
|
17
|
+
search Filter rows by regex
|
|
18
|
+
sort Sort rows
|
|
19
|
+
reverse Reverse row order
|
|
20
|
+
behead Output without header
|
|
21
|
+
enum Add index column
|
|
22
|
+
shuffle Randomly reorder rows
|
|
23
|
+
sample Random sample of N rows
|
|
24
|
+
dedup Remove duplicate rows
|
|
25
|
+
top Top N rows by column value
|
|
26
|
+
cat Concatenate CSV files
|
|
27
|
+
transpose Swap rows and columns
|
|
28
|
+
fixlengths Fix ragged CSV
|
|
29
|
+
flatten/f Display records vertically
|
|
30
|
+
explode Split column values to rows
|
|
31
|
+
implode Combine rows by grouping
|
|
32
|
+
split Split into multiple files
|
|
33
|
+
view Pretty print as table
|
|
34
|
+
stats Show column statistics
|
|
35
|
+
frequency Count value occurrences
|
|
36
|
+
to json Convert CSV to JSON
|
|
37
|
+
from json Convert JSON to CSV
|
|
38
|
+
|
|
39
|
+
Not Yet Implemented (require expression evaluation):
|
|
40
|
+
join Join two CSVs on a key column
|
|
41
|
+
agg Aggregate column values with expressions
|
|
42
|
+
groupby Group rows and aggregate
|
|
43
|
+
map Add computed columns via expressions
|
|
44
|
+
transform Transform column values via expressions
|
|
45
|
+
pivot Reshape data (pivot table)
|
|
46
|
+
|
|
47
|
+
These commands are stubbed and will return a "not implemented" error.
|
|
48
|
+
The TypeScript xan uses a custom expression language for these operations
|
|
49
|
+
that would need to be ported to Python.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
import csv
|
|
53
|
+
import io
|
|
54
|
+
import json
|
|
55
|
+
import random
|
|
56
|
+
import re
|
|
57
|
+
from typing import Any
|
|
58
|
+
|
|
59
|
+
from ...types import CommandContext, ExecResult
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def parse_csv(content: str) -> tuple[list[str], list[dict[str, str]]]:
|
|
63
|
+
"""Parse CSV content into headers and data rows."""
|
|
64
|
+
if not content.strip():
|
|
65
|
+
return [], []
|
|
66
|
+
|
|
67
|
+
reader = csv.DictReader(io.StringIO(content))
|
|
68
|
+
headers = reader.fieldnames or []
|
|
69
|
+
data = list(reader)
|
|
70
|
+
return list(headers), data
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def format_csv(headers: list[str], data: list[dict[str, Any]]) -> str:
|
|
74
|
+
"""Format data as CSV."""
|
|
75
|
+
if not headers:
|
|
76
|
+
return ""
|
|
77
|
+
|
|
78
|
+
output = io.StringIO(newline="")
|
|
79
|
+
writer = csv.DictWriter(output, fieldnames=headers, lineterminator="\n")
|
|
80
|
+
writer.writeheader()
|
|
81
|
+
for row in data:
|
|
82
|
+
writer.writerow({h: row.get(h, "") for h in headers})
|
|
83
|
+
return output.getvalue()
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def format_value(v: Any) -> str:
|
|
87
|
+
"""Format a value for CSV output."""
|
|
88
|
+
if v is None:
|
|
89
|
+
return ""
|
|
90
|
+
s = str(v)
|
|
91
|
+
if "," in s or '"' in s or "\n" in s:
|
|
92
|
+
escaped = s.replace('"', '""')
|
|
93
|
+
return f'"{escaped}"'
|
|
94
|
+
return s
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
async def read_csv_input(
|
|
98
|
+
file_args: list[str], ctx: CommandContext
|
|
99
|
+
) -> tuple[list[str], list[dict[str, str]], ExecResult | None]:
|
|
100
|
+
"""Read CSV from file or stdin."""
|
|
101
|
+
if not file_args or file_args[0] == "-":
|
|
102
|
+
content = ctx.stdin
|
|
103
|
+
else:
|
|
104
|
+
try:
|
|
105
|
+
path = ctx.fs.resolve_path(ctx.cwd, file_args[0])
|
|
106
|
+
content = await ctx.fs.read_file(path)
|
|
107
|
+
except FileNotFoundError:
|
|
108
|
+
return [], [], ExecResult(
|
|
109
|
+
stdout="",
|
|
110
|
+
stderr=f"xan: {file_args[0]}: No such file or directory\n",
|
|
111
|
+
exit_code=2,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
headers, data = parse_csv(content)
|
|
115
|
+
return headers, data, None
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
async def cmd_headers(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
119
|
+
"""Show column names."""
|
|
120
|
+
just_names = "-j" in args or "--just-names" in args
|
|
121
|
+
file_args = [a for a in args if not a.startswith("-")]
|
|
122
|
+
|
|
123
|
+
headers, _, error = await read_csv_input(file_args, ctx)
|
|
124
|
+
if error:
|
|
125
|
+
return error
|
|
126
|
+
|
|
127
|
+
if just_names:
|
|
128
|
+
output = "\n".join(headers) + "\n" if headers else ""
|
|
129
|
+
else:
|
|
130
|
+
output = "\n".join(f"{i}\t{h}" for i, h in enumerate(headers)) + "\n" if headers else ""
|
|
131
|
+
|
|
132
|
+
return ExecResult(stdout=output, stderr="", exit_code=0)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
async def cmd_count(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
136
|
+
"""Count rows."""
|
|
137
|
+
file_args = [a for a in args if not a.startswith("-")]
|
|
138
|
+
|
|
139
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
140
|
+
if error:
|
|
141
|
+
return error
|
|
142
|
+
|
|
143
|
+
return ExecResult(stdout=f"{len(data)}\n", stderr="", exit_code=0)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
async def cmd_head(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
147
|
+
"""Show first N rows."""
|
|
148
|
+
n = 10
|
|
149
|
+
file_args = []
|
|
150
|
+
|
|
151
|
+
i = 0
|
|
152
|
+
while i < len(args):
|
|
153
|
+
arg = args[i]
|
|
154
|
+
if arg == "-n" and i + 1 < len(args):
|
|
155
|
+
try:
|
|
156
|
+
n = int(args[i + 1])
|
|
157
|
+
except ValueError:
|
|
158
|
+
pass
|
|
159
|
+
i += 2
|
|
160
|
+
continue
|
|
161
|
+
elif arg.startswith("-n"):
|
|
162
|
+
try:
|
|
163
|
+
n = int(arg[2:])
|
|
164
|
+
except ValueError:
|
|
165
|
+
pass
|
|
166
|
+
elif not arg.startswith("-"):
|
|
167
|
+
file_args.append(arg)
|
|
168
|
+
i += 1
|
|
169
|
+
|
|
170
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
171
|
+
if error:
|
|
172
|
+
return error
|
|
173
|
+
|
|
174
|
+
return ExecResult(stdout=format_csv(headers, data[:n]), stderr="", exit_code=0)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
async def cmd_tail(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
178
|
+
"""Show last N rows."""
|
|
179
|
+
n = 10
|
|
180
|
+
file_args = []
|
|
181
|
+
|
|
182
|
+
i = 0
|
|
183
|
+
while i < len(args):
|
|
184
|
+
arg = args[i]
|
|
185
|
+
if arg == "-n" and i + 1 < len(args):
|
|
186
|
+
try:
|
|
187
|
+
n = int(args[i + 1])
|
|
188
|
+
except ValueError:
|
|
189
|
+
pass
|
|
190
|
+
i += 2
|
|
191
|
+
continue
|
|
192
|
+
elif arg.startswith("-n"):
|
|
193
|
+
try:
|
|
194
|
+
n = int(arg[2:])
|
|
195
|
+
except ValueError:
|
|
196
|
+
pass
|
|
197
|
+
elif not arg.startswith("-"):
|
|
198
|
+
file_args.append(arg)
|
|
199
|
+
i += 1
|
|
200
|
+
|
|
201
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
202
|
+
if error:
|
|
203
|
+
return error
|
|
204
|
+
|
|
205
|
+
return ExecResult(stdout=format_csv(headers, data[-n:]), stderr="", exit_code=0)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
async def cmd_slice(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
209
|
+
"""Extract row range."""
|
|
210
|
+
start = 0
|
|
211
|
+
end = None
|
|
212
|
+
file_args = []
|
|
213
|
+
|
|
214
|
+
i = 0
|
|
215
|
+
while i < len(args):
|
|
216
|
+
arg = args[i]
|
|
217
|
+
if arg == "-s" or arg == "--start":
|
|
218
|
+
if i + 1 < len(args):
|
|
219
|
+
try:
|
|
220
|
+
start = int(args[i + 1])
|
|
221
|
+
except ValueError:
|
|
222
|
+
pass
|
|
223
|
+
i += 2
|
|
224
|
+
continue
|
|
225
|
+
elif arg == "-e" or arg == "--end":
|
|
226
|
+
if i + 1 < len(args):
|
|
227
|
+
try:
|
|
228
|
+
end = int(args[i + 1])
|
|
229
|
+
except ValueError:
|
|
230
|
+
pass
|
|
231
|
+
i += 2
|
|
232
|
+
continue
|
|
233
|
+
elif not arg.startswith("-"):
|
|
234
|
+
file_args.append(arg)
|
|
235
|
+
i += 1
|
|
236
|
+
|
|
237
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
238
|
+
if error:
|
|
239
|
+
return error
|
|
240
|
+
|
|
241
|
+
return ExecResult(stdout=format_csv(headers, data[start:end]), stderr="", exit_code=0)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
async def cmd_select(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
245
|
+
"""Select columns."""
|
|
246
|
+
cols_spec = ""
|
|
247
|
+
file_args = []
|
|
248
|
+
|
|
249
|
+
for arg in args:
|
|
250
|
+
if not arg.startswith("-"):
|
|
251
|
+
if not cols_spec:
|
|
252
|
+
cols_spec = arg
|
|
253
|
+
else:
|
|
254
|
+
file_args.append(arg)
|
|
255
|
+
|
|
256
|
+
if not cols_spec:
|
|
257
|
+
return ExecResult(
|
|
258
|
+
stdout="",
|
|
259
|
+
stderr="xan select: no columns specified\n",
|
|
260
|
+
exit_code=1,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
264
|
+
if error:
|
|
265
|
+
return error
|
|
266
|
+
|
|
267
|
+
# Parse column specification (comma-separated names or indices)
|
|
268
|
+
selected_headers = []
|
|
269
|
+
for col in cols_spec.split(","):
|
|
270
|
+
col = col.strip()
|
|
271
|
+
if not col:
|
|
272
|
+
continue
|
|
273
|
+
|
|
274
|
+
# Check if it's an index
|
|
275
|
+
try:
|
|
276
|
+
idx = int(col)
|
|
277
|
+
if 0 <= idx < len(headers):
|
|
278
|
+
selected_headers.append(headers[idx])
|
|
279
|
+
continue
|
|
280
|
+
except ValueError:
|
|
281
|
+
pass
|
|
282
|
+
|
|
283
|
+
# Check for glob pattern
|
|
284
|
+
if "*" in col:
|
|
285
|
+
pattern = col.replace("*", ".*")
|
|
286
|
+
for h in headers:
|
|
287
|
+
if re.match(f"^{pattern}$", h) and h not in selected_headers:
|
|
288
|
+
selected_headers.append(h)
|
|
289
|
+
continue
|
|
290
|
+
|
|
291
|
+
# Direct column name
|
|
292
|
+
if col in headers:
|
|
293
|
+
selected_headers.append(col)
|
|
294
|
+
|
|
295
|
+
# Filter data to selected columns
|
|
296
|
+
selected_data = []
|
|
297
|
+
for row in data:
|
|
298
|
+
selected_data.append({h: row.get(h, "") for h in selected_headers})
|
|
299
|
+
|
|
300
|
+
return ExecResult(stdout=format_csv(selected_headers, selected_data), stderr="", exit_code=0)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
async def cmd_filter(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
304
|
+
"""Filter rows by expression."""
|
|
305
|
+
expr = ""
|
|
306
|
+
invert = False
|
|
307
|
+
file_args = []
|
|
308
|
+
|
|
309
|
+
i = 0
|
|
310
|
+
while i < len(args):
|
|
311
|
+
arg = args[i]
|
|
312
|
+
if arg in ("-v", "--invert"):
|
|
313
|
+
invert = True
|
|
314
|
+
elif not arg.startswith("-"):
|
|
315
|
+
if not expr:
|
|
316
|
+
expr = arg
|
|
317
|
+
else:
|
|
318
|
+
file_args.append(arg)
|
|
319
|
+
i += 1
|
|
320
|
+
|
|
321
|
+
if not expr:
|
|
322
|
+
return ExecResult(
|
|
323
|
+
stdout="",
|
|
324
|
+
stderr="xan filter: no expression specified\n",
|
|
325
|
+
exit_code=1,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
329
|
+
if error:
|
|
330
|
+
return error
|
|
331
|
+
|
|
332
|
+
# Parse simple expressions: col op value
|
|
333
|
+
# Supported: ==, !=, <, <=, >, >=, contains, startswith, endswith
|
|
334
|
+
filtered = []
|
|
335
|
+
for row in data:
|
|
336
|
+
match = evaluate_filter_expr(expr, row)
|
|
337
|
+
if (match and not invert) or (not match and invert):
|
|
338
|
+
filtered.append(row)
|
|
339
|
+
|
|
340
|
+
return ExecResult(stdout=format_csv(headers, filtered), stderr="", exit_code=0)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def evaluate_filter_expr(expr: str, row: dict[str, str]) -> bool:
|
|
344
|
+
"""Evaluate a filter expression against a row."""
|
|
345
|
+
expr = expr.strip()
|
|
346
|
+
|
|
347
|
+
# Try different operators
|
|
348
|
+
for op, op_func in [
|
|
349
|
+
("==", lambda a, b: str(a) == str(b)),
|
|
350
|
+
("!=", lambda a, b: str(a) != str(b)),
|
|
351
|
+
(">=", lambda a, b: try_compare(a, b, "ge")),
|
|
352
|
+
("<=", lambda a, b: try_compare(a, b, "le")),
|
|
353
|
+
(">", lambda a, b: try_compare(a, b, "gt")),
|
|
354
|
+
("<", lambda a, b: try_compare(a, b, "lt")),
|
|
355
|
+
]:
|
|
356
|
+
if f" {op} " in expr:
|
|
357
|
+
parts = expr.split(f" {op} ", 1)
|
|
358
|
+
col = parts[0].strip()
|
|
359
|
+
val = parts[1].strip().strip('"').strip("'")
|
|
360
|
+
if col in row:
|
|
361
|
+
return op_func(row[col], val)
|
|
362
|
+
return False
|
|
363
|
+
|
|
364
|
+
# Check for function-style expressions
|
|
365
|
+
if "contains(" in expr.lower():
|
|
366
|
+
match = re.match(r"(\w+)\s+contains\s*\(([^)]+)\)", expr, re.IGNORECASE)
|
|
367
|
+
if match:
|
|
368
|
+
col, val = match.groups()
|
|
369
|
+
val = val.strip('"').strip("'")
|
|
370
|
+
if col in row:
|
|
371
|
+
return val in str(row[col])
|
|
372
|
+
return False
|
|
373
|
+
|
|
374
|
+
if "startswith(" in expr.lower():
|
|
375
|
+
match = re.match(r"(\w+)\s+startswith\s*\(([^)]+)\)", expr, re.IGNORECASE)
|
|
376
|
+
if match:
|
|
377
|
+
col, val = match.groups()
|
|
378
|
+
val = val.strip('"').strip("'")
|
|
379
|
+
if col in row:
|
|
380
|
+
return str(row[col]).startswith(val)
|
|
381
|
+
return False
|
|
382
|
+
|
|
383
|
+
if "endswith(" in expr.lower():
|
|
384
|
+
match = re.match(r"(\w+)\s+endswith\s*\(([^)]+)\)", expr, re.IGNORECASE)
|
|
385
|
+
if match:
|
|
386
|
+
col, val = match.groups()
|
|
387
|
+
val = val.strip('"').strip("'")
|
|
388
|
+
if col in row:
|
|
389
|
+
return str(row[col]).endswith(val)
|
|
390
|
+
return False
|
|
391
|
+
|
|
392
|
+
return False
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def try_compare(a: str, b: str, op: str) -> bool:
|
|
396
|
+
"""Try to compare values, first as numbers, then as strings."""
|
|
397
|
+
try:
|
|
398
|
+
a_num = float(a) if a else 0
|
|
399
|
+
b_num = float(b) if b else 0
|
|
400
|
+
if op == "gt":
|
|
401
|
+
return a_num > b_num
|
|
402
|
+
elif op == "ge":
|
|
403
|
+
return a_num >= b_num
|
|
404
|
+
elif op == "lt":
|
|
405
|
+
return a_num < b_num
|
|
406
|
+
elif op == "le":
|
|
407
|
+
return a_num <= b_num
|
|
408
|
+
except ValueError:
|
|
409
|
+
pass
|
|
410
|
+
|
|
411
|
+
if op == "gt":
|
|
412
|
+
return str(a) > str(b)
|
|
413
|
+
elif op == "ge":
|
|
414
|
+
return str(a) >= str(b)
|
|
415
|
+
elif op == "lt":
|
|
416
|
+
return str(a) < str(b)
|
|
417
|
+
elif op == "le":
|
|
418
|
+
return str(a) <= str(b)
|
|
419
|
+
return False
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
async def cmd_search(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
423
|
+
"""Filter rows by regex."""
|
|
424
|
+
pattern = ""
|
|
425
|
+
select_cols: list[str] = []
|
|
426
|
+
invert = False
|
|
427
|
+
ignore_case = False
|
|
428
|
+
file_args = []
|
|
429
|
+
|
|
430
|
+
i = 0
|
|
431
|
+
while i < len(args):
|
|
432
|
+
arg = args[i]
|
|
433
|
+
if arg in ("-s", "--select") and i + 1 < len(args):
|
|
434
|
+
select_cols = args[i + 1].split(",")
|
|
435
|
+
i += 2
|
|
436
|
+
continue
|
|
437
|
+
elif arg in ("-v", "--invert"):
|
|
438
|
+
invert = True
|
|
439
|
+
elif arg in ("-i", "--ignore-case"):
|
|
440
|
+
ignore_case = True
|
|
441
|
+
elif not arg.startswith("-"):
|
|
442
|
+
if not pattern:
|
|
443
|
+
pattern = arg
|
|
444
|
+
else:
|
|
445
|
+
file_args.append(arg)
|
|
446
|
+
i += 1
|
|
447
|
+
|
|
448
|
+
if not pattern:
|
|
449
|
+
return ExecResult(
|
|
450
|
+
stdout="",
|
|
451
|
+
stderr="xan search: no pattern specified\n",
|
|
452
|
+
exit_code=1,
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
456
|
+
if error:
|
|
457
|
+
return error
|
|
458
|
+
|
|
459
|
+
search_cols = select_cols if select_cols else headers
|
|
460
|
+
|
|
461
|
+
try:
|
|
462
|
+
regex = re.compile(pattern, re.IGNORECASE if ignore_case else 0)
|
|
463
|
+
except re.error:
|
|
464
|
+
return ExecResult(
|
|
465
|
+
stdout="",
|
|
466
|
+
stderr=f"xan search: invalid regex pattern '{pattern}'\n",
|
|
467
|
+
exit_code=1,
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
filtered = []
|
|
471
|
+
for row in data:
|
|
472
|
+
matches = any(
|
|
473
|
+
regex.search(str(row.get(col, "")))
|
|
474
|
+
for col in search_cols
|
|
475
|
+
if col in row
|
|
476
|
+
)
|
|
477
|
+
if (matches and not invert) or (not matches and invert):
|
|
478
|
+
filtered.append(row)
|
|
479
|
+
|
|
480
|
+
return ExecResult(stdout=format_csv(headers, filtered), stderr="", exit_code=0)
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
async def cmd_sort(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
484
|
+
"""Sort rows."""
|
|
485
|
+
sort_col = ""
|
|
486
|
+
numeric = False
|
|
487
|
+
reverse = False
|
|
488
|
+
file_args = []
|
|
489
|
+
|
|
490
|
+
i = 0
|
|
491
|
+
while i < len(args):
|
|
492
|
+
arg = args[i]
|
|
493
|
+
if arg in ("-N", "--numeric"):
|
|
494
|
+
numeric = True
|
|
495
|
+
elif arg in ("-r", "--reverse", "-R"):
|
|
496
|
+
reverse = True
|
|
497
|
+
elif arg in ("-s", "--select") and i + 1 < len(args):
|
|
498
|
+
sort_col = args[i + 1]
|
|
499
|
+
i += 2
|
|
500
|
+
continue
|
|
501
|
+
elif not arg.startswith("-"):
|
|
502
|
+
if not sort_col:
|
|
503
|
+
sort_col = arg
|
|
504
|
+
else:
|
|
505
|
+
file_args.append(arg)
|
|
506
|
+
i += 1
|
|
507
|
+
|
|
508
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
509
|
+
if error:
|
|
510
|
+
return error
|
|
511
|
+
|
|
512
|
+
if not sort_col:
|
|
513
|
+
return ExecResult(
|
|
514
|
+
stdout="",
|
|
515
|
+
stderr="xan sort: no sort column specified\n",
|
|
516
|
+
exit_code=1,
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
if sort_col not in headers:
|
|
520
|
+
return ExecResult(
|
|
521
|
+
stdout="",
|
|
522
|
+
stderr=f"xan sort: column '{sort_col}' not found\n",
|
|
523
|
+
exit_code=1,
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
def sort_key(row: dict) -> Any:
|
|
527
|
+
val = row.get(sort_col, "")
|
|
528
|
+
if numeric:
|
|
529
|
+
try:
|
|
530
|
+
return float(val) if val else 0
|
|
531
|
+
except ValueError:
|
|
532
|
+
return 0
|
|
533
|
+
return str(val)
|
|
534
|
+
|
|
535
|
+
sorted_data = sorted(data, key=sort_key, reverse=reverse)
|
|
536
|
+
return ExecResult(stdout=format_csv(headers, sorted_data), stderr="", exit_code=0)
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
async def cmd_view(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
540
|
+
"""Pretty print as table."""
|
|
541
|
+
file_args = [a for a in args if not a.startswith("-")]
|
|
542
|
+
|
|
543
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
544
|
+
if error:
|
|
545
|
+
return error
|
|
546
|
+
|
|
547
|
+
if not headers:
|
|
548
|
+
return ExecResult(stdout="", stderr="", exit_code=0)
|
|
549
|
+
|
|
550
|
+
# Calculate column widths
|
|
551
|
+
widths = {h: len(h) for h in headers}
|
|
552
|
+
for row in data:
|
|
553
|
+
for h in headers:
|
|
554
|
+
widths[h] = max(widths[h], len(str(row.get(h, ""))))
|
|
555
|
+
|
|
556
|
+
# Build table
|
|
557
|
+
lines = []
|
|
558
|
+
|
|
559
|
+
# Header
|
|
560
|
+
header_line = " | ".join(h.ljust(widths[h]) for h in headers)
|
|
561
|
+
lines.append(header_line)
|
|
562
|
+
|
|
563
|
+
# Separator
|
|
564
|
+
sep_line = "-+-".join("-" * widths[h] for h in headers)
|
|
565
|
+
lines.append(sep_line)
|
|
566
|
+
|
|
567
|
+
# Data rows
|
|
568
|
+
for row in data:
|
|
569
|
+
row_line = " | ".join(str(row.get(h, "")).ljust(widths[h]) for h in headers)
|
|
570
|
+
lines.append(row_line)
|
|
571
|
+
|
|
572
|
+
return ExecResult(stdout="\n".join(lines) + "\n", stderr="", exit_code=0)
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
async def cmd_stats(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
576
|
+
"""Show column statistics."""
|
|
577
|
+
file_args = [a for a in args if not a.startswith("-")]
|
|
578
|
+
|
|
579
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
580
|
+
if error:
|
|
581
|
+
return error
|
|
582
|
+
|
|
583
|
+
if not headers:
|
|
584
|
+
return ExecResult(stdout="", stderr="", exit_code=0)
|
|
585
|
+
|
|
586
|
+
lines = []
|
|
587
|
+
for col in headers:
|
|
588
|
+
values = [row.get(col, "") for row in data]
|
|
589
|
+
non_empty = [v for v in values if v]
|
|
590
|
+
|
|
591
|
+
# Try to parse as numbers
|
|
592
|
+
nums = []
|
|
593
|
+
for v in non_empty:
|
|
594
|
+
try:
|
|
595
|
+
nums.append(float(v))
|
|
596
|
+
except ValueError:
|
|
597
|
+
pass
|
|
598
|
+
|
|
599
|
+
lines.append(f"Column: {col}")
|
|
600
|
+
lines.append(f" Count: {len(values)}")
|
|
601
|
+
lines.append(f" Non-empty: {len(non_empty)}")
|
|
602
|
+
lines.append(f" Unique: {len(set(non_empty))}")
|
|
603
|
+
|
|
604
|
+
if nums:
|
|
605
|
+
lines.append(f" Min: {min(nums)}")
|
|
606
|
+
lines.append(f" Max: {max(nums)}")
|
|
607
|
+
lines.append(f" Sum: {sum(nums)}")
|
|
608
|
+
lines.append(f" Mean: {sum(nums) / len(nums):.2f}")
|
|
609
|
+
|
|
610
|
+
lines.append("")
|
|
611
|
+
|
|
612
|
+
return ExecResult(stdout="\n".join(lines), stderr="", exit_code=0)
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
async def cmd_frequency(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
616
|
+
"""Count value occurrences."""
|
|
617
|
+
col = ""
|
|
618
|
+
file_args = []
|
|
619
|
+
|
|
620
|
+
for arg in args:
|
|
621
|
+
if not arg.startswith("-"):
|
|
622
|
+
if not col:
|
|
623
|
+
col = arg
|
|
624
|
+
else:
|
|
625
|
+
file_args.append(arg)
|
|
626
|
+
|
|
627
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
628
|
+
if error:
|
|
629
|
+
return error
|
|
630
|
+
|
|
631
|
+
if not col:
|
|
632
|
+
# Default to first column
|
|
633
|
+
col = headers[0] if headers else ""
|
|
634
|
+
|
|
635
|
+
if col not in headers:
|
|
636
|
+
return ExecResult(
|
|
637
|
+
stdout="",
|
|
638
|
+
stderr=f"xan frequency: column '{col}' not found\n",
|
|
639
|
+
exit_code=1,
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
# Count occurrences
|
|
643
|
+
counts: dict[str, int] = {}
|
|
644
|
+
for row in data:
|
|
645
|
+
val = str(row.get(col, ""))
|
|
646
|
+
counts[val] = counts.get(val, 0) + 1
|
|
647
|
+
|
|
648
|
+
# Sort by count descending
|
|
649
|
+
sorted_counts = sorted(counts.items(), key=lambda x: -x[1])
|
|
650
|
+
|
|
651
|
+
# Output as CSV
|
|
652
|
+
output = "value,count\n"
|
|
653
|
+
for val, count in sorted_counts:
|
|
654
|
+
output += f"{format_value(val)},{count}\n"
|
|
655
|
+
|
|
656
|
+
return ExecResult(stdout=output, stderr="", exit_code=0)
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
async def cmd_reverse(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
660
|
+
"""Reverse row order."""
|
|
661
|
+
file_args = [a for a in args if not a.startswith("-")]
|
|
662
|
+
|
|
663
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
664
|
+
if error:
|
|
665
|
+
return error
|
|
666
|
+
|
|
667
|
+
return ExecResult(stdout=format_csv(headers, data[::-1]), stderr="", exit_code=0)
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
async def cmd_behead(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
671
|
+
"""Output data without header row."""
|
|
672
|
+
file_args = [a for a in args if not a.startswith("-")]
|
|
673
|
+
|
|
674
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
675
|
+
if error:
|
|
676
|
+
return error
|
|
677
|
+
|
|
678
|
+
if not headers or not data:
|
|
679
|
+
return ExecResult(stdout="", stderr="", exit_code=0)
|
|
680
|
+
|
|
681
|
+
# Output data rows without header
|
|
682
|
+
output = io.StringIO(newline="")
|
|
683
|
+
writer = csv.writer(output, lineterminator="\n")
|
|
684
|
+
for row in data:
|
|
685
|
+
writer.writerow([row.get(h, "") for h in headers])
|
|
686
|
+
return ExecResult(stdout=output.getvalue(), stderr="", exit_code=0)
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
async def cmd_enum(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
690
|
+
"""Add index column to CSV."""
|
|
691
|
+
col_name = "index"
|
|
692
|
+
start = 0
|
|
693
|
+
file_args = []
|
|
694
|
+
|
|
695
|
+
i = 0
|
|
696
|
+
while i < len(args):
|
|
697
|
+
arg = args[i]
|
|
698
|
+
if arg in ("-c", "--column") and i + 1 < len(args):
|
|
699
|
+
col_name = args[i + 1]
|
|
700
|
+
i += 2
|
|
701
|
+
continue
|
|
702
|
+
elif arg == "--start" and i + 1 < len(args):
|
|
703
|
+
try:
|
|
704
|
+
start = int(args[i + 1])
|
|
705
|
+
except ValueError:
|
|
706
|
+
pass
|
|
707
|
+
i += 2
|
|
708
|
+
continue
|
|
709
|
+
elif not arg.startswith("-"):
|
|
710
|
+
file_args.append(arg)
|
|
711
|
+
i += 1
|
|
712
|
+
|
|
713
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
714
|
+
if error:
|
|
715
|
+
return error
|
|
716
|
+
|
|
717
|
+
# Create new headers with index column first
|
|
718
|
+
new_headers = [col_name] + headers
|
|
719
|
+
|
|
720
|
+
# Add index to each row
|
|
721
|
+
new_data = []
|
|
722
|
+
for idx, row in enumerate(data, start=start):
|
|
723
|
+
new_row = {col_name: str(idx)}
|
|
724
|
+
new_row.update(row)
|
|
725
|
+
new_data.append(new_row)
|
|
726
|
+
|
|
727
|
+
return ExecResult(stdout=format_csv(new_headers, new_data), stderr="", exit_code=0)
|
|
728
|
+
|
|
729
|
+
|
|
730
|
+
def _looks_like_file_path(arg: str) -> bool:
|
|
731
|
+
"""Check if argument looks like a file path rather than a column spec."""
|
|
732
|
+
return arg.startswith("/") or arg.startswith("./") or arg.endswith(".csv")
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
async def cmd_drop(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
736
|
+
"""Drop columns (inverse of select)."""
|
|
737
|
+
cols_spec = ""
|
|
738
|
+
file_args = []
|
|
739
|
+
|
|
740
|
+
for arg in args:
|
|
741
|
+
if not arg.startswith("-"):
|
|
742
|
+
if not cols_spec:
|
|
743
|
+
# Check if this looks like a file path
|
|
744
|
+
if _looks_like_file_path(arg):
|
|
745
|
+
# This is likely a file path, not a column spec
|
|
746
|
+
file_args.append(arg)
|
|
747
|
+
else:
|
|
748
|
+
cols_spec = arg
|
|
749
|
+
else:
|
|
750
|
+
file_args.append(arg)
|
|
751
|
+
|
|
752
|
+
if not cols_spec:
|
|
753
|
+
return ExecResult(
|
|
754
|
+
stdout="",
|
|
755
|
+
stderr="xan drop: no columns specified\n",
|
|
756
|
+
exit_code=1,
|
|
757
|
+
)
|
|
758
|
+
|
|
759
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
760
|
+
if error:
|
|
761
|
+
return error
|
|
762
|
+
|
|
763
|
+
# Parse columns to drop
|
|
764
|
+
drop_cols: set[str] = set()
|
|
765
|
+
for col in cols_spec.split(","):
|
|
766
|
+
col = col.strip()
|
|
767
|
+
if not col:
|
|
768
|
+
continue
|
|
769
|
+
|
|
770
|
+
# Check if it's an index
|
|
771
|
+
try:
|
|
772
|
+
idx = int(col)
|
|
773
|
+
if 0 <= idx < len(headers):
|
|
774
|
+
drop_cols.add(headers[idx])
|
|
775
|
+
continue
|
|
776
|
+
except ValueError:
|
|
777
|
+
pass
|
|
778
|
+
|
|
779
|
+
# Direct column name
|
|
780
|
+
if col in headers:
|
|
781
|
+
drop_cols.add(col)
|
|
782
|
+
|
|
783
|
+
# Keep columns not in drop list
|
|
784
|
+
remaining_headers = [h for h in headers if h not in drop_cols]
|
|
785
|
+
|
|
786
|
+
# Filter data to remaining columns
|
|
787
|
+
new_data = []
|
|
788
|
+
for row in data:
|
|
789
|
+
new_data.append({h: row.get(h, "") for h in remaining_headers})
|
|
790
|
+
|
|
791
|
+
return ExecResult(stdout=format_csv(remaining_headers, new_data), stderr="", exit_code=0)
|
|
792
|
+
|
|
793
|
+
|
|
794
|
+
async def cmd_shuffle(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
795
|
+
"""Randomly reorder rows."""
|
|
796
|
+
seed: int | None = None
|
|
797
|
+
file_args = []
|
|
798
|
+
|
|
799
|
+
i = 0
|
|
800
|
+
while i < len(args):
|
|
801
|
+
arg = args[i]
|
|
802
|
+
if arg == "--seed" and i + 1 < len(args):
|
|
803
|
+
try:
|
|
804
|
+
seed = int(args[i + 1])
|
|
805
|
+
except ValueError:
|
|
806
|
+
pass
|
|
807
|
+
i += 2
|
|
808
|
+
continue
|
|
809
|
+
elif not arg.startswith("-"):
|
|
810
|
+
file_args.append(arg)
|
|
811
|
+
i += 1
|
|
812
|
+
|
|
813
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
814
|
+
if error:
|
|
815
|
+
return error
|
|
816
|
+
|
|
817
|
+
# Shuffle with optional seed
|
|
818
|
+
if seed is not None:
|
|
819
|
+
rng = random.Random(seed)
|
|
820
|
+
rng.shuffle(data)
|
|
821
|
+
else:
|
|
822
|
+
random.shuffle(data)
|
|
823
|
+
|
|
824
|
+
return ExecResult(stdout=format_csv(headers, data), stderr="", exit_code=0)
|
|
825
|
+
|
|
826
|
+
|
|
827
|
+
async def cmd_cat(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
828
|
+
"""Concatenate CSV files."""
|
|
829
|
+
file_args = [a for a in args if not a.startswith("-")]
|
|
830
|
+
|
|
831
|
+
if not file_args:
|
|
832
|
+
return ExecResult(
|
|
833
|
+
stdout="",
|
|
834
|
+
stderr="xan cat: no files specified\n",
|
|
835
|
+
exit_code=1,
|
|
836
|
+
)
|
|
837
|
+
|
|
838
|
+
all_headers: list[str] | None = None
|
|
839
|
+
all_data: list[dict[str, str]] = []
|
|
840
|
+
|
|
841
|
+
for file_path in file_args:
|
|
842
|
+
try:
|
|
843
|
+
path = ctx.fs.resolve_path(ctx.cwd, file_path)
|
|
844
|
+
content = await ctx.fs.read_file(path)
|
|
845
|
+
except FileNotFoundError:
|
|
846
|
+
return ExecResult(
|
|
847
|
+
stdout="",
|
|
848
|
+
stderr=f"xan: {file_path}: No such file or directory\n",
|
|
849
|
+
exit_code=2,
|
|
850
|
+
)
|
|
851
|
+
|
|
852
|
+
headers, data = parse_csv(content)
|
|
853
|
+
|
|
854
|
+
if all_headers is None:
|
|
855
|
+
all_headers = headers
|
|
856
|
+
elif headers != all_headers:
|
|
857
|
+
return ExecResult(
|
|
858
|
+
stdout="",
|
|
859
|
+
stderr=f"xan cat: headers in '{file_path}' do not match\n",
|
|
860
|
+
exit_code=1,
|
|
861
|
+
)
|
|
862
|
+
|
|
863
|
+
all_data.extend(data)
|
|
864
|
+
|
|
865
|
+
if all_headers is None:
|
|
866
|
+
return ExecResult(stdout="", stderr="", exit_code=0)
|
|
867
|
+
|
|
868
|
+
return ExecResult(stdout=format_csv(all_headers, all_data), stderr="", exit_code=0)
|
|
869
|
+
|
|
870
|
+
|
|
871
|
+
async def cmd_to(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
872
|
+
"""Convert CSV to other formats."""
|
|
873
|
+
if not args:
|
|
874
|
+
return ExecResult(
|
|
875
|
+
stdout="",
|
|
876
|
+
stderr="xan to: no format specified\n",
|
|
877
|
+
exit_code=1,
|
|
878
|
+
)
|
|
879
|
+
|
|
880
|
+
fmt = args[0]
|
|
881
|
+
sub_args = args[1:]
|
|
882
|
+
|
|
883
|
+
if fmt == "json":
|
|
884
|
+
return await cmd_to_json(sub_args, ctx)
|
|
885
|
+
else:
|
|
886
|
+
return ExecResult(
|
|
887
|
+
stdout="",
|
|
888
|
+
stderr=f"xan to: unknown format '{fmt}'\n",
|
|
889
|
+
exit_code=1,
|
|
890
|
+
)
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
async def cmd_to_json(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
894
|
+
"""Convert CSV to JSON."""
|
|
895
|
+
file_args = [a for a in args if not a.startswith("-")]
|
|
896
|
+
|
|
897
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
898
|
+
if error:
|
|
899
|
+
return error
|
|
900
|
+
|
|
901
|
+
# Convert to list of dicts
|
|
902
|
+
result = [dict(row) for row in data]
|
|
903
|
+
output = json.dumps(result, ensure_ascii=False)
|
|
904
|
+
return ExecResult(stdout=output + "\n", stderr="", exit_code=0)
|
|
905
|
+
|
|
906
|
+
|
|
907
|
+
async def cmd_from(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
908
|
+
"""Convert other formats to CSV."""
|
|
909
|
+
if not args:
|
|
910
|
+
return ExecResult(
|
|
911
|
+
stdout="",
|
|
912
|
+
stderr="xan from: no format specified\n",
|
|
913
|
+
exit_code=1,
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
fmt = args[0]
|
|
917
|
+
sub_args = args[1:]
|
|
918
|
+
|
|
919
|
+
if fmt == "json":
|
|
920
|
+
return await cmd_from_json(sub_args, ctx)
|
|
921
|
+
else:
|
|
922
|
+
return ExecResult(
|
|
923
|
+
stdout="",
|
|
924
|
+
stderr=f"xan from: unknown format '{fmt}'\n",
|
|
925
|
+
exit_code=1,
|
|
926
|
+
)
|
|
927
|
+
|
|
928
|
+
|
|
929
|
+
async def cmd_from_json(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
930
|
+
"""Convert JSON to CSV."""
|
|
931
|
+
file_args = [a for a in args if not a.startswith("-")]
|
|
932
|
+
|
|
933
|
+
# Read input
|
|
934
|
+
if not file_args or file_args[0] == "-":
|
|
935
|
+
content = ctx.stdin
|
|
936
|
+
else:
|
|
937
|
+
try:
|
|
938
|
+
path = ctx.fs.resolve_path(ctx.cwd, file_args[0])
|
|
939
|
+
content = await ctx.fs.read_file(path)
|
|
940
|
+
except FileNotFoundError:
|
|
941
|
+
return ExecResult(
|
|
942
|
+
stdout="",
|
|
943
|
+
stderr=f"xan: {file_args[0]}: No such file or directory\n",
|
|
944
|
+
exit_code=2,
|
|
945
|
+
)
|
|
946
|
+
|
|
947
|
+
# Parse JSON
|
|
948
|
+
try:
|
|
949
|
+
data = json.loads(content)
|
|
950
|
+
except json.JSONDecodeError:
|
|
951
|
+
return ExecResult(
|
|
952
|
+
stdout="",
|
|
953
|
+
stderr="xan from json: invalid JSON\n",
|
|
954
|
+
exit_code=1,
|
|
955
|
+
)
|
|
956
|
+
|
|
957
|
+
if not isinstance(data, list):
|
|
958
|
+
return ExecResult(
|
|
959
|
+
stdout="",
|
|
960
|
+
stderr="xan from json: expected JSON array\n",
|
|
961
|
+
exit_code=1,
|
|
962
|
+
)
|
|
963
|
+
|
|
964
|
+
if not data:
|
|
965
|
+
return ExecResult(stdout="", stderr="", exit_code=0)
|
|
966
|
+
|
|
967
|
+
# Collect all keys from all objects
|
|
968
|
+
all_keys: list[str] = []
|
|
969
|
+
for item in data:
|
|
970
|
+
if isinstance(item, dict):
|
|
971
|
+
for key in item.keys():
|
|
972
|
+
if key not in all_keys:
|
|
973
|
+
all_keys.append(key)
|
|
974
|
+
|
|
975
|
+
# Create CSV output
|
|
976
|
+
output = io.StringIO(newline="")
|
|
977
|
+
writer = csv.DictWriter(output, fieldnames=all_keys, lineterminator="\n")
|
|
978
|
+
writer.writeheader()
|
|
979
|
+
for item in data:
|
|
980
|
+
if isinstance(item, dict):
|
|
981
|
+
writer.writerow({k: str(item.get(k, "")) for k in all_keys})
|
|
982
|
+
|
|
983
|
+
return ExecResult(stdout=output.getvalue(), stderr="", exit_code=0)
|
|
984
|
+
|
|
985
|
+
|
|
986
|
+
async def cmd_rename(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
987
|
+
"""Rename columns."""
|
|
988
|
+
rename_spec = ""
|
|
989
|
+
file_args = []
|
|
990
|
+
|
|
991
|
+
for arg in args:
|
|
992
|
+
if not arg.startswith("-"):
|
|
993
|
+
if not rename_spec:
|
|
994
|
+
if _looks_like_file_path(arg):
|
|
995
|
+
file_args.append(arg)
|
|
996
|
+
else:
|
|
997
|
+
rename_spec = arg
|
|
998
|
+
else:
|
|
999
|
+
file_args.append(arg)
|
|
1000
|
+
|
|
1001
|
+
if not rename_spec:
|
|
1002
|
+
return ExecResult(
|
|
1003
|
+
stdout="",
|
|
1004
|
+
stderr="xan rename: no rename specification provided\n",
|
|
1005
|
+
exit_code=1,
|
|
1006
|
+
)
|
|
1007
|
+
|
|
1008
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
1009
|
+
if error:
|
|
1010
|
+
return error
|
|
1011
|
+
|
|
1012
|
+
# Parse rename specification: old:new,old2:new2
|
|
1013
|
+
renames: dict[str, str] = {}
|
|
1014
|
+
for pair in rename_spec.split(","):
|
|
1015
|
+
if ":" in pair:
|
|
1016
|
+
old, new = pair.split(":", 1)
|
|
1017
|
+
renames[old.strip()] = new.strip()
|
|
1018
|
+
|
|
1019
|
+
# Apply renames to headers
|
|
1020
|
+
new_headers = [renames.get(h, h) for h in headers]
|
|
1021
|
+
|
|
1022
|
+
# Update data keys
|
|
1023
|
+
new_data = []
|
|
1024
|
+
for row in data:
|
|
1025
|
+
new_row = {}
|
|
1026
|
+
for h in headers:
|
|
1027
|
+
new_key = renames.get(h, h)
|
|
1028
|
+
new_row[new_key] = row.get(h, "")
|
|
1029
|
+
new_data.append(new_row)
|
|
1030
|
+
|
|
1031
|
+
return ExecResult(stdout=format_csv(new_headers, new_data), stderr="", exit_code=0)
|
|
1032
|
+
|
|
1033
|
+
|
|
1034
|
+
async def cmd_sample(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
1035
|
+
"""Random sample of N rows."""
|
|
1036
|
+
n = 10
|
|
1037
|
+
seed: int | None = None
|
|
1038
|
+
file_args = []
|
|
1039
|
+
|
|
1040
|
+
i = 0
|
|
1041
|
+
while i < len(args):
|
|
1042
|
+
arg = args[i]
|
|
1043
|
+
if arg == "--seed" and i + 1 < len(args):
|
|
1044
|
+
try:
|
|
1045
|
+
seed = int(args[i + 1])
|
|
1046
|
+
except ValueError:
|
|
1047
|
+
pass
|
|
1048
|
+
i += 2
|
|
1049
|
+
continue
|
|
1050
|
+
elif not arg.startswith("-"):
|
|
1051
|
+
# First non-flag arg is the count
|
|
1052
|
+
try:
|
|
1053
|
+
n = int(arg)
|
|
1054
|
+
except ValueError:
|
|
1055
|
+
file_args.append(arg)
|
|
1056
|
+
i += 1
|
|
1057
|
+
|
|
1058
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
1059
|
+
if error:
|
|
1060
|
+
return error
|
|
1061
|
+
|
|
1062
|
+
# Sample with optional seed
|
|
1063
|
+
if seed is not None:
|
|
1064
|
+
rng = random.Random(seed)
|
|
1065
|
+
sampled = rng.sample(data, min(n, len(data)))
|
|
1066
|
+
else:
|
|
1067
|
+
sampled = random.sample(data, min(n, len(data)))
|
|
1068
|
+
|
|
1069
|
+
return ExecResult(stdout=format_csv(headers, sampled), stderr="", exit_code=0)
|
|
1070
|
+
|
|
1071
|
+
|
|
1072
|
+
async def cmd_dedup(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
1073
|
+
"""Remove duplicate rows."""
|
|
1074
|
+
select_col = ""
|
|
1075
|
+
file_args = []
|
|
1076
|
+
|
|
1077
|
+
i = 0
|
|
1078
|
+
while i < len(args):
|
|
1079
|
+
arg = args[i]
|
|
1080
|
+
if arg in ("-s", "--select") and i + 1 < len(args):
|
|
1081
|
+
select_col = args[i + 1]
|
|
1082
|
+
i += 2
|
|
1083
|
+
continue
|
|
1084
|
+
elif not arg.startswith("-"):
|
|
1085
|
+
file_args.append(arg)
|
|
1086
|
+
i += 1
|
|
1087
|
+
|
|
1088
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
1089
|
+
if error:
|
|
1090
|
+
return error
|
|
1091
|
+
|
|
1092
|
+
seen: set[str] = set()
|
|
1093
|
+
unique_data = []
|
|
1094
|
+
|
|
1095
|
+
for row in data:
|
|
1096
|
+
if select_col:
|
|
1097
|
+
key = str(row.get(select_col, ""))
|
|
1098
|
+
else:
|
|
1099
|
+
key = tuple(row.get(h, "") for h in headers).__str__()
|
|
1100
|
+
|
|
1101
|
+
if key not in seen:
|
|
1102
|
+
seen.add(key)
|
|
1103
|
+
unique_data.append(row)
|
|
1104
|
+
|
|
1105
|
+
return ExecResult(stdout=format_csv(headers, unique_data), stderr="", exit_code=0)
|
|
1106
|
+
|
|
1107
|
+
|
|
1108
|
+
async def cmd_top(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
1109
|
+
"""Get top N rows by column value."""
|
|
1110
|
+
n = 10
|
|
1111
|
+
reverse = False
|
|
1112
|
+
sort_col = ""
|
|
1113
|
+
file_args = []
|
|
1114
|
+
|
|
1115
|
+
i = 0
|
|
1116
|
+
while i < len(args):
|
|
1117
|
+
arg = args[i]
|
|
1118
|
+
if arg in ("-r", "--reverse"):
|
|
1119
|
+
reverse = True
|
|
1120
|
+
elif not arg.startswith("-"):
|
|
1121
|
+
# First non-flag is n, second is column, third is file
|
|
1122
|
+
try:
|
|
1123
|
+
n = int(arg)
|
|
1124
|
+
except ValueError:
|
|
1125
|
+
if not sort_col:
|
|
1126
|
+
sort_col = arg
|
|
1127
|
+
else:
|
|
1128
|
+
file_args.append(arg)
|
|
1129
|
+
i += 1
|
|
1130
|
+
|
|
1131
|
+
if not sort_col:
|
|
1132
|
+
return ExecResult(
|
|
1133
|
+
stdout="",
|
|
1134
|
+
stderr="xan top: no column specified\n",
|
|
1135
|
+
exit_code=1,
|
|
1136
|
+
)
|
|
1137
|
+
|
|
1138
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
1139
|
+
if error:
|
|
1140
|
+
return error
|
|
1141
|
+
|
|
1142
|
+
if sort_col not in headers:
|
|
1143
|
+
return ExecResult(
|
|
1144
|
+
stdout="",
|
|
1145
|
+
stderr=f"xan top: column '{sort_col}' not found\n",
|
|
1146
|
+
exit_code=1,
|
|
1147
|
+
)
|
|
1148
|
+
|
|
1149
|
+
def sort_key(row: dict) -> float:
|
|
1150
|
+
val = row.get(sort_col, "")
|
|
1151
|
+
try:
|
|
1152
|
+
return float(val) if val else 0
|
|
1153
|
+
except ValueError:
|
|
1154
|
+
return 0
|
|
1155
|
+
|
|
1156
|
+
# Sort descending by default (top = highest), ascending if reverse
|
|
1157
|
+
sorted_data = sorted(data, key=sort_key, reverse=not reverse)
|
|
1158
|
+
return ExecResult(stdout=format_csv(headers, sorted_data[:n]), stderr="", exit_code=0)
|
|
1159
|
+
|
|
1160
|
+
|
|
1161
|
+
async def cmd_transpose(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
1162
|
+
"""Transpose rows and columns."""
|
|
1163
|
+
file_args = [a for a in args if not a.startswith("-")]
|
|
1164
|
+
|
|
1165
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
1166
|
+
if error:
|
|
1167
|
+
return error
|
|
1168
|
+
|
|
1169
|
+
if not headers:
|
|
1170
|
+
return ExecResult(stdout="", stderr="", exit_code=0)
|
|
1171
|
+
|
|
1172
|
+
# Build transposed data
|
|
1173
|
+
# Original headers become first column
|
|
1174
|
+
# Each data row becomes a new column
|
|
1175
|
+
new_headers = ["field"] + [str(i) for i in range(len(data))]
|
|
1176
|
+
new_data = []
|
|
1177
|
+
|
|
1178
|
+
for h in headers:
|
|
1179
|
+
row = {"field": h}
|
|
1180
|
+
for i, d in enumerate(data):
|
|
1181
|
+
row[str(i)] = d.get(h, "")
|
|
1182
|
+
new_data.append(row)
|
|
1183
|
+
|
|
1184
|
+
return ExecResult(stdout=format_csv(new_headers, new_data), stderr="", exit_code=0)
|
|
1185
|
+
|
|
1186
|
+
|
|
1187
|
+
async def cmd_fixlengths(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
1188
|
+
"""Fix ragged CSV by padding/truncating rows."""
|
|
1189
|
+
file_args = [a for a in args if not a.startswith("-")]
|
|
1190
|
+
|
|
1191
|
+
# Read raw CSV to handle ragged data
|
|
1192
|
+
if not file_args or file_args[0] == "-":
|
|
1193
|
+
content = ctx.stdin
|
|
1194
|
+
else:
|
|
1195
|
+
try:
|
|
1196
|
+
path = ctx.fs.resolve_path(ctx.cwd, file_args[0])
|
|
1197
|
+
content = await ctx.fs.read_file(path)
|
|
1198
|
+
except FileNotFoundError:
|
|
1199
|
+
return ExecResult(
|
|
1200
|
+
stdout="",
|
|
1201
|
+
stderr=f"xan: {file_args[0]}: No such file or directory\n",
|
|
1202
|
+
exit_code=2,
|
|
1203
|
+
)
|
|
1204
|
+
|
|
1205
|
+
if not content.strip():
|
|
1206
|
+
return ExecResult(stdout="", stderr="", exit_code=0)
|
|
1207
|
+
|
|
1208
|
+
# Parse manually to handle ragged rows
|
|
1209
|
+
lines = content.strip().split("\n")
|
|
1210
|
+
reader = csv.reader(lines)
|
|
1211
|
+
rows = list(reader)
|
|
1212
|
+
|
|
1213
|
+
if not rows:
|
|
1214
|
+
return ExecResult(stdout="", stderr="", exit_code=0)
|
|
1215
|
+
|
|
1216
|
+
# Use header row length as the target
|
|
1217
|
+
target_len = len(rows[0])
|
|
1218
|
+
|
|
1219
|
+
# Fix each row
|
|
1220
|
+
fixed_rows = []
|
|
1221
|
+
for row in rows:
|
|
1222
|
+
if len(row) < target_len:
|
|
1223
|
+
row = row + [""] * (target_len - len(row))
|
|
1224
|
+
elif len(row) > target_len:
|
|
1225
|
+
row = row[:target_len]
|
|
1226
|
+
fixed_rows.append(row)
|
|
1227
|
+
|
|
1228
|
+
# Output
|
|
1229
|
+
output = io.StringIO(newline="")
|
|
1230
|
+
writer = csv.writer(output, lineterminator="\n")
|
|
1231
|
+
for row in fixed_rows:
|
|
1232
|
+
writer.writerow(row)
|
|
1233
|
+
|
|
1234
|
+
return ExecResult(stdout=output.getvalue(), stderr="", exit_code=0)
|
|
1235
|
+
|
|
1236
|
+
|
|
1237
|
+
async def cmd_flatten(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
1238
|
+
"""Display records vertically (one field per line)."""
|
|
1239
|
+
file_args = [a for a in args if not a.startswith("-")]
|
|
1240
|
+
|
|
1241
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
1242
|
+
if error:
|
|
1243
|
+
return error
|
|
1244
|
+
|
|
1245
|
+
if not headers or not data:
|
|
1246
|
+
return ExecResult(stdout="", stderr="", exit_code=0)
|
|
1247
|
+
|
|
1248
|
+
lines = []
|
|
1249
|
+
for i, row in enumerate(data):
|
|
1250
|
+
if i > 0:
|
|
1251
|
+
lines.append("") # Blank line between records
|
|
1252
|
+
for h in headers:
|
|
1253
|
+
lines.append(f"{h}: {row.get(h, '')}")
|
|
1254
|
+
|
|
1255
|
+
return ExecResult(stdout="\n".join(lines) + "\n", stderr="", exit_code=0)
|
|
1256
|
+
|
|
1257
|
+
|
|
1258
|
+
async def cmd_explode(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
1259
|
+
"""Split column values into multiple rows."""
|
|
1260
|
+
col = ""
|
|
1261
|
+
delimiter = ","
|
|
1262
|
+
file_args = []
|
|
1263
|
+
|
|
1264
|
+
i = 0
|
|
1265
|
+
while i < len(args):
|
|
1266
|
+
arg = args[i]
|
|
1267
|
+
if arg in ("-d", "--delimiter") and i + 1 < len(args):
|
|
1268
|
+
delimiter = args[i + 1]
|
|
1269
|
+
i += 2
|
|
1270
|
+
continue
|
|
1271
|
+
elif not arg.startswith("-"):
|
|
1272
|
+
if not col:
|
|
1273
|
+
if _looks_like_file_path(arg):
|
|
1274
|
+
file_args.append(arg)
|
|
1275
|
+
else:
|
|
1276
|
+
col = arg
|
|
1277
|
+
else:
|
|
1278
|
+
file_args.append(arg)
|
|
1279
|
+
i += 1
|
|
1280
|
+
|
|
1281
|
+
if not col:
|
|
1282
|
+
return ExecResult(
|
|
1283
|
+
stdout="",
|
|
1284
|
+
stderr="xan explode: no column specified\n",
|
|
1285
|
+
exit_code=1,
|
|
1286
|
+
)
|
|
1287
|
+
|
|
1288
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
1289
|
+
if error:
|
|
1290
|
+
return error
|
|
1291
|
+
|
|
1292
|
+
if col not in headers:
|
|
1293
|
+
return ExecResult(
|
|
1294
|
+
stdout="",
|
|
1295
|
+
stderr=f"xan explode: column '{col}' not found\n",
|
|
1296
|
+
exit_code=1,
|
|
1297
|
+
)
|
|
1298
|
+
|
|
1299
|
+
# Explode rows
|
|
1300
|
+
new_data = []
|
|
1301
|
+
for row in data:
|
|
1302
|
+
val = row.get(col, "")
|
|
1303
|
+
parts = val.split(delimiter) if val else [""]
|
|
1304
|
+
for part in parts:
|
|
1305
|
+
new_row = dict(row)
|
|
1306
|
+
new_row[col] = part.strip()
|
|
1307
|
+
new_data.append(new_row)
|
|
1308
|
+
|
|
1309
|
+
return ExecResult(stdout=format_csv(headers, new_data), stderr="", exit_code=0)
|
|
1310
|
+
|
|
1311
|
+
|
|
1312
|
+
async def cmd_implode(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
1313
|
+
"""Combine rows by grouping, joining values."""
|
|
1314
|
+
col = ""
|
|
1315
|
+
group_col = ""
|
|
1316
|
+
delimiter = ","
|
|
1317
|
+
file_args = []
|
|
1318
|
+
|
|
1319
|
+
i = 0
|
|
1320
|
+
while i < len(args):
|
|
1321
|
+
arg = args[i]
|
|
1322
|
+
if arg in ("-g", "--group") and i + 1 < len(args):
|
|
1323
|
+
group_col = args[i + 1]
|
|
1324
|
+
i += 2
|
|
1325
|
+
continue
|
|
1326
|
+
elif arg in ("-d", "--delimiter") and i + 1 < len(args):
|
|
1327
|
+
delimiter = args[i + 1]
|
|
1328
|
+
i += 2
|
|
1329
|
+
continue
|
|
1330
|
+
elif not arg.startswith("-"):
|
|
1331
|
+
if not col:
|
|
1332
|
+
if _looks_like_file_path(arg):
|
|
1333
|
+
file_args.append(arg)
|
|
1334
|
+
else:
|
|
1335
|
+
col = arg
|
|
1336
|
+
else:
|
|
1337
|
+
file_args.append(arg)
|
|
1338
|
+
i += 1
|
|
1339
|
+
|
|
1340
|
+
if not col:
|
|
1341
|
+
return ExecResult(
|
|
1342
|
+
stdout="",
|
|
1343
|
+
stderr="xan implode: no column specified\n",
|
|
1344
|
+
exit_code=1,
|
|
1345
|
+
)
|
|
1346
|
+
|
|
1347
|
+
if not group_col:
|
|
1348
|
+
return ExecResult(
|
|
1349
|
+
stdout="",
|
|
1350
|
+
stderr="xan implode: no group column specified (use -g)\n",
|
|
1351
|
+
exit_code=1,
|
|
1352
|
+
)
|
|
1353
|
+
|
|
1354
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
1355
|
+
if error:
|
|
1356
|
+
return error
|
|
1357
|
+
|
|
1358
|
+
# Group rows
|
|
1359
|
+
groups: dict[str, list[dict[str, str]]] = {}
|
|
1360
|
+
for row in data:
|
|
1361
|
+
key = row.get(group_col, "")
|
|
1362
|
+
if key not in groups:
|
|
1363
|
+
groups[key] = []
|
|
1364
|
+
groups[key].append(row)
|
|
1365
|
+
|
|
1366
|
+
# Implode
|
|
1367
|
+
new_data = []
|
|
1368
|
+
for key, rows in groups.items():
|
|
1369
|
+
# Take first row as base, combine the implode column
|
|
1370
|
+
base = dict(rows[0])
|
|
1371
|
+
values = [r.get(col, "") for r in rows]
|
|
1372
|
+
base[col] = delimiter.join(values)
|
|
1373
|
+
new_data.append(base)
|
|
1374
|
+
|
|
1375
|
+
return ExecResult(stdout=format_csv(headers, new_data), stderr="", exit_code=0)
|
|
1376
|
+
|
|
1377
|
+
|
|
1378
|
+
async def cmd_split(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
1379
|
+
"""Split CSV into multiple files."""
|
|
1380
|
+
chunk_size = 0
|
|
1381
|
+
output_dir = "/tmp/xan_split"
|
|
1382
|
+
file_args = []
|
|
1383
|
+
|
|
1384
|
+
i = 0
|
|
1385
|
+
while i < len(args):
|
|
1386
|
+
arg = args[i]
|
|
1387
|
+
if arg in ("-o", "--output") and i + 1 < len(args):
|
|
1388
|
+
output_dir = args[i + 1]
|
|
1389
|
+
i += 2
|
|
1390
|
+
continue
|
|
1391
|
+
elif not arg.startswith("-"):
|
|
1392
|
+
try:
|
|
1393
|
+
chunk_size = int(arg)
|
|
1394
|
+
except ValueError:
|
|
1395
|
+
file_args.append(arg)
|
|
1396
|
+
i += 1
|
|
1397
|
+
|
|
1398
|
+
if chunk_size <= 0:
|
|
1399
|
+
return ExecResult(
|
|
1400
|
+
stdout="",
|
|
1401
|
+
stderr="xan split: no chunk size specified\n",
|
|
1402
|
+
exit_code=1,
|
|
1403
|
+
)
|
|
1404
|
+
|
|
1405
|
+
headers, data, error = await read_csv_input(file_args, ctx)
|
|
1406
|
+
if error:
|
|
1407
|
+
return error
|
|
1408
|
+
|
|
1409
|
+
# Create output directory
|
|
1410
|
+
output_path = ctx.fs.resolve_path(ctx.cwd, output_dir)
|
|
1411
|
+
try:
|
|
1412
|
+
await ctx.fs.mkdir(output_path, recursive=True)
|
|
1413
|
+
except FileExistsError:
|
|
1414
|
+
pass
|
|
1415
|
+
|
|
1416
|
+
# Split data into chunks
|
|
1417
|
+
file_num = 0
|
|
1418
|
+
for i in range(0, len(data), chunk_size):
|
|
1419
|
+
chunk = data[i : i + chunk_size]
|
|
1420
|
+
chunk_content = format_csv(headers, chunk)
|
|
1421
|
+
chunk_path = f"{output_path}/{file_num}.csv"
|
|
1422
|
+
await ctx.fs.write_file(chunk_path, chunk_content)
|
|
1423
|
+
file_num += 1
|
|
1424
|
+
|
|
1425
|
+
return ExecResult(stdout=f"Split into {file_num} files in {output_dir}\n", stderr="", exit_code=0)
|
|
1426
|
+
|
|
1427
|
+
|
|
1428
|
+
# =============================================================================
|
|
1429
|
+
# Phase 3 Commands - Stubs (Not Yet Implemented)
|
|
1430
|
+
# These commands require expression evaluation which is not yet ported.
|
|
1431
|
+
# =============================================================================
|
|
1432
|
+
|
|
1433
|
+
|
|
1434
|
+
def _not_implemented(cmd: str) -> ExecResult:
|
|
1435
|
+
"""Return a not-implemented error for stubbed commands."""
|
|
1436
|
+
return ExecResult(
|
|
1437
|
+
stdout="",
|
|
1438
|
+
stderr=(
|
|
1439
|
+
f"xan {cmd}: not yet implemented\n"
|
|
1440
|
+
f"This command requires expression evaluation which is not yet ported from TypeScript.\n"
|
|
1441
|
+
),
|
|
1442
|
+
exit_code=1,
|
|
1443
|
+
)
|
|
1444
|
+
|
|
1445
|
+
|
|
1446
|
+
async def cmd_join(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
1447
|
+
"""Join two CSVs on a key column.
|
|
1448
|
+
|
|
1449
|
+
Not yet implemented. Requires:
|
|
1450
|
+
- Multiple input file handling
|
|
1451
|
+
- Key column matching
|
|
1452
|
+
- Join types (inner, left, right, full outer)
|
|
1453
|
+
|
|
1454
|
+
Usage would be: xan join <LEFT_COL> <LEFT_FILE> <RIGHT_COL> <RIGHT_FILE>
|
|
1455
|
+
"""
|
|
1456
|
+
return _not_implemented("join")
|
|
1457
|
+
|
|
1458
|
+
|
|
1459
|
+
async def cmd_agg(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
1460
|
+
"""Aggregate column values.
|
|
1461
|
+
|
|
1462
|
+
Not yet implemented. Requires:
|
|
1463
|
+
- Expression parser for aggregation functions (sum, count, avg, min, max, etc.)
|
|
1464
|
+
- Column selection
|
|
1465
|
+
|
|
1466
|
+
Usage would be: xan agg 'sum(price), count()' data.csv
|
|
1467
|
+
"""
|
|
1468
|
+
return _not_implemented("agg")
|
|
1469
|
+
|
|
1470
|
+
|
|
1471
|
+
async def cmd_groupby(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
1472
|
+
"""Group rows and aggregate.
|
|
1473
|
+
|
|
1474
|
+
Not yet implemented. Requires:
|
|
1475
|
+
- Expression parser for aggregation functions
|
|
1476
|
+
- Group key handling
|
|
1477
|
+
|
|
1478
|
+
Usage would be: xan groupby category 'sum(price), count()' data.csv
|
|
1479
|
+
"""
|
|
1480
|
+
return _not_implemented("groupby")
|
|
1481
|
+
|
|
1482
|
+
|
|
1483
|
+
async def cmd_map(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
1484
|
+
"""Add computed columns via expressions.
|
|
1485
|
+
|
|
1486
|
+
Not yet implemented. Requires:
|
|
1487
|
+
- Expression parser for column computations
|
|
1488
|
+
- Support for arithmetic, string ops, conditionals
|
|
1489
|
+
|
|
1490
|
+
Usage would be: xan map 'total = price * quantity' data.csv
|
|
1491
|
+
"""
|
|
1492
|
+
return _not_implemented("map")
|
|
1493
|
+
|
|
1494
|
+
|
|
1495
|
+
async def cmd_transform(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
1496
|
+
"""Transform column values via expressions.
|
|
1497
|
+
|
|
1498
|
+
Not yet implemented. Requires:
|
|
1499
|
+
- Expression parser for transformations
|
|
1500
|
+
- In-place column modification
|
|
1501
|
+
|
|
1502
|
+
Usage would be: xan transform 'price = price * 1.1' data.csv
|
|
1503
|
+
"""
|
|
1504
|
+
return _not_implemented("transform")
|
|
1505
|
+
|
|
1506
|
+
|
|
1507
|
+
async def cmd_pivot(args: list[str], ctx: CommandContext) -> ExecResult:
|
|
1508
|
+
"""Reshape data (pivot table).
|
|
1509
|
+
|
|
1510
|
+
Not yet implemented. Requires:
|
|
1511
|
+
- Row key, column key, and value columns
|
|
1512
|
+
- Aggregation for duplicate keys
|
|
1513
|
+
|
|
1514
|
+
Usage would be: xan pivot <ROW_COL> <COL_COL> <VAL_COL> data.csv
|
|
1515
|
+
"""
|
|
1516
|
+
return _not_implemented("pivot")
|
|
1517
|
+
|
|
1518
|
+
|
|
1519
|
+
class XanCommand:
|
|
1520
|
+
"""The xan command - CSV toolkit."""
|
|
1521
|
+
|
|
1522
|
+
name = "xan"
|
|
1523
|
+
|
|
1524
|
+
async def execute(self, args: list[str], ctx: CommandContext) -> ExecResult:
|
|
1525
|
+
"""Execute the xan command."""
|
|
1526
|
+
if not args or "--help" in args or "-h" in args:
|
|
1527
|
+
return ExecResult(
|
|
1528
|
+
stdout=(
|
|
1529
|
+
"Usage: xan <COMMAND> [OPTIONS] [FILE]\n"
|
|
1530
|
+
"CSV toolkit for data manipulation.\n\n"
|
|
1531
|
+
"Commands:\n"
|
|
1532
|
+
" headers Show column names\n"
|
|
1533
|
+
" count Count rows\n"
|
|
1534
|
+
" head Show first N rows\n"
|
|
1535
|
+
" tail Show last N rows\n"
|
|
1536
|
+
" slice Extract row range\n"
|
|
1537
|
+
" select Select columns\n"
|
|
1538
|
+
" drop Drop columns (inverse of select)\n"
|
|
1539
|
+
" rename Rename columns (old:new)\n"
|
|
1540
|
+
" filter Filter rows by expression\n"
|
|
1541
|
+
" search Filter rows by regex\n"
|
|
1542
|
+
" sort Sort rows\n"
|
|
1543
|
+
" reverse Reverse row order\n"
|
|
1544
|
+
" behead Output without header\n"
|
|
1545
|
+
" enum Add index column\n"
|
|
1546
|
+
" shuffle Randomly reorder rows\n"
|
|
1547
|
+
" sample Random sample of N rows\n"
|
|
1548
|
+
" dedup Remove duplicate rows\n"
|
|
1549
|
+
" top Top N rows by column value\n"
|
|
1550
|
+
" cat Concatenate CSV files\n"
|
|
1551
|
+
" transpose Swap rows and columns\n"
|
|
1552
|
+
" fixlengths Fix ragged CSV\n"
|
|
1553
|
+
" flatten Display records vertically\n"
|
|
1554
|
+
" explode Split column to rows\n"
|
|
1555
|
+
" implode Combine rows by grouping\n"
|
|
1556
|
+
" split Split into multiple files\n"
|
|
1557
|
+
" view Pretty print as table\n"
|
|
1558
|
+
" stats Show column statistics\n"
|
|
1559
|
+
" frequency Count value occurrences\n"
|
|
1560
|
+
" to Convert to other formats (json)\n"
|
|
1561
|
+
" from Convert from other formats (json)\n\n"
|
|
1562
|
+
"Not Yet Implemented (require expression evaluation):\n"
|
|
1563
|
+
" join Join two CSVs on key\n"
|
|
1564
|
+
" agg Aggregate values\n"
|
|
1565
|
+
" groupby Group and aggregate\n"
|
|
1566
|
+
" map Add computed columns\n"
|
|
1567
|
+
" transform Transform column values\n"
|
|
1568
|
+
" pivot Reshape data (pivot table)\n\n"
|
|
1569
|
+
"Examples:\n"
|
|
1570
|
+
" xan headers data.csv\n"
|
|
1571
|
+
" xan count data.csv\n"
|
|
1572
|
+
" xan head -n 5 data.csv\n"
|
|
1573
|
+
" xan select name,email data.csv\n"
|
|
1574
|
+
" xan filter 'age > 30' data.csv\n"
|
|
1575
|
+
" xan sort -N price data.csv\n"
|
|
1576
|
+
" xan to json data.csv\n"
|
|
1577
|
+
),
|
|
1578
|
+
stderr="",
|
|
1579
|
+
exit_code=0,
|
|
1580
|
+
)
|
|
1581
|
+
|
|
1582
|
+
subcommand = args[0]
|
|
1583
|
+
sub_args = args[1:]
|
|
1584
|
+
|
|
1585
|
+
if subcommand == "headers":
|
|
1586
|
+
return await cmd_headers(sub_args, ctx)
|
|
1587
|
+
elif subcommand == "count":
|
|
1588
|
+
return await cmd_count(sub_args, ctx)
|
|
1589
|
+
elif subcommand == "head":
|
|
1590
|
+
return await cmd_head(sub_args, ctx)
|
|
1591
|
+
elif subcommand == "tail":
|
|
1592
|
+
return await cmd_tail(sub_args, ctx)
|
|
1593
|
+
elif subcommand == "slice":
|
|
1594
|
+
return await cmd_slice(sub_args, ctx)
|
|
1595
|
+
elif subcommand == "select":
|
|
1596
|
+
return await cmd_select(sub_args, ctx)
|
|
1597
|
+
elif subcommand == "drop":
|
|
1598
|
+
return await cmd_drop(sub_args, ctx)
|
|
1599
|
+
elif subcommand == "rename":
|
|
1600
|
+
return await cmd_rename(sub_args, ctx)
|
|
1601
|
+
elif subcommand == "filter":
|
|
1602
|
+
return await cmd_filter(sub_args, ctx)
|
|
1603
|
+
elif subcommand == "search":
|
|
1604
|
+
return await cmd_search(sub_args, ctx)
|
|
1605
|
+
elif subcommand == "sort":
|
|
1606
|
+
return await cmd_sort(sub_args, ctx)
|
|
1607
|
+
elif subcommand == "reverse":
|
|
1608
|
+
return await cmd_reverse(sub_args, ctx)
|
|
1609
|
+
elif subcommand == "behead":
|
|
1610
|
+
return await cmd_behead(sub_args, ctx)
|
|
1611
|
+
elif subcommand == "enum":
|
|
1612
|
+
return await cmd_enum(sub_args, ctx)
|
|
1613
|
+
elif subcommand == "shuffle":
|
|
1614
|
+
return await cmd_shuffle(sub_args, ctx)
|
|
1615
|
+
elif subcommand == "sample":
|
|
1616
|
+
return await cmd_sample(sub_args, ctx)
|
|
1617
|
+
elif subcommand == "dedup":
|
|
1618
|
+
return await cmd_dedup(sub_args, ctx)
|
|
1619
|
+
elif subcommand == "top":
|
|
1620
|
+
return await cmd_top(sub_args, ctx)
|
|
1621
|
+
elif subcommand == "cat":
|
|
1622
|
+
return await cmd_cat(sub_args, ctx)
|
|
1623
|
+
elif subcommand == "transpose":
|
|
1624
|
+
return await cmd_transpose(sub_args, ctx)
|
|
1625
|
+
elif subcommand == "fixlengths":
|
|
1626
|
+
return await cmd_fixlengths(sub_args, ctx)
|
|
1627
|
+
elif subcommand in ("flatten", "f"):
|
|
1628
|
+
return await cmd_flatten(sub_args, ctx)
|
|
1629
|
+
elif subcommand == "explode":
|
|
1630
|
+
return await cmd_explode(sub_args, ctx)
|
|
1631
|
+
elif subcommand == "implode":
|
|
1632
|
+
return await cmd_implode(sub_args, ctx)
|
|
1633
|
+
elif subcommand == "split":
|
|
1634
|
+
return await cmd_split(sub_args, ctx)
|
|
1635
|
+
elif subcommand == "view":
|
|
1636
|
+
return await cmd_view(sub_args, ctx)
|
|
1637
|
+
elif subcommand == "stats":
|
|
1638
|
+
return await cmd_stats(sub_args, ctx)
|
|
1639
|
+
elif subcommand in ("frequency", "freq"):
|
|
1640
|
+
return await cmd_frequency(sub_args, ctx)
|
|
1641
|
+
elif subcommand == "to":
|
|
1642
|
+
return await cmd_to(sub_args, ctx)
|
|
1643
|
+
elif subcommand == "from":
|
|
1644
|
+
return await cmd_from(sub_args, ctx)
|
|
1645
|
+
# Stubbed commands (not yet implemented)
|
|
1646
|
+
elif subcommand == "join":
|
|
1647
|
+
return await cmd_join(sub_args, ctx)
|
|
1648
|
+
elif subcommand == "agg":
|
|
1649
|
+
return await cmd_agg(sub_args, ctx)
|
|
1650
|
+
elif subcommand == "groupby":
|
|
1651
|
+
return await cmd_groupby(sub_args, ctx)
|
|
1652
|
+
elif subcommand == "map":
|
|
1653
|
+
return await cmd_map(sub_args, ctx)
|
|
1654
|
+
elif subcommand == "transform":
|
|
1655
|
+
return await cmd_transform(sub_args, ctx)
|
|
1656
|
+
elif subcommand == "pivot":
|
|
1657
|
+
return await cmd_pivot(sub_args, ctx)
|
|
1658
|
+
else:
|
|
1659
|
+
return ExecResult(
|
|
1660
|
+
stdout="",
|
|
1661
|
+
stderr=f"xan: unknown command '{subcommand}'\nRun 'xan --help' for usage.\n",
|
|
1662
|
+
exit_code=1,
|
|
1663
|
+
)
|