just-bash 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. just_bash/__init__.py +55 -0
  2. just_bash/ast/__init__.py +213 -0
  3. just_bash/ast/factory.py +320 -0
  4. just_bash/ast/types.py +953 -0
  5. just_bash/bash.py +220 -0
  6. just_bash/commands/__init__.py +23 -0
  7. just_bash/commands/argv/__init__.py +5 -0
  8. just_bash/commands/argv/argv.py +21 -0
  9. just_bash/commands/awk/__init__.py +5 -0
  10. just_bash/commands/awk/awk.py +1168 -0
  11. just_bash/commands/base64/__init__.py +5 -0
  12. just_bash/commands/base64/base64.py +138 -0
  13. just_bash/commands/basename/__init__.py +5 -0
  14. just_bash/commands/basename/basename.py +72 -0
  15. just_bash/commands/bash/__init__.py +5 -0
  16. just_bash/commands/bash/bash.py +188 -0
  17. just_bash/commands/cat/__init__.py +5 -0
  18. just_bash/commands/cat/cat.py +173 -0
  19. just_bash/commands/checksum/__init__.py +5 -0
  20. just_bash/commands/checksum/checksum.py +179 -0
  21. just_bash/commands/chmod/__init__.py +5 -0
  22. just_bash/commands/chmod/chmod.py +216 -0
  23. just_bash/commands/column/__init__.py +5 -0
  24. just_bash/commands/column/column.py +180 -0
  25. just_bash/commands/comm/__init__.py +5 -0
  26. just_bash/commands/comm/comm.py +150 -0
  27. just_bash/commands/compression/__init__.py +5 -0
  28. just_bash/commands/compression/compression.py +298 -0
  29. just_bash/commands/cp/__init__.py +5 -0
  30. just_bash/commands/cp/cp.py +149 -0
  31. just_bash/commands/curl/__init__.py +5 -0
  32. just_bash/commands/curl/curl.py +801 -0
  33. just_bash/commands/cut/__init__.py +5 -0
  34. just_bash/commands/cut/cut.py +327 -0
  35. just_bash/commands/date/__init__.py +5 -0
  36. just_bash/commands/date/date.py +258 -0
  37. just_bash/commands/diff/__init__.py +5 -0
  38. just_bash/commands/diff/diff.py +118 -0
  39. just_bash/commands/dirname/__init__.py +5 -0
  40. just_bash/commands/dirname/dirname.py +56 -0
  41. just_bash/commands/du/__init__.py +5 -0
  42. just_bash/commands/du/du.py +150 -0
  43. just_bash/commands/echo/__init__.py +5 -0
  44. just_bash/commands/echo/echo.py +125 -0
  45. just_bash/commands/env/__init__.py +5 -0
  46. just_bash/commands/env/env.py +163 -0
  47. just_bash/commands/expand/__init__.py +5 -0
  48. just_bash/commands/expand/expand.py +299 -0
  49. just_bash/commands/expr/__init__.py +5 -0
  50. just_bash/commands/expr/expr.py +273 -0
  51. just_bash/commands/file/__init__.py +5 -0
  52. just_bash/commands/file/file.py +274 -0
  53. just_bash/commands/find/__init__.py +5 -0
  54. just_bash/commands/find/find.py +623 -0
  55. just_bash/commands/fold/__init__.py +5 -0
  56. just_bash/commands/fold/fold.py +160 -0
  57. just_bash/commands/grep/__init__.py +5 -0
  58. just_bash/commands/grep/grep.py +418 -0
  59. just_bash/commands/head/__init__.py +5 -0
  60. just_bash/commands/head/head.py +167 -0
  61. just_bash/commands/help/__init__.py +5 -0
  62. just_bash/commands/help/help.py +67 -0
  63. just_bash/commands/hostname/__init__.py +5 -0
  64. just_bash/commands/hostname/hostname.py +21 -0
  65. just_bash/commands/html_to_markdown/__init__.py +5 -0
  66. just_bash/commands/html_to_markdown/html_to_markdown.py +191 -0
  67. just_bash/commands/join/__init__.py +5 -0
  68. just_bash/commands/join/join.py +252 -0
  69. just_bash/commands/jq/__init__.py +5 -0
  70. just_bash/commands/jq/jq.py +280 -0
  71. just_bash/commands/ln/__init__.py +5 -0
  72. just_bash/commands/ln/ln.py +127 -0
  73. just_bash/commands/ls/__init__.py +5 -0
  74. just_bash/commands/ls/ls.py +280 -0
  75. just_bash/commands/mkdir/__init__.py +5 -0
  76. just_bash/commands/mkdir/mkdir.py +92 -0
  77. just_bash/commands/mv/__init__.py +5 -0
  78. just_bash/commands/mv/mv.py +142 -0
  79. just_bash/commands/nl/__init__.py +5 -0
  80. just_bash/commands/nl/nl.py +180 -0
  81. just_bash/commands/od/__init__.py +5 -0
  82. just_bash/commands/od/od.py +157 -0
  83. just_bash/commands/paste/__init__.py +5 -0
  84. just_bash/commands/paste/paste.py +100 -0
  85. just_bash/commands/printf/__init__.py +5 -0
  86. just_bash/commands/printf/printf.py +157 -0
  87. just_bash/commands/pwd/__init__.py +5 -0
  88. just_bash/commands/pwd/pwd.py +23 -0
  89. just_bash/commands/read/__init__.py +5 -0
  90. just_bash/commands/read/read.py +185 -0
  91. just_bash/commands/readlink/__init__.py +5 -0
  92. just_bash/commands/readlink/readlink.py +86 -0
  93. just_bash/commands/registry.py +844 -0
  94. just_bash/commands/rev/__init__.py +5 -0
  95. just_bash/commands/rev/rev.py +74 -0
  96. just_bash/commands/rg/__init__.py +5 -0
  97. just_bash/commands/rg/rg.py +1048 -0
  98. just_bash/commands/rm/__init__.py +5 -0
  99. just_bash/commands/rm/rm.py +106 -0
  100. just_bash/commands/search_engine/__init__.py +13 -0
  101. just_bash/commands/search_engine/matcher.py +170 -0
  102. just_bash/commands/search_engine/regex.py +159 -0
  103. just_bash/commands/sed/__init__.py +5 -0
  104. just_bash/commands/sed/sed.py +863 -0
  105. just_bash/commands/seq/__init__.py +5 -0
  106. just_bash/commands/seq/seq.py +190 -0
  107. just_bash/commands/shell/__init__.py +5 -0
  108. just_bash/commands/shell/shell.py +206 -0
  109. just_bash/commands/sleep/__init__.py +5 -0
  110. just_bash/commands/sleep/sleep.py +62 -0
  111. just_bash/commands/sort/__init__.py +5 -0
  112. just_bash/commands/sort/sort.py +411 -0
  113. just_bash/commands/split/__init__.py +5 -0
  114. just_bash/commands/split/split.py +237 -0
  115. just_bash/commands/sqlite3/__init__.py +5 -0
  116. just_bash/commands/sqlite3/sqlite3_cmd.py +505 -0
  117. just_bash/commands/stat/__init__.py +5 -0
  118. just_bash/commands/stat/stat.py +150 -0
  119. just_bash/commands/strings/__init__.py +5 -0
  120. just_bash/commands/strings/strings.py +150 -0
  121. just_bash/commands/tac/__init__.py +5 -0
  122. just_bash/commands/tac/tac.py +158 -0
  123. just_bash/commands/tail/__init__.py +5 -0
  124. just_bash/commands/tail/tail.py +180 -0
  125. just_bash/commands/tar/__init__.py +5 -0
  126. just_bash/commands/tar/tar.py +1067 -0
  127. just_bash/commands/tee/__init__.py +5 -0
  128. just_bash/commands/tee/tee.py +63 -0
  129. just_bash/commands/timeout/__init__.py +5 -0
  130. just_bash/commands/timeout/timeout.py +188 -0
  131. just_bash/commands/touch/__init__.py +5 -0
  132. just_bash/commands/touch/touch.py +91 -0
  133. just_bash/commands/tr/__init__.py +5 -0
  134. just_bash/commands/tr/tr.py +297 -0
  135. just_bash/commands/tree/__init__.py +5 -0
  136. just_bash/commands/tree/tree.py +139 -0
  137. just_bash/commands/true/__init__.py +5 -0
  138. just_bash/commands/true/true.py +32 -0
  139. just_bash/commands/uniq/__init__.py +5 -0
  140. just_bash/commands/uniq/uniq.py +323 -0
  141. just_bash/commands/wc/__init__.py +5 -0
  142. just_bash/commands/wc/wc.py +169 -0
  143. just_bash/commands/which/__init__.py +5 -0
  144. just_bash/commands/which/which.py +52 -0
  145. just_bash/commands/xan/__init__.py +5 -0
  146. just_bash/commands/xan/xan.py +1663 -0
  147. just_bash/commands/xargs/__init__.py +5 -0
  148. just_bash/commands/xargs/xargs.py +136 -0
  149. just_bash/commands/yq/__init__.py +5 -0
  150. just_bash/commands/yq/yq.py +848 -0
  151. just_bash/fs/__init__.py +29 -0
  152. just_bash/fs/in_memory_fs.py +621 -0
  153. just_bash/fs/mountable_fs.py +504 -0
  154. just_bash/fs/overlay_fs.py +894 -0
  155. just_bash/fs/read_write_fs.py +455 -0
  156. just_bash/interpreter/__init__.py +37 -0
  157. just_bash/interpreter/builtins/__init__.py +92 -0
  158. just_bash/interpreter/builtins/alias.py +154 -0
  159. just_bash/interpreter/builtins/cd.py +76 -0
  160. just_bash/interpreter/builtins/control.py +127 -0
  161. just_bash/interpreter/builtins/declare.py +336 -0
  162. just_bash/interpreter/builtins/export.py +56 -0
  163. just_bash/interpreter/builtins/let.py +44 -0
  164. just_bash/interpreter/builtins/local.py +57 -0
  165. just_bash/interpreter/builtins/mapfile.py +152 -0
  166. just_bash/interpreter/builtins/misc.py +378 -0
  167. just_bash/interpreter/builtins/readonly.py +80 -0
  168. just_bash/interpreter/builtins/set.py +234 -0
  169. just_bash/interpreter/builtins/shopt.py +201 -0
  170. just_bash/interpreter/builtins/source.py +136 -0
  171. just_bash/interpreter/builtins/test.py +290 -0
  172. just_bash/interpreter/builtins/unset.py +53 -0
  173. just_bash/interpreter/conditionals.py +387 -0
  174. just_bash/interpreter/control_flow.py +381 -0
  175. just_bash/interpreter/errors.py +116 -0
  176. just_bash/interpreter/expansion.py +1156 -0
  177. just_bash/interpreter/interpreter.py +813 -0
  178. just_bash/interpreter/types.py +134 -0
  179. just_bash/network/__init__.py +1 -0
  180. just_bash/parser/__init__.py +39 -0
  181. just_bash/parser/lexer.py +948 -0
  182. just_bash/parser/parser.py +2162 -0
  183. just_bash/py.typed +0 -0
  184. just_bash/query_engine/__init__.py +83 -0
  185. just_bash/query_engine/builtins/__init__.py +1283 -0
  186. just_bash/query_engine/evaluator.py +578 -0
  187. just_bash/query_engine/parser.py +525 -0
  188. just_bash/query_engine/tokenizer.py +329 -0
  189. just_bash/query_engine/types.py +373 -0
  190. just_bash/types.py +180 -0
  191. just_bash-0.1.5.dist-info/METADATA +410 -0
  192. just_bash-0.1.5.dist-info/RECORD +193 -0
  193. just_bash-0.1.5.dist-info/WHEEL +4 -0
@@ -0,0 +1,1663 @@
1
+ """Xan command implementation - CSV toolkit.
2
+
3
+ A Python port of the xan CSV toolkit for data manipulation.
4
+
5
+ Usage: xan <COMMAND> [OPTIONS] [FILE]
6
+
7
+ Implemented Commands:
8
+ headers Show column names
9
+ count Count rows
10
+ head Show first N rows
11
+ tail Show last N rows
12
+ slice Extract row range
13
+ select Select columns
14
+ drop Drop columns (inverse of select)
15
+ rename Rename columns (old:new syntax)
16
+ filter Filter rows by expression
17
+ search Filter rows by regex
18
+ sort Sort rows
19
+ reverse Reverse row order
20
+ behead Output without header
21
+ enum Add index column
22
+ shuffle Randomly reorder rows
23
+ sample Random sample of N rows
24
+ dedup Remove duplicate rows
25
+ top Top N rows by column value
26
+ cat Concatenate CSV files
27
+ transpose Swap rows and columns
28
+ fixlengths Fix ragged CSV
29
+ flatten/f Display records vertically
30
+ explode Split column values to rows
31
+ implode Combine rows by grouping
32
+ split Split into multiple files
33
+ view Pretty print as table
34
+ stats Show column statistics
35
+ frequency Count value occurrences
36
+ to json Convert CSV to JSON
37
+ from json Convert JSON to CSV
38
+
39
+ Not Yet Implemented (require expression evaluation):
40
+ join Join two CSVs on a key column
41
+ agg Aggregate column values with expressions
42
+ groupby Group rows and aggregate
43
+ map Add computed columns via expressions
44
+ transform Transform column values via expressions
45
+ pivot Reshape data (pivot table)
46
+
47
+ These commands are stubbed and will return a "not implemented" error.
48
+ The TypeScript xan uses a custom expression language for these operations
49
+ that would need to be ported to Python.
50
+ """
51
+
52
+ import csv
53
+ import io
54
+ import json
55
+ import random
56
+ import re
57
+ from typing import Any
58
+
59
+ from ...types import CommandContext, ExecResult
60
+
61
+
62
+ def parse_csv(content: str) -> tuple[list[str], list[dict[str, str]]]:
63
+ """Parse CSV content into headers and data rows."""
64
+ if not content.strip():
65
+ return [], []
66
+
67
+ reader = csv.DictReader(io.StringIO(content))
68
+ headers = reader.fieldnames or []
69
+ data = list(reader)
70
+ return list(headers), data
71
+
72
+
73
+ def format_csv(headers: list[str], data: list[dict[str, Any]]) -> str:
74
+ """Format data as CSV."""
75
+ if not headers:
76
+ return ""
77
+
78
+ output = io.StringIO(newline="")
79
+ writer = csv.DictWriter(output, fieldnames=headers, lineterminator="\n")
80
+ writer.writeheader()
81
+ for row in data:
82
+ writer.writerow({h: row.get(h, "") for h in headers})
83
+ return output.getvalue()
84
+
85
+
86
+ def format_value(v: Any) -> str:
87
+ """Format a value for CSV output."""
88
+ if v is None:
89
+ return ""
90
+ s = str(v)
91
+ if "," in s or '"' in s or "\n" in s:
92
+ escaped = s.replace('"', '""')
93
+ return f'"{escaped}"'
94
+ return s
95
+
96
+
97
+ async def read_csv_input(
98
+ file_args: list[str], ctx: CommandContext
99
+ ) -> tuple[list[str], list[dict[str, str]], ExecResult | None]:
100
+ """Read CSV from file or stdin."""
101
+ if not file_args or file_args[0] == "-":
102
+ content = ctx.stdin
103
+ else:
104
+ try:
105
+ path = ctx.fs.resolve_path(ctx.cwd, file_args[0])
106
+ content = await ctx.fs.read_file(path)
107
+ except FileNotFoundError:
108
+ return [], [], ExecResult(
109
+ stdout="",
110
+ stderr=f"xan: {file_args[0]}: No such file or directory\n",
111
+ exit_code=2,
112
+ )
113
+
114
+ headers, data = parse_csv(content)
115
+ return headers, data, None
116
+
117
+
118
+ async def cmd_headers(args: list[str], ctx: CommandContext) -> ExecResult:
119
+ """Show column names."""
120
+ just_names = "-j" in args or "--just-names" in args
121
+ file_args = [a for a in args if not a.startswith("-")]
122
+
123
+ headers, _, error = await read_csv_input(file_args, ctx)
124
+ if error:
125
+ return error
126
+
127
+ if just_names:
128
+ output = "\n".join(headers) + "\n" if headers else ""
129
+ else:
130
+ output = "\n".join(f"{i}\t{h}" for i, h in enumerate(headers)) + "\n" if headers else ""
131
+
132
+ return ExecResult(stdout=output, stderr="", exit_code=0)
133
+
134
+
135
+ async def cmd_count(args: list[str], ctx: CommandContext) -> ExecResult:
136
+ """Count rows."""
137
+ file_args = [a for a in args if not a.startswith("-")]
138
+
139
+ headers, data, error = await read_csv_input(file_args, ctx)
140
+ if error:
141
+ return error
142
+
143
+ return ExecResult(stdout=f"{len(data)}\n", stderr="", exit_code=0)
144
+
145
+
146
+ async def cmd_head(args: list[str], ctx: CommandContext) -> ExecResult:
147
+ """Show first N rows."""
148
+ n = 10
149
+ file_args = []
150
+
151
+ i = 0
152
+ while i < len(args):
153
+ arg = args[i]
154
+ if arg == "-n" and i + 1 < len(args):
155
+ try:
156
+ n = int(args[i + 1])
157
+ except ValueError:
158
+ pass
159
+ i += 2
160
+ continue
161
+ elif arg.startswith("-n"):
162
+ try:
163
+ n = int(arg[2:])
164
+ except ValueError:
165
+ pass
166
+ elif not arg.startswith("-"):
167
+ file_args.append(arg)
168
+ i += 1
169
+
170
+ headers, data, error = await read_csv_input(file_args, ctx)
171
+ if error:
172
+ return error
173
+
174
+ return ExecResult(stdout=format_csv(headers, data[:n]), stderr="", exit_code=0)
175
+
176
+
177
+ async def cmd_tail(args: list[str], ctx: CommandContext) -> ExecResult:
178
+ """Show last N rows."""
179
+ n = 10
180
+ file_args = []
181
+
182
+ i = 0
183
+ while i < len(args):
184
+ arg = args[i]
185
+ if arg == "-n" and i + 1 < len(args):
186
+ try:
187
+ n = int(args[i + 1])
188
+ except ValueError:
189
+ pass
190
+ i += 2
191
+ continue
192
+ elif arg.startswith("-n"):
193
+ try:
194
+ n = int(arg[2:])
195
+ except ValueError:
196
+ pass
197
+ elif not arg.startswith("-"):
198
+ file_args.append(arg)
199
+ i += 1
200
+
201
+ headers, data, error = await read_csv_input(file_args, ctx)
202
+ if error:
203
+ return error
204
+
205
+ return ExecResult(stdout=format_csv(headers, data[-n:]), stderr="", exit_code=0)
206
+
207
+
208
+ async def cmd_slice(args: list[str], ctx: CommandContext) -> ExecResult:
209
+ """Extract row range."""
210
+ start = 0
211
+ end = None
212
+ file_args = []
213
+
214
+ i = 0
215
+ while i < len(args):
216
+ arg = args[i]
217
+ if arg == "-s" or arg == "--start":
218
+ if i + 1 < len(args):
219
+ try:
220
+ start = int(args[i + 1])
221
+ except ValueError:
222
+ pass
223
+ i += 2
224
+ continue
225
+ elif arg == "-e" or arg == "--end":
226
+ if i + 1 < len(args):
227
+ try:
228
+ end = int(args[i + 1])
229
+ except ValueError:
230
+ pass
231
+ i += 2
232
+ continue
233
+ elif not arg.startswith("-"):
234
+ file_args.append(arg)
235
+ i += 1
236
+
237
+ headers, data, error = await read_csv_input(file_args, ctx)
238
+ if error:
239
+ return error
240
+
241
+ return ExecResult(stdout=format_csv(headers, data[start:end]), stderr="", exit_code=0)
242
+
243
+
244
+ async def cmd_select(args: list[str], ctx: CommandContext) -> ExecResult:
245
+ """Select columns."""
246
+ cols_spec = ""
247
+ file_args = []
248
+
249
+ for arg in args:
250
+ if not arg.startswith("-"):
251
+ if not cols_spec:
252
+ cols_spec = arg
253
+ else:
254
+ file_args.append(arg)
255
+
256
+ if not cols_spec:
257
+ return ExecResult(
258
+ stdout="",
259
+ stderr="xan select: no columns specified\n",
260
+ exit_code=1,
261
+ )
262
+
263
+ headers, data, error = await read_csv_input(file_args, ctx)
264
+ if error:
265
+ return error
266
+
267
+ # Parse column specification (comma-separated names or indices)
268
+ selected_headers = []
269
+ for col in cols_spec.split(","):
270
+ col = col.strip()
271
+ if not col:
272
+ continue
273
+
274
+ # Check if it's an index
275
+ try:
276
+ idx = int(col)
277
+ if 0 <= idx < len(headers):
278
+ selected_headers.append(headers[idx])
279
+ continue
280
+ except ValueError:
281
+ pass
282
+
283
+ # Check for glob pattern
284
+ if "*" in col:
285
+ pattern = col.replace("*", ".*")
286
+ for h in headers:
287
+ if re.match(f"^{pattern}$", h) and h not in selected_headers:
288
+ selected_headers.append(h)
289
+ continue
290
+
291
+ # Direct column name
292
+ if col in headers:
293
+ selected_headers.append(col)
294
+
295
+ # Filter data to selected columns
296
+ selected_data = []
297
+ for row in data:
298
+ selected_data.append({h: row.get(h, "") for h in selected_headers})
299
+
300
+ return ExecResult(stdout=format_csv(selected_headers, selected_data), stderr="", exit_code=0)
301
+
302
+
303
+ async def cmd_filter(args: list[str], ctx: CommandContext) -> ExecResult:
304
+ """Filter rows by expression."""
305
+ expr = ""
306
+ invert = False
307
+ file_args = []
308
+
309
+ i = 0
310
+ while i < len(args):
311
+ arg = args[i]
312
+ if arg in ("-v", "--invert"):
313
+ invert = True
314
+ elif not arg.startswith("-"):
315
+ if not expr:
316
+ expr = arg
317
+ else:
318
+ file_args.append(arg)
319
+ i += 1
320
+
321
+ if not expr:
322
+ return ExecResult(
323
+ stdout="",
324
+ stderr="xan filter: no expression specified\n",
325
+ exit_code=1,
326
+ )
327
+
328
+ headers, data, error = await read_csv_input(file_args, ctx)
329
+ if error:
330
+ return error
331
+
332
+ # Parse simple expressions: col op value
333
+ # Supported: ==, !=, <, <=, >, >=, contains, startswith, endswith
334
+ filtered = []
335
+ for row in data:
336
+ match = evaluate_filter_expr(expr, row)
337
+ if (match and not invert) or (not match and invert):
338
+ filtered.append(row)
339
+
340
+ return ExecResult(stdout=format_csv(headers, filtered), stderr="", exit_code=0)
341
+
342
+
343
+ def evaluate_filter_expr(expr: str, row: dict[str, str]) -> bool:
344
+ """Evaluate a filter expression against a row."""
345
+ expr = expr.strip()
346
+
347
+ # Try different operators
348
+ for op, op_func in [
349
+ ("==", lambda a, b: str(a) == str(b)),
350
+ ("!=", lambda a, b: str(a) != str(b)),
351
+ (">=", lambda a, b: try_compare(a, b, "ge")),
352
+ ("<=", lambda a, b: try_compare(a, b, "le")),
353
+ (">", lambda a, b: try_compare(a, b, "gt")),
354
+ ("<", lambda a, b: try_compare(a, b, "lt")),
355
+ ]:
356
+ if f" {op} " in expr:
357
+ parts = expr.split(f" {op} ", 1)
358
+ col = parts[0].strip()
359
+ val = parts[1].strip().strip('"').strip("'")
360
+ if col in row:
361
+ return op_func(row[col], val)
362
+ return False
363
+
364
+ # Check for function-style expressions
365
+ if "contains(" in expr.lower():
366
+ match = re.match(r"(\w+)\s+contains\s*\(([^)]+)\)", expr, re.IGNORECASE)
367
+ if match:
368
+ col, val = match.groups()
369
+ val = val.strip('"').strip("'")
370
+ if col in row:
371
+ return val in str(row[col])
372
+ return False
373
+
374
+ if "startswith(" in expr.lower():
375
+ match = re.match(r"(\w+)\s+startswith\s*\(([^)]+)\)", expr, re.IGNORECASE)
376
+ if match:
377
+ col, val = match.groups()
378
+ val = val.strip('"').strip("'")
379
+ if col in row:
380
+ return str(row[col]).startswith(val)
381
+ return False
382
+
383
+ if "endswith(" in expr.lower():
384
+ match = re.match(r"(\w+)\s+endswith\s*\(([^)]+)\)", expr, re.IGNORECASE)
385
+ if match:
386
+ col, val = match.groups()
387
+ val = val.strip('"').strip("'")
388
+ if col in row:
389
+ return str(row[col]).endswith(val)
390
+ return False
391
+
392
+ return False
393
+
394
+
395
+ def try_compare(a: str, b: str, op: str) -> bool:
396
+ """Try to compare values, first as numbers, then as strings."""
397
+ try:
398
+ a_num = float(a) if a else 0
399
+ b_num = float(b) if b else 0
400
+ if op == "gt":
401
+ return a_num > b_num
402
+ elif op == "ge":
403
+ return a_num >= b_num
404
+ elif op == "lt":
405
+ return a_num < b_num
406
+ elif op == "le":
407
+ return a_num <= b_num
408
+ except ValueError:
409
+ pass
410
+
411
+ if op == "gt":
412
+ return str(a) > str(b)
413
+ elif op == "ge":
414
+ return str(a) >= str(b)
415
+ elif op == "lt":
416
+ return str(a) < str(b)
417
+ elif op == "le":
418
+ return str(a) <= str(b)
419
+ return False
420
+
421
+
422
+ async def cmd_search(args: list[str], ctx: CommandContext) -> ExecResult:
423
+ """Filter rows by regex."""
424
+ pattern = ""
425
+ select_cols: list[str] = []
426
+ invert = False
427
+ ignore_case = False
428
+ file_args = []
429
+
430
+ i = 0
431
+ while i < len(args):
432
+ arg = args[i]
433
+ if arg in ("-s", "--select") and i + 1 < len(args):
434
+ select_cols = args[i + 1].split(",")
435
+ i += 2
436
+ continue
437
+ elif arg in ("-v", "--invert"):
438
+ invert = True
439
+ elif arg in ("-i", "--ignore-case"):
440
+ ignore_case = True
441
+ elif not arg.startswith("-"):
442
+ if not pattern:
443
+ pattern = arg
444
+ else:
445
+ file_args.append(arg)
446
+ i += 1
447
+
448
+ if not pattern:
449
+ return ExecResult(
450
+ stdout="",
451
+ stderr="xan search: no pattern specified\n",
452
+ exit_code=1,
453
+ )
454
+
455
+ headers, data, error = await read_csv_input(file_args, ctx)
456
+ if error:
457
+ return error
458
+
459
+ search_cols = select_cols if select_cols else headers
460
+
461
+ try:
462
+ regex = re.compile(pattern, re.IGNORECASE if ignore_case else 0)
463
+ except re.error:
464
+ return ExecResult(
465
+ stdout="",
466
+ stderr=f"xan search: invalid regex pattern '{pattern}'\n",
467
+ exit_code=1,
468
+ )
469
+
470
+ filtered = []
471
+ for row in data:
472
+ matches = any(
473
+ regex.search(str(row.get(col, "")))
474
+ for col in search_cols
475
+ if col in row
476
+ )
477
+ if (matches and not invert) or (not matches and invert):
478
+ filtered.append(row)
479
+
480
+ return ExecResult(stdout=format_csv(headers, filtered), stderr="", exit_code=0)
481
+
482
+
483
+ async def cmd_sort(args: list[str], ctx: CommandContext) -> ExecResult:
484
+ """Sort rows."""
485
+ sort_col = ""
486
+ numeric = False
487
+ reverse = False
488
+ file_args = []
489
+
490
+ i = 0
491
+ while i < len(args):
492
+ arg = args[i]
493
+ if arg in ("-N", "--numeric"):
494
+ numeric = True
495
+ elif arg in ("-r", "--reverse", "-R"):
496
+ reverse = True
497
+ elif arg in ("-s", "--select") and i + 1 < len(args):
498
+ sort_col = args[i + 1]
499
+ i += 2
500
+ continue
501
+ elif not arg.startswith("-"):
502
+ if not sort_col:
503
+ sort_col = arg
504
+ else:
505
+ file_args.append(arg)
506
+ i += 1
507
+
508
+ headers, data, error = await read_csv_input(file_args, ctx)
509
+ if error:
510
+ return error
511
+
512
+ if not sort_col:
513
+ return ExecResult(
514
+ stdout="",
515
+ stderr="xan sort: no sort column specified\n",
516
+ exit_code=1,
517
+ )
518
+
519
+ if sort_col not in headers:
520
+ return ExecResult(
521
+ stdout="",
522
+ stderr=f"xan sort: column '{sort_col}' not found\n",
523
+ exit_code=1,
524
+ )
525
+
526
+ def sort_key(row: dict) -> Any:
527
+ val = row.get(sort_col, "")
528
+ if numeric:
529
+ try:
530
+ return float(val) if val else 0
531
+ except ValueError:
532
+ return 0
533
+ return str(val)
534
+
535
+ sorted_data = sorted(data, key=sort_key, reverse=reverse)
536
+ return ExecResult(stdout=format_csv(headers, sorted_data), stderr="", exit_code=0)
537
+
538
+
539
+ async def cmd_view(args: list[str], ctx: CommandContext) -> ExecResult:
540
+ """Pretty print as table."""
541
+ file_args = [a for a in args if not a.startswith("-")]
542
+
543
+ headers, data, error = await read_csv_input(file_args, ctx)
544
+ if error:
545
+ return error
546
+
547
+ if not headers:
548
+ return ExecResult(stdout="", stderr="", exit_code=0)
549
+
550
+ # Calculate column widths
551
+ widths = {h: len(h) for h in headers}
552
+ for row in data:
553
+ for h in headers:
554
+ widths[h] = max(widths[h], len(str(row.get(h, ""))))
555
+
556
+ # Build table
557
+ lines = []
558
+
559
+ # Header
560
+ header_line = " | ".join(h.ljust(widths[h]) for h in headers)
561
+ lines.append(header_line)
562
+
563
+ # Separator
564
+ sep_line = "-+-".join("-" * widths[h] for h in headers)
565
+ lines.append(sep_line)
566
+
567
+ # Data rows
568
+ for row in data:
569
+ row_line = " | ".join(str(row.get(h, "")).ljust(widths[h]) for h in headers)
570
+ lines.append(row_line)
571
+
572
+ return ExecResult(stdout="\n".join(lines) + "\n", stderr="", exit_code=0)
573
+
574
+
575
+ async def cmd_stats(args: list[str], ctx: CommandContext) -> ExecResult:
576
+ """Show column statistics."""
577
+ file_args = [a for a in args if not a.startswith("-")]
578
+
579
+ headers, data, error = await read_csv_input(file_args, ctx)
580
+ if error:
581
+ return error
582
+
583
+ if not headers:
584
+ return ExecResult(stdout="", stderr="", exit_code=0)
585
+
586
+ lines = []
587
+ for col in headers:
588
+ values = [row.get(col, "") for row in data]
589
+ non_empty = [v for v in values if v]
590
+
591
+ # Try to parse as numbers
592
+ nums = []
593
+ for v in non_empty:
594
+ try:
595
+ nums.append(float(v))
596
+ except ValueError:
597
+ pass
598
+
599
+ lines.append(f"Column: {col}")
600
+ lines.append(f" Count: {len(values)}")
601
+ lines.append(f" Non-empty: {len(non_empty)}")
602
+ lines.append(f" Unique: {len(set(non_empty))}")
603
+
604
+ if nums:
605
+ lines.append(f" Min: {min(nums)}")
606
+ lines.append(f" Max: {max(nums)}")
607
+ lines.append(f" Sum: {sum(nums)}")
608
+ lines.append(f" Mean: {sum(nums) / len(nums):.2f}")
609
+
610
+ lines.append("")
611
+
612
+ return ExecResult(stdout="\n".join(lines), stderr="", exit_code=0)
613
+
614
+
615
+ async def cmd_frequency(args: list[str], ctx: CommandContext) -> ExecResult:
616
+ """Count value occurrences."""
617
+ col = ""
618
+ file_args = []
619
+
620
+ for arg in args:
621
+ if not arg.startswith("-"):
622
+ if not col:
623
+ col = arg
624
+ else:
625
+ file_args.append(arg)
626
+
627
+ headers, data, error = await read_csv_input(file_args, ctx)
628
+ if error:
629
+ return error
630
+
631
+ if not col:
632
+ # Default to first column
633
+ col = headers[0] if headers else ""
634
+
635
+ if col not in headers:
636
+ return ExecResult(
637
+ stdout="",
638
+ stderr=f"xan frequency: column '{col}' not found\n",
639
+ exit_code=1,
640
+ )
641
+
642
+ # Count occurrences
643
+ counts: dict[str, int] = {}
644
+ for row in data:
645
+ val = str(row.get(col, ""))
646
+ counts[val] = counts.get(val, 0) + 1
647
+
648
+ # Sort by count descending
649
+ sorted_counts = sorted(counts.items(), key=lambda x: -x[1])
650
+
651
+ # Output as CSV
652
+ output = "value,count\n"
653
+ for val, count in sorted_counts:
654
+ output += f"{format_value(val)},{count}\n"
655
+
656
+ return ExecResult(stdout=output, stderr="", exit_code=0)
657
+
658
+
659
+ async def cmd_reverse(args: list[str], ctx: CommandContext) -> ExecResult:
660
+ """Reverse row order."""
661
+ file_args = [a for a in args if not a.startswith("-")]
662
+
663
+ headers, data, error = await read_csv_input(file_args, ctx)
664
+ if error:
665
+ return error
666
+
667
+ return ExecResult(stdout=format_csv(headers, data[::-1]), stderr="", exit_code=0)
668
+
669
+
670
+ async def cmd_behead(args: list[str], ctx: CommandContext) -> ExecResult:
671
+ """Output data without header row."""
672
+ file_args = [a for a in args if not a.startswith("-")]
673
+
674
+ headers, data, error = await read_csv_input(file_args, ctx)
675
+ if error:
676
+ return error
677
+
678
+ if not headers or not data:
679
+ return ExecResult(stdout="", stderr="", exit_code=0)
680
+
681
+ # Output data rows without header
682
+ output = io.StringIO(newline="")
683
+ writer = csv.writer(output, lineterminator="\n")
684
+ for row in data:
685
+ writer.writerow([row.get(h, "") for h in headers])
686
+ return ExecResult(stdout=output.getvalue(), stderr="", exit_code=0)
687
+
688
+
689
+ async def cmd_enum(args: list[str], ctx: CommandContext) -> ExecResult:
690
+ """Add index column to CSV."""
691
+ col_name = "index"
692
+ start = 0
693
+ file_args = []
694
+
695
+ i = 0
696
+ while i < len(args):
697
+ arg = args[i]
698
+ if arg in ("-c", "--column") and i + 1 < len(args):
699
+ col_name = args[i + 1]
700
+ i += 2
701
+ continue
702
+ elif arg == "--start" and i + 1 < len(args):
703
+ try:
704
+ start = int(args[i + 1])
705
+ except ValueError:
706
+ pass
707
+ i += 2
708
+ continue
709
+ elif not arg.startswith("-"):
710
+ file_args.append(arg)
711
+ i += 1
712
+
713
+ headers, data, error = await read_csv_input(file_args, ctx)
714
+ if error:
715
+ return error
716
+
717
+ # Create new headers with index column first
718
+ new_headers = [col_name] + headers
719
+
720
+ # Add index to each row
721
+ new_data = []
722
+ for idx, row in enumerate(data, start=start):
723
+ new_row = {col_name: str(idx)}
724
+ new_row.update(row)
725
+ new_data.append(new_row)
726
+
727
+ return ExecResult(stdout=format_csv(new_headers, new_data), stderr="", exit_code=0)
728
+
729
+
730
+ def _looks_like_file_path(arg: str) -> bool:
731
+ """Check if argument looks like a file path rather than a column spec."""
732
+ return arg.startswith("/") or arg.startswith("./") or arg.endswith(".csv")
733
+
734
+
735
+ async def cmd_drop(args: list[str], ctx: CommandContext) -> ExecResult:
736
+ """Drop columns (inverse of select)."""
737
+ cols_spec = ""
738
+ file_args = []
739
+
740
+ for arg in args:
741
+ if not arg.startswith("-"):
742
+ if not cols_spec:
743
+ # Check if this looks like a file path
744
+ if _looks_like_file_path(arg):
745
+ # This is likely a file path, not a column spec
746
+ file_args.append(arg)
747
+ else:
748
+ cols_spec = arg
749
+ else:
750
+ file_args.append(arg)
751
+
752
+ if not cols_spec:
753
+ return ExecResult(
754
+ stdout="",
755
+ stderr="xan drop: no columns specified\n",
756
+ exit_code=1,
757
+ )
758
+
759
+ headers, data, error = await read_csv_input(file_args, ctx)
760
+ if error:
761
+ return error
762
+
763
+ # Parse columns to drop
764
+ drop_cols: set[str] = set()
765
+ for col in cols_spec.split(","):
766
+ col = col.strip()
767
+ if not col:
768
+ continue
769
+
770
+ # Check if it's an index
771
+ try:
772
+ idx = int(col)
773
+ if 0 <= idx < len(headers):
774
+ drop_cols.add(headers[idx])
775
+ continue
776
+ except ValueError:
777
+ pass
778
+
779
+ # Direct column name
780
+ if col in headers:
781
+ drop_cols.add(col)
782
+
783
+ # Keep columns not in drop list
784
+ remaining_headers = [h for h in headers if h not in drop_cols]
785
+
786
+ # Filter data to remaining columns
787
+ new_data = []
788
+ for row in data:
789
+ new_data.append({h: row.get(h, "") for h in remaining_headers})
790
+
791
+ return ExecResult(stdout=format_csv(remaining_headers, new_data), stderr="", exit_code=0)
792
+
793
+
794
+ async def cmd_shuffle(args: list[str], ctx: CommandContext) -> ExecResult:
795
+ """Randomly reorder rows."""
796
+ seed: int | None = None
797
+ file_args = []
798
+
799
+ i = 0
800
+ while i < len(args):
801
+ arg = args[i]
802
+ if arg == "--seed" and i + 1 < len(args):
803
+ try:
804
+ seed = int(args[i + 1])
805
+ except ValueError:
806
+ pass
807
+ i += 2
808
+ continue
809
+ elif not arg.startswith("-"):
810
+ file_args.append(arg)
811
+ i += 1
812
+
813
+ headers, data, error = await read_csv_input(file_args, ctx)
814
+ if error:
815
+ return error
816
+
817
+ # Shuffle with optional seed
818
+ if seed is not None:
819
+ rng = random.Random(seed)
820
+ rng.shuffle(data)
821
+ else:
822
+ random.shuffle(data)
823
+
824
+ return ExecResult(stdout=format_csv(headers, data), stderr="", exit_code=0)
825
+
826
+
827
+ async def cmd_cat(args: list[str], ctx: CommandContext) -> ExecResult:
828
+ """Concatenate CSV files."""
829
+ file_args = [a for a in args if not a.startswith("-")]
830
+
831
+ if not file_args:
832
+ return ExecResult(
833
+ stdout="",
834
+ stderr="xan cat: no files specified\n",
835
+ exit_code=1,
836
+ )
837
+
838
+ all_headers: list[str] | None = None
839
+ all_data: list[dict[str, str]] = []
840
+
841
+ for file_path in file_args:
842
+ try:
843
+ path = ctx.fs.resolve_path(ctx.cwd, file_path)
844
+ content = await ctx.fs.read_file(path)
845
+ except FileNotFoundError:
846
+ return ExecResult(
847
+ stdout="",
848
+ stderr=f"xan: {file_path}: No such file or directory\n",
849
+ exit_code=2,
850
+ )
851
+
852
+ headers, data = parse_csv(content)
853
+
854
+ if all_headers is None:
855
+ all_headers = headers
856
+ elif headers != all_headers:
857
+ return ExecResult(
858
+ stdout="",
859
+ stderr=f"xan cat: headers in '{file_path}' do not match\n",
860
+ exit_code=1,
861
+ )
862
+
863
+ all_data.extend(data)
864
+
865
+ if all_headers is None:
866
+ return ExecResult(stdout="", stderr="", exit_code=0)
867
+
868
+ return ExecResult(stdout=format_csv(all_headers, all_data), stderr="", exit_code=0)
869
+
870
+
871
+ async def cmd_to(args: list[str], ctx: CommandContext) -> ExecResult:
872
+ """Convert CSV to other formats."""
873
+ if not args:
874
+ return ExecResult(
875
+ stdout="",
876
+ stderr="xan to: no format specified\n",
877
+ exit_code=1,
878
+ )
879
+
880
+ fmt = args[0]
881
+ sub_args = args[1:]
882
+
883
+ if fmt == "json":
884
+ return await cmd_to_json(sub_args, ctx)
885
+ else:
886
+ return ExecResult(
887
+ stdout="",
888
+ stderr=f"xan to: unknown format '{fmt}'\n",
889
+ exit_code=1,
890
+ )
891
+
892
+
893
+ async def cmd_to_json(args: list[str], ctx: CommandContext) -> ExecResult:
894
+ """Convert CSV to JSON."""
895
+ file_args = [a for a in args if not a.startswith("-")]
896
+
897
+ headers, data, error = await read_csv_input(file_args, ctx)
898
+ if error:
899
+ return error
900
+
901
+ # Convert to list of dicts
902
+ result = [dict(row) for row in data]
903
+ output = json.dumps(result, ensure_ascii=False)
904
+ return ExecResult(stdout=output + "\n", stderr="", exit_code=0)
905
+
906
+
907
+ async def cmd_from(args: list[str], ctx: CommandContext) -> ExecResult:
908
+ """Convert other formats to CSV."""
909
+ if not args:
910
+ return ExecResult(
911
+ stdout="",
912
+ stderr="xan from: no format specified\n",
913
+ exit_code=1,
914
+ )
915
+
916
+ fmt = args[0]
917
+ sub_args = args[1:]
918
+
919
+ if fmt == "json":
920
+ return await cmd_from_json(sub_args, ctx)
921
+ else:
922
+ return ExecResult(
923
+ stdout="",
924
+ stderr=f"xan from: unknown format '{fmt}'\n",
925
+ exit_code=1,
926
+ )
927
+
928
+
929
+ async def cmd_from_json(args: list[str], ctx: CommandContext) -> ExecResult:
930
+ """Convert JSON to CSV."""
931
+ file_args = [a for a in args if not a.startswith("-")]
932
+
933
+ # Read input
934
+ if not file_args or file_args[0] == "-":
935
+ content = ctx.stdin
936
+ else:
937
+ try:
938
+ path = ctx.fs.resolve_path(ctx.cwd, file_args[0])
939
+ content = await ctx.fs.read_file(path)
940
+ except FileNotFoundError:
941
+ return ExecResult(
942
+ stdout="",
943
+ stderr=f"xan: {file_args[0]}: No such file or directory\n",
944
+ exit_code=2,
945
+ )
946
+
947
+ # Parse JSON
948
+ try:
949
+ data = json.loads(content)
950
+ except json.JSONDecodeError:
951
+ return ExecResult(
952
+ stdout="",
953
+ stderr="xan from json: invalid JSON\n",
954
+ exit_code=1,
955
+ )
956
+
957
+ if not isinstance(data, list):
958
+ return ExecResult(
959
+ stdout="",
960
+ stderr="xan from json: expected JSON array\n",
961
+ exit_code=1,
962
+ )
963
+
964
+ if not data:
965
+ return ExecResult(stdout="", stderr="", exit_code=0)
966
+
967
+ # Collect all keys from all objects
968
+ all_keys: list[str] = []
969
+ for item in data:
970
+ if isinstance(item, dict):
971
+ for key in item.keys():
972
+ if key not in all_keys:
973
+ all_keys.append(key)
974
+
975
+ # Create CSV output
976
+ output = io.StringIO(newline="")
977
+ writer = csv.DictWriter(output, fieldnames=all_keys, lineterminator="\n")
978
+ writer.writeheader()
979
+ for item in data:
980
+ if isinstance(item, dict):
981
+ writer.writerow({k: str(item.get(k, "")) for k in all_keys})
982
+
983
+ return ExecResult(stdout=output.getvalue(), stderr="", exit_code=0)
984
+
985
+
986
+ async def cmd_rename(args: list[str], ctx: CommandContext) -> ExecResult:
987
+ """Rename columns."""
988
+ rename_spec = ""
989
+ file_args = []
990
+
991
+ for arg in args:
992
+ if not arg.startswith("-"):
993
+ if not rename_spec:
994
+ if _looks_like_file_path(arg):
995
+ file_args.append(arg)
996
+ else:
997
+ rename_spec = arg
998
+ else:
999
+ file_args.append(arg)
1000
+
1001
+ if not rename_spec:
1002
+ return ExecResult(
1003
+ stdout="",
1004
+ stderr="xan rename: no rename specification provided\n",
1005
+ exit_code=1,
1006
+ )
1007
+
1008
+ headers, data, error = await read_csv_input(file_args, ctx)
1009
+ if error:
1010
+ return error
1011
+
1012
+ # Parse rename specification: old:new,old2:new2
1013
+ renames: dict[str, str] = {}
1014
+ for pair in rename_spec.split(","):
1015
+ if ":" in pair:
1016
+ old, new = pair.split(":", 1)
1017
+ renames[old.strip()] = new.strip()
1018
+
1019
+ # Apply renames to headers
1020
+ new_headers = [renames.get(h, h) for h in headers]
1021
+
1022
+ # Update data keys
1023
+ new_data = []
1024
+ for row in data:
1025
+ new_row = {}
1026
+ for h in headers:
1027
+ new_key = renames.get(h, h)
1028
+ new_row[new_key] = row.get(h, "")
1029
+ new_data.append(new_row)
1030
+
1031
+ return ExecResult(stdout=format_csv(new_headers, new_data), stderr="", exit_code=0)
1032
+
1033
+
1034
+ async def cmd_sample(args: list[str], ctx: CommandContext) -> ExecResult:
1035
+ """Random sample of N rows."""
1036
+ n = 10
1037
+ seed: int | None = None
1038
+ file_args = []
1039
+
1040
+ i = 0
1041
+ while i < len(args):
1042
+ arg = args[i]
1043
+ if arg == "--seed" and i + 1 < len(args):
1044
+ try:
1045
+ seed = int(args[i + 1])
1046
+ except ValueError:
1047
+ pass
1048
+ i += 2
1049
+ continue
1050
+ elif not arg.startswith("-"):
1051
+ # First non-flag arg is the count
1052
+ try:
1053
+ n = int(arg)
1054
+ except ValueError:
1055
+ file_args.append(arg)
1056
+ i += 1
1057
+
1058
+ headers, data, error = await read_csv_input(file_args, ctx)
1059
+ if error:
1060
+ return error
1061
+
1062
+ # Sample with optional seed
1063
+ if seed is not None:
1064
+ rng = random.Random(seed)
1065
+ sampled = rng.sample(data, min(n, len(data)))
1066
+ else:
1067
+ sampled = random.sample(data, min(n, len(data)))
1068
+
1069
+ return ExecResult(stdout=format_csv(headers, sampled), stderr="", exit_code=0)
1070
+
1071
+
1072
+ async def cmd_dedup(args: list[str], ctx: CommandContext) -> ExecResult:
1073
+ """Remove duplicate rows."""
1074
+ select_col = ""
1075
+ file_args = []
1076
+
1077
+ i = 0
1078
+ while i < len(args):
1079
+ arg = args[i]
1080
+ if arg in ("-s", "--select") and i + 1 < len(args):
1081
+ select_col = args[i + 1]
1082
+ i += 2
1083
+ continue
1084
+ elif not arg.startswith("-"):
1085
+ file_args.append(arg)
1086
+ i += 1
1087
+
1088
+ headers, data, error = await read_csv_input(file_args, ctx)
1089
+ if error:
1090
+ return error
1091
+
1092
+ seen: set[str] = set()
1093
+ unique_data = []
1094
+
1095
+ for row in data:
1096
+ if select_col:
1097
+ key = str(row.get(select_col, ""))
1098
+ else:
1099
+ key = tuple(row.get(h, "") for h in headers).__str__()
1100
+
1101
+ if key not in seen:
1102
+ seen.add(key)
1103
+ unique_data.append(row)
1104
+
1105
+ return ExecResult(stdout=format_csv(headers, unique_data), stderr="", exit_code=0)
1106
+
1107
+
1108
+ async def cmd_top(args: list[str], ctx: CommandContext) -> ExecResult:
1109
+ """Get top N rows by column value."""
1110
+ n = 10
1111
+ reverse = False
1112
+ sort_col = ""
1113
+ file_args = []
1114
+
1115
+ i = 0
1116
+ while i < len(args):
1117
+ arg = args[i]
1118
+ if arg in ("-r", "--reverse"):
1119
+ reverse = True
1120
+ elif not arg.startswith("-"):
1121
+ # First non-flag is n, second is column, third is file
1122
+ try:
1123
+ n = int(arg)
1124
+ except ValueError:
1125
+ if not sort_col:
1126
+ sort_col = arg
1127
+ else:
1128
+ file_args.append(arg)
1129
+ i += 1
1130
+
1131
+ if not sort_col:
1132
+ return ExecResult(
1133
+ stdout="",
1134
+ stderr="xan top: no column specified\n",
1135
+ exit_code=1,
1136
+ )
1137
+
1138
+ headers, data, error = await read_csv_input(file_args, ctx)
1139
+ if error:
1140
+ return error
1141
+
1142
+ if sort_col not in headers:
1143
+ return ExecResult(
1144
+ stdout="",
1145
+ stderr=f"xan top: column '{sort_col}' not found\n",
1146
+ exit_code=1,
1147
+ )
1148
+
1149
+ def sort_key(row: dict) -> float:
1150
+ val = row.get(sort_col, "")
1151
+ try:
1152
+ return float(val) if val else 0
1153
+ except ValueError:
1154
+ return 0
1155
+
1156
+ # Sort descending by default (top = highest), ascending if reverse
1157
+ sorted_data = sorted(data, key=sort_key, reverse=not reverse)
1158
+ return ExecResult(stdout=format_csv(headers, sorted_data[:n]), stderr="", exit_code=0)
1159
+
1160
+
1161
+ async def cmd_transpose(args: list[str], ctx: CommandContext) -> ExecResult:
1162
+ """Transpose rows and columns."""
1163
+ file_args = [a for a in args if not a.startswith("-")]
1164
+
1165
+ headers, data, error = await read_csv_input(file_args, ctx)
1166
+ if error:
1167
+ return error
1168
+
1169
+ if not headers:
1170
+ return ExecResult(stdout="", stderr="", exit_code=0)
1171
+
1172
+ # Build transposed data
1173
+ # Original headers become first column
1174
+ # Each data row becomes a new column
1175
+ new_headers = ["field"] + [str(i) for i in range(len(data))]
1176
+ new_data = []
1177
+
1178
+ for h in headers:
1179
+ row = {"field": h}
1180
+ for i, d in enumerate(data):
1181
+ row[str(i)] = d.get(h, "")
1182
+ new_data.append(row)
1183
+
1184
+ return ExecResult(stdout=format_csv(new_headers, new_data), stderr="", exit_code=0)
1185
+
1186
+
1187
+ async def cmd_fixlengths(args: list[str], ctx: CommandContext) -> ExecResult:
1188
+ """Fix ragged CSV by padding/truncating rows."""
1189
+ file_args = [a for a in args if not a.startswith("-")]
1190
+
1191
+ # Read raw CSV to handle ragged data
1192
+ if not file_args or file_args[0] == "-":
1193
+ content = ctx.stdin
1194
+ else:
1195
+ try:
1196
+ path = ctx.fs.resolve_path(ctx.cwd, file_args[0])
1197
+ content = await ctx.fs.read_file(path)
1198
+ except FileNotFoundError:
1199
+ return ExecResult(
1200
+ stdout="",
1201
+ stderr=f"xan: {file_args[0]}: No such file or directory\n",
1202
+ exit_code=2,
1203
+ )
1204
+
1205
+ if not content.strip():
1206
+ return ExecResult(stdout="", stderr="", exit_code=0)
1207
+
1208
+ # Parse manually to handle ragged rows
1209
+ lines = content.strip().split("\n")
1210
+ reader = csv.reader(lines)
1211
+ rows = list(reader)
1212
+
1213
+ if not rows:
1214
+ return ExecResult(stdout="", stderr="", exit_code=0)
1215
+
1216
+ # Use header row length as the target
1217
+ target_len = len(rows[0])
1218
+
1219
+ # Fix each row
1220
+ fixed_rows = []
1221
+ for row in rows:
1222
+ if len(row) < target_len:
1223
+ row = row + [""] * (target_len - len(row))
1224
+ elif len(row) > target_len:
1225
+ row = row[:target_len]
1226
+ fixed_rows.append(row)
1227
+
1228
+ # Output
1229
+ output = io.StringIO(newline="")
1230
+ writer = csv.writer(output, lineterminator="\n")
1231
+ for row in fixed_rows:
1232
+ writer.writerow(row)
1233
+
1234
+ return ExecResult(stdout=output.getvalue(), stderr="", exit_code=0)
1235
+
1236
+
1237
+ async def cmd_flatten(args: list[str], ctx: CommandContext) -> ExecResult:
1238
+ """Display records vertically (one field per line)."""
1239
+ file_args = [a for a in args if not a.startswith("-")]
1240
+
1241
+ headers, data, error = await read_csv_input(file_args, ctx)
1242
+ if error:
1243
+ return error
1244
+
1245
+ if not headers or not data:
1246
+ return ExecResult(stdout="", stderr="", exit_code=0)
1247
+
1248
+ lines = []
1249
+ for i, row in enumerate(data):
1250
+ if i > 0:
1251
+ lines.append("") # Blank line between records
1252
+ for h in headers:
1253
+ lines.append(f"{h}: {row.get(h, '')}")
1254
+
1255
+ return ExecResult(stdout="\n".join(lines) + "\n", stderr="", exit_code=0)
1256
+
1257
+
1258
+ async def cmd_explode(args: list[str], ctx: CommandContext) -> ExecResult:
1259
+ """Split column values into multiple rows."""
1260
+ col = ""
1261
+ delimiter = ","
1262
+ file_args = []
1263
+
1264
+ i = 0
1265
+ while i < len(args):
1266
+ arg = args[i]
1267
+ if arg in ("-d", "--delimiter") and i + 1 < len(args):
1268
+ delimiter = args[i + 1]
1269
+ i += 2
1270
+ continue
1271
+ elif not arg.startswith("-"):
1272
+ if not col:
1273
+ if _looks_like_file_path(arg):
1274
+ file_args.append(arg)
1275
+ else:
1276
+ col = arg
1277
+ else:
1278
+ file_args.append(arg)
1279
+ i += 1
1280
+
1281
+ if not col:
1282
+ return ExecResult(
1283
+ stdout="",
1284
+ stderr="xan explode: no column specified\n",
1285
+ exit_code=1,
1286
+ )
1287
+
1288
+ headers, data, error = await read_csv_input(file_args, ctx)
1289
+ if error:
1290
+ return error
1291
+
1292
+ if col not in headers:
1293
+ return ExecResult(
1294
+ stdout="",
1295
+ stderr=f"xan explode: column '{col}' not found\n",
1296
+ exit_code=1,
1297
+ )
1298
+
1299
+ # Explode rows
1300
+ new_data = []
1301
+ for row in data:
1302
+ val = row.get(col, "")
1303
+ parts = val.split(delimiter) if val else [""]
1304
+ for part in parts:
1305
+ new_row = dict(row)
1306
+ new_row[col] = part.strip()
1307
+ new_data.append(new_row)
1308
+
1309
+ return ExecResult(stdout=format_csv(headers, new_data), stderr="", exit_code=0)
1310
+
1311
+
1312
+ async def cmd_implode(args: list[str], ctx: CommandContext) -> ExecResult:
1313
+ """Combine rows by grouping, joining values."""
1314
+ col = ""
1315
+ group_col = ""
1316
+ delimiter = ","
1317
+ file_args = []
1318
+
1319
+ i = 0
1320
+ while i < len(args):
1321
+ arg = args[i]
1322
+ if arg in ("-g", "--group") and i + 1 < len(args):
1323
+ group_col = args[i + 1]
1324
+ i += 2
1325
+ continue
1326
+ elif arg in ("-d", "--delimiter") and i + 1 < len(args):
1327
+ delimiter = args[i + 1]
1328
+ i += 2
1329
+ continue
1330
+ elif not arg.startswith("-"):
1331
+ if not col:
1332
+ if _looks_like_file_path(arg):
1333
+ file_args.append(arg)
1334
+ else:
1335
+ col = arg
1336
+ else:
1337
+ file_args.append(arg)
1338
+ i += 1
1339
+
1340
+ if not col:
1341
+ return ExecResult(
1342
+ stdout="",
1343
+ stderr="xan implode: no column specified\n",
1344
+ exit_code=1,
1345
+ )
1346
+
1347
+ if not group_col:
1348
+ return ExecResult(
1349
+ stdout="",
1350
+ stderr="xan implode: no group column specified (use -g)\n",
1351
+ exit_code=1,
1352
+ )
1353
+
1354
+ headers, data, error = await read_csv_input(file_args, ctx)
1355
+ if error:
1356
+ return error
1357
+
1358
+ # Group rows
1359
+ groups: dict[str, list[dict[str, str]]] = {}
1360
+ for row in data:
1361
+ key = row.get(group_col, "")
1362
+ if key not in groups:
1363
+ groups[key] = []
1364
+ groups[key].append(row)
1365
+
1366
+ # Implode
1367
+ new_data = []
1368
+ for key, rows in groups.items():
1369
+ # Take first row as base, combine the implode column
1370
+ base = dict(rows[0])
1371
+ values = [r.get(col, "") for r in rows]
1372
+ base[col] = delimiter.join(values)
1373
+ new_data.append(base)
1374
+
1375
+ return ExecResult(stdout=format_csv(headers, new_data), stderr="", exit_code=0)
1376
+
1377
+
1378
+ async def cmd_split(args: list[str], ctx: CommandContext) -> ExecResult:
1379
+ """Split CSV into multiple files."""
1380
+ chunk_size = 0
1381
+ output_dir = "/tmp/xan_split"
1382
+ file_args = []
1383
+
1384
+ i = 0
1385
+ while i < len(args):
1386
+ arg = args[i]
1387
+ if arg in ("-o", "--output") and i + 1 < len(args):
1388
+ output_dir = args[i + 1]
1389
+ i += 2
1390
+ continue
1391
+ elif not arg.startswith("-"):
1392
+ try:
1393
+ chunk_size = int(arg)
1394
+ except ValueError:
1395
+ file_args.append(arg)
1396
+ i += 1
1397
+
1398
+ if chunk_size <= 0:
1399
+ return ExecResult(
1400
+ stdout="",
1401
+ stderr="xan split: no chunk size specified\n",
1402
+ exit_code=1,
1403
+ )
1404
+
1405
+ headers, data, error = await read_csv_input(file_args, ctx)
1406
+ if error:
1407
+ return error
1408
+
1409
+ # Create output directory
1410
+ output_path = ctx.fs.resolve_path(ctx.cwd, output_dir)
1411
+ try:
1412
+ await ctx.fs.mkdir(output_path, recursive=True)
1413
+ except FileExistsError:
1414
+ pass
1415
+
1416
+ # Split data into chunks
1417
+ file_num = 0
1418
+ for i in range(0, len(data), chunk_size):
1419
+ chunk = data[i : i + chunk_size]
1420
+ chunk_content = format_csv(headers, chunk)
1421
+ chunk_path = f"{output_path}/{file_num}.csv"
1422
+ await ctx.fs.write_file(chunk_path, chunk_content)
1423
+ file_num += 1
1424
+
1425
+ return ExecResult(stdout=f"Split into {file_num} files in {output_dir}\n", stderr="", exit_code=0)
1426
+
1427
+
1428
+ # =============================================================================
1429
+ # Phase 3 Commands - Stubs (Not Yet Implemented)
1430
+ # These commands require expression evaluation which is not yet ported.
1431
+ # =============================================================================
1432
+
1433
+
1434
+ def _not_implemented(cmd: str) -> ExecResult:
1435
+ """Return a not-implemented error for stubbed commands."""
1436
+ return ExecResult(
1437
+ stdout="",
1438
+ stderr=(
1439
+ f"xan {cmd}: not yet implemented\n"
1440
+ f"This command requires expression evaluation which is not yet ported from TypeScript.\n"
1441
+ ),
1442
+ exit_code=1,
1443
+ )
1444
+
1445
+
1446
+ async def cmd_join(args: list[str], ctx: CommandContext) -> ExecResult:
1447
+ """Join two CSVs on a key column.
1448
+
1449
+ Not yet implemented. Requires:
1450
+ - Multiple input file handling
1451
+ - Key column matching
1452
+ - Join types (inner, left, right, full outer)
1453
+
1454
+ Usage would be: xan join <LEFT_COL> <LEFT_FILE> <RIGHT_COL> <RIGHT_FILE>
1455
+ """
1456
+ return _not_implemented("join")
1457
+
1458
+
1459
+ async def cmd_agg(args: list[str], ctx: CommandContext) -> ExecResult:
1460
+ """Aggregate column values.
1461
+
1462
+ Not yet implemented. Requires:
1463
+ - Expression parser for aggregation functions (sum, count, avg, min, max, etc.)
1464
+ - Column selection
1465
+
1466
+ Usage would be: xan agg 'sum(price), count()' data.csv
1467
+ """
1468
+ return _not_implemented("agg")
1469
+
1470
+
1471
+ async def cmd_groupby(args: list[str], ctx: CommandContext) -> ExecResult:
1472
+ """Group rows and aggregate.
1473
+
1474
+ Not yet implemented. Requires:
1475
+ - Expression parser for aggregation functions
1476
+ - Group key handling
1477
+
1478
+ Usage would be: xan groupby category 'sum(price), count()' data.csv
1479
+ """
1480
+ return _not_implemented("groupby")
1481
+
1482
+
1483
+ async def cmd_map(args: list[str], ctx: CommandContext) -> ExecResult:
1484
+ """Add computed columns via expressions.
1485
+
1486
+ Not yet implemented. Requires:
1487
+ - Expression parser for column computations
1488
+ - Support for arithmetic, string ops, conditionals
1489
+
1490
+ Usage would be: xan map 'total = price * quantity' data.csv
1491
+ """
1492
+ return _not_implemented("map")
1493
+
1494
+
1495
+ async def cmd_transform(args: list[str], ctx: CommandContext) -> ExecResult:
1496
+ """Transform column values via expressions.
1497
+
1498
+ Not yet implemented. Requires:
1499
+ - Expression parser for transformations
1500
+ - In-place column modification
1501
+
1502
+ Usage would be: xan transform 'price = price * 1.1' data.csv
1503
+ """
1504
+ return _not_implemented("transform")
1505
+
1506
+
1507
+ async def cmd_pivot(args: list[str], ctx: CommandContext) -> ExecResult:
1508
+ """Reshape data (pivot table).
1509
+
1510
+ Not yet implemented. Requires:
1511
+ - Row key, column key, and value columns
1512
+ - Aggregation for duplicate keys
1513
+
1514
+ Usage would be: xan pivot <ROW_COL> <COL_COL> <VAL_COL> data.csv
1515
+ """
1516
+ return _not_implemented("pivot")
1517
+
1518
+
1519
+ class XanCommand:
1520
+ """The xan command - CSV toolkit."""
1521
+
1522
+ name = "xan"
1523
+
1524
+ async def execute(self, args: list[str], ctx: CommandContext) -> ExecResult:
1525
+ """Execute the xan command."""
1526
+ if not args or "--help" in args or "-h" in args:
1527
+ return ExecResult(
1528
+ stdout=(
1529
+ "Usage: xan <COMMAND> [OPTIONS] [FILE]\n"
1530
+ "CSV toolkit for data manipulation.\n\n"
1531
+ "Commands:\n"
1532
+ " headers Show column names\n"
1533
+ " count Count rows\n"
1534
+ " head Show first N rows\n"
1535
+ " tail Show last N rows\n"
1536
+ " slice Extract row range\n"
1537
+ " select Select columns\n"
1538
+ " drop Drop columns (inverse of select)\n"
1539
+ " rename Rename columns (old:new)\n"
1540
+ " filter Filter rows by expression\n"
1541
+ " search Filter rows by regex\n"
1542
+ " sort Sort rows\n"
1543
+ " reverse Reverse row order\n"
1544
+ " behead Output without header\n"
1545
+ " enum Add index column\n"
1546
+ " shuffle Randomly reorder rows\n"
1547
+ " sample Random sample of N rows\n"
1548
+ " dedup Remove duplicate rows\n"
1549
+ " top Top N rows by column value\n"
1550
+ " cat Concatenate CSV files\n"
1551
+ " transpose Swap rows and columns\n"
1552
+ " fixlengths Fix ragged CSV\n"
1553
+ " flatten Display records vertically\n"
1554
+ " explode Split column to rows\n"
1555
+ " implode Combine rows by grouping\n"
1556
+ " split Split into multiple files\n"
1557
+ " view Pretty print as table\n"
1558
+ " stats Show column statistics\n"
1559
+ " frequency Count value occurrences\n"
1560
+ " to Convert to other formats (json)\n"
1561
+ " from Convert from other formats (json)\n\n"
1562
+ "Not Yet Implemented (require expression evaluation):\n"
1563
+ " join Join two CSVs on key\n"
1564
+ " agg Aggregate values\n"
1565
+ " groupby Group and aggregate\n"
1566
+ " map Add computed columns\n"
1567
+ " transform Transform column values\n"
1568
+ " pivot Reshape data (pivot table)\n\n"
1569
+ "Examples:\n"
1570
+ " xan headers data.csv\n"
1571
+ " xan count data.csv\n"
1572
+ " xan head -n 5 data.csv\n"
1573
+ " xan select name,email data.csv\n"
1574
+ " xan filter 'age > 30' data.csv\n"
1575
+ " xan sort -N price data.csv\n"
1576
+ " xan to json data.csv\n"
1577
+ ),
1578
+ stderr="",
1579
+ exit_code=0,
1580
+ )
1581
+
1582
+ subcommand = args[0]
1583
+ sub_args = args[1:]
1584
+
1585
+ if subcommand == "headers":
1586
+ return await cmd_headers(sub_args, ctx)
1587
+ elif subcommand == "count":
1588
+ return await cmd_count(sub_args, ctx)
1589
+ elif subcommand == "head":
1590
+ return await cmd_head(sub_args, ctx)
1591
+ elif subcommand == "tail":
1592
+ return await cmd_tail(sub_args, ctx)
1593
+ elif subcommand == "slice":
1594
+ return await cmd_slice(sub_args, ctx)
1595
+ elif subcommand == "select":
1596
+ return await cmd_select(sub_args, ctx)
1597
+ elif subcommand == "drop":
1598
+ return await cmd_drop(sub_args, ctx)
1599
+ elif subcommand == "rename":
1600
+ return await cmd_rename(sub_args, ctx)
1601
+ elif subcommand == "filter":
1602
+ return await cmd_filter(sub_args, ctx)
1603
+ elif subcommand == "search":
1604
+ return await cmd_search(sub_args, ctx)
1605
+ elif subcommand == "sort":
1606
+ return await cmd_sort(sub_args, ctx)
1607
+ elif subcommand == "reverse":
1608
+ return await cmd_reverse(sub_args, ctx)
1609
+ elif subcommand == "behead":
1610
+ return await cmd_behead(sub_args, ctx)
1611
+ elif subcommand == "enum":
1612
+ return await cmd_enum(sub_args, ctx)
1613
+ elif subcommand == "shuffle":
1614
+ return await cmd_shuffle(sub_args, ctx)
1615
+ elif subcommand == "sample":
1616
+ return await cmd_sample(sub_args, ctx)
1617
+ elif subcommand == "dedup":
1618
+ return await cmd_dedup(sub_args, ctx)
1619
+ elif subcommand == "top":
1620
+ return await cmd_top(sub_args, ctx)
1621
+ elif subcommand == "cat":
1622
+ return await cmd_cat(sub_args, ctx)
1623
+ elif subcommand == "transpose":
1624
+ return await cmd_transpose(sub_args, ctx)
1625
+ elif subcommand == "fixlengths":
1626
+ return await cmd_fixlengths(sub_args, ctx)
1627
+ elif subcommand in ("flatten", "f"):
1628
+ return await cmd_flatten(sub_args, ctx)
1629
+ elif subcommand == "explode":
1630
+ return await cmd_explode(sub_args, ctx)
1631
+ elif subcommand == "implode":
1632
+ return await cmd_implode(sub_args, ctx)
1633
+ elif subcommand == "split":
1634
+ return await cmd_split(sub_args, ctx)
1635
+ elif subcommand == "view":
1636
+ return await cmd_view(sub_args, ctx)
1637
+ elif subcommand == "stats":
1638
+ return await cmd_stats(sub_args, ctx)
1639
+ elif subcommand in ("frequency", "freq"):
1640
+ return await cmd_frequency(sub_args, ctx)
1641
+ elif subcommand == "to":
1642
+ return await cmd_to(sub_args, ctx)
1643
+ elif subcommand == "from":
1644
+ return await cmd_from(sub_args, ctx)
1645
+ # Stubbed commands (not yet implemented)
1646
+ elif subcommand == "join":
1647
+ return await cmd_join(sub_args, ctx)
1648
+ elif subcommand == "agg":
1649
+ return await cmd_agg(sub_args, ctx)
1650
+ elif subcommand == "groupby":
1651
+ return await cmd_groupby(sub_args, ctx)
1652
+ elif subcommand == "map":
1653
+ return await cmd_map(sub_args, ctx)
1654
+ elif subcommand == "transform":
1655
+ return await cmd_transform(sub_args, ctx)
1656
+ elif subcommand == "pivot":
1657
+ return await cmd_pivot(sub_args, ctx)
1658
+ else:
1659
+ return ExecResult(
1660
+ stdout="",
1661
+ stderr=f"xan: unknown command '{subcommand}'\nRun 'xan --help' for usage.\n",
1662
+ exit_code=1,
1663
+ )