celltype-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. celltype_cli-0.1.0.dist-info/METADATA +267 -0
  2. celltype_cli-0.1.0.dist-info/RECORD +89 -0
  3. celltype_cli-0.1.0.dist-info/WHEEL +4 -0
  4. celltype_cli-0.1.0.dist-info/entry_points.txt +2 -0
  5. celltype_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
  6. ct/__init__.py +3 -0
  7. ct/agent/__init__.py +0 -0
  8. ct/agent/case_studies.py +426 -0
  9. ct/agent/config.py +523 -0
  10. ct/agent/doctor.py +544 -0
  11. ct/agent/knowledge.py +523 -0
  12. ct/agent/loop.py +99 -0
  13. ct/agent/mcp_server.py +478 -0
  14. ct/agent/orchestrator.py +733 -0
  15. ct/agent/runner.py +656 -0
  16. ct/agent/sandbox.py +481 -0
  17. ct/agent/session.py +145 -0
  18. ct/agent/system_prompt.py +186 -0
  19. ct/agent/trace_store.py +228 -0
  20. ct/agent/trajectory.py +169 -0
  21. ct/agent/types.py +182 -0
  22. ct/agent/workflows.py +462 -0
  23. ct/api/__init__.py +1 -0
  24. ct/api/app.py +211 -0
  25. ct/api/config.py +120 -0
  26. ct/api/engine.py +124 -0
  27. ct/cli.py +1448 -0
  28. ct/data/__init__.py +0 -0
  29. ct/data/compute_providers.json +59 -0
  30. ct/data/cro_database.json +395 -0
  31. ct/data/downloader.py +238 -0
  32. ct/data/loaders.py +252 -0
  33. ct/kb/__init__.py +5 -0
  34. ct/kb/benchmarks.py +147 -0
  35. ct/kb/governance.py +106 -0
  36. ct/kb/ingest.py +415 -0
  37. ct/kb/reasoning.py +129 -0
  38. ct/kb/schema_monitor.py +162 -0
  39. ct/kb/substrate.py +387 -0
  40. ct/models/__init__.py +0 -0
  41. ct/models/llm.py +370 -0
  42. ct/tools/__init__.py +195 -0
  43. ct/tools/_compound_resolver.py +297 -0
  44. ct/tools/biomarker.py +368 -0
  45. ct/tools/cellxgene.py +282 -0
  46. ct/tools/chemistry.py +1371 -0
  47. ct/tools/claude.py +390 -0
  48. ct/tools/clinical.py +1153 -0
  49. ct/tools/clue.py +249 -0
  50. ct/tools/code.py +1069 -0
  51. ct/tools/combination.py +397 -0
  52. ct/tools/compute.py +402 -0
  53. ct/tools/cro.py +413 -0
  54. ct/tools/data_api.py +2114 -0
  55. ct/tools/design.py +295 -0
  56. ct/tools/dna.py +575 -0
  57. ct/tools/experiment.py +604 -0
  58. ct/tools/expression.py +655 -0
  59. ct/tools/files.py +957 -0
  60. ct/tools/genomics.py +1387 -0
  61. ct/tools/http_client.py +146 -0
  62. ct/tools/imaging.py +319 -0
  63. ct/tools/intel.py +223 -0
  64. ct/tools/literature.py +743 -0
  65. ct/tools/network.py +422 -0
  66. ct/tools/notification.py +111 -0
  67. ct/tools/omics.py +3330 -0
  68. ct/tools/ops.py +1230 -0
  69. ct/tools/parity.py +649 -0
  70. ct/tools/pk.py +245 -0
  71. ct/tools/protein.py +678 -0
  72. ct/tools/regulatory.py +643 -0
  73. ct/tools/remote_data.py +179 -0
  74. ct/tools/report.py +181 -0
  75. ct/tools/repurposing.py +376 -0
  76. ct/tools/safety.py +1280 -0
  77. ct/tools/shell.py +178 -0
  78. ct/tools/singlecell.py +533 -0
  79. ct/tools/statistics.py +552 -0
  80. ct/tools/structure.py +882 -0
  81. ct/tools/target.py +901 -0
  82. ct/tools/translational.py +123 -0
  83. ct/tools/viability.py +218 -0
  84. ct/ui/__init__.py +0 -0
  85. ct/ui/markdown.py +31 -0
  86. ct/ui/status.py +258 -0
  87. ct/ui/suggestions.py +567 -0
  88. ct/ui/terminal.py +1456 -0
  89. ct/ui/traces.py +112 -0
ct/tools/files.py ADDED
@@ -0,0 +1,957 @@
1
+ """
2
+ File I/O tools for ct.
3
+
4
+ Read files, write reports/CSV, edit/create/delete files, and search file contents.
5
+ Restricted to safe directories (~/.ct/, config-specified paths, and CWD).
6
+ """
7
+
8
+ import csv
9
+ import io
10
+ import re
11
+ import shutil
12
+ from pathlib import Path
13
+
14
+ from ct.tools import registry
15
+
16
+
17
+ def _allowed_paths(config=None) -> list[Path]:
18
+ """Return list of directories the user is allowed to read from."""
19
+ allowed = [Path.home() / ".ct"]
20
+
21
+ if config:
22
+ for key in ("data.base", "data.depmap", "data.prism", "data.l1000",
23
+ "data.msigdb", "sandbox.output_dir"):
24
+ val = config.get(key)
25
+ if val:
26
+ p = Path(val)
27
+ # Add the directory (or parent if it's a file path)
28
+ allowed.append(p if p.is_dir() else p.parent)
29
+
30
+ # Also allow sandbox extra read dirs (e.g. capsule data directories)
31
+ extra = config.get("sandbox.extra_read_dirs")
32
+ if extra:
33
+ for d in str(extra).split(","):
34
+ d = d.strip()
35
+ if d:
36
+ allowed.append(Path(d))
37
+
38
+ return allowed
39
+
40
+
41
+ def _is_allowed(path: Path, config=None) -> bool:
42
+ """Check if a path is within allowed directories."""
43
+ resolved = path.resolve()
44
+ for allowed in _allowed_paths(config):
45
+ try:
46
+ resolved.relative_to(allowed.resolve())
47
+ return True
48
+ except ValueError:
49
+ continue
50
+ return False
51
+
52
+
53
+ def _is_within_cwd(path: Path) -> bool:
54
+ """Check if a resolved path is under the current working directory.
55
+
56
+ Resolves symlinks before checking to prevent traversal via symlinks
57
+ (e.g., ./data -> /etc would be rejected).
58
+ """
59
+ try:
60
+ resolved = path.resolve(strict=False)
61
+ cwd = Path.cwd().resolve()
62
+ resolved.relative_to(cwd)
63
+ # Extra check: if path contains a symlink, verify each component
64
+ # stays within CWD after resolution
65
+ if path.is_symlink():
66
+ target = path.resolve()
67
+ target.relative_to(cwd)
68
+ return True
69
+ except ValueError:
70
+ return False
71
+
72
+
73
+ # Paths that must never be edited or deleted
74
+ _PROTECTED_PATTERNS = (
75
+ "/.git/",
76
+ ".env",
77
+ )
78
+
79
+
80
+ def _is_protected(path: Path) -> bool:
81
+ """Check if a path is in the protected blocklist."""
82
+ resolved = str(path.resolve())
83
+ name = path.name.lower()
84
+
85
+ # .ssh — block private keys but allow .pub
86
+ if ".ssh/" in resolved or ".ssh\\" in resolved:
87
+ # Allow public keys
88
+ if name.endswith(".pub"):
89
+ return False
90
+ return True
91
+
92
+ for pattern in _PROTECTED_PATTERNS:
93
+ if pattern in resolved:
94
+ return True
95
+
96
+ # Block private keys outside .ssh too
97
+ if name.startswith("id_") and not name.endswith(".pub"):
98
+ return True
99
+ return False
100
+
101
+
102
+ def _output_dir(config=None) -> Path:
103
+ """Get the output directory, creating it if needed."""
104
+ if config:
105
+ out = config.get("sandbox.output_dir")
106
+ if out:
107
+ p = Path(out)
108
+ p.mkdir(parents=True, exist_ok=True)
109
+ return p
110
+ default = Path.cwd() / "outputs"
111
+ default.mkdir(parents=True, exist_ok=True)
112
+ return default
113
+
114
+
115
+ def _resolve_output_path(out_dir: Path, filename: str) -> tuple[Path | None, str | None]:
116
+ """Resolve an output filename safely within out_dir."""
117
+ raw_name = (filename or "").strip()
118
+ if not raw_name:
119
+ return None, "Filename cannot be empty."
120
+
121
+ rel_path = Path(raw_name)
122
+ if rel_path.is_absolute():
123
+ return None, "Absolute paths are not allowed."
124
+
125
+ resolved = (out_dir / rel_path).resolve()
126
+ try:
127
+ resolved.relative_to(out_dir.resolve())
128
+ except ValueError:
129
+ return None, "Path traversal detected."
130
+
131
+ if resolved.name in {"", ".", ".."}:
132
+ return None, "Filename must point to a file."
133
+
134
+ return resolved, None
135
+
136
+
137
+ def _resolve_cwd_path(path: str) -> tuple[Path | None, str | None]:
138
+ """Resolve path and enforce current-working-directory containment."""
139
+ p = Path(path).expanduser()
140
+ if not _is_within_cwd(p):
141
+ return None, "path_not_allowed"
142
+ return p, None
143
+
144
+
145
+ @registry.register(
146
+ name="files.read_file",
147
+ description="Read a text file and return its contents",
148
+ category="files",
149
+ parameters={"path": "Path to the file to read"},
150
+ usage_guide=(
151
+ "Use to read data files, prior reports, configuration files, or any file in the "
152
+ "current working directory. Also reads from ~/.ct/ and configured data directories."
153
+ ),
154
+ )
155
+ def read_file(path: str, _session=None, **kwargs) -> dict:
156
+ """Read a text file and return its contents."""
157
+ config = _session.config if _session else None
158
+ p = Path(path).expanduser()
159
+
160
+ if not _is_allowed(p, config) and not _is_within_cwd(p):
161
+ return {
162
+ "summary": f"Access denied: {path} is outside allowed directories.",
163
+ "error": "path_not_allowed",
164
+ }
165
+
166
+ if _is_protected(p):
167
+ return {
168
+ "summary": f"Access denied: {path} is a protected file.",
169
+ "error": "path_protected",
170
+ }
171
+
172
+ if not p.exists():
173
+ return {
174
+ "summary": f"File not found: {path}",
175
+ "error": "file_not_found",
176
+ }
177
+
178
+ try:
179
+ content = p.read_text(encoding="utf-8")
180
+ lines = content.count("\n") + 1
181
+ return {
182
+ "summary": f"Read {p.name} ({lines} lines, {len(content)} chars).",
183
+ "path": str(p),
184
+ "content": content,
185
+ "lines": lines,
186
+ }
187
+ except UnicodeDecodeError:
188
+ # Handle common binary tabular formats with a structured preview
189
+ # rather than hard-failing decode.
190
+ suffix = p.suffix.lower()
191
+ if suffix in (".xlsx", ".xls"):
192
+ try:
193
+ import pandas as pd
194
+
195
+ xls = pd.ExcelFile(p)
196
+ sheet_names = xls.sheet_names
197
+ if not sheet_names:
198
+ return {
199
+ "summary": f"Excel file has no sheets: {p.name}",
200
+ "path": str(p),
201
+ "sheets": [],
202
+ }
203
+
204
+ df = pd.read_excel(p, sheet_name=sheet_names[0], nrows=50)
205
+ rows = len(df)
206
+ cols = [str(c) for c in df.columns]
207
+ preview = df.head(5).to_dict(orient="records")
208
+ return {
209
+ "summary": (
210
+ f"Read Excel file {p.name}: {len(sheet_names)} sheet(s), "
211
+ f"previewed '{sheet_names[0]}' ({rows} rows, {len(cols)} columns in preview)."
212
+ ),
213
+ "path": str(p),
214
+ "format": "excel",
215
+ "sheets": sheet_names,
216
+ "sheet": sheet_names[0],
217
+ "columns": cols,
218
+ "rows_previewed": rows,
219
+ "preview": preview,
220
+ }
221
+ except Exception as e:
222
+ return {"summary": f"Error reading Excel file {path}: {e}", "error": str(e)}
223
+ return {
224
+ "summary": (
225
+ f"{p.name} appears to be a binary/non-UTF8 file. "
226
+ "Use code.execute or a format-specific tool to parse it."
227
+ ),
228
+ "path": str(p),
229
+ "error": "binary_file",
230
+ }
231
+ except Exception as e:
232
+ return {"summary": f"Error reading {path}: {e}", "error": str(e)}
233
+
234
+
235
+ @registry.register(
236
+ name="files.edit_file",
237
+ description="Edit a file by replacing an exact string match with new content",
238
+ category="files",
239
+ parameters={
240
+ "path": "Path to the file to edit (must be within CWD)",
241
+ "old_string": "Exact string to find and replace (must be unique in the file)",
242
+ "new_string": "Replacement string",
243
+ },
244
+ usage_guide=(
245
+ "Use to make targeted edits to files in the current working directory. "
246
+ "The old_string must appear exactly once in the file for unambiguous replacement."
247
+ ),
248
+ )
249
+ def edit_file(path: str, old_string: str, new_string: str, **kwargs) -> dict:
250
+ """Edit a file by exact string replacement."""
251
+ p = Path(path).expanduser()
252
+
253
+ if not _is_within_cwd(p):
254
+ return {"summary": f"Access denied: {path} is outside working directory.", "error": "path_not_allowed"}
255
+ if _is_protected(p):
256
+ return {"summary": f"Protected path: {path} cannot be edited.", "error": "path_protected"}
257
+ if not p.exists():
258
+ return {"summary": f"File not found: {path}", "error": "file_not_found"}
259
+
260
+ try:
261
+ content = p.read_text(encoding="utf-8")
262
+ except Exception as e:
263
+ return {"summary": f"Error reading {path}: {e}", "error": str(e)}
264
+
265
+ count = content.count(old_string)
266
+ if count == 0:
267
+ return {"summary": f"String not found in {p.name}.", "error": "string_not_found"}
268
+ if count > 1:
269
+ return {
270
+ "summary": f"Ambiguous: '{old_string[:50]}...' appears {count} times in {p.name}. Provide more context.",
271
+ "error": "ambiguous_match",
272
+ "match_count": count,
273
+ }
274
+
275
+ new_content = content.replace(old_string, new_string, 1)
276
+ try:
277
+ p.write_text(new_content, encoding="utf-8")
278
+ except Exception as e:
279
+ return {"summary": f"Error writing {path}: {e}", "error": str(e)}
280
+
281
+ return {
282
+ "summary": f"Edited {p.name}: replaced {len(old_string)} chars with {len(new_string)} chars.",
283
+ "path": str(p.resolve()),
284
+ "old_length": len(old_string),
285
+ "new_length": len(new_string),
286
+ }
287
+
288
+
289
+ @registry.register(
290
+ name="files.create_file",
291
+ description="Create a new file with the given content",
292
+ category="files",
293
+ parameters={
294
+ "path": "Path for the new file (must be within CWD)",
295
+ "content": "Content to write to the file",
296
+ },
297
+ usage_guide=(
298
+ "Use to create new files (scripts, configs, data files) in the working directory. "
299
+ "Will not overwrite existing files — use edit_file for modifications."
300
+ ),
301
+ )
302
+ def create_file(path: str, content: str, **kwargs) -> dict:
303
+ """Create a new file. Refuses to overwrite existing files."""
304
+ p = Path(path).expanduser()
305
+
306
+ if not _is_within_cwd(p):
307
+ return {"summary": f"Access denied: {path} is outside working directory.", "error": "path_not_allowed"}
308
+ if _is_protected(p):
309
+ return {"summary": f"Protected path: {path} cannot be created.", "error": "path_protected"}
310
+ if p.exists():
311
+ try:
312
+ existing = p.read_text(encoding="utf-8")
313
+ if existing == content:
314
+ lines = content.count("\n") + 1
315
+ return {
316
+ "summary": f"File already exists with identical content: {p.name}.",
317
+ "path": str(p.resolve()),
318
+ "lines": lines,
319
+ "size": len(content),
320
+ "unchanged": True,
321
+ }
322
+ # Auto-update stale generated artifacts so repeated workflows are idempotent.
323
+ p.write_text(content, encoding="utf-8")
324
+ lines = content.count("\n") + 1
325
+ return {
326
+ "summary": f"Updated existing file {p.name} ({lines} lines, {len(content)} chars).",
327
+ "path": str(p.resolve()),
328
+ "lines": lines,
329
+ "size": len(content),
330
+ "overwritten": True,
331
+ }
332
+ except Exception:
333
+ # Keep default behavior for non-text/unreadable files.
334
+ pass
335
+ return {"summary": f"File already exists: {path}. Use edit_file to modify.", "error": "file_exists"}
336
+
337
+ try:
338
+ p.parent.mkdir(parents=True, exist_ok=True)
339
+ p.write_text(content, encoding="utf-8")
340
+ except Exception as e:
341
+ return {"summary": f"Error creating {path}: {e}", "error": str(e)}
342
+
343
+ lines = content.count("\n") + 1
344
+ return {
345
+ "summary": f"Created {p.name} ({lines} lines, {len(content)} chars).",
346
+ "path": str(p.resolve()),
347
+ "lines": lines,
348
+ "size": len(content),
349
+ }
350
+
351
+
352
+ @registry.register(
353
+ name="files.delete_file",
354
+ description="Delete a file from the working directory",
355
+ category="files",
356
+ parameters={"path": "Path to the file to delete (must be within CWD)"},
357
+ usage_guide="Use to remove files from the working directory. Cannot delete directories.",
358
+ )
359
+ def delete_file(path: str, **kwargs) -> dict:
360
+ """Delete a single file."""
361
+ p = Path(path).expanduser()
362
+
363
+ if not _is_within_cwd(p):
364
+ return {"summary": f"Access denied: {path} is outside working directory.", "error": "path_not_allowed"}
365
+ if _is_protected(p):
366
+ return {"summary": f"Protected path: {path} cannot be deleted.", "error": "path_protected"}
367
+ if not p.exists():
368
+ return {"summary": f"File not found: {path}", "error": "file_not_found"}
369
+ if p.is_dir():
370
+ return {"summary": f"Cannot delete directory: {path}. Only files.", "error": "is_directory"}
371
+
372
+ try:
373
+ size = p.stat().st_size
374
+ p.unlink()
375
+ except Exception as e:
376
+ return {"summary": f"Error deleting {path}: {e}", "error": str(e)}
377
+
378
+ return {
379
+ "summary": f"Deleted {p.name} ({size} bytes).",
380
+ "path": str(p.resolve()),
381
+ }
382
+
383
+
384
+ @registry.register(
385
+ name="files.move_file",
386
+ description="Move or rename a file within the working directory",
387
+ category="files",
388
+ parameters={
389
+ "source_path": "Path to source file (must be within CWD)",
390
+ "dest_path": "Path to destination file (must be within CWD)",
391
+ "overwrite": "Whether to overwrite destination if it exists (default false)",
392
+ },
393
+ usage_guide=(
394
+ "Use to rename files or reorganize outputs in the workspace. "
395
+ "Both source and destination must stay inside the current working directory."
396
+ ),
397
+ )
398
+ def move_file(source_path: str, dest_path: str, overwrite: bool = False, **kwargs) -> dict:
399
+ """Move a file safely within CWD."""
400
+ src, err = _resolve_cwd_path(source_path)
401
+ if err:
402
+ return {"summary": f"Access denied: {source_path} is outside working directory.", "error": err}
403
+ dst, err = _resolve_cwd_path(dest_path)
404
+ if err:
405
+ return {"summary": f"Access denied: {dest_path} is outside working directory.", "error": err}
406
+ if _is_protected(src) or _is_protected(dst):
407
+ return {"summary": "Protected path cannot be moved.", "error": "path_protected"}
408
+ if not src.exists():
409
+ return {"summary": f"File not found: {source_path}", "error": "file_not_found"}
410
+ if src.is_dir():
411
+ return {"summary": f"Source is a directory: {source_path}", "error": "is_directory"}
412
+ if dst.exists() and not overwrite:
413
+ return {"summary": f"Destination exists: {dest_path}", "error": "file_exists"}
414
+ if dst.exists() and dst.is_dir():
415
+ return {"summary": f"Destination is a directory: {dest_path}", "error": "is_directory"}
416
+ try:
417
+ dst.parent.mkdir(parents=True, exist_ok=True)
418
+ src.replace(dst)
419
+ except Exception as e:
420
+ return {"summary": f"Error moving file: {e}", "error": str(e)}
421
+ return {"summary": f"Moved {src.name} to {dst}", "source": str(src), "destination": str(dst)}
422
+
423
+
424
+ @registry.register(
425
+ name="files.copy_file",
426
+ description="Copy a file within the working directory",
427
+ category="files",
428
+ parameters={
429
+ "source_path": "Path to source file (must be within CWD)",
430
+ "dest_path": "Path to destination file (must be within CWD)",
431
+ "overwrite": "Whether to overwrite destination if it exists (default false)",
432
+ },
433
+ usage_guide=(
434
+ "Use to duplicate templates, data files, or reports in the workspace "
435
+ "without editing the original."
436
+ ),
437
+ )
438
+ def copy_file(source_path: str, dest_path: str, overwrite: bool = False, **kwargs) -> dict:
439
+ """Copy a file safely within CWD."""
440
+ src, err = _resolve_cwd_path(source_path)
441
+ if err:
442
+ return {"summary": f"Access denied: {source_path} is outside working directory.", "error": err}
443
+ dst, err = _resolve_cwd_path(dest_path)
444
+ if err:
445
+ return {"summary": f"Access denied: {dest_path} is outside working directory.", "error": err}
446
+ if _is_protected(src) or _is_protected(dst):
447
+ return {"summary": "Protected path cannot be copied.", "error": "path_protected"}
448
+ if not src.exists():
449
+ return {"summary": f"File not found: {source_path}", "error": "file_not_found"}
450
+ if src.is_dir():
451
+ return {"summary": f"Source is a directory: {source_path}", "error": "is_directory"}
452
+ if dst.exists() and not overwrite:
453
+ return {"summary": f"Destination exists: {dest_path}", "error": "file_exists"}
454
+ if dst.exists() and dst.is_dir():
455
+ return {"summary": f"Destination is a directory: {dest_path}", "error": "is_directory"}
456
+ try:
457
+ dst.parent.mkdir(parents=True, exist_ok=True)
458
+ shutil.copy2(src, dst)
459
+ except Exception as e:
460
+ return {"summary": f"Error copying file: {e}", "error": str(e)}
461
+ return {"summary": f"Copied {src.name} to {dst}", "source": str(src), "destination": str(dst)}
462
+
463
+
464
+ @registry.register(
465
+ name="files.create_directory",
466
+ description="Create a directory within the working directory",
467
+ category="files",
468
+ parameters={
469
+ "path": "Directory path to create (must be within CWD)",
470
+ "exist_ok": "If true, do not error when directory already exists (default true)",
471
+ },
472
+ usage_guide="Use to create folders for outputs, reports, and structured project organization.",
473
+ )
474
+ def create_directory(path: str, exist_ok: bool = True, **kwargs) -> dict:
475
+ """Create a directory safely within CWD."""
476
+ p, err = _resolve_cwd_path(path)
477
+ if err:
478
+ return {"summary": f"Access denied: {path} is outside working directory.", "error": err}
479
+ if _is_protected(p):
480
+ return {"summary": f"Protected path: {path} cannot be created.", "error": "path_protected"}
481
+ if p.exists() and p.is_file():
482
+ return {"summary": f"Path exists as a file: {path}", "error": "is_file"}
483
+ try:
484
+ p.mkdir(parents=True, exist_ok=bool(exist_ok))
485
+ except FileExistsError:
486
+ return {"summary": f"Directory already exists: {path}", "error": "file_exists"}
487
+ except Exception as e:
488
+ return {"summary": f"Error creating directory: {e}", "error": str(e)}
489
+ return {"summary": f"Directory ready: {p}", "path": str(p.resolve())}
490
+
491
+
492
+ @registry.register(
493
+ name="files.extract_archive",
494
+ description=(
495
+ "Extract a ZIP, tar, tar.gz, or tar.bz2 archive. "
496
+ "Supports extracting the full archive or specific files by pattern."
497
+ ),
498
+ category="files",
499
+ parameters={
500
+ "path": "Path to the archive file",
501
+ "destination": "Directory to extract into (default: current working directory)",
502
+ "pattern": "Optional glob pattern to extract only matching files (e.g. '*.mafft', '156083at2759*')",
503
+ },
504
+ usage_guide=(
505
+ "Use to extract ZIP, tar, tar.gz, or tar.bz2 archives. Safer and more reliable "
506
+ "than shell.run for archive extraction. Supports selective extraction via pattern."
507
+ ),
508
+ )
509
+ def extract_archive(
510
+ path: str,
511
+ destination: str = ".",
512
+ pattern: str = "",
513
+ _session=None,
514
+ **kwargs,
515
+ ) -> dict:
516
+ """Extract an archive file."""
517
+ import fnmatch
518
+ import tarfile
519
+ import zipfile
520
+ import logging
521
+ _log = logging.getLogger("ct.tools.files")
522
+ _log.debug("extract_archive: path=%r destination=%r pattern=%r kwargs=%r", path, destination, pattern, kwargs)
523
+
524
+ src = Path(path).expanduser()
525
+ if not src.exists():
526
+ # Try relative to extra_read_dirs
527
+ config = _session.config if _session else None
528
+ if config:
529
+ extra = config.get("sandbox.extra_read_dirs")
530
+ if extra:
531
+ for d in str(extra).split(","):
532
+ candidate = Path(d.strip()) / path
533
+ if candidate.exists():
534
+ src = candidate
535
+ break
536
+ if not src.exists():
537
+ return {"summary": f"Archive not found: {path}", "error": "file_not_found"}
538
+
539
+ # Sanitize destination: only allow relative paths under CWD
540
+ dest = Path(destination)
541
+ cwd = Path.cwd()
542
+ if dest.is_absolute():
543
+ try:
544
+ dest.resolve().relative_to(cwd.resolve())
545
+ except ValueError:
546
+ # Absolute path outside CWD — ignore it, use CWD
547
+ _log.warning("Ignoring absolute destination %s, extracting to CWD", dest)
548
+ dest = cwd
549
+ else:
550
+ # Relative path — resolve relative to CWD
551
+ dest = cwd / dest
552
+ try:
553
+ dest.mkdir(parents=True, exist_ok=True)
554
+ except PermissionError:
555
+ _log.warning("Permission denied for %s, falling back to CWD", dest)
556
+ dest = cwd
557
+
558
+ extracted = []
559
+ try:
560
+ if src.suffix == ".zip":
561
+ with zipfile.ZipFile(src, "r") as zf:
562
+ members = zf.namelist()
563
+ if pattern:
564
+ members = [m for m in members if fnmatch.fnmatch(Path(m).name, pattern)]
565
+ for m in members:
566
+ zf.extract(m, dest)
567
+ extracted.append(m)
568
+ elif src.suffix in (".gz", ".bz2", ".xz", ".tar") or ".tar." in src.name:
569
+ with tarfile.open(src, "r:*") as tf:
570
+ members = tf.getnames()
571
+ if pattern:
572
+ members = [m for m in members if fnmatch.fnmatch(Path(m).name, pattern)]
573
+ for m in members:
574
+ tf.extract(m, dest, filter="data")
575
+ extracted.append(m)
576
+ else:
577
+ tf.extractall(dest, filter="data")
578
+ extracted = members
579
+ else:
580
+ return {"summary": f"Unsupported archive format: {src.suffix}", "error": "unsupported_format"}
581
+ except Exception as e:
582
+ return {"summary": f"Extraction error: {e}", "error": str(e)}
583
+
584
+ summary = f"Extracted {len(extracted)} files from {src.name} to {dest}"
585
+ if pattern:
586
+ summary += f" (pattern: {pattern})"
587
+
588
+ return {
589
+ "summary": summary,
590
+ "extracted_count": len(extracted),
591
+ "destination": str(dest.resolve()),
592
+ "files": extracted[:50], # Cap for large archives
593
+ }
594
+
595
+
596
+ @registry.register(
597
+ name="files.list_directory",
598
+ description="List directory entries with metadata",
599
+ category="files",
600
+ parameters={
601
+ "path": "Directory path to inspect (default CWD)",
602
+ "recursive": "If true, recurse through subdirectories (default false)",
603
+ "max_entries": "Maximum entries to return (default 200)",
604
+ "show_hidden": "Include dotfiles/directories (default false)",
605
+ },
606
+ usage_guide="Use to inspect workspace structure before reading or modifying files.",
607
+ )
608
+ def list_directory(
609
+ path: str = "",
610
+ recursive: bool = False,
611
+ max_entries: int = 200,
612
+ show_hidden: bool = False,
613
+ **kwargs,
614
+ ) -> dict:
615
+ """List directory contents in a safe, bounded way."""
616
+ base = Path(path).expanduser() if path else Path.cwd()
617
+ # If absolute path is outside CWD or doesn't exist, try the basename under CWD
618
+ if not base.exists() or not _is_within_cwd(base):
619
+ if path:
620
+ cwd_candidate = Path.cwd() / Path(path).name
621
+ if cwd_candidate.exists() and cwd_candidate.is_dir():
622
+ base = cwd_candidate
623
+ if not _is_within_cwd(base) and base != Path.cwd():
624
+ # Check if it's in allowed paths (extra_read_dirs, data dirs)
625
+ config = getattr(kwargs.get("_session"), "config", None) if kwargs.get("_session") else None
626
+ if not _is_allowed(base, config):
627
+ return {"summary": f"Access denied: {path} is outside working directory.", "error": "path_not_allowed"}
628
+ if not base.exists():
629
+ return {"summary": f"Path not found: {base}", "error": "file_not_found"}
630
+ if not base.is_dir():
631
+ return {"summary": f"Not a directory: {base}", "error": "not_directory"}
632
+
633
+ max_entries = min(max(int(max_entries), 1), 1000)
634
+ cwd = Path.cwd().resolve()
635
+ entries = []
636
+
637
+ iterator = base.rglob("*") if recursive else base.iterdir()
638
+ try:
639
+ for p in sorted(iterator):
640
+ name = p.name
641
+ if not show_hidden and name.startswith("."):
642
+ continue
643
+ try:
644
+ rel = str(p.resolve().relative_to(cwd))
645
+ except ValueError:
646
+ continue
647
+ item = {
648
+ "path": rel,
649
+ "name": name,
650
+ "type": "dir" if p.is_dir() else "file",
651
+ }
652
+ if p.is_file():
653
+ try:
654
+ item["size"] = p.stat().st_size
655
+ except OSError:
656
+ item["size"] = None
657
+ entries.append(item)
658
+ if len(entries) >= max_entries:
659
+ break
660
+ except Exception as e:
661
+ return {"summary": f"Error listing directory: {e}", "error": str(e)}
662
+
663
+ return {
664
+ "summary": f"Listed {len(entries)} entries under {base}",
665
+ "entries": entries,
666
+ "count": len(entries),
667
+ "directory": str(base.resolve()),
668
+ }
669
+
670
+
671
+ @registry.register(
672
+ name="files.search_files",
673
+ description="Search for files by glob pattern within the working directory",
674
+ category="files",
675
+ parameters={
676
+ "pattern": "Glob pattern (e.g., '**/*.py', '*.csv', 'src/**/*.ts')",
677
+ "path": "Subdirectory to search in (default: CWD)",
678
+ },
679
+ usage_guide="Use to find files by name pattern. Returns file paths, names, and sizes.",
680
+ )
681
+ def search_files(pattern: str, path: str = "", **kwargs) -> dict:
682
+ """Glob-based file search within CWD."""
683
+ base = Path(path).expanduser() if path else Path.cwd()
684
+
685
+ if not _is_within_cwd(base) and base != Path.cwd():
686
+ return {"summary": f"Access denied: {path} is outside working directory.", "error": "path_not_allowed"}
687
+
688
+ try:
689
+ cwd = Path.cwd().resolve()
690
+ matches = []
691
+ for p in sorted(base.glob(pattern)):
692
+ if p.is_file():
693
+ try:
694
+ rel = str(p.resolve().relative_to(cwd))
695
+ except ValueError:
696
+ continue # Skip files outside CWD
697
+ matches.append({
698
+ "path": str(p.resolve()),
699
+ "name": p.name,
700
+ "relative": rel,
701
+ "size": p.stat().st_size,
702
+ })
703
+ if len(matches) >= 100:
704
+ break
705
+ except Exception as e:
706
+ return {"summary": f"Search error: {e}", "error": str(e)}
707
+
708
+ if not matches:
709
+ return {"summary": f"No files matching '{pattern}'.", "files": [], "count": 0}
710
+
711
+ listing = "\n".join(f" {m['relative']} ({m['size']} bytes)" for m in matches[:20])
712
+ more = f"\n ... and {len(matches) - 20} more" if len(matches) > 20 else ""
713
+ return {
714
+ "summary": f"Found {len(matches)} files matching '{pattern}':\n{listing}{more}",
715
+ "files": matches,
716
+ "count": len(matches),
717
+ }
718
+
719
+
720
+ @registry.register(
721
+ name="files.search_content",
722
+ description="Search file contents by regex pattern (like grep)",
723
+ category="files",
724
+ parameters={
725
+ "pattern": "Regex pattern to search for",
726
+ "path": "Subdirectory to search in (default: CWD)",
727
+ "glob": "File glob filter (default: '**/*')",
728
+ "max_results": "Maximum matches to return (default: 50)",
729
+ },
730
+ usage_guide=(
731
+ "Use to search for text patterns across files — find function definitions, "
732
+ "variable usage, TODOs, error messages, etc. Skips binary and large files."
733
+ ),
734
+ )
735
+ def search_content(pattern: str, path: str = "", glob: str = "**/*",
736
+ max_results: int = 50, **kwargs) -> dict:
737
+ """Regex content search across files in CWD."""
738
+ base = Path(path).expanduser() if path else Path.cwd()
739
+
740
+ if not _is_within_cwd(base) and base != Path.cwd():
741
+ return {"summary": f"Access denied: {path} is outside working directory.", "error": "path_not_allowed"}
742
+
743
+ try:
744
+ compiled = re.compile(pattern)
745
+ except re.error as e:
746
+ return {"summary": f"Invalid regex: {e}", "error": str(e)}
747
+
748
+ cwd = Path.cwd().resolve()
749
+ matches = []
750
+ files_searched = 0
751
+
752
+ try:
753
+ for fp in sorted(base.glob(glob)):
754
+ if not fp.is_file():
755
+ continue
756
+ # Skip large files (>1MB) and likely binary files
757
+ try:
758
+ size = fp.stat().st_size
759
+ except OSError:
760
+ continue
761
+ if size > 1_000_000:
762
+ continue
763
+ # Skip common binary extensions
764
+ if fp.suffix.lower() in ('.pyc', '.pyo', '.so', '.dll', '.exe', '.bin',
765
+ '.png', '.jpg', '.jpeg', '.gif', '.ico', '.pdf',
766
+ '.zip', '.tar', '.gz', '.bz2', '.whl', '.egg'):
767
+ continue
768
+
769
+ try:
770
+ content = fp.read_text(encoding="utf-8", errors="ignore")
771
+ except Exception:
772
+ continue
773
+
774
+ files_searched += 1
775
+ for line_num, line in enumerate(content.splitlines(), 1):
776
+ if compiled.search(line):
777
+ try:
778
+ rel = str(fp.resolve().relative_to(cwd))
779
+ except ValueError:
780
+ continue
781
+ preview = line.strip()
782
+ if len(preview) > 200:
783
+ preview = preview[:197] + "..."
784
+ matches.append({
785
+ "file": rel,
786
+ "line": line_num,
787
+ "text": preview,
788
+ })
789
+ if len(matches) >= max_results:
790
+ break
791
+ if len(matches) >= max_results:
792
+ break
793
+ except Exception as e:
794
+ return {"summary": f"Search error: {e}", "error": str(e)}
795
+
796
+ if not matches:
797
+ return {
798
+ "summary": f"No matches for '{pattern}' in {files_searched} files.",
799
+ "matches": [],
800
+ "count": 0,
801
+ "files_searched": files_searched,
802
+ }
803
+
804
+ listing = "\n".join(f" {m['file']}:{m['line']}: {m['text']}" for m in matches[:15])
805
+ more = f"\n ... and {len(matches) - 15} more" if len(matches) > 15 else ""
806
+ return {
807
+ "summary": f"Found {len(matches)} matches for '{pattern}' across {files_searched} files:\n{listing}{more}",
808
+ "matches": matches,
809
+ "count": len(matches),
810
+ "files_searched": files_searched,
811
+ }
812
+
813
+
814
+ @registry.register(
815
+ name="files.write_report",
816
+ description="Write a report to the output directory",
817
+ category="files",
818
+ parameters={
819
+ "content": "Report content (markdown text)",
820
+ "filename": "Output filename (e.g., 'report.md')",
821
+ "format": "Output format: 'markdown' (default) or 'text'",
822
+ "overwrite": "Whether to overwrite existing file (default False)",
823
+ },
824
+ usage_guide=(
825
+ "Use to save analysis results as a formatted report. "
826
+ "Output goes to the configured output directory (./outputs by default)."
827
+ ),
828
+ )
829
+ def write_report(content: str, filename: str = "report.md",
830
+ format: str = "markdown", overwrite: bool = False,
831
+ _session=None, **kwargs) -> dict:
832
+ """Write a report to the output directory."""
833
+ config = _session.config if _session else None
834
+ out_dir = _output_dir(config)
835
+
836
+ # Ensure filename has appropriate extension
837
+ if format == "markdown" and not filename.endswith((".md", ".markdown")):
838
+ filename = filename + ".md"
839
+
840
+ out_path, error = _resolve_output_path(out_dir, filename)
841
+ if error:
842
+ return {
843
+ "summary": f"Invalid filename '{filename}': {error}",
844
+ "error": "invalid_filename",
845
+ }
846
+
847
+ if not overwrite and out_path.exists():
848
+ suffix = "".join(out_path.suffixes)
849
+ stem = out_path.name[: -len(suffix)] if suffix else out_path.name
850
+ counter = 2
851
+ candidate = out_path.parent / f"{stem}_{counter}{suffix}"
852
+ while candidate.exists():
853
+ counter += 1
854
+ candidate = out_path.parent / f"{stem}_{counter}{suffix}"
855
+ out_path = candidate
856
+
857
+ try:
858
+ out_path.parent.mkdir(parents=True, exist_ok=True)
859
+ out_path.write_text(content, encoding="utf-8")
860
+ return {
861
+ "summary": f"Report saved to {out_path}",
862
+ "path": str(out_path),
863
+ "size": len(content),
864
+ }
865
+ except Exception as e:
866
+ return {"summary": f"Error writing report: {e}", "error": str(e)}
867
+
868
+
869
+ @registry.register(
870
+ name="files.write_csv",
871
+ description="Write structured data as a CSV file",
872
+ category="files",
873
+ parameters={
874
+ "data": "List of dicts to write (each dict = one row)",
875
+ "filename": "Output filename (e.g., 'results.csv')",
876
+ },
877
+ usage_guide=(
878
+ "Use to export structured results (tables, rankings, gene lists) as CSV. "
879
+ "Input is a list of dicts; keys become column headers."
880
+ ),
881
+ )
882
+ def write_csv(data: list, filename: str = "results.csv",
883
+ _session=None, **kwargs) -> dict:
884
+ """Write structured data as CSV."""
885
+ config = _session.config if _session else None
886
+ out_dir = _output_dir(config)
887
+
888
+ if not filename.endswith(".csv"):
889
+ filename = filename + ".csv"
890
+
891
+ out_path, error = _resolve_output_path(out_dir, filename)
892
+ if error:
893
+ return {
894
+ "summary": f"Invalid filename '{filename}': {error}",
895
+ "error": "invalid_filename",
896
+ }
897
+
898
+ if not data:
899
+ return {"summary": "No data to write.", "error": "empty_data"}
900
+
901
+ try:
902
+ out_path.parent.mkdir(parents=True, exist_ok=True)
903
+ # Get column headers from first row
904
+ if isinstance(data[0], dict):
905
+ fieldnames = list(data[0].keys())
906
+ buf = io.StringIO()
907
+ writer = csv.DictWriter(buf, fieldnames=fieldnames)
908
+ writer.writeheader()
909
+ writer.writerows(data)
910
+ out_path.write_text(buf.getvalue(), encoding="utf-8")
911
+ else:
912
+ # Fallback: list of lists
913
+ buf = io.StringIO()
914
+ writer = csv.writer(buf)
915
+ writer.writerows(data)
916
+ out_path.write_text(buf.getvalue(), encoding="utf-8")
917
+
918
+ return {
919
+ "summary": f"CSV saved to {out_path} ({len(data)} rows).",
920
+ "path": str(out_path),
921
+ "rows": len(data),
922
+ }
923
+ except Exception as e:
924
+ return {"summary": f"Error writing CSV: {e}", "error": str(e)}
925
+
926
+
927
+ @registry.register(
928
+ name="files.list_outputs",
929
+ description="List all files in the output directory",
930
+ category="files",
931
+ parameters={},
932
+ usage_guide="Use to see what reports and exports have been generated this session.",
933
+ )
934
+ def list_outputs(_session=None, **kwargs) -> dict:
935
+ """List all files in the output directory."""
936
+ config = _session.config if _session else None
937
+ out_dir = _output_dir(config)
938
+
939
+ files = []
940
+ if out_dir.exists():
941
+ for p in sorted(out_dir.iterdir()):
942
+ if p.is_file():
943
+ files.append({
944
+ "name": p.name,
945
+ "size": p.stat().st_size,
946
+ "path": str(p),
947
+ })
948
+
949
+ if not files:
950
+ return {"summary": f"Output directory is empty: {out_dir}", "files": []}
951
+
952
+ listing = "\n".join(f" {f['name']} ({f['size']} bytes)" for f in files)
953
+ return {
954
+ "summary": f"Output directory ({out_dir}):\n{listing}",
955
+ "files": files,
956
+ "directory": str(out_dir),
957
+ }