celltype-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- celltype_cli-0.1.0.dist-info/METADATA +267 -0
- celltype_cli-0.1.0.dist-info/RECORD +89 -0
- celltype_cli-0.1.0.dist-info/WHEEL +4 -0
- celltype_cli-0.1.0.dist-info/entry_points.txt +2 -0
- celltype_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- ct/__init__.py +3 -0
- ct/agent/__init__.py +0 -0
- ct/agent/case_studies.py +426 -0
- ct/agent/config.py +523 -0
- ct/agent/doctor.py +544 -0
- ct/agent/knowledge.py +523 -0
- ct/agent/loop.py +99 -0
- ct/agent/mcp_server.py +478 -0
- ct/agent/orchestrator.py +733 -0
- ct/agent/runner.py +656 -0
- ct/agent/sandbox.py +481 -0
- ct/agent/session.py +145 -0
- ct/agent/system_prompt.py +186 -0
- ct/agent/trace_store.py +228 -0
- ct/agent/trajectory.py +169 -0
- ct/agent/types.py +182 -0
- ct/agent/workflows.py +462 -0
- ct/api/__init__.py +1 -0
- ct/api/app.py +211 -0
- ct/api/config.py +120 -0
- ct/api/engine.py +124 -0
- ct/cli.py +1448 -0
- ct/data/__init__.py +0 -0
- ct/data/compute_providers.json +59 -0
- ct/data/cro_database.json +395 -0
- ct/data/downloader.py +238 -0
- ct/data/loaders.py +252 -0
- ct/kb/__init__.py +5 -0
- ct/kb/benchmarks.py +147 -0
- ct/kb/governance.py +106 -0
- ct/kb/ingest.py +415 -0
- ct/kb/reasoning.py +129 -0
- ct/kb/schema_monitor.py +162 -0
- ct/kb/substrate.py +387 -0
- ct/models/__init__.py +0 -0
- ct/models/llm.py +370 -0
- ct/tools/__init__.py +195 -0
- ct/tools/_compound_resolver.py +297 -0
- ct/tools/biomarker.py +368 -0
- ct/tools/cellxgene.py +282 -0
- ct/tools/chemistry.py +1371 -0
- ct/tools/claude.py +390 -0
- ct/tools/clinical.py +1153 -0
- ct/tools/clue.py +249 -0
- ct/tools/code.py +1069 -0
- ct/tools/combination.py +397 -0
- ct/tools/compute.py +402 -0
- ct/tools/cro.py +413 -0
- ct/tools/data_api.py +2114 -0
- ct/tools/design.py +295 -0
- ct/tools/dna.py +575 -0
- ct/tools/experiment.py +604 -0
- ct/tools/expression.py +655 -0
- ct/tools/files.py +957 -0
- ct/tools/genomics.py +1387 -0
- ct/tools/http_client.py +146 -0
- ct/tools/imaging.py +319 -0
- ct/tools/intel.py +223 -0
- ct/tools/literature.py +743 -0
- ct/tools/network.py +422 -0
- ct/tools/notification.py +111 -0
- ct/tools/omics.py +3330 -0
- ct/tools/ops.py +1230 -0
- ct/tools/parity.py +649 -0
- ct/tools/pk.py +245 -0
- ct/tools/protein.py +678 -0
- ct/tools/regulatory.py +643 -0
- ct/tools/remote_data.py +179 -0
- ct/tools/report.py +181 -0
- ct/tools/repurposing.py +376 -0
- ct/tools/safety.py +1280 -0
- ct/tools/shell.py +178 -0
- ct/tools/singlecell.py +533 -0
- ct/tools/statistics.py +552 -0
- ct/tools/structure.py +882 -0
- ct/tools/target.py +901 -0
- ct/tools/translational.py +123 -0
- ct/tools/viability.py +218 -0
- ct/ui/__init__.py +0 -0
- ct/ui/markdown.py +31 -0
- ct/ui/status.py +258 -0
- ct/ui/suggestions.py +567 -0
- ct/ui/terminal.py +1456 -0
- ct/ui/traces.py +112 -0
ct/tools/files.py
ADDED
|
@@ -0,0 +1,957 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File I/O tools for ct.
|
|
3
|
+
|
|
4
|
+
Read files, write reports/CSV, edit/create/delete files, and search file contents.
|
|
5
|
+
Restricted to safe directories (~/.ct/, config-specified paths, and CWD).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import csv
|
|
9
|
+
import io
|
|
10
|
+
import re
|
|
11
|
+
import shutil
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from ct.tools import registry
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _allowed_paths(config=None) -> list[Path]:
|
|
18
|
+
"""Return list of directories the user is allowed to read from."""
|
|
19
|
+
allowed = [Path.home() / ".ct"]
|
|
20
|
+
|
|
21
|
+
if config:
|
|
22
|
+
for key in ("data.base", "data.depmap", "data.prism", "data.l1000",
|
|
23
|
+
"data.msigdb", "sandbox.output_dir"):
|
|
24
|
+
val = config.get(key)
|
|
25
|
+
if val:
|
|
26
|
+
p = Path(val)
|
|
27
|
+
# Add the directory (or parent if it's a file path)
|
|
28
|
+
allowed.append(p if p.is_dir() else p.parent)
|
|
29
|
+
|
|
30
|
+
# Also allow sandbox extra read dirs (e.g. capsule data directories)
|
|
31
|
+
extra = config.get("sandbox.extra_read_dirs")
|
|
32
|
+
if extra:
|
|
33
|
+
for d in str(extra).split(","):
|
|
34
|
+
d = d.strip()
|
|
35
|
+
if d:
|
|
36
|
+
allowed.append(Path(d))
|
|
37
|
+
|
|
38
|
+
return allowed
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _is_allowed(path: Path, config=None) -> bool:
|
|
42
|
+
"""Check if a path is within allowed directories."""
|
|
43
|
+
resolved = path.resolve()
|
|
44
|
+
for allowed in _allowed_paths(config):
|
|
45
|
+
try:
|
|
46
|
+
resolved.relative_to(allowed.resolve())
|
|
47
|
+
return True
|
|
48
|
+
except ValueError:
|
|
49
|
+
continue
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _is_within_cwd(path: Path) -> bool:
|
|
54
|
+
"""Check if a resolved path is under the current working directory.
|
|
55
|
+
|
|
56
|
+
Resolves symlinks before checking to prevent traversal via symlinks
|
|
57
|
+
(e.g., ./data -> /etc would be rejected).
|
|
58
|
+
"""
|
|
59
|
+
try:
|
|
60
|
+
resolved = path.resolve(strict=False)
|
|
61
|
+
cwd = Path.cwd().resolve()
|
|
62
|
+
resolved.relative_to(cwd)
|
|
63
|
+
# Extra check: if path contains a symlink, verify each component
|
|
64
|
+
# stays within CWD after resolution
|
|
65
|
+
if path.is_symlink():
|
|
66
|
+
target = path.resolve()
|
|
67
|
+
target.relative_to(cwd)
|
|
68
|
+
return True
|
|
69
|
+
except ValueError:
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# Paths that must never be edited or deleted
|
|
74
|
+
_PROTECTED_PATTERNS = (
|
|
75
|
+
"/.git/",
|
|
76
|
+
".env",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _is_protected(path: Path) -> bool:
|
|
81
|
+
"""Check if a path is in the protected blocklist."""
|
|
82
|
+
resolved = str(path.resolve())
|
|
83
|
+
name = path.name.lower()
|
|
84
|
+
|
|
85
|
+
# .ssh — block private keys but allow .pub
|
|
86
|
+
if ".ssh/" in resolved or ".ssh\\" in resolved:
|
|
87
|
+
# Allow public keys
|
|
88
|
+
if name.endswith(".pub"):
|
|
89
|
+
return False
|
|
90
|
+
return True
|
|
91
|
+
|
|
92
|
+
for pattern in _PROTECTED_PATTERNS:
|
|
93
|
+
if pattern in resolved:
|
|
94
|
+
return True
|
|
95
|
+
|
|
96
|
+
# Block private keys outside .ssh too
|
|
97
|
+
if name.startswith("id_") and not name.endswith(".pub"):
|
|
98
|
+
return True
|
|
99
|
+
return False
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _output_dir(config=None) -> Path:
|
|
103
|
+
"""Get the output directory, creating it if needed."""
|
|
104
|
+
if config:
|
|
105
|
+
out = config.get("sandbox.output_dir")
|
|
106
|
+
if out:
|
|
107
|
+
p = Path(out)
|
|
108
|
+
p.mkdir(parents=True, exist_ok=True)
|
|
109
|
+
return p
|
|
110
|
+
default = Path.cwd() / "outputs"
|
|
111
|
+
default.mkdir(parents=True, exist_ok=True)
|
|
112
|
+
return default
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _resolve_output_path(out_dir: Path, filename: str) -> tuple[Path | None, str | None]:
|
|
116
|
+
"""Resolve an output filename safely within out_dir."""
|
|
117
|
+
raw_name = (filename or "").strip()
|
|
118
|
+
if not raw_name:
|
|
119
|
+
return None, "Filename cannot be empty."
|
|
120
|
+
|
|
121
|
+
rel_path = Path(raw_name)
|
|
122
|
+
if rel_path.is_absolute():
|
|
123
|
+
return None, "Absolute paths are not allowed."
|
|
124
|
+
|
|
125
|
+
resolved = (out_dir / rel_path).resolve()
|
|
126
|
+
try:
|
|
127
|
+
resolved.relative_to(out_dir.resolve())
|
|
128
|
+
except ValueError:
|
|
129
|
+
return None, "Path traversal detected."
|
|
130
|
+
|
|
131
|
+
if resolved.name in {"", ".", ".."}:
|
|
132
|
+
return None, "Filename must point to a file."
|
|
133
|
+
|
|
134
|
+
return resolved, None
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _resolve_cwd_path(path: str) -> tuple[Path | None, str | None]:
|
|
138
|
+
"""Resolve path and enforce current-working-directory containment."""
|
|
139
|
+
p = Path(path).expanduser()
|
|
140
|
+
if not _is_within_cwd(p):
|
|
141
|
+
return None, "path_not_allowed"
|
|
142
|
+
return p, None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@registry.register(
|
|
146
|
+
name="files.read_file",
|
|
147
|
+
description="Read a text file and return its contents",
|
|
148
|
+
category="files",
|
|
149
|
+
parameters={"path": "Path to the file to read"},
|
|
150
|
+
usage_guide=(
|
|
151
|
+
"Use to read data files, prior reports, configuration files, or any file in the "
|
|
152
|
+
"current working directory. Also reads from ~/.ct/ and configured data directories."
|
|
153
|
+
),
|
|
154
|
+
)
|
|
155
|
+
def read_file(path: str, _session=None, **kwargs) -> dict:
|
|
156
|
+
"""Read a text file and return its contents."""
|
|
157
|
+
config = _session.config if _session else None
|
|
158
|
+
p = Path(path).expanduser()
|
|
159
|
+
|
|
160
|
+
if not _is_allowed(p, config) and not _is_within_cwd(p):
|
|
161
|
+
return {
|
|
162
|
+
"summary": f"Access denied: {path} is outside allowed directories.",
|
|
163
|
+
"error": "path_not_allowed",
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
if _is_protected(p):
|
|
167
|
+
return {
|
|
168
|
+
"summary": f"Access denied: {path} is a protected file.",
|
|
169
|
+
"error": "path_protected",
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
if not p.exists():
|
|
173
|
+
return {
|
|
174
|
+
"summary": f"File not found: {path}",
|
|
175
|
+
"error": "file_not_found",
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
try:
|
|
179
|
+
content = p.read_text(encoding="utf-8")
|
|
180
|
+
lines = content.count("\n") + 1
|
|
181
|
+
return {
|
|
182
|
+
"summary": f"Read {p.name} ({lines} lines, {len(content)} chars).",
|
|
183
|
+
"path": str(p),
|
|
184
|
+
"content": content,
|
|
185
|
+
"lines": lines,
|
|
186
|
+
}
|
|
187
|
+
except UnicodeDecodeError:
|
|
188
|
+
# Handle common binary tabular formats with a structured preview
|
|
189
|
+
# rather than hard-failing decode.
|
|
190
|
+
suffix = p.suffix.lower()
|
|
191
|
+
if suffix in (".xlsx", ".xls"):
|
|
192
|
+
try:
|
|
193
|
+
import pandas as pd
|
|
194
|
+
|
|
195
|
+
xls = pd.ExcelFile(p)
|
|
196
|
+
sheet_names = xls.sheet_names
|
|
197
|
+
if not sheet_names:
|
|
198
|
+
return {
|
|
199
|
+
"summary": f"Excel file has no sheets: {p.name}",
|
|
200
|
+
"path": str(p),
|
|
201
|
+
"sheets": [],
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
df = pd.read_excel(p, sheet_name=sheet_names[0], nrows=50)
|
|
205
|
+
rows = len(df)
|
|
206
|
+
cols = [str(c) for c in df.columns]
|
|
207
|
+
preview = df.head(5).to_dict(orient="records")
|
|
208
|
+
return {
|
|
209
|
+
"summary": (
|
|
210
|
+
f"Read Excel file {p.name}: {len(sheet_names)} sheet(s), "
|
|
211
|
+
f"previewed '{sheet_names[0]}' ({rows} rows, {len(cols)} columns in preview)."
|
|
212
|
+
),
|
|
213
|
+
"path": str(p),
|
|
214
|
+
"format": "excel",
|
|
215
|
+
"sheets": sheet_names,
|
|
216
|
+
"sheet": sheet_names[0],
|
|
217
|
+
"columns": cols,
|
|
218
|
+
"rows_previewed": rows,
|
|
219
|
+
"preview": preview,
|
|
220
|
+
}
|
|
221
|
+
except Exception as e:
|
|
222
|
+
return {"summary": f"Error reading Excel file {path}: {e}", "error": str(e)}
|
|
223
|
+
return {
|
|
224
|
+
"summary": (
|
|
225
|
+
f"{p.name} appears to be a binary/non-UTF8 file. "
|
|
226
|
+
"Use code.execute or a format-specific tool to parse it."
|
|
227
|
+
),
|
|
228
|
+
"path": str(p),
|
|
229
|
+
"error": "binary_file",
|
|
230
|
+
}
|
|
231
|
+
except Exception as e:
|
|
232
|
+
return {"summary": f"Error reading {path}: {e}", "error": str(e)}
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
@registry.register(
|
|
236
|
+
name="files.edit_file",
|
|
237
|
+
description="Edit a file by replacing an exact string match with new content",
|
|
238
|
+
category="files",
|
|
239
|
+
parameters={
|
|
240
|
+
"path": "Path to the file to edit (must be within CWD)",
|
|
241
|
+
"old_string": "Exact string to find and replace (must be unique in the file)",
|
|
242
|
+
"new_string": "Replacement string",
|
|
243
|
+
},
|
|
244
|
+
usage_guide=(
|
|
245
|
+
"Use to make targeted edits to files in the current working directory. "
|
|
246
|
+
"The old_string must appear exactly once in the file for unambiguous replacement."
|
|
247
|
+
),
|
|
248
|
+
)
|
|
249
|
+
def edit_file(path: str, old_string: str, new_string: str, **kwargs) -> dict:
|
|
250
|
+
"""Edit a file by exact string replacement."""
|
|
251
|
+
p = Path(path).expanduser()
|
|
252
|
+
|
|
253
|
+
if not _is_within_cwd(p):
|
|
254
|
+
return {"summary": f"Access denied: {path} is outside working directory.", "error": "path_not_allowed"}
|
|
255
|
+
if _is_protected(p):
|
|
256
|
+
return {"summary": f"Protected path: {path} cannot be edited.", "error": "path_protected"}
|
|
257
|
+
if not p.exists():
|
|
258
|
+
return {"summary": f"File not found: {path}", "error": "file_not_found"}
|
|
259
|
+
|
|
260
|
+
try:
|
|
261
|
+
content = p.read_text(encoding="utf-8")
|
|
262
|
+
except Exception as e:
|
|
263
|
+
return {"summary": f"Error reading {path}: {e}", "error": str(e)}
|
|
264
|
+
|
|
265
|
+
count = content.count(old_string)
|
|
266
|
+
if count == 0:
|
|
267
|
+
return {"summary": f"String not found in {p.name}.", "error": "string_not_found"}
|
|
268
|
+
if count > 1:
|
|
269
|
+
return {
|
|
270
|
+
"summary": f"Ambiguous: '{old_string[:50]}...' appears {count} times in {p.name}. Provide more context.",
|
|
271
|
+
"error": "ambiguous_match",
|
|
272
|
+
"match_count": count,
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
new_content = content.replace(old_string, new_string, 1)
|
|
276
|
+
try:
|
|
277
|
+
p.write_text(new_content, encoding="utf-8")
|
|
278
|
+
except Exception as e:
|
|
279
|
+
return {"summary": f"Error writing {path}: {e}", "error": str(e)}
|
|
280
|
+
|
|
281
|
+
return {
|
|
282
|
+
"summary": f"Edited {p.name}: replaced {len(old_string)} chars with {len(new_string)} chars.",
|
|
283
|
+
"path": str(p.resolve()),
|
|
284
|
+
"old_length": len(old_string),
|
|
285
|
+
"new_length": len(new_string),
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
@registry.register(
|
|
290
|
+
name="files.create_file",
|
|
291
|
+
description="Create a new file with the given content",
|
|
292
|
+
category="files",
|
|
293
|
+
parameters={
|
|
294
|
+
"path": "Path for the new file (must be within CWD)",
|
|
295
|
+
"content": "Content to write to the file",
|
|
296
|
+
},
|
|
297
|
+
usage_guide=(
|
|
298
|
+
"Use to create new files (scripts, configs, data files) in the working directory. "
|
|
299
|
+
"Will not overwrite existing files — use edit_file for modifications."
|
|
300
|
+
),
|
|
301
|
+
)
|
|
302
|
+
def create_file(path: str, content: str, **kwargs) -> dict:
|
|
303
|
+
"""Create a new file. Refuses to overwrite existing files."""
|
|
304
|
+
p = Path(path).expanduser()
|
|
305
|
+
|
|
306
|
+
if not _is_within_cwd(p):
|
|
307
|
+
return {"summary": f"Access denied: {path} is outside working directory.", "error": "path_not_allowed"}
|
|
308
|
+
if _is_protected(p):
|
|
309
|
+
return {"summary": f"Protected path: {path} cannot be created.", "error": "path_protected"}
|
|
310
|
+
if p.exists():
|
|
311
|
+
try:
|
|
312
|
+
existing = p.read_text(encoding="utf-8")
|
|
313
|
+
if existing == content:
|
|
314
|
+
lines = content.count("\n") + 1
|
|
315
|
+
return {
|
|
316
|
+
"summary": f"File already exists with identical content: {p.name}.",
|
|
317
|
+
"path": str(p.resolve()),
|
|
318
|
+
"lines": lines,
|
|
319
|
+
"size": len(content),
|
|
320
|
+
"unchanged": True,
|
|
321
|
+
}
|
|
322
|
+
# Auto-update stale generated artifacts so repeated workflows are idempotent.
|
|
323
|
+
p.write_text(content, encoding="utf-8")
|
|
324
|
+
lines = content.count("\n") + 1
|
|
325
|
+
return {
|
|
326
|
+
"summary": f"Updated existing file {p.name} ({lines} lines, {len(content)} chars).",
|
|
327
|
+
"path": str(p.resolve()),
|
|
328
|
+
"lines": lines,
|
|
329
|
+
"size": len(content),
|
|
330
|
+
"overwritten": True,
|
|
331
|
+
}
|
|
332
|
+
except Exception:
|
|
333
|
+
# Keep default behavior for non-text/unreadable files.
|
|
334
|
+
pass
|
|
335
|
+
return {"summary": f"File already exists: {path}. Use edit_file to modify.", "error": "file_exists"}
|
|
336
|
+
|
|
337
|
+
try:
|
|
338
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
339
|
+
p.write_text(content, encoding="utf-8")
|
|
340
|
+
except Exception as e:
|
|
341
|
+
return {"summary": f"Error creating {path}: {e}", "error": str(e)}
|
|
342
|
+
|
|
343
|
+
lines = content.count("\n") + 1
|
|
344
|
+
return {
|
|
345
|
+
"summary": f"Created {p.name} ({lines} lines, {len(content)} chars).",
|
|
346
|
+
"path": str(p.resolve()),
|
|
347
|
+
"lines": lines,
|
|
348
|
+
"size": len(content),
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
@registry.register(
|
|
353
|
+
name="files.delete_file",
|
|
354
|
+
description="Delete a file from the working directory",
|
|
355
|
+
category="files",
|
|
356
|
+
parameters={"path": "Path to the file to delete (must be within CWD)"},
|
|
357
|
+
usage_guide="Use to remove files from the working directory. Cannot delete directories.",
|
|
358
|
+
)
|
|
359
|
+
def delete_file(path: str, **kwargs) -> dict:
|
|
360
|
+
"""Delete a single file."""
|
|
361
|
+
p = Path(path).expanduser()
|
|
362
|
+
|
|
363
|
+
if not _is_within_cwd(p):
|
|
364
|
+
return {"summary": f"Access denied: {path} is outside working directory.", "error": "path_not_allowed"}
|
|
365
|
+
if _is_protected(p):
|
|
366
|
+
return {"summary": f"Protected path: {path} cannot be deleted.", "error": "path_protected"}
|
|
367
|
+
if not p.exists():
|
|
368
|
+
return {"summary": f"File not found: {path}", "error": "file_not_found"}
|
|
369
|
+
if p.is_dir():
|
|
370
|
+
return {"summary": f"Cannot delete directory: {path}. Only files.", "error": "is_directory"}
|
|
371
|
+
|
|
372
|
+
try:
|
|
373
|
+
size = p.stat().st_size
|
|
374
|
+
p.unlink()
|
|
375
|
+
except Exception as e:
|
|
376
|
+
return {"summary": f"Error deleting {path}: {e}", "error": str(e)}
|
|
377
|
+
|
|
378
|
+
return {
|
|
379
|
+
"summary": f"Deleted {p.name} ({size} bytes).",
|
|
380
|
+
"path": str(p.resolve()),
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
@registry.register(
|
|
385
|
+
name="files.move_file",
|
|
386
|
+
description="Move or rename a file within the working directory",
|
|
387
|
+
category="files",
|
|
388
|
+
parameters={
|
|
389
|
+
"source_path": "Path to source file (must be within CWD)",
|
|
390
|
+
"dest_path": "Path to destination file (must be within CWD)",
|
|
391
|
+
"overwrite": "Whether to overwrite destination if it exists (default false)",
|
|
392
|
+
},
|
|
393
|
+
usage_guide=(
|
|
394
|
+
"Use to rename files or reorganize outputs in the workspace. "
|
|
395
|
+
"Both source and destination must stay inside the current working directory."
|
|
396
|
+
),
|
|
397
|
+
)
|
|
398
|
+
def move_file(source_path: str, dest_path: str, overwrite: bool = False, **kwargs) -> dict:
|
|
399
|
+
"""Move a file safely within CWD."""
|
|
400
|
+
src, err = _resolve_cwd_path(source_path)
|
|
401
|
+
if err:
|
|
402
|
+
return {"summary": f"Access denied: {source_path} is outside working directory.", "error": err}
|
|
403
|
+
dst, err = _resolve_cwd_path(dest_path)
|
|
404
|
+
if err:
|
|
405
|
+
return {"summary": f"Access denied: {dest_path} is outside working directory.", "error": err}
|
|
406
|
+
if _is_protected(src) or _is_protected(dst):
|
|
407
|
+
return {"summary": "Protected path cannot be moved.", "error": "path_protected"}
|
|
408
|
+
if not src.exists():
|
|
409
|
+
return {"summary": f"File not found: {source_path}", "error": "file_not_found"}
|
|
410
|
+
if src.is_dir():
|
|
411
|
+
return {"summary": f"Source is a directory: {source_path}", "error": "is_directory"}
|
|
412
|
+
if dst.exists() and not overwrite:
|
|
413
|
+
return {"summary": f"Destination exists: {dest_path}", "error": "file_exists"}
|
|
414
|
+
if dst.exists() and dst.is_dir():
|
|
415
|
+
return {"summary": f"Destination is a directory: {dest_path}", "error": "is_directory"}
|
|
416
|
+
try:
|
|
417
|
+
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
418
|
+
src.replace(dst)
|
|
419
|
+
except Exception as e:
|
|
420
|
+
return {"summary": f"Error moving file: {e}", "error": str(e)}
|
|
421
|
+
return {"summary": f"Moved {src.name} to {dst}", "source": str(src), "destination": str(dst)}
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
@registry.register(
|
|
425
|
+
name="files.copy_file",
|
|
426
|
+
description="Copy a file within the working directory",
|
|
427
|
+
category="files",
|
|
428
|
+
parameters={
|
|
429
|
+
"source_path": "Path to source file (must be within CWD)",
|
|
430
|
+
"dest_path": "Path to destination file (must be within CWD)",
|
|
431
|
+
"overwrite": "Whether to overwrite destination if it exists (default false)",
|
|
432
|
+
},
|
|
433
|
+
usage_guide=(
|
|
434
|
+
"Use to duplicate templates, data files, or reports in the workspace "
|
|
435
|
+
"without editing the original."
|
|
436
|
+
),
|
|
437
|
+
)
|
|
438
|
+
def copy_file(source_path: str, dest_path: str, overwrite: bool = False, **kwargs) -> dict:
|
|
439
|
+
"""Copy a file safely within CWD."""
|
|
440
|
+
src, err = _resolve_cwd_path(source_path)
|
|
441
|
+
if err:
|
|
442
|
+
return {"summary": f"Access denied: {source_path} is outside working directory.", "error": err}
|
|
443
|
+
dst, err = _resolve_cwd_path(dest_path)
|
|
444
|
+
if err:
|
|
445
|
+
return {"summary": f"Access denied: {dest_path} is outside working directory.", "error": err}
|
|
446
|
+
if _is_protected(src) or _is_protected(dst):
|
|
447
|
+
return {"summary": "Protected path cannot be copied.", "error": "path_protected"}
|
|
448
|
+
if not src.exists():
|
|
449
|
+
return {"summary": f"File not found: {source_path}", "error": "file_not_found"}
|
|
450
|
+
if src.is_dir():
|
|
451
|
+
return {"summary": f"Source is a directory: {source_path}", "error": "is_directory"}
|
|
452
|
+
if dst.exists() and not overwrite:
|
|
453
|
+
return {"summary": f"Destination exists: {dest_path}", "error": "file_exists"}
|
|
454
|
+
if dst.exists() and dst.is_dir():
|
|
455
|
+
return {"summary": f"Destination is a directory: {dest_path}", "error": "is_directory"}
|
|
456
|
+
try:
|
|
457
|
+
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
458
|
+
shutil.copy2(src, dst)
|
|
459
|
+
except Exception as e:
|
|
460
|
+
return {"summary": f"Error copying file: {e}", "error": str(e)}
|
|
461
|
+
return {"summary": f"Copied {src.name} to {dst}", "source": str(src), "destination": str(dst)}
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
@registry.register(
|
|
465
|
+
name="files.create_directory",
|
|
466
|
+
description="Create a directory within the working directory",
|
|
467
|
+
category="files",
|
|
468
|
+
parameters={
|
|
469
|
+
"path": "Directory path to create (must be within CWD)",
|
|
470
|
+
"exist_ok": "If true, do not error when directory already exists (default true)",
|
|
471
|
+
},
|
|
472
|
+
usage_guide="Use to create folders for outputs, reports, and structured project organization.",
|
|
473
|
+
)
|
|
474
|
+
def create_directory(path: str, exist_ok: bool = True, **kwargs) -> dict:
|
|
475
|
+
"""Create a directory safely within CWD."""
|
|
476
|
+
p, err = _resolve_cwd_path(path)
|
|
477
|
+
if err:
|
|
478
|
+
return {"summary": f"Access denied: {path} is outside working directory.", "error": err}
|
|
479
|
+
if _is_protected(p):
|
|
480
|
+
return {"summary": f"Protected path: {path} cannot be created.", "error": "path_protected"}
|
|
481
|
+
if p.exists() and p.is_file():
|
|
482
|
+
return {"summary": f"Path exists as a file: {path}", "error": "is_file"}
|
|
483
|
+
try:
|
|
484
|
+
p.mkdir(parents=True, exist_ok=bool(exist_ok))
|
|
485
|
+
except FileExistsError:
|
|
486
|
+
return {"summary": f"Directory already exists: {path}", "error": "file_exists"}
|
|
487
|
+
except Exception as e:
|
|
488
|
+
return {"summary": f"Error creating directory: {e}", "error": str(e)}
|
|
489
|
+
return {"summary": f"Directory ready: {p}", "path": str(p.resolve())}
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
@registry.register(
|
|
493
|
+
name="files.extract_archive",
|
|
494
|
+
description=(
|
|
495
|
+
"Extract a ZIP, tar, tar.gz, or tar.bz2 archive. "
|
|
496
|
+
"Supports extracting the full archive or specific files by pattern."
|
|
497
|
+
),
|
|
498
|
+
category="files",
|
|
499
|
+
parameters={
|
|
500
|
+
"path": "Path to the archive file",
|
|
501
|
+
"destination": "Directory to extract into (default: current working directory)",
|
|
502
|
+
"pattern": "Optional glob pattern to extract only matching files (e.g. '*.mafft', '156083at2759*')",
|
|
503
|
+
},
|
|
504
|
+
usage_guide=(
|
|
505
|
+
"Use to extract ZIP, tar, tar.gz, or tar.bz2 archives. Safer and more reliable "
|
|
506
|
+
"than shell.run for archive extraction. Supports selective extraction via pattern."
|
|
507
|
+
),
|
|
508
|
+
)
|
|
509
|
+
def extract_archive(
|
|
510
|
+
path: str,
|
|
511
|
+
destination: str = ".",
|
|
512
|
+
pattern: str = "",
|
|
513
|
+
_session=None,
|
|
514
|
+
**kwargs,
|
|
515
|
+
) -> dict:
|
|
516
|
+
"""Extract an archive file."""
|
|
517
|
+
import fnmatch
|
|
518
|
+
import tarfile
|
|
519
|
+
import zipfile
|
|
520
|
+
import logging
|
|
521
|
+
_log = logging.getLogger("ct.tools.files")
|
|
522
|
+
_log.debug("extract_archive: path=%r destination=%r pattern=%r kwargs=%r", path, destination, pattern, kwargs)
|
|
523
|
+
|
|
524
|
+
src = Path(path).expanduser()
|
|
525
|
+
if not src.exists():
|
|
526
|
+
# Try relative to extra_read_dirs
|
|
527
|
+
config = _session.config if _session else None
|
|
528
|
+
if config:
|
|
529
|
+
extra = config.get("sandbox.extra_read_dirs")
|
|
530
|
+
if extra:
|
|
531
|
+
for d in str(extra).split(","):
|
|
532
|
+
candidate = Path(d.strip()) / path
|
|
533
|
+
if candidate.exists():
|
|
534
|
+
src = candidate
|
|
535
|
+
break
|
|
536
|
+
if not src.exists():
|
|
537
|
+
return {"summary": f"Archive not found: {path}", "error": "file_not_found"}
|
|
538
|
+
|
|
539
|
+
# Sanitize destination: only allow relative paths under CWD
|
|
540
|
+
dest = Path(destination)
|
|
541
|
+
cwd = Path.cwd()
|
|
542
|
+
if dest.is_absolute():
|
|
543
|
+
try:
|
|
544
|
+
dest.resolve().relative_to(cwd.resolve())
|
|
545
|
+
except ValueError:
|
|
546
|
+
# Absolute path outside CWD — ignore it, use CWD
|
|
547
|
+
_log.warning("Ignoring absolute destination %s, extracting to CWD", dest)
|
|
548
|
+
dest = cwd
|
|
549
|
+
else:
|
|
550
|
+
# Relative path — resolve relative to CWD
|
|
551
|
+
dest = cwd / dest
|
|
552
|
+
try:
|
|
553
|
+
dest.mkdir(parents=True, exist_ok=True)
|
|
554
|
+
except PermissionError:
|
|
555
|
+
_log.warning("Permission denied for %s, falling back to CWD", dest)
|
|
556
|
+
dest = cwd
|
|
557
|
+
|
|
558
|
+
extracted = []
|
|
559
|
+
try:
|
|
560
|
+
if src.suffix == ".zip":
|
|
561
|
+
with zipfile.ZipFile(src, "r") as zf:
|
|
562
|
+
members = zf.namelist()
|
|
563
|
+
if pattern:
|
|
564
|
+
members = [m for m in members if fnmatch.fnmatch(Path(m).name, pattern)]
|
|
565
|
+
for m in members:
|
|
566
|
+
zf.extract(m, dest)
|
|
567
|
+
extracted.append(m)
|
|
568
|
+
elif src.suffix in (".gz", ".bz2", ".xz", ".tar") or ".tar." in src.name:
|
|
569
|
+
with tarfile.open(src, "r:*") as tf:
|
|
570
|
+
members = tf.getnames()
|
|
571
|
+
if pattern:
|
|
572
|
+
members = [m for m in members if fnmatch.fnmatch(Path(m).name, pattern)]
|
|
573
|
+
for m in members:
|
|
574
|
+
tf.extract(m, dest, filter="data")
|
|
575
|
+
extracted.append(m)
|
|
576
|
+
else:
|
|
577
|
+
tf.extractall(dest, filter="data")
|
|
578
|
+
extracted = members
|
|
579
|
+
else:
|
|
580
|
+
return {"summary": f"Unsupported archive format: {src.suffix}", "error": "unsupported_format"}
|
|
581
|
+
except Exception as e:
|
|
582
|
+
return {"summary": f"Extraction error: {e}", "error": str(e)}
|
|
583
|
+
|
|
584
|
+
summary = f"Extracted {len(extracted)} files from {src.name} to {dest}"
|
|
585
|
+
if pattern:
|
|
586
|
+
summary += f" (pattern: {pattern})"
|
|
587
|
+
|
|
588
|
+
return {
|
|
589
|
+
"summary": summary,
|
|
590
|
+
"extracted_count": len(extracted),
|
|
591
|
+
"destination": str(dest.resolve()),
|
|
592
|
+
"files": extracted[:50], # Cap for large archives
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
@registry.register(
|
|
597
|
+
name="files.list_directory",
|
|
598
|
+
description="List directory entries with metadata",
|
|
599
|
+
category="files",
|
|
600
|
+
parameters={
|
|
601
|
+
"path": "Directory path to inspect (default CWD)",
|
|
602
|
+
"recursive": "If true, recurse through subdirectories (default false)",
|
|
603
|
+
"max_entries": "Maximum entries to return (default 200)",
|
|
604
|
+
"show_hidden": "Include dotfiles/directories (default false)",
|
|
605
|
+
},
|
|
606
|
+
usage_guide="Use to inspect workspace structure before reading or modifying files.",
|
|
607
|
+
)
|
|
608
|
+
def list_directory(
|
|
609
|
+
path: str = "",
|
|
610
|
+
recursive: bool = False,
|
|
611
|
+
max_entries: int = 200,
|
|
612
|
+
show_hidden: bool = False,
|
|
613
|
+
**kwargs,
|
|
614
|
+
) -> dict:
|
|
615
|
+
"""List directory contents in a safe, bounded way."""
|
|
616
|
+
base = Path(path).expanduser() if path else Path.cwd()
|
|
617
|
+
# If absolute path is outside CWD or doesn't exist, try the basename under CWD
|
|
618
|
+
if not base.exists() or not _is_within_cwd(base):
|
|
619
|
+
if path:
|
|
620
|
+
cwd_candidate = Path.cwd() / Path(path).name
|
|
621
|
+
if cwd_candidate.exists() and cwd_candidate.is_dir():
|
|
622
|
+
base = cwd_candidate
|
|
623
|
+
if not _is_within_cwd(base) and base != Path.cwd():
|
|
624
|
+
# Check if it's in allowed paths (extra_read_dirs, data dirs)
|
|
625
|
+
config = getattr(kwargs.get("_session"), "config", None) if kwargs.get("_session") else None
|
|
626
|
+
if not _is_allowed(base, config):
|
|
627
|
+
return {"summary": f"Access denied: {path} is outside working directory.", "error": "path_not_allowed"}
|
|
628
|
+
if not base.exists():
|
|
629
|
+
return {"summary": f"Path not found: {base}", "error": "file_not_found"}
|
|
630
|
+
if not base.is_dir():
|
|
631
|
+
return {"summary": f"Not a directory: {base}", "error": "not_directory"}
|
|
632
|
+
|
|
633
|
+
max_entries = min(max(int(max_entries), 1), 1000)
|
|
634
|
+
cwd = Path.cwd().resolve()
|
|
635
|
+
entries = []
|
|
636
|
+
|
|
637
|
+
iterator = base.rglob("*") if recursive else base.iterdir()
|
|
638
|
+
try:
|
|
639
|
+
for p in sorted(iterator):
|
|
640
|
+
name = p.name
|
|
641
|
+
if not show_hidden and name.startswith("."):
|
|
642
|
+
continue
|
|
643
|
+
try:
|
|
644
|
+
rel = str(p.resolve().relative_to(cwd))
|
|
645
|
+
except ValueError:
|
|
646
|
+
continue
|
|
647
|
+
item = {
|
|
648
|
+
"path": rel,
|
|
649
|
+
"name": name,
|
|
650
|
+
"type": "dir" if p.is_dir() else "file",
|
|
651
|
+
}
|
|
652
|
+
if p.is_file():
|
|
653
|
+
try:
|
|
654
|
+
item["size"] = p.stat().st_size
|
|
655
|
+
except OSError:
|
|
656
|
+
item["size"] = None
|
|
657
|
+
entries.append(item)
|
|
658
|
+
if len(entries) >= max_entries:
|
|
659
|
+
break
|
|
660
|
+
except Exception as e:
|
|
661
|
+
return {"summary": f"Error listing directory: {e}", "error": str(e)}
|
|
662
|
+
|
|
663
|
+
return {
|
|
664
|
+
"summary": f"Listed {len(entries)} entries under {base}",
|
|
665
|
+
"entries": entries,
|
|
666
|
+
"count": len(entries),
|
|
667
|
+
"directory": str(base.resolve()),
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
@registry.register(
|
|
672
|
+
name="files.search_files",
|
|
673
|
+
description="Search for files by glob pattern within the working directory",
|
|
674
|
+
category="files",
|
|
675
|
+
parameters={
|
|
676
|
+
"pattern": "Glob pattern (e.g., '**/*.py', '*.csv', 'src/**/*.ts')",
|
|
677
|
+
"path": "Subdirectory to search in (default: CWD)",
|
|
678
|
+
},
|
|
679
|
+
usage_guide="Use to find files by name pattern. Returns file paths, names, and sizes.",
|
|
680
|
+
)
|
|
681
|
+
def search_files(pattern: str, path: str = "", **kwargs) -> dict:
|
|
682
|
+
"""Glob-based file search within CWD."""
|
|
683
|
+
base = Path(path).expanduser() if path else Path.cwd()
|
|
684
|
+
|
|
685
|
+
if not _is_within_cwd(base) and base != Path.cwd():
|
|
686
|
+
return {"summary": f"Access denied: {path} is outside working directory.", "error": "path_not_allowed"}
|
|
687
|
+
|
|
688
|
+
try:
|
|
689
|
+
cwd = Path.cwd().resolve()
|
|
690
|
+
matches = []
|
|
691
|
+
for p in sorted(base.glob(pattern)):
|
|
692
|
+
if p.is_file():
|
|
693
|
+
try:
|
|
694
|
+
rel = str(p.resolve().relative_to(cwd))
|
|
695
|
+
except ValueError:
|
|
696
|
+
continue # Skip files outside CWD
|
|
697
|
+
matches.append({
|
|
698
|
+
"path": str(p.resolve()),
|
|
699
|
+
"name": p.name,
|
|
700
|
+
"relative": rel,
|
|
701
|
+
"size": p.stat().st_size,
|
|
702
|
+
})
|
|
703
|
+
if len(matches) >= 100:
|
|
704
|
+
break
|
|
705
|
+
except Exception as e:
|
|
706
|
+
return {"summary": f"Search error: {e}", "error": str(e)}
|
|
707
|
+
|
|
708
|
+
if not matches:
|
|
709
|
+
return {"summary": f"No files matching '{pattern}'.", "files": [], "count": 0}
|
|
710
|
+
|
|
711
|
+
listing = "\n".join(f" {m['relative']} ({m['size']} bytes)" for m in matches[:20])
|
|
712
|
+
more = f"\n ... and {len(matches) - 20} more" if len(matches) > 20 else ""
|
|
713
|
+
return {
|
|
714
|
+
"summary": f"Found {len(matches)} files matching '{pattern}':\n{listing}{more}",
|
|
715
|
+
"files": matches,
|
|
716
|
+
"count": len(matches),
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
@registry.register(
|
|
721
|
+
name="files.search_content",
|
|
722
|
+
description="Search file contents by regex pattern (like grep)",
|
|
723
|
+
category="files",
|
|
724
|
+
parameters={
|
|
725
|
+
"pattern": "Regex pattern to search for",
|
|
726
|
+
"path": "Subdirectory to search in (default: CWD)",
|
|
727
|
+
"glob": "File glob filter (default: '**/*')",
|
|
728
|
+
"max_results": "Maximum matches to return (default: 50)",
|
|
729
|
+
},
|
|
730
|
+
usage_guide=(
|
|
731
|
+
"Use to search for text patterns across files — find function definitions, "
|
|
732
|
+
"variable usage, TODOs, error messages, etc. Skips binary and large files."
|
|
733
|
+
),
|
|
734
|
+
)
|
|
735
|
+
def search_content(pattern: str, path: str = "", glob: str = "**/*",
|
|
736
|
+
max_results: int = 50, **kwargs) -> dict:
|
|
737
|
+
"""Regex content search across files in CWD."""
|
|
738
|
+
base = Path(path).expanduser() if path else Path.cwd()
|
|
739
|
+
|
|
740
|
+
if not _is_within_cwd(base) and base != Path.cwd():
|
|
741
|
+
return {"summary": f"Access denied: {path} is outside working directory.", "error": "path_not_allowed"}
|
|
742
|
+
|
|
743
|
+
try:
|
|
744
|
+
compiled = re.compile(pattern)
|
|
745
|
+
except re.error as e:
|
|
746
|
+
return {"summary": f"Invalid regex: {e}", "error": str(e)}
|
|
747
|
+
|
|
748
|
+
cwd = Path.cwd().resolve()
|
|
749
|
+
matches = []
|
|
750
|
+
files_searched = 0
|
|
751
|
+
|
|
752
|
+
try:
|
|
753
|
+
for fp in sorted(base.glob(glob)):
|
|
754
|
+
if not fp.is_file():
|
|
755
|
+
continue
|
|
756
|
+
# Skip large files (>1MB) and likely binary files
|
|
757
|
+
try:
|
|
758
|
+
size = fp.stat().st_size
|
|
759
|
+
except OSError:
|
|
760
|
+
continue
|
|
761
|
+
if size > 1_000_000:
|
|
762
|
+
continue
|
|
763
|
+
# Skip common binary extensions
|
|
764
|
+
if fp.suffix.lower() in ('.pyc', '.pyo', '.so', '.dll', '.exe', '.bin',
|
|
765
|
+
'.png', '.jpg', '.jpeg', '.gif', '.ico', '.pdf',
|
|
766
|
+
'.zip', '.tar', '.gz', '.bz2', '.whl', '.egg'):
|
|
767
|
+
continue
|
|
768
|
+
|
|
769
|
+
try:
|
|
770
|
+
content = fp.read_text(encoding="utf-8", errors="ignore")
|
|
771
|
+
except Exception:
|
|
772
|
+
continue
|
|
773
|
+
|
|
774
|
+
files_searched += 1
|
|
775
|
+
for line_num, line in enumerate(content.splitlines(), 1):
|
|
776
|
+
if compiled.search(line):
|
|
777
|
+
try:
|
|
778
|
+
rel = str(fp.resolve().relative_to(cwd))
|
|
779
|
+
except ValueError:
|
|
780
|
+
continue
|
|
781
|
+
preview = line.strip()
|
|
782
|
+
if len(preview) > 200:
|
|
783
|
+
preview = preview[:197] + "..."
|
|
784
|
+
matches.append({
|
|
785
|
+
"file": rel,
|
|
786
|
+
"line": line_num,
|
|
787
|
+
"text": preview,
|
|
788
|
+
})
|
|
789
|
+
if len(matches) >= max_results:
|
|
790
|
+
break
|
|
791
|
+
if len(matches) >= max_results:
|
|
792
|
+
break
|
|
793
|
+
except Exception as e:
|
|
794
|
+
return {"summary": f"Search error: {e}", "error": str(e)}
|
|
795
|
+
|
|
796
|
+
if not matches:
|
|
797
|
+
return {
|
|
798
|
+
"summary": f"No matches for '{pattern}' in {files_searched} files.",
|
|
799
|
+
"matches": [],
|
|
800
|
+
"count": 0,
|
|
801
|
+
"files_searched": files_searched,
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
listing = "\n".join(f" {m['file']}:{m['line']}: {m['text']}" for m in matches[:15])
|
|
805
|
+
more = f"\n ... and {len(matches) - 15} more" if len(matches) > 15 else ""
|
|
806
|
+
return {
|
|
807
|
+
"summary": f"Found {len(matches)} matches for '{pattern}' across {files_searched} files:\n{listing}{more}",
|
|
808
|
+
"matches": matches,
|
|
809
|
+
"count": len(matches),
|
|
810
|
+
"files_searched": files_searched,
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
|
|
814
|
+
@registry.register(
|
|
815
|
+
name="files.write_report",
|
|
816
|
+
description="Write a report to the output directory",
|
|
817
|
+
category="files",
|
|
818
|
+
parameters={
|
|
819
|
+
"content": "Report content (markdown text)",
|
|
820
|
+
"filename": "Output filename (e.g., 'report.md')",
|
|
821
|
+
"format": "Output format: 'markdown' (default) or 'text'",
|
|
822
|
+
"overwrite": "Whether to overwrite existing file (default False)",
|
|
823
|
+
},
|
|
824
|
+
usage_guide=(
|
|
825
|
+
"Use to save analysis results as a formatted report. "
|
|
826
|
+
"Output goes to the configured output directory (./outputs by default)."
|
|
827
|
+
),
|
|
828
|
+
)
|
|
829
|
+
def write_report(content: str, filename: str = "report.md",
|
|
830
|
+
format: str = "markdown", overwrite: bool = False,
|
|
831
|
+
_session=None, **kwargs) -> dict:
|
|
832
|
+
"""Write a report to the output directory."""
|
|
833
|
+
config = _session.config if _session else None
|
|
834
|
+
out_dir = _output_dir(config)
|
|
835
|
+
|
|
836
|
+
# Ensure filename has appropriate extension
|
|
837
|
+
if format == "markdown" and not filename.endswith((".md", ".markdown")):
|
|
838
|
+
filename = filename + ".md"
|
|
839
|
+
|
|
840
|
+
out_path, error = _resolve_output_path(out_dir, filename)
|
|
841
|
+
if error:
|
|
842
|
+
return {
|
|
843
|
+
"summary": f"Invalid filename '{filename}': {error}",
|
|
844
|
+
"error": "invalid_filename",
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
if not overwrite and out_path.exists():
|
|
848
|
+
suffix = "".join(out_path.suffixes)
|
|
849
|
+
stem = out_path.name[: -len(suffix)] if suffix else out_path.name
|
|
850
|
+
counter = 2
|
|
851
|
+
candidate = out_path.parent / f"{stem}_{counter}{suffix}"
|
|
852
|
+
while candidate.exists():
|
|
853
|
+
counter += 1
|
|
854
|
+
candidate = out_path.parent / f"{stem}_{counter}{suffix}"
|
|
855
|
+
out_path = candidate
|
|
856
|
+
|
|
857
|
+
try:
|
|
858
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
859
|
+
out_path.write_text(content, encoding="utf-8")
|
|
860
|
+
return {
|
|
861
|
+
"summary": f"Report saved to {out_path}",
|
|
862
|
+
"path": str(out_path),
|
|
863
|
+
"size": len(content),
|
|
864
|
+
}
|
|
865
|
+
except Exception as e:
|
|
866
|
+
return {"summary": f"Error writing report: {e}", "error": str(e)}
|
|
867
|
+
|
|
868
|
+
|
|
869
|
+
@registry.register(
|
|
870
|
+
name="files.write_csv",
|
|
871
|
+
description="Write structured data as a CSV file",
|
|
872
|
+
category="files",
|
|
873
|
+
parameters={
|
|
874
|
+
"data": "List of dicts to write (each dict = one row)",
|
|
875
|
+
"filename": "Output filename (e.g., 'results.csv')",
|
|
876
|
+
},
|
|
877
|
+
usage_guide=(
|
|
878
|
+
"Use to export structured results (tables, rankings, gene lists) as CSV. "
|
|
879
|
+
"Input is a list of dicts; keys become column headers."
|
|
880
|
+
),
|
|
881
|
+
)
|
|
882
|
+
def write_csv(data: list, filename: str = "results.csv",
|
|
883
|
+
_session=None, **kwargs) -> dict:
|
|
884
|
+
"""Write structured data as CSV."""
|
|
885
|
+
config = _session.config if _session else None
|
|
886
|
+
out_dir = _output_dir(config)
|
|
887
|
+
|
|
888
|
+
if not filename.endswith(".csv"):
|
|
889
|
+
filename = filename + ".csv"
|
|
890
|
+
|
|
891
|
+
out_path, error = _resolve_output_path(out_dir, filename)
|
|
892
|
+
if error:
|
|
893
|
+
return {
|
|
894
|
+
"summary": f"Invalid filename '{filename}': {error}",
|
|
895
|
+
"error": "invalid_filename",
|
|
896
|
+
}
|
|
897
|
+
|
|
898
|
+
if not data:
|
|
899
|
+
return {"summary": "No data to write.", "error": "empty_data"}
|
|
900
|
+
|
|
901
|
+
try:
|
|
902
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
903
|
+
# Get column headers from first row
|
|
904
|
+
if isinstance(data[0], dict):
|
|
905
|
+
fieldnames = list(data[0].keys())
|
|
906
|
+
buf = io.StringIO()
|
|
907
|
+
writer = csv.DictWriter(buf, fieldnames=fieldnames)
|
|
908
|
+
writer.writeheader()
|
|
909
|
+
writer.writerows(data)
|
|
910
|
+
out_path.write_text(buf.getvalue(), encoding="utf-8")
|
|
911
|
+
else:
|
|
912
|
+
# Fallback: list of lists
|
|
913
|
+
buf = io.StringIO()
|
|
914
|
+
writer = csv.writer(buf)
|
|
915
|
+
writer.writerows(data)
|
|
916
|
+
out_path.write_text(buf.getvalue(), encoding="utf-8")
|
|
917
|
+
|
|
918
|
+
return {
|
|
919
|
+
"summary": f"CSV saved to {out_path} ({len(data)} rows).",
|
|
920
|
+
"path": str(out_path),
|
|
921
|
+
"rows": len(data),
|
|
922
|
+
}
|
|
923
|
+
except Exception as e:
|
|
924
|
+
return {"summary": f"Error writing CSV: {e}", "error": str(e)}
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
@registry.register(
|
|
928
|
+
name="files.list_outputs",
|
|
929
|
+
description="List all files in the output directory",
|
|
930
|
+
category="files",
|
|
931
|
+
parameters={},
|
|
932
|
+
usage_guide="Use to see what reports and exports have been generated this session.",
|
|
933
|
+
)
|
|
934
|
+
def list_outputs(_session=None, **kwargs) -> dict:
|
|
935
|
+
"""List all files in the output directory."""
|
|
936
|
+
config = _session.config if _session else None
|
|
937
|
+
out_dir = _output_dir(config)
|
|
938
|
+
|
|
939
|
+
files = []
|
|
940
|
+
if out_dir.exists():
|
|
941
|
+
for p in sorted(out_dir.iterdir()):
|
|
942
|
+
if p.is_file():
|
|
943
|
+
files.append({
|
|
944
|
+
"name": p.name,
|
|
945
|
+
"size": p.stat().st_size,
|
|
946
|
+
"path": str(p),
|
|
947
|
+
})
|
|
948
|
+
|
|
949
|
+
if not files:
|
|
950
|
+
return {"summary": f"Output directory is empty: {out_dir}", "files": []}
|
|
951
|
+
|
|
952
|
+
listing = "\n".join(f" {f['name']} ({f['size']} bytes)" for f in files)
|
|
953
|
+
return {
|
|
954
|
+
"summary": f"Output directory ({out_dir}):\n{listing}",
|
|
955
|
+
"files": files,
|
|
956
|
+
"directory": str(out_dir),
|
|
957
|
+
}
|