delimit-cli 3.4.0 → 3.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/delimit-setup.js +23 -0
- package/gateway/ai/backends/tools_data.py +830 -0
- package/gateway/ai/backends/tools_design.py +921 -0
- package/gateway/ai/backends/tools_infra.py +866 -0
- package/gateway/ai/backends/tools_real.py +766 -0
- package/gateway/ai/backends/ui_bridge.py +26 -49
- package/gateway/ai/deliberation.py +387 -0
- package/gateway/ai/ledger_manager.py +207 -0
- package/gateway/ai/server.py +630 -216
- package/package.json +1 -1
|
@@ -0,0 +1,830 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Real implementations for cost, data, and intel tools.
|
|
3
|
+
All tools work WITHOUT external integrations by default, using file-based
|
|
4
|
+
analysis and local storage. Optional cloud API integration when keys are configured.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import csv
|
|
8
|
+
import hashlib
|
|
9
|
+
import io
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import os
|
|
13
|
+
import re
|
|
14
|
+
import shutil
|
|
15
|
+
import sqlite3
|
|
16
|
+
import uuid
|
|
17
|
+
from datetime import datetime, timezone
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, Dict, List, Optional
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger("delimit.ai.tools_data")
|
|
22
|
+
|
|
23
|
+
DELIMIT_HOME = Path(os.environ.get("DELIMIT_HOME", os.path.expanduser("~/.delimit")))
|
|
24
|
+
BACKUPS_DIR = DELIMIT_HOME / "backups"
|
|
25
|
+
INTEL_DIR = DELIMIT_HOME / "intel"
|
|
26
|
+
COST_ALERTS_FILE = DELIMIT_HOME / "cost_alerts.json"
|
|
27
|
+
DATASETS_FILE = INTEL_DIR / "datasets.json"
|
|
28
|
+
SNAPSHOTS_DIR = INTEL_DIR / "snapshots"
|
|
29
|
+
|
|
30
|
+
# Typical VPS monthly pricing estimates (USD)
|
|
31
|
+
VPS_COST_ESTIMATES = {
|
|
32
|
+
"small": 5.0, # 1 vCPU, 1GB RAM
|
|
33
|
+
"medium": 20.0, # 2 vCPU, 4GB RAM
|
|
34
|
+
"large": 40.0, # 4 vCPU, 8GB RAM
|
|
35
|
+
"xlarge": 80.0, # 8 vCPU, 16GB RAM
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _ensure_dir(path: Path) -> None:
|
|
40
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# ═══════════════════════════════════════════════════════════════════════
|
|
44
|
+
# COST TOOLS
|
|
45
|
+
# ═══════════════════════════════════════════════════════════════════════
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def cost_analyze(target: str = ".") -> Dict[str, Any]:
|
|
49
|
+
"""Analyze project costs by scanning infrastructure files."""
|
|
50
|
+
target_path = Path(target).resolve()
|
|
51
|
+
if not target_path.exists():
|
|
52
|
+
return {"error": "target_not_found", "message": f"Path does not exist: {target}"}
|
|
53
|
+
|
|
54
|
+
services = []
|
|
55
|
+
cost_breakdown = []
|
|
56
|
+
recommendations = []
|
|
57
|
+
total_cost = 0.0
|
|
58
|
+
|
|
59
|
+
# Scan for Dockerfiles
|
|
60
|
+
dockerfiles = list(target_path.rglob("Dockerfile")) + list(target_path.rglob("Dockerfile.*"))
|
|
61
|
+
for df in dockerfiles:
|
|
62
|
+
rel = str(df.relative_to(target_path))
|
|
63
|
+
content = df.read_text(errors="ignore")
|
|
64
|
+
# Estimate size category from base image
|
|
65
|
+
size = "medium"
|
|
66
|
+
if any(kw in content.lower() for kw in ["alpine", "slim", "distroless"]):
|
|
67
|
+
size = "small"
|
|
68
|
+
elif any(kw in content.lower() for kw in ["gpu", "cuda", "nvidia"]):
|
|
69
|
+
size = "xlarge"
|
|
70
|
+
est = VPS_COST_ESTIMATES[size]
|
|
71
|
+
services.append({"type": "container", "file": rel, "size_estimate": size})
|
|
72
|
+
cost_breakdown.append({"item": f"Container ({rel})", "monthly_usd": est})
|
|
73
|
+
total_cost += est
|
|
74
|
+
|
|
75
|
+
# Scan for docker-compose
|
|
76
|
+
compose_files = list(target_path.rglob("docker-compose.yml")) + list(target_path.rglob("docker-compose.yaml")) + list(target_path.rglob("compose.yml")) + list(target_path.rglob("compose.yaml"))
|
|
77
|
+
for cf in compose_files:
|
|
78
|
+
try:
|
|
79
|
+
content = cf.read_text(errors="ignore")
|
|
80
|
+
# Count services by looking for service blocks
|
|
81
|
+
svc_count = len(re.findall(r"^\s{2}\w[\w-]*:\s*$", content, re.MULTILINE))
|
|
82
|
+
if svc_count == 0:
|
|
83
|
+
# Fallback: count lines that look like service definitions
|
|
84
|
+
svc_count = max(1, content.lower().count("image:"))
|
|
85
|
+
rel = str(cf.relative_to(target_path))
|
|
86
|
+
for i in range(svc_count):
|
|
87
|
+
est = VPS_COST_ESTIMATES["medium"]
|
|
88
|
+
services.append({"type": "compose_service", "file": rel, "index": i})
|
|
89
|
+
cost_breakdown.append({"item": f"Compose service #{i+1} ({rel})", "monthly_usd": est})
|
|
90
|
+
total_cost += est
|
|
91
|
+
except Exception:
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
# Scan for package.json (npm dependencies)
|
|
95
|
+
pkg_files = list(target_path.rglob("package.json"))
|
|
96
|
+
dep_count = 0
|
|
97
|
+
for pf in pkg_files:
|
|
98
|
+
if "node_modules" in str(pf):
|
|
99
|
+
continue
|
|
100
|
+
try:
|
|
101
|
+
data = json.loads(pf.read_text(errors="ignore"))
|
|
102
|
+
deps = len(data.get("dependencies", {}))
|
|
103
|
+
dev_deps = len(data.get("devDependencies", {}))
|
|
104
|
+
dep_count += deps + dev_deps
|
|
105
|
+
services.append({"type": "node_project", "file": str(pf.relative_to(target_path)), "dependencies": deps, "dev_dependencies": dev_deps})
|
|
106
|
+
except Exception:
|
|
107
|
+
pass
|
|
108
|
+
|
|
109
|
+
# Scan for requirements.txt (Python dependencies)
|
|
110
|
+
req_files = list(target_path.rglob("requirements.txt")) + list(target_path.rglob("requirements/*.txt"))
|
|
111
|
+
for rf in req_files:
|
|
112
|
+
try:
|
|
113
|
+
lines = [l.strip() for l in rf.read_text(errors="ignore").splitlines() if l.strip() and not l.strip().startswith("#") and not l.strip().startswith("-")]
|
|
114
|
+
dep_count += len(lines)
|
|
115
|
+
services.append({"type": "python_project", "file": str(rf.relative_to(target_path)), "dependencies": len(lines)})
|
|
116
|
+
except Exception:
|
|
117
|
+
pass
|
|
118
|
+
|
|
119
|
+
# Scan for pyproject.toml
|
|
120
|
+
pyproject_files = list(target_path.rglob("pyproject.toml"))
|
|
121
|
+
for pf in pyproject_files:
|
|
122
|
+
if "node_modules" in str(pf):
|
|
123
|
+
continue
|
|
124
|
+
try:
|
|
125
|
+
content = pf.read_text(errors="ignore")
|
|
126
|
+
dep_lines = re.findall(r'^\s*"[^"]+[><=!]', content, re.MULTILINE)
|
|
127
|
+
if dep_lines:
|
|
128
|
+
dep_count += len(dep_lines)
|
|
129
|
+
services.append({"type": "python_project", "file": str(pf.relative_to(target_path)), "dependencies": len(dep_lines)})
|
|
130
|
+
except Exception:
|
|
131
|
+
pass
|
|
132
|
+
|
|
133
|
+
# Scan for cloud config files
|
|
134
|
+
cloud_providers = []
|
|
135
|
+
aws_paths = [target_path / ".aws", Path.home() / ".aws"]
|
|
136
|
+
for ap in aws_paths:
|
|
137
|
+
if ap.exists():
|
|
138
|
+
cloud_providers.append("aws")
|
|
139
|
+
break
|
|
140
|
+
|
|
141
|
+
gcloud_paths = [target_path / ".gcloud", Path.home() / ".config" / "gcloud"]
|
|
142
|
+
for gp in gcloud_paths:
|
|
143
|
+
if gp.exists():
|
|
144
|
+
cloud_providers.append("gcp")
|
|
145
|
+
break
|
|
146
|
+
|
|
147
|
+
if (target_path / ".azure").exists() or (Path.home() / ".azure").exists():
|
|
148
|
+
cloud_providers.append("azure")
|
|
149
|
+
|
|
150
|
+
# Check for Vercel / Netlify / Railway configs
|
|
151
|
+
for conf, provider in [("vercel.json", "vercel"), ("netlify.toml", "netlify"), ("railway.json", "railway"), ("fly.toml", "fly.io")]:
|
|
152
|
+
if (target_path / conf).exists():
|
|
153
|
+
cloud_providers.append(provider)
|
|
154
|
+
services.append({"type": "paas", "provider": provider, "file": conf})
|
|
155
|
+
cost_breakdown.append({"item": f"PaaS ({provider})", "monthly_usd": 10.0})
|
|
156
|
+
total_cost += 10.0
|
|
157
|
+
|
|
158
|
+
# If no services found, report that
|
|
159
|
+
if not services:
|
|
160
|
+
recommendations.append("No infrastructure files detected. Add Dockerfiles or deployment configs for cost estimation.")
|
|
161
|
+
|
|
162
|
+
if dep_count > 100:
|
|
163
|
+
recommendations.append(f"High dependency count ({dep_count}). Consider auditing for unused packages to reduce build times and attack surface.")
|
|
164
|
+
if len(dockerfiles) > 0 and not any("alpine" in df.read_text(errors="ignore").lower() or "slim" in df.read_text(errors="ignore").lower() for df in dockerfiles):
|
|
165
|
+
recommendations.append("Consider using Alpine or slim base images to reduce container costs.")
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
"tool": "cost.analyze",
|
|
169
|
+
"target": str(target_path),
|
|
170
|
+
"estimated_monthly_cost": round(total_cost, 2),
|
|
171
|
+
"services_detected": len(services),
|
|
172
|
+
"services": services,
|
|
173
|
+
"dependency_count": dep_count,
|
|
174
|
+
"cloud_providers": cloud_providers,
|
|
175
|
+
"cost_breakdown": cost_breakdown,
|
|
176
|
+
"recommendations": recommendations,
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def cost_optimize(target: str = ".") -> Dict[str, Any]:
|
|
181
|
+
"""Find cost optimization opportunities in a project."""
|
|
182
|
+
target_path = Path(target).resolve()
|
|
183
|
+
if not target_path.exists():
|
|
184
|
+
return {"error": "target_not_found", "message": f"Path does not exist: {target}"}
|
|
185
|
+
|
|
186
|
+
opportunities = []
|
|
187
|
+
estimated_savings = 0.0
|
|
188
|
+
|
|
189
|
+
# Check for oversized Docker images (no multi-stage build)
|
|
190
|
+
dockerfiles = list(target_path.rglob("Dockerfile")) + list(target_path.rglob("Dockerfile.*"))
|
|
191
|
+
for df in dockerfiles:
|
|
192
|
+
content = df.read_text(errors="ignore")
|
|
193
|
+
rel = str(df.relative_to(target_path))
|
|
194
|
+
from_count = len(re.findall(r"^FROM\s+", content, re.MULTILINE | re.IGNORECASE))
|
|
195
|
+
|
|
196
|
+
if from_count == 1 and "AS" not in content.upper().split("FROM", 1)[-1].split("\n")[0]:
|
|
197
|
+
opportunities.append({
|
|
198
|
+
"type": "docker_multistage",
|
|
199
|
+
"file": rel,
|
|
200
|
+
"severity": "medium",
|
|
201
|
+
"description": "Single-stage Docker build. Multi-stage builds can reduce image size by 50-80%.",
|
|
202
|
+
"estimated_savings_usd": 5.0,
|
|
203
|
+
})
|
|
204
|
+
estimated_savings += 5.0
|
|
205
|
+
|
|
206
|
+
if not any(kw in content.lower() for kw in ["alpine", "slim", "distroless", "scratch"]):
|
|
207
|
+
opportunities.append({
|
|
208
|
+
"type": "docker_base_image",
|
|
209
|
+
"file": rel,
|
|
210
|
+
"severity": "low",
|
|
211
|
+
"description": "Using full base image. Alpine/slim variants reduce image size and pull costs.",
|
|
212
|
+
"estimated_savings_usd": 2.0,
|
|
213
|
+
})
|
|
214
|
+
estimated_savings += 2.0
|
|
215
|
+
|
|
216
|
+
# Check for .dockerignore
|
|
217
|
+
dockerignore = df.parent / ".dockerignore"
|
|
218
|
+
if not dockerignore.exists():
|
|
219
|
+
opportunities.append({
|
|
220
|
+
"type": "missing_dockerignore",
|
|
221
|
+
"file": rel,
|
|
222
|
+
"severity": "low",
|
|
223
|
+
"description": "No .dockerignore found. Build context may include unnecessary files.",
|
|
224
|
+
"estimated_savings_usd": 1.0,
|
|
225
|
+
})
|
|
226
|
+
estimated_savings += 1.0
|
|
227
|
+
|
|
228
|
+
# Check for unused dependencies in package.json
|
|
229
|
+
pkg_files = list(target_path.rglob("package.json"))
|
|
230
|
+
for pf in pkg_files:
|
|
231
|
+
if "node_modules" in str(pf):
|
|
232
|
+
continue
|
|
233
|
+
try:
|
|
234
|
+
data = json.loads(pf.read_text(errors="ignore"))
|
|
235
|
+
deps = data.get("dependencies", {})
|
|
236
|
+
rel = str(pf.relative_to(target_path))
|
|
237
|
+
|
|
238
|
+
# Scan source files for import references
|
|
239
|
+
src_dir = pf.parent / "src"
|
|
240
|
+
if not src_dir.exists():
|
|
241
|
+
src_dir = pf.parent
|
|
242
|
+
|
|
243
|
+
source_content = ""
|
|
244
|
+
for ext in ["*.js", "*.ts", "*.jsx", "*.tsx", "*.mjs"]:
|
|
245
|
+
for sf in src_dir.rglob(ext):
|
|
246
|
+
if "node_modules" in str(sf) or "dist" in str(sf) or ".next" in str(sf):
|
|
247
|
+
continue
|
|
248
|
+
try:
|
|
249
|
+
source_content += sf.read_text(errors="ignore") + "\n"
|
|
250
|
+
except Exception:
|
|
251
|
+
pass
|
|
252
|
+
|
|
253
|
+
if source_content:
|
|
254
|
+
potentially_unused = []
|
|
255
|
+
for dep_name in deps:
|
|
256
|
+
# Check common import patterns
|
|
257
|
+
patterns = [
|
|
258
|
+
dep_name,
|
|
259
|
+
dep_name.replace("-", "_"),
|
|
260
|
+
dep_name.split("/")[-1],
|
|
261
|
+
]
|
|
262
|
+
if not any(p in source_content for p in patterns):
|
|
263
|
+
potentially_unused.append(dep_name)
|
|
264
|
+
|
|
265
|
+
if potentially_unused:
|
|
266
|
+
opportunities.append({
|
|
267
|
+
"type": "unused_npm_dependencies",
|
|
268
|
+
"file": rel,
|
|
269
|
+
"severity": "medium",
|
|
270
|
+
"description": f"Potentially unused dependencies: {', '.join(potentially_unused[:10])}",
|
|
271
|
+
"count": len(potentially_unused),
|
|
272
|
+
"packages": potentially_unused[:10],
|
|
273
|
+
"estimated_savings_usd": len(potentially_unused) * 0.5,
|
|
274
|
+
})
|
|
275
|
+
estimated_savings += len(potentially_unused) * 0.5
|
|
276
|
+
except Exception:
|
|
277
|
+
pass
|
|
278
|
+
|
|
279
|
+
# Check for uncompressed assets
|
|
280
|
+
large_assets = []
|
|
281
|
+
for ext in ["*.png", "*.jpg", "*.jpeg", "*.gif", "*.bmp", "*.svg"]:
|
|
282
|
+
for af in target_path.rglob(ext):
|
|
283
|
+
if "node_modules" in str(af) or ".git" in str(af):
|
|
284
|
+
continue
|
|
285
|
+
try:
|
|
286
|
+
size = af.stat().st_size
|
|
287
|
+
if size > 500_000: # > 500KB
|
|
288
|
+
large_assets.append({"file": str(af.relative_to(target_path)), "size_bytes": size})
|
|
289
|
+
except Exception:
|
|
290
|
+
pass
|
|
291
|
+
|
|
292
|
+
if large_assets:
|
|
293
|
+
opportunities.append({
|
|
294
|
+
"type": "uncompressed_assets",
|
|
295
|
+
"severity": "low",
|
|
296
|
+
"description": f"Found {len(large_assets)} large image files (>500KB). Consider compression or WebP conversion.",
|
|
297
|
+
"files": large_assets[:10],
|
|
298
|
+
"estimated_savings_usd": 2.0,
|
|
299
|
+
})
|
|
300
|
+
estimated_savings += 2.0
|
|
301
|
+
|
|
302
|
+
# Check for uncompressed JS/CSS bundles
|
|
303
|
+
for ext in ["*.js", "*.css"]:
|
|
304
|
+
for bf in target_path.rglob(ext):
|
|
305
|
+
if "node_modules" in str(bf) or ".git" in str(bf):
|
|
306
|
+
continue
|
|
307
|
+
parts = str(bf).lower()
|
|
308
|
+
if any(d in parts for d in ["/dist/", "/build/", "/public/", "/.next/"]):
|
|
309
|
+
try:
|
|
310
|
+
size = bf.stat().st_size
|
|
311
|
+
if size > 1_000_000: # > 1MB
|
|
312
|
+
gz_path = Path(str(bf) + ".gz")
|
|
313
|
+
br_path = Path(str(bf) + ".br")
|
|
314
|
+
if not gz_path.exists() and not br_path.exists():
|
|
315
|
+
opportunities.append({
|
|
316
|
+
"type": "uncompressed_bundle",
|
|
317
|
+
"file": str(bf.relative_to(target_path)),
|
|
318
|
+
"severity": "medium",
|
|
319
|
+
"size_bytes": size,
|
|
320
|
+
"description": f"Large uncompressed bundle ({size // 1024}KB). Enable gzip/brotli compression.",
|
|
321
|
+
"estimated_savings_usd": 1.0,
|
|
322
|
+
})
|
|
323
|
+
estimated_savings += 1.0
|
|
324
|
+
except Exception:
|
|
325
|
+
pass
|
|
326
|
+
|
|
327
|
+
return {
|
|
328
|
+
"tool": "cost.optimize",
|
|
329
|
+
"target": str(target_path),
|
|
330
|
+
"optimization_opportunities": opportunities,
|
|
331
|
+
"opportunity_count": len(opportunities),
|
|
332
|
+
"estimated_savings": round(estimated_savings, 2),
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def cost_alert(action: str = "list", name: Optional[str] = None,
|
|
337
|
+
threshold: Optional[float] = None, alert_id: Optional[str] = None) -> Dict[str, Any]:
|
|
338
|
+
"""Manage file-based cost alerts."""
|
|
339
|
+
_ensure_dir(DELIMIT_HOME)
|
|
340
|
+
|
|
341
|
+
# Load existing alerts
|
|
342
|
+
alerts = []
|
|
343
|
+
if COST_ALERTS_FILE.exists():
|
|
344
|
+
try:
|
|
345
|
+
alerts = json.loads(COST_ALERTS_FILE.read_text())
|
|
346
|
+
except Exception:
|
|
347
|
+
alerts = []
|
|
348
|
+
|
|
349
|
+
if action == "list":
|
|
350
|
+
return {
|
|
351
|
+
"tool": "cost.alert",
|
|
352
|
+
"action": "list",
|
|
353
|
+
"alerts": alerts,
|
|
354
|
+
"active_count": sum(1 for a in alerts if a.get("active", True)),
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
elif action == "create":
|
|
358
|
+
if not name or threshold is None:
|
|
359
|
+
return {"error": "missing_params", "message": "create requires 'name' and 'threshold'"}
|
|
360
|
+
new_alert = {
|
|
361
|
+
"id": str(uuid.uuid4())[:8],
|
|
362
|
+
"name": name,
|
|
363
|
+
"threshold_usd": threshold,
|
|
364
|
+
"active": True,
|
|
365
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
366
|
+
}
|
|
367
|
+
alerts.append(new_alert)
|
|
368
|
+
COST_ALERTS_FILE.write_text(json.dumps(alerts, indent=2))
|
|
369
|
+
return {
|
|
370
|
+
"tool": "cost.alert",
|
|
371
|
+
"action": "create",
|
|
372
|
+
"alert": new_alert,
|
|
373
|
+
"alerts": alerts,
|
|
374
|
+
"active_count": sum(1 for a in alerts if a.get("active", True)),
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
elif action == "delete":
|
|
378
|
+
if not alert_id:
|
|
379
|
+
return {"error": "missing_params", "message": "delete requires 'alert_id'"}
|
|
380
|
+
original_count = len(alerts)
|
|
381
|
+
alerts = [a for a in alerts if a.get("id") != alert_id]
|
|
382
|
+
if len(alerts) == original_count:
|
|
383
|
+
return {"error": "not_found", "message": f"Alert {alert_id} not found"}
|
|
384
|
+
COST_ALERTS_FILE.write_text(json.dumps(alerts, indent=2))
|
|
385
|
+
return {
|
|
386
|
+
"tool": "cost.alert",
|
|
387
|
+
"action": "delete",
|
|
388
|
+
"deleted_id": alert_id,
|
|
389
|
+
"alerts": alerts,
|
|
390
|
+
"active_count": sum(1 for a in alerts if a.get("active", True)),
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
elif action == "toggle":
|
|
394
|
+
if not alert_id:
|
|
395
|
+
return {"error": "missing_params", "message": "toggle requires 'alert_id'"}
|
|
396
|
+
found = False
|
|
397
|
+
for a in alerts:
|
|
398
|
+
if a.get("id") == alert_id:
|
|
399
|
+
a["active"] = not a.get("active", True)
|
|
400
|
+
found = True
|
|
401
|
+
break
|
|
402
|
+
if not found:
|
|
403
|
+
return {"error": "not_found", "message": f"Alert {alert_id} not found"}
|
|
404
|
+
COST_ALERTS_FILE.write_text(json.dumps(alerts, indent=2))
|
|
405
|
+
return {
|
|
406
|
+
"tool": "cost.alert",
|
|
407
|
+
"action": "toggle",
|
|
408
|
+
"alert_id": alert_id,
|
|
409
|
+
"alerts": alerts,
|
|
410
|
+
"active_count": sum(1 for a in alerts if a.get("active", True)),
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
else:
|
|
414
|
+
return {"error": "invalid_action", "message": f"Unknown action: {action}. Use list/create/delete/toggle."}
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
# ═══════════════════════════════════════════════════════════════════════
|
|
418
|
+
# DATA TOOLS
|
|
419
|
+
# ═══════════════════════════════════════════════════════════════════════
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def data_validate(target: str = ".") -> Dict[str, Any]:
|
|
423
|
+
"""Validate data files in a directory."""
|
|
424
|
+
target_path = Path(target).resolve()
|
|
425
|
+
if not target_path.exists():
|
|
426
|
+
return {"error": "target_not_found", "message": f"Path does not exist: {target}"}
|
|
427
|
+
|
|
428
|
+
files_checked = 0
|
|
429
|
+
valid = 0
|
|
430
|
+
invalid = 0
|
|
431
|
+
issues = []
|
|
432
|
+
|
|
433
|
+
# If target is a single file, validate just that
|
|
434
|
+
if target_path.is_file():
|
|
435
|
+
file_list = [target_path]
|
|
436
|
+
else:
|
|
437
|
+
file_list = []
|
|
438
|
+
for ext in ["*.json", "*.csv", "*.sqlite", "*.sqlite3", "*.db"]:
|
|
439
|
+
for f in target_path.rglob(ext):
|
|
440
|
+
if "node_modules" in str(f) or ".git" in str(f):
|
|
441
|
+
continue
|
|
442
|
+
file_list.append(f)
|
|
443
|
+
|
|
444
|
+
for fpath in file_list:
|
|
445
|
+
files_checked += 1
|
|
446
|
+
suffix = fpath.suffix.lower()
|
|
447
|
+
rel = str(fpath.relative_to(target_path)) if target_path.is_dir() else fpath.name
|
|
448
|
+
|
|
449
|
+
if suffix == ".json":
|
|
450
|
+
try:
|
|
451
|
+
content = fpath.read_text(errors="ignore")
|
|
452
|
+
json.loads(content)
|
|
453
|
+
valid += 1
|
|
454
|
+
except json.JSONDecodeError as e:
|
|
455
|
+
invalid += 1
|
|
456
|
+
issues.append({"file": rel, "type": "json_parse_error", "message": str(e)})
|
|
457
|
+
except Exception as e:
|
|
458
|
+
invalid += 1
|
|
459
|
+
issues.append({"file": rel, "type": "read_error", "message": str(e)})
|
|
460
|
+
|
|
461
|
+
elif suffix == ".csv":
|
|
462
|
+
try:
|
|
463
|
+
content = fpath.read_text(errors="ignore")
|
|
464
|
+
reader = csv.reader(io.StringIO(content))
|
|
465
|
+
rows = list(reader)
|
|
466
|
+
if not rows:
|
|
467
|
+
issues.append({"file": rel, "type": "empty_csv", "message": "CSV file is empty"})
|
|
468
|
+
invalid += 1
|
|
469
|
+
else:
|
|
470
|
+
header_len = len(rows[0])
|
|
471
|
+
inconsistent_rows = []
|
|
472
|
+
for i, row in enumerate(rows[1:], start=2):
|
|
473
|
+
if len(row) != header_len:
|
|
474
|
+
inconsistent_rows.append(i)
|
|
475
|
+
if inconsistent_rows:
|
|
476
|
+
invalid += 1
|
|
477
|
+
issues.append({
|
|
478
|
+
"file": rel,
|
|
479
|
+
"type": "csv_column_mismatch",
|
|
480
|
+
"message": f"Expected {header_len} columns, found mismatches on rows: {inconsistent_rows[:10]}",
|
|
481
|
+
"expected_columns": header_len,
|
|
482
|
+
"mismatched_rows": inconsistent_rows[:10],
|
|
483
|
+
})
|
|
484
|
+
else:
|
|
485
|
+
valid += 1
|
|
486
|
+
except Exception as e:
|
|
487
|
+
invalid += 1
|
|
488
|
+
issues.append({"file": rel, "type": "csv_error", "message": str(e)})
|
|
489
|
+
|
|
490
|
+
elif suffix in (".sqlite", ".sqlite3", ".db"):
|
|
491
|
+
try:
|
|
492
|
+
conn = sqlite3.connect(str(fpath))
|
|
493
|
+
cursor = conn.execute("PRAGMA integrity_check")
|
|
494
|
+
result = cursor.fetchone()
|
|
495
|
+
if result and result[0] == "ok":
|
|
496
|
+
# Also get table count
|
|
497
|
+
tables = conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()
|
|
498
|
+
valid += 1
|
|
499
|
+
if not tables:
|
|
500
|
+
issues.append({"file": rel, "type": "empty_database", "message": "SQLite database has no tables", "severity": "info"})
|
|
501
|
+
else:
|
|
502
|
+
invalid += 1
|
|
503
|
+
issues.append({"file": rel, "type": "sqlite_integrity_failed", "message": str(result)})
|
|
504
|
+
conn.close()
|
|
505
|
+
except Exception as e:
|
|
506
|
+
invalid += 1
|
|
507
|
+
issues.append({"file": rel, "type": "sqlite_error", "message": str(e)})
|
|
508
|
+
|
|
509
|
+
return {
|
|
510
|
+
"tool": "data.validate",
|
|
511
|
+
"target": str(target_path),
|
|
512
|
+
"files_checked": files_checked,
|
|
513
|
+
"valid": valid,
|
|
514
|
+
"invalid": invalid,
|
|
515
|
+
"issues": issues,
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def data_migrate(target: str = ".") -> Dict[str, Any]:
|
|
520
|
+
"""Check for migration files and report status."""
|
|
521
|
+
target_path = Path(target).resolve()
|
|
522
|
+
if not target_path.exists():
|
|
523
|
+
return {"error": "target_not_found", "message": f"Path does not exist: {target}"}
|
|
524
|
+
|
|
525
|
+
framework_detected = None
|
|
526
|
+
migrations_found = []
|
|
527
|
+
pending = 0
|
|
528
|
+
status = "no_migrations"
|
|
529
|
+
|
|
530
|
+
# Check for Alembic (Python/SQLAlchemy)
|
|
531
|
+
alembic_dir = target_path / "alembic"
|
|
532
|
+
if not alembic_dir.exists():
|
|
533
|
+
alembic_dir = target_path / "migrations" / "versions"
|
|
534
|
+
if alembic_dir.exists():
|
|
535
|
+
framework_detected = "alembic"
|
|
536
|
+
for mf in sorted(alembic_dir.glob("*.py")):
|
|
537
|
+
if mf.name == "__init__.py" or mf.name == "env.py":
|
|
538
|
+
continue
|
|
539
|
+
migrations_found.append({
|
|
540
|
+
"file": str(mf.relative_to(target_path)),
|
|
541
|
+
"name": mf.stem,
|
|
542
|
+
"modified": datetime.fromtimestamp(mf.stat().st_mtime, tz=timezone.utc).isoformat(),
|
|
543
|
+
})
|
|
544
|
+
|
|
545
|
+
# Check for Django migrations
|
|
546
|
+
django_dirs = list(target_path.rglob("migrations"))
|
|
547
|
+
for md in django_dirs:
|
|
548
|
+
if "node_modules" in str(md) or ".git" in str(md) or "alembic" in str(md):
|
|
549
|
+
continue
|
|
550
|
+
init_file = md / "__init__.py"
|
|
551
|
+
if init_file.exists():
|
|
552
|
+
framework_detected = framework_detected or "django"
|
|
553
|
+
for mf in sorted(md.glob("*.py")):
|
|
554
|
+
if mf.name == "__init__.py":
|
|
555
|
+
continue
|
|
556
|
+
migrations_found.append({
|
|
557
|
+
"file": str(mf.relative_to(target_path)),
|
|
558
|
+
"name": mf.stem,
|
|
559
|
+
"modified": datetime.fromtimestamp(mf.stat().st_mtime, tz=timezone.utc).isoformat(),
|
|
560
|
+
})
|
|
561
|
+
|
|
562
|
+
# Check for Prisma migrations
|
|
563
|
+
prisma_dir = target_path / "prisma" / "migrations"
|
|
564
|
+
if prisma_dir.exists():
|
|
565
|
+
framework_detected = framework_detected or "prisma"
|
|
566
|
+
for mdir in sorted(prisma_dir.iterdir()):
|
|
567
|
+
if mdir.is_dir() and mdir.name != "migration_lock.toml":
|
|
568
|
+
sql_file = mdir / "migration.sql"
|
|
569
|
+
migrations_found.append({
|
|
570
|
+
"file": str(mdir.relative_to(target_path)),
|
|
571
|
+
"name": mdir.name,
|
|
572
|
+
"has_sql": sql_file.exists(),
|
|
573
|
+
"modified": datetime.fromtimestamp(mdir.stat().st_mtime, tz=timezone.utc).isoformat(),
|
|
574
|
+
})
|
|
575
|
+
|
|
576
|
+
# Check for Knex/Sequelize migrations
|
|
577
|
+
knex_dir = target_path / "migrations"
|
|
578
|
+
if knex_dir.exists() and not framework_detected:
|
|
579
|
+
js_migrations = list(knex_dir.glob("*.js")) + list(knex_dir.glob("*.ts"))
|
|
580
|
+
if js_migrations:
|
|
581
|
+
framework_detected = "knex/sequelize"
|
|
582
|
+
for mf in sorted(js_migrations):
|
|
583
|
+
migrations_found.append({
|
|
584
|
+
"file": str(mf.relative_to(target_path)),
|
|
585
|
+
"name": mf.stem,
|
|
586
|
+
"modified": datetime.fromtimestamp(mf.stat().st_mtime, tz=timezone.utc).isoformat(),
|
|
587
|
+
})
|
|
588
|
+
|
|
589
|
+
last_applied = migrations_found[-1]["name"] if migrations_found else None
|
|
590
|
+
if migrations_found:
|
|
591
|
+
status = "migrations_found"
|
|
592
|
+
# Without a DB connection, we can only report found migrations, not applied vs pending
|
|
593
|
+
pending = 0 # Would need DB connection to determine
|
|
594
|
+
|
|
595
|
+
return {
|
|
596
|
+
"tool": "data.migrate",
|
|
597
|
+
"target": str(target_path),
|
|
598
|
+
"framework_detected": framework_detected,
|
|
599
|
+
"migrations_found": len(migrations_found),
|
|
600
|
+
"migrations": migrations_found,
|
|
601
|
+
"pending_migrations": pending,
|
|
602
|
+
"last_migration": last_applied,
|
|
603
|
+
"status": status,
|
|
604
|
+
"note": "Connect a database to determine applied vs pending migrations." if migrations_found else None,
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
def data_backup(target: str = ".") -> Dict[str, Any]:
|
|
609
|
+
"""Back up SQLite and JSON data files to ~/.delimit/backups/."""
|
|
610
|
+
target_path = Path(target).resolve()
|
|
611
|
+
if not target_path.exists():
|
|
612
|
+
return {"error": "target_not_found", "message": f"Path does not exist: {target}"}
|
|
613
|
+
|
|
614
|
+
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
|
615
|
+
backup_dir = BACKUPS_DIR / timestamp
|
|
616
|
+
_ensure_dir(backup_dir)
|
|
617
|
+
|
|
618
|
+
files_backed_up = []
|
|
619
|
+
total_size = 0
|
|
620
|
+
|
|
621
|
+
# Collect data files
|
|
622
|
+
data_files = []
|
|
623
|
+
if target_path.is_file():
|
|
624
|
+
data_files = [target_path]
|
|
625
|
+
else:
|
|
626
|
+
for ext in ["*.sqlite", "*.sqlite3", "*.db", "*.json"]:
|
|
627
|
+
for f in target_path.rglob(ext):
|
|
628
|
+
if "node_modules" in str(f) or ".git" in str(f) or str(DELIMIT_HOME) in str(f):
|
|
629
|
+
continue
|
|
630
|
+
# Skip package.json, tsconfig.json etc -- only back up data files
|
|
631
|
+
if f.name in ("package.json", "package-lock.json", "tsconfig.json", "jsconfig.json", "composer.json"):
|
|
632
|
+
continue
|
|
633
|
+
data_files.append(f)
|
|
634
|
+
|
|
635
|
+
for fpath in data_files:
|
|
636
|
+
try:
|
|
637
|
+
size = fpath.stat().st_size
|
|
638
|
+
if target_path.is_dir():
|
|
639
|
+
rel = fpath.relative_to(target_path)
|
|
640
|
+
else:
|
|
641
|
+
rel = Path(fpath.name)
|
|
642
|
+
dest = backup_dir / rel
|
|
643
|
+
_ensure_dir(dest.parent)
|
|
644
|
+
shutil.copy2(str(fpath), str(dest))
|
|
645
|
+
files_backed_up.append({"file": str(rel), "size_bytes": size})
|
|
646
|
+
total_size += size
|
|
647
|
+
except Exception as e:
|
|
648
|
+
files_backed_up.append({"file": str(fpath.name), "error": str(e)})
|
|
649
|
+
|
|
650
|
+
return {
|
|
651
|
+
"tool": "data.backup",
|
|
652
|
+
"target": str(target_path),
|
|
653
|
+
"files_backed_up": len([f for f in files_backed_up if "error" not in f]),
|
|
654
|
+
"files": files_backed_up,
|
|
655
|
+
"backup_path": str(backup_dir),
|
|
656
|
+
"total_size": total_size,
|
|
657
|
+
"total_size_human": _human_size(total_size),
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
def _human_size(size_bytes: int) -> str:
|
|
662
|
+
for unit in ["B", "KB", "MB", "GB"]:
|
|
663
|
+
if size_bytes < 1024:
|
|
664
|
+
return f"{size_bytes:.1f} {unit}"
|
|
665
|
+
size_bytes /= 1024
|
|
666
|
+
return f"{size_bytes:.1f} TB"
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
# ═══════════════════════════════════════════════════════════════════════
|
|
670
|
+
# INTEL TOOLS
|
|
671
|
+
# ═══════════════════════════════════════════════════════════════════════
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
def _load_datasets() -> List[Dict[str, Any]]:
|
|
675
|
+
_ensure_dir(INTEL_DIR)
|
|
676
|
+
if DATASETS_FILE.exists():
|
|
677
|
+
try:
|
|
678
|
+
return json.loads(DATASETS_FILE.read_text())
|
|
679
|
+
except Exception:
|
|
680
|
+
return []
|
|
681
|
+
return []
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
def _save_datasets(datasets: List[Dict[str, Any]]) -> None:
|
|
685
|
+
_ensure_dir(INTEL_DIR)
|
|
686
|
+
DATASETS_FILE.write_text(json.dumps(datasets, indent=2))
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
def intel_snapshot_ingest(data: Dict[str, Any], provenance: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
690
|
+
"""Save JSON data with provenance metadata."""
|
|
691
|
+
_ensure_dir(SNAPSHOTS_DIR)
|
|
692
|
+
|
|
693
|
+
snapshot_id = str(uuid.uuid4())[:12]
|
|
694
|
+
timestamp = datetime.now(timezone.utc).isoformat()
|
|
695
|
+
|
|
696
|
+
snapshot = {
|
|
697
|
+
"id": snapshot_id,
|
|
698
|
+
"data": data,
|
|
699
|
+
"provenance": provenance or {},
|
|
700
|
+
"ingested_at": timestamp,
|
|
701
|
+
"checksum": hashlib.sha256(json.dumps(data, sort_keys=True).encode()).hexdigest()[:16],
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
snapshot_file = SNAPSHOTS_DIR / f"{snapshot_id}.json"
|
|
705
|
+
snapshot_file.write_text(json.dumps(snapshot, indent=2))
|
|
706
|
+
|
|
707
|
+
return {
|
|
708
|
+
"tool": "intel.snapshot_ingest",
|
|
709
|
+
"snapshot_id": snapshot_id,
|
|
710
|
+
"ingested_at": timestamp,
|
|
711
|
+
"checksum": snapshot["checksum"],
|
|
712
|
+
"storage_path": str(snapshot_file),
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
def intel_query(dataset_id: Optional[str] = None, query: str = "", parameters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
717
|
+
"""Search saved snapshots by keyword/date."""
|
|
718
|
+
_ensure_dir(SNAPSHOTS_DIR)
|
|
719
|
+
|
|
720
|
+
results = []
|
|
721
|
+
params = parameters or {}
|
|
722
|
+
date_from = params.get("date_from")
|
|
723
|
+
date_to = params.get("date_to")
|
|
724
|
+
limit = params.get("limit", 50)
|
|
725
|
+
|
|
726
|
+
# Search snapshots
|
|
727
|
+
for sf in sorted(SNAPSHOTS_DIR.glob("*.json"), reverse=True):
|
|
728
|
+
try:
|
|
729
|
+
snapshot = json.loads(sf.read_text())
|
|
730
|
+
except Exception:
|
|
731
|
+
continue
|
|
732
|
+
|
|
733
|
+
# Filter by dataset_id if specified
|
|
734
|
+
if dataset_id and snapshot.get("provenance", {}).get("dataset_id") != dataset_id:
|
|
735
|
+
continue
|
|
736
|
+
|
|
737
|
+
# Date filtering
|
|
738
|
+
ingested = snapshot.get("ingested_at", "")
|
|
739
|
+
if date_from and ingested < date_from:
|
|
740
|
+
continue
|
|
741
|
+
if date_to and ingested > date_to:
|
|
742
|
+
continue
|
|
743
|
+
|
|
744
|
+
# Keyword search in data
|
|
745
|
+
if query:
|
|
746
|
+
data_str = json.dumps(snapshot.get("data", {})).lower()
|
|
747
|
+
if query.lower() not in data_str:
|
|
748
|
+
continue
|
|
749
|
+
|
|
750
|
+
results.append({
|
|
751
|
+
"snapshot_id": snapshot.get("id"),
|
|
752
|
+
"ingested_at": ingested,
|
|
753
|
+
"provenance": snapshot.get("provenance", {}),
|
|
754
|
+
"data_preview": _truncate_data(snapshot.get("data", {})),
|
|
755
|
+
})
|
|
756
|
+
|
|
757
|
+
if len(results) >= limit:
|
|
758
|
+
break
|
|
759
|
+
|
|
760
|
+
return {
|
|
761
|
+
"tool": "intel.query",
|
|
762
|
+
"query": query,
|
|
763
|
+
"dataset_id": dataset_id,
|
|
764
|
+
"results": results,
|
|
765
|
+
"total_results": len(results),
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
def _truncate_data(data: Any, max_len: int = 200) -> Any:
|
|
770
|
+
"""Truncate data for preview."""
|
|
771
|
+
s = json.dumps(data)
|
|
772
|
+
if len(s) <= max_len:
|
|
773
|
+
return data
|
|
774
|
+
return {"_preview": s[:max_len] + "...", "_truncated": True}
|
|
775
|
+
|
|
776
|
+
|
|
777
|
+
def intel_dataset_register(name: str, schema: Optional[Dict[str, Any]] = None,
|
|
778
|
+
description: Optional[str] = None) -> Dict[str, Any]:
|
|
779
|
+
"""Register a new dataset."""
|
|
780
|
+
datasets = _load_datasets()
|
|
781
|
+
|
|
782
|
+
# Check for duplicate name
|
|
783
|
+
for ds in datasets:
|
|
784
|
+
if ds.get("name") == name:
|
|
785
|
+
return {"error": "duplicate", "message": f"Dataset '{name}' already registered", "dataset_id": ds["id"]}
|
|
786
|
+
|
|
787
|
+
dataset_id = str(uuid.uuid4())[:12]
|
|
788
|
+
new_dataset = {
|
|
789
|
+
"id": dataset_id,
|
|
790
|
+
"name": name,
|
|
791
|
+
"schema": schema or {},
|
|
792
|
+
"description": description or "",
|
|
793
|
+
"frozen": False,
|
|
794
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
795
|
+
"updated_at": datetime.now(timezone.utc).isoformat(),
|
|
796
|
+
}
|
|
797
|
+
datasets.append(new_dataset)
|
|
798
|
+
_save_datasets(datasets)
|
|
799
|
+
|
|
800
|
+
return {
|
|
801
|
+
"tool": "intel.dataset_register",
|
|
802
|
+
"dataset": new_dataset,
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
|
|
806
|
+
def intel_dataset_list() -> Dict[str, Any]:
|
|
807
|
+
"""List all registered datasets."""
|
|
808
|
+
datasets = _load_datasets()
|
|
809
|
+
return {
|
|
810
|
+
"tool": "intel.dataset_list",
|
|
811
|
+
"datasets": datasets,
|
|
812
|
+
"total": len(datasets),
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
def intel_dataset_freeze(dataset_id: str) -> Dict[str, Any]:
|
|
817
|
+
"""Mark a dataset as immutable."""
|
|
818
|
+
datasets = _load_datasets()
|
|
819
|
+
|
|
820
|
+
for ds in datasets:
|
|
821
|
+
if ds.get("id") == dataset_id:
|
|
822
|
+
if ds.get("frozen"):
|
|
823
|
+
return {"tool": "intel.dataset_freeze", "dataset_id": dataset_id, "status": "already_frozen"}
|
|
824
|
+
ds["frozen"] = True
|
|
825
|
+
ds["frozen_at"] = datetime.now(timezone.utc).isoformat()
|
|
826
|
+
ds["updated_at"] = ds["frozen_at"]
|
|
827
|
+
_save_datasets(datasets)
|
|
828
|
+
return {"tool": "intel.dataset_freeze", "dataset_id": dataset_id, "status": "frozen", "frozen_at": ds["frozen_at"]}
|
|
829
|
+
|
|
830
|
+
return {"error": "not_found", "message": f"Dataset {dataset_id} not found"}
|