crewlyze 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +12 -0
- package/.gitattributes +2 -0
- package/CHANGELOG.md +86 -0
- package/Dockerfile +21 -0
- package/LICENSE +21 -0
- package/README.md +139 -0
- package/USAGE.md +106 -0
- package/agents/__init__.py +0 -0
- package/agents/cleaner.py +38 -0
- package/agents/insights.py +44 -0
- package/agents/relation.py +36 -0
- package/agents/visualizer.py +41 -0
- package/assets/badge_crewai.svg +4 -0
- package/assets/badge_matplotlib.svg +4 -0
- package/assets/badge_ollama.svg +4 -0
- package/assets/badge_pandas.svg +4 -0
- package/assets/badge_seaborn.svg +4 -0
- package/assets/branding_image.png +0 -0
- package/assets/complete_workflow.svg +216 -0
- package/assets/favicon.png +0 -0
- package/assets/logo.png +0 -0
- package/assets/stars.svg +12 -0
- package/bin/crewlyze.js +79 -0
- package/config/README.md +129 -0
- package/config/__init__.py +1 -0
- package/config/context.py +16 -0
- package/config/llm_config.py +300 -0
- package/config/metrics_tracker.py +70 -0
- package/crew.py +870 -0
- package/crewlyze-3.1.0.tgz +0 -0
- package/fix_syntax.py +54 -0
- package/main.py +1279 -0
- package/package.json +22 -0
- package/pyproject.toml +32 -0
- package/requirements.txt +33 -0
- package/tools/__init__.py +0 -0
- package/tools/dataset_tools.py +803 -0
- package/ui/__init__.py +3 -0
- package/ui/copilot.py +200 -0
- package/ui/export.py +800 -0
- package/update_appjs.py +54 -0
- package/update_llm.py +21 -0
- package/update_main.py +20 -0
- package/web/app.js +3142 -0
- package/web/index.html +1105 -0
- package/web/style.css +2561 -0
- package/workflows/__init__.py +0 -0
- package/workflows/pipeline.py +254 -0
package/main.py
ADDED
|
@@ -0,0 +1,1279 @@
|
|
|
1
|
+
# Crewlyze
|
|
2
|
+
# Copyright (c) 2025 Sowmiyan S
|
|
3
|
+
# Licensed under the MIT License
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
FastAPI Server backend for the Crewlyze application.
|
|
7
|
+
Serves static HTML/JS/CSS assets and exposes REST APIs + Server-Sent Events (SSE)
|
|
8
|
+
for streaming real-time analysis logs.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
sys.stdout.reconfigure(encoding='utf-8')
|
|
16
|
+
sys.stderr.reconfigure(encoding='utf-8')
|
|
17
|
+
except Exception:
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
import json
|
|
21
|
+
import re
|
|
22
|
+
import uuid
|
|
23
|
+
import asyncio
|
|
24
|
+
import shutil
|
|
25
|
+
import threading
|
|
26
|
+
import time
|
|
27
|
+
import zipfile
|
|
28
|
+
from io import BytesIO
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
from typing import Optional
|
|
31
|
+
|
|
32
|
+
import pandas as pd
|
|
33
|
+
from tools.dataset_tools import read_csv_robust
|
|
34
|
+
|
|
35
|
+
# Monkey patch crewai caching to avoid Nvidia NIM / LiteLLM validation errors
|
|
36
|
+
try:
|
|
37
|
+
import crewai.llms.cache as _crewai_cache
|
|
38
|
+
_crewai_cache.mark_cache_breakpoint = lambda msg: msg
|
|
39
|
+
except Exception:
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
from fastapi import FastAPI, File, UploadFile, Form, BackgroundTasks, HTTPException
|
|
43
|
+
from fastapi.responses import StreamingResponse, FileResponse, HTMLResponse
|
|
44
|
+
from fastapi.staticfiles import StaticFiles
|
|
45
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
46
|
+
|
|
47
|
+
# regex to find ANSI terminal escape patterns
|
|
48
|
+
ANSI_ESCAPE = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
|
|
49
|
+
|
|
50
|
+
# To keep track of log states (e.g. ignoring prompt blocks) per session
|
|
51
|
+
log_stream_states = {}
|
|
52
|
+
|
|
53
|
+
def clean_log_message(line: str, session_id: Optional[str] = None) -> Optional[str]:
|
|
54
|
+
"""Strip ANSI color codes, ignore noisy messages, and format thoughts/actions nicely."""
|
|
55
|
+
# Strip ANSI colors/escapes
|
|
56
|
+
line = ANSI_ESCAPE.sub('', line)
|
|
57
|
+
|
|
58
|
+
# Check if empty
|
|
59
|
+
stripped = line.strip()
|
|
60
|
+
if not stripped:
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
line_lower = stripped.lower()
|
|
64
|
+
|
|
65
|
+
# System logs noise keywords to ignore
|
|
66
|
+
noise_keywords = [
|
|
67
|
+
"scriptruncontext",
|
|
68
|
+
"telemetry_opt_out",
|
|
69
|
+
"otel_sdk_disabled",
|
|
70
|
+
"opentelemetry",
|
|
71
|
+
"urllib3",
|
|
72
|
+
"connectionpool",
|
|
73
|
+
"http/1.1",
|
|
74
|
+
"httpx",
|
|
75
|
+
"backoff",
|
|
76
|
+
"requests.packages",
|
|
77
|
+
"missing scriptruncontext",
|
|
78
|
+
"openai-api-keyword",
|
|
79
|
+
"http request",
|
|
80
|
+
"cooldown",
|
|
81
|
+
"rate limit",
|
|
82
|
+
"max_tokens",
|
|
83
|
+
]
|
|
84
|
+
if any(kw in line_lower for kw in noise_keywords):
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
# Handle stateful prompt block ignoring
|
|
88
|
+
if session_id:
|
|
89
|
+
if session_id not in log_stream_states:
|
|
90
|
+
log_stream_states[session_id] = {"in_prompt": False}
|
|
91
|
+
state = log_stream_states[session_id]
|
|
92
|
+
|
|
93
|
+
# Start ignoring if prompt starts
|
|
94
|
+
if "prompt after formatting:" in line_lower or "use the following format:" in line_lower:
|
|
95
|
+
state["in_prompt"] = True
|
|
96
|
+
return None
|
|
97
|
+
|
|
98
|
+
# Stop ignoring if agent thoughts/actions/results start
|
|
99
|
+
if state["in_prompt"]:
|
|
100
|
+
stop_triggers = ["thought:", "action:", "action input:", "response:", "observation:", "entering new", "finished chain"]
|
|
101
|
+
if any(trig in line_lower for trig in stop_triggers):
|
|
102
|
+
state["in_prompt"] = False
|
|
103
|
+
else:
|
|
104
|
+
return None # Still ignoring prompt contents
|
|
105
|
+
|
|
106
|
+
# Ignore raw debug logs from crewai/langchain
|
|
107
|
+
if stripped.startswith("[DEBUG]:") or stripped.startswith("[INFO]:"):
|
|
108
|
+
if "working agent" in line_lower:
|
|
109
|
+
agent_name = stripped.split(":", 2)[-1].strip()
|
|
110
|
+
return f"[Agent] {agent_name} is active..."
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
# Format specific Langchain output structures for a premium look
|
|
114
|
+
if "entering new crewagentexecutor chain" in line_lower:
|
|
115
|
+
return "[Task] Starting agent execution task..."
|
|
116
|
+
if "finished chain" in line_lower:
|
|
117
|
+
return "[Task] Execution task completed."
|
|
118
|
+
|
|
119
|
+
# Format Thoughts, Actions, inputs, and outputs nicely
|
|
120
|
+
if stripped.startswith("Thought:"):
|
|
121
|
+
thought_text = stripped[8:].strip()
|
|
122
|
+
return f"[Thought] {thought_text}"
|
|
123
|
+
|
|
124
|
+
if stripped.startswith("Action:"):
|
|
125
|
+
action_text = stripped[7:].strip()
|
|
126
|
+
return f"[Calling Tool] {action_text}"
|
|
127
|
+
|
|
128
|
+
if stripped.startswith("Action Input:"):
|
|
129
|
+
input_text = stripped[13:].strip()
|
|
130
|
+
if len(input_text) > 150:
|
|
131
|
+
input_text = input_text[:150] + "..."
|
|
132
|
+
return f"[Input] {input_text}"
|
|
133
|
+
|
|
134
|
+
if stripped.startswith("Response:") or stripped.startswith("Observation:"):
|
|
135
|
+
resp_text = stripped.split(":", 1)[1].strip()
|
|
136
|
+
if len(resp_text) > 150:
|
|
137
|
+
resp_text = resp_text[:150] + "..."
|
|
138
|
+
return f"[Tool Response] {resp_text}"
|
|
139
|
+
|
|
140
|
+
if "warning" in line_lower or "error" in line_lower:
|
|
141
|
+
if "error" in line_lower:
|
|
142
|
+
return f"[Error] {stripped}"
|
|
143
|
+
return f"[Warning] {stripped}"
|
|
144
|
+
|
|
145
|
+
return stripped
|
|
146
|
+
|
|
147
|
+
# Core analysis engines — imported lazily so the server boots
|
|
148
|
+
# even if crewai has install issues on this Python version.
|
|
149
|
+
# Actual ImportError surfaces only when analysis is triggered.
|
|
150
|
+
_run_crew = None
|
|
151
|
+
_apply_runtime_llm_settings = None
|
|
152
|
+
_validate_llm_connection = None
|
|
153
|
+
_run_copilot_query = None
|
|
154
|
+
_export_pdf = None
|
|
155
|
+
|
|
156
|
+
def _load_crew():
|
|
157
|
+
global _run_crew, _apply_runtime_llm_settings, _validate_llm_connection
|
|
158
|
+
global _run_copilot_query, _export_pdf
|
|
159
|
+
if _run_crew is None:
|
|
160
|
+
from crew import run_crew as _rc
|
|
161
|
+
from config.llm_config import apply_runtime_llm_settings as _arls, validate_llm_connection as _vlc
|
|
162
|
+
from ui.copilot import run_copilot_query as _rcq
|
|
163
|
+
from ui.export import export_pdf as _ep
|
|
164
|
+
_run_crew = _rc
|
|
165
|
+
_apply_runtime_llm_settings = _arls
|
|
166
|
+
_validate_llm_connection = _vlc
|
|
167
|
+
_run_copilot_query = _rcq
|
|
168
|
+
_export_pdf = _ep
|
|
169
|
+
|
|
170
|
+
# Suppress warnings
|
|
171
|
+
os.environ["CREWAI_TELEMETRY_OPT_OUT"] = "true"
|
|
172
|
+
os.environ["OTEL_SDK_DISABLED"] = "true"
|
|
173
|
+
|
|
174
|
+
app = FastAPI(
|
|
175
|
+
title="Crewlyze API",
|
|
176
|
+
description="Autonomous Multi-Agent Business Intelligence and Data Engineering Platform",
|
|
177
|
+
version="3.1.0"
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Enable CORS for local development flexibility
|
|
181
|
+
app.add_middleware(
|
|
182
|
+
CORSMiddleware,
|
|
183
|
+
allow_origins=["*"],
|
|
184
|
+
allow_credentials=True,
|
|
185
|
+
allow_methods=["*"],
|
|
186
|
+
allow_headers=["*"],
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# ---------------------------------------------------------------------------
|
|
190
|
+
# State & Directory Setup
|
|
191
|
+
# ---------------------------------------------------------------------------
|
|
192
|
+
|
|
193
|
+
USER_HOME = Path.home() / ".crewlyze"
|
|
194
|
+
DATA_DIR = Path(os.getenv("CREWLYZE_DATA_DIR", str(USER_HOME / "data")))
|
|
195
|
+
SESSIONS_DIR = DATA_DIR / "sessions"
|
|
196
|
+
OUTPUTS_DIR = Path(os.getenv("CREWLYZE_OUTPUTS_DIR", str(USER_HOME / "outputs")))
|
|
197
|
+
|
|
198
|
+
for path in (DATA_DIR, SESSIONS_DIR, OUTPUTS_DIR):
|
|
199
|
+
path.mkdir(exist_ok=True, parents=True)
|
|
200
|
+
|
|
201
|
+
def is_safe_id(id_str: str) -> bool:
|
|
202
|
+
"""Ensure the ID is strictly alphanumeric (plus dashes/underscores) to prevent path traversal."""
|
|
203
|
+
if not id_str:
|
|
204
|
+
return False
|
|
205
|
+
return bool(re.match(r"^[a-zA-Z0-9_-]+$", id_str))
|
|
206
|
+
|
|
207
|
+
def is_safe_filename(filename: str) -> bool:
|
|
208
|
+
"""Ensure the filename doesn't contain path traversal characters and has a safe pattern."""
|
|
209
|
+
if not filename:
|
|
210
|
+
return False
|
|
211
|
+
if ".." in filename or "/" in filename or "\\" in filename:
|
|
212
|
+
return False
|
|
213
|
+
if "\0" in filename:
|
|
214
|
+
return False
|
|
215
|
+
# Allow safe characters including spaces, dashes, dots, underscores, parentheses, brackets, and common special symbols in column names
|
|
216
|
+
return bool(re.match(r"^[a-zA-Z0-9_\-. ()[\]$,%&+@=;\'~#]+$", filename))
|
|
217
|
+
|
|
218
|
+
def validate_project_id(project_id: str) -> str:
|
|
219
|
+
"""Validate that the project_id matches a safe pattern to prevent path traversal."""
|
|
220
|
+
if not is_safe_id(project_id):
|
|
221
|
+
raise HTTPException(status_code=400, detail="Invalid project ID.")
|
|
222
|
+
return project_id
|
|
223
|
+
|
|
224
|
+
def get_safe_session_dir(project_id: str) -> Path:
|
|
225
|
+
pid = validate_project_id(project_id)
|
|
226
|
+
base = SESSIONS_DIR.resolve()
|
|
227
|
+
resolved = (base / pid).resolve()
|
|
228
|
+
try:
|
|
229
|
+
resolved.relative_to(base)
|
|
230
|
+
except ValueError:
|
|
231
|
+
raise HTTPException(status_code=400, detail="Path traversal detected.")
|
|
232
|
+
return resolved
|
|
233
|
+
|
|
234
|
+
def get_safe_output_dir(project_id: str) -> Path:
|
|
235
|
+
pid = validate_project_id(project_id)
|
|
236
|
+
base = OUTPUTS_DIR.resolve()
|
|
237
|
+
resolved = (base / pid).resolve()
|
|
238
|
+
try:
|
|
239
|
+
resolved.relative_to(base)
|
|
240
|
+
except ValueError:
|
|
241
|
+
raise HTTPException(status_code=400, detail="Path traversal detected.")
|
|
242
|
+
return resolved
|
|
243
|
+
|
|
244
|
+
_metadata_lock = threading.Lock()
|
|
245
|
+
|
|
246
|
+
def save_project_metadata(project_id: str, meta: dict):
|
|
247
|
+
session_dir = get_safe_session_dir(project_id)
|
|
248
|
+
if not session_dir.exists():
|
|
249
|
+
session_dir.mkdir(parents=True, exist_ok=True)
|
|
250
|
+
metadata_path = session_dir / "metadata.json"
|
|
251
|
+
with _metadata_lock:
|
|
252
|
+
with open(metadata_path, "w", encoding="utf-8") as f:
|
|
253
|
+
json.dump(meta, f, indent=2)
|
|
254
|
+
|
|
255
|
+
def get_project_metadata(project_id: str) -> dict:
|
|
256
|
+
session_dir = get_safe_session_dir(project_id)
|
|
257
|
+
metadata_path = session_dir / "metadata.json"
|
|
258
|
+
|
|
259
|
+
if not session_dir.exists():
|
|
260
|
+
return {}
|
|
261
|
+
|
|
262
|
+
meta = {}
|
|
263
|
+
with _metadata_lock:
|
|
264
|
+
if metadata_path.exists():
|
|
265
|
+
try:
|
|
266
|
+
with open(metadata_path, "r", encoding="utf-8") as f:
|
|
267
|
+
meta = json.load(f)
|
|
268
|
+
except Exception:
|
|
269
|
+
pass
|
|
270
|
+
|
|
271
|
+
# Default metadata if not present or corrupt (compatibility check)
|
|
272
|
+
if not meta:
|
|
273
|
+
upload_file = session_dir / "original_upload.csv"
|
|
274
|
+
filename = "dataset.csv"
|
|
275
|
+
size = 0
|
|
276
|
+
if upload_file.exists():
|
|
277
|
+
filename = "dataset.csv"
|
|
278
|
+
size = upload_file.stat().st_size
|
|
279
|
+
|
|
280
|
+
results_path = session_dir / "results.json"
|
|
281
|
+
status = "idle"
|
|
282
|
+
if results_path.exists():
|
|
283
|
+
status = "completed"
|
|
284
|
+
elif (session_dir / "done.txt").exists():
|
|
285
|
+
status = "completed"
|
|
286
|
+
|
|
287
|
+
created_at = session_dir.stat().st_ctime
|
|
288
|
+
meta = {
|
|
289
|
+
"id": project_id,
|
|
290
|
+
"name": f"Project {project_id}",
|
|
291
|
+
"filename": filename,
|
|
292
|
+
"report_title": f"{filename.rsplit('.', 1)[0].replace('_', ' ').title()} Executive Analysis",
|
|
293
|
+
"size": size,
|
|
294
|
+
"created_at": created_at * 1000,
|
|
295
|
+
"status": status,
|
|
296
|
+
"thumbnail": None
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
# Dynamically resolve and update the thumbnail link if generated PNGs exist
|
|
300
|
+
output_dir = get_safe_output_dir(project_id)
|
|
301
|
+
current_thumb = meta.get("thumbnail")
|
|
302
|
+
target_thumb = None
|
|
303
|
+
if output_dir.exists() and output_dir.is_dir():
|
|
304
|
+
png_charts = sorted(
|
|
305
|
+
[f for f in output_dir.glob("*.png")],
|
|
306
|
+
key=lambda x: x.stat().st_mtime,
|
|
307
|
+
reverse=True
|
|
308
|
+
)
|
|
309
|
+
if png_charts:
|
|
310
|
+
import urllib.parse
|
|
311
|
+
target_thumb = f"/api/charts/{project_id}/{urllib.parse.quote(png_charts[0].name)}"
|
|
312
|
+
|
|
313
|
+
if current_thumb != target_thumb:
|
|
314
|
+
meta["thumbnail"] = target_thumb
|
|
315
|
+
save_project_metadata(project_id, meta)
|
|
316
|
+
|
|
317
|
+
return meta
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def parse_bool(value: Optional[str]) -> bool:
|
|
321
|
+
return bool(value and str(value).strip().lower() not in {"false", "0", "off", "no", ""})
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def optimize_goal_grammar(goal: str, provider: str, model: str, api_key: str, env_key_name: str) -> str:
|
|
325
|
+
"""Uses the runtime-configured LLM to optimize the grammar of the project goal."""
|
|
326
|
+
if not goal.strip():
|
|
327
|
+
return ""
|
|
328
|
+
try:
|
|
329
|
+
from config.llm_config import apply_runtime_llm_settings, get_llm_params
|
|
330
|
+
from crewai import LLM
|
|
331
|
+
|
|
332
|
+
apply_runtime_llm_settings(provider, model, api_key or "", env_key_name)
|
|
333
|
+
params = get_llm_params()
|
|
334
|
+
llm = LLM(**params)
|
|
335
|
+
|
|
336
|
+
prompt = (
|
|
337
|
+
"You are a professional editor. Improve the grammar, phrasing, and professional tone "
|
|
338
|
+
"of the following data analysis goal. Keep it concise (1-2 sentences). "
|
|
339
|
+
"Return ONLY the corrected goal text, without any introductory text, quotes, or metadata.\n\n"
|
|
340
|
+
f"Goal: {goal.strip()}"
|
|
341
|
+
)
|
|
342
|
+
response = llm.call([{"role": "user", "content": prompt}])
|
|
343
|
+
result = response if isinstance(response, str) else str(response)
|
|
344
|
+
return result.strip().strip('"').strip("'")
|
|
345
|
+
except Exception as e:
|
|
346
|
+
print(f"Grammar optimization failed: {e}")
|
|
347
|
+
return goal.strip()
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
# ---------------------------------------------------------------------------
|
|
351
|
+
# Background Task Pipeline
|
|
352
|
+
# ---------------------------------------------------------------------------
|
|
353
|
+
|
|
354
|
+
MAX_CONCURRENT_ANALYSES = 2
|
|
355
|
+
active_analyses = 0
|
|
356
|
+
active_analyses_lock = threading.Lock()
|
|
357
|
+
|
|
358
|
+
def run_crew_in_background(
|
|
359
|
+
session_id: str,
|
|
360
|
+
csv_path: str,
|
|
361
|
+
provider: str,
|
|
362
|
+
model: str,
|
|
363
|
+
api_key: str,
|
|
364
|
+
env_key_name: str,
|
|
365
|
+
cooldown: int,
|
|
366
|
+
selected_tasks: list[str],
|
|
367
|
+
deep_analysis: bool,
|
|
368
|
+
report_title: str,
|
|
369
|
+
):
|
|
370
|
+
"""
|
|
371
|
+
Orchestrates the CrewAI pipeline in a background thread, writing all
|
|
372
|
+
stdout progress to a tail-able stdout.log file and serializing results.
|
|
373
|
+
"""
|
|
374
|
+
if not is_safe_id(session_id):
|
|
375
|
+
raise ValueError("Invalid session ID.")
|
|
376
|
+
session_dir = (SESSIONS_DIR / session_id).resolve()
|
|
377
|
+
resolved_csv = Path(csv_path).resolve()
|
|
378
|
+
try:
|
|
379
|
+
resolved_csv.relative_to(session_dir)
|
|
380
|
+
except ValueError:
|
|
381
|
+
raise ValueError("Path traversal detected in CSV path.")
|
|
382
|
+
|
|
383
|
+
# 1. Inject thread-isolated LLM configurations and context variables
|
|
384
|
+
from config.context import (
|
|
385
|
+
current_session_id,
|
|
386
|
+
current_session_csv,
|
|
387
|
+
current_session_output_dir,
|
|
388
|
+
current_llm_provider,
|
|
389
|
+
current_llm_model,
|
|
390
|
+
current_llm_api_key,
|
|
391
|
+
current_llm_env_key_name,
|
|
392
|
+
current_cooldown,
|
|
393
|
+
current_deep_analysis,
|
|
394
|
+
)
|
|
395
|
+
current_session_id.set(session_id)
|
|
396
|
+
current_session_csv.set(str(resolved_csv))
|
|
397
|
+
current_session_output_dir.set(str((OUTPUTS_DIR / session_id).resolve()))
|
|
398
|
+
current_llm_provider.set(provider)
|
|
399
|
+
current_llm_model.set(model)
|
|
400
|
+
current_llm_api_key.set(api_key or "")
|
|
401
|
+
current_llm_env_key_name.set(env_key_name or "")
|
|
402
|
+
current_cooldown.set(cooldown)
|
|
403
|
+
current_deep_analysis.set(deep_analysis)
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
# Save or update the report title and goal in project metadata
|
|
408
|
+
try:
|
|
409
|
+
meta = get_project_metadata(session_id)
|
|
410
|
+
if report_title:
|
|
411
|
+
meta["report_title"] = report_title.strip()
|
|
412
|
+
|
|
413
|
+
user_goal = meta.get("goal", "")
|
|
414
|
+
if user_goal.strip():
|
|
415
|
+
print("Optimizing goal grammar...")
|
|
416
|
+
opt_goal = optimize_goal_grammar(user_goal, provider, model, api_key, env_key_name)
|
|
417
|
+
meta["optimized_goal"] = opt_goal
|
|
418
|
+
print(f"Optimized goal: {opt_goal}")
|
|
419
|
+
else:
|
|
420
|
+
meta["optimized_goal"] = ""
|
|
421
|
+
|
|
422
|
+
save_project_metadata(session_id, meta)
|
|
423
|
+
except Exception as e:
|
|
424
|
+
print(f"Error handling metadata goal/title: {e}")
|
|
425
|
+
|
|
426
|
+
session_dir = SESSIONS_DIR / session_id
|
|
427
|
+
log_path = session_dir / "stdout.log"
|
|
428
|
+
done_path = session_dir / "done.txt"
|
|
429
|
+
results_path = session_dir / "results.json"
|
|
430
|
+
|
|
431
|
+
# Clean up previous state
|
|
432
|
+
done_path.unlink(missing_ok=True)
|
|
433
|
+
results_path.unlink(missing_ok=True)
|
|
434
|
+
|
|
435
|
+
# Update metadata status to running
|
|
436
|
+
try:
|
|
437
|
+
meta = get_project_metadata(session_id)
|
|
438
|
+
meta["status"] = "running"
|
|
439
|
+
save_project_metadata(session_id, meta)
|
|
440
|
+
except Exception:
|
|
441
|
+
pass
|
|
442
|
+
|
|
443
|
+
# 2. Redirect stdout and kickoff
|
|
444
|
+
with open(log_path, "w", encoding="utf-8", errors="replace") as log_file:
|
|
445
|
+
import contextlib
|
|
446
|
+
with contextlib.redirect_stdout(log_file):
|
|
447
|
+
try:
|
|
448
|
+
print("Initializing multi-agent workflows...")
|
|
449
|
+
_load_crew()
|
|
450
|
+
|
|
451
|
+
env_tasks = os.getenv("SELECTED_TASKS", "")
|
|
452
|
+
parsed_tasks = [t.strip() for t in env_tasks.split(",") if t.strip()]
|
|
453
|
+
deep_flag = os.getenv("DEEP_ANALYSIS", "false").lower() in {"true", "1", "yes", "on"}
|
|
454
|
+
|
|
455
|
+
result = _run_crew(
|
|
456
|
+
csv_path,
|
|
457
|
+
session_id=session_id,
|
|
458
|
+
selected_tasks=parsed_tasks or None,
|
|
459
|
+
deep_analysis=deep_flag,
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
# Convert results to JSON-serializable structure
|
|
463
|
+
# Re-map Plotly charts into serializable JSON dictionaries
|
|
464
|
+
plotly_serializable = []
|
|
465
|
+
for chart in result.get("plotly_charts", []):
|
|
466
|
+
try:
|
|
467
|
+
plotly_serializable.append({
|
|
468
|
+
"title": chart["title"],
|
|
469
|
+
"fig_json": json.loads(chart["fig"].to_json())
|
|
470
|
+
})
|
|
471
|
+
except Exception:
|
|
472
|
+
pass
|
|
473
|
+
|
|
474
|
+
# Gather static PNG charts
|
|
475
|
+
png_charts_list = [f.name for f in Path(result["output_dir"]).glob("*.png")]
|
|
476
|
+
|
|
477
|
+
serializable_result = {
|
|
478
|
+
"cleaning_steps": result["cleaning_steps"],
|
|
479
|
+
"relations": result["relations"],
|
|
480
|
+
"insights": result["insights"],
|
|
481
|
+
"code": result.get("code", ""),
|
|
482
|
+
"output_dir": result["output_dir"],
|
|
483
|
+
"plotly_charts": plotly_serializable,
|
|
484
|
+
"png_charts": png_charts_list,
|
|
485
|
+
"rows_count": int(result["dataframe"].shape[0]),
|
|
486
|
+
"cols_count": int(result["dataframe"].shape[1]),
|
|
487
|
+
"numeric_count": int(len(result["dataframe"].select_dtypes(include=["number"]).columns)),
|
|
488
|
+
"cat_count": int(len(result["dataframe"].select_dtypes(include=["object"]).columns))
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
# Cache first 100 rows as JSON data preview
|
|
492
|
+
preview_data = result["dataframe"].head(100).replace([float('inf'), float('-inf')], float('nan')).fillna("").to_dict(orient="records")
|
|
493
|
+
serializable_result["preview"] = preview_data
|
|
494
|
+
|
|
495
|
+
with open(results_path, "w", encoding="utf-8") as f:
|
|
496
|
+
json.dump(serializable_result, f, indent=2)
|
|
497
|
+
|
|
498
|
+
print("\nAnalysis complete! Ready to render dashboard.")
|
|
499
|
+
|
|
500
|
+
# Update metadata status to completed
|
|
501
|
+
try:
|
|
502
|
+
meta = get_project_metadata(session_id)
|
|
503
|
+
meta["status"] = "completed"
|
|
504
|
+
if png_charts_list:
|
|
505
|
+
import urllib.parse
|
|
506
|
+
meta["thumbnail"] = f"/api/charts/{session_id}/{urllib.parse.quote(png_charts_list[0])}"
|
|
507
|
+
save_project_metadata(session_id, meta)
|
|
508
|
+
except Exception:
|
|
509
|
+
pass
|
|
510
|
+
|
|
511
|
+
except Exception as e:
|
|
512
|
+
import traceback
|
|
513
|
+
print(f"\nPipeline failed: {e}", file=sys.stderr)
|
|
514
|
+
traceback.print_exc(file=log_file)
|
|
515
|
+
|
|
516
|
+
error_result = {"error": str(e)}
|
|
517
|
+
with open(results_path, "w", encoding="utf-8") as f:
|
|
518
|
+
json.dump(error_result, f, indent=2)
|
|
519
|
+
|
|
520
|
+
# Update metadata status to failed
|
|
521
|
+
try:
|
|
522
|
+
meta = get_project_metadata(session_id)
|
|
523
|
+
meta["status"] = "failed"
|
|
524
|
+
save_project_metadata(session_id, meta)
|
|
525
|
+
except Exception:
|
|
526
|
+
pass
|
|
527
|
+
finally:
|
|
528
|
+
# Write done sentinel to stop EventSource streams
|
|
529
|
+
with open(done_path, "w") as f:
|
|
530
|
+
f.write("done")
|
|
531
|
+
global active_analyses
|
|
532
|
+
with active_analyses_lock:
|
|
533
|
+
active_analyses = max(0, active_analyses - 1)
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
# ---------------------------------------------------------------------------
|
|
537
|
+
# API Endpoints
|
|
538
|
+
# ---------------------------------------------------------------------------
|
|
539
|
+
|
|
540
|
+
@app.post("/api/upload")
|
|
541
|
+
async def upload_file(file: UploadFile = File(...)):
|
|
542
|
+
"""Uploads the dataset and registers a unique user session ID."""
|
|
543
|
+
session_id = uuid.uuid4().hex[:12]
|
|
544
|
+
session_dir = get_safe_session_dir(session_id)
|
|
545
|
+
session_dir.mkdir(parents=True, exist_ok=True)
|
|
546
|
+
|
|
547
|
+
file_path = session_dir / "original_upload.csv"
|
|
548
|
+
with open(file_path, "wb") as buffer:
|
|
549
|
+
shutil.copyfileobj(file.file, buffer)
|
|
550
|
+
|
|
551
|
+
# Pre-configure fresh log files
|
|
552
|
+
log_path = session_dir / "stdout.log"
|
|
553
|
+
with open(log_path, "w") as f:
|
|
554
|
+
f.write("Dataset uploaded successfully.\n")
|
|
555
|
+
|
|
556
|
+
# Save default project metadata
|
|
557
|
+
try:
|
|
558
|
+
proj_name = file.filename.rsplit('.', 1)[0].replace('_', ' ').replace('-', ' ').title()
|
|
559
|
+
meta = {
|
|
560
|
+
"id": session_id,
|
|
561
|
+
"name": proj_name,
|
|
562
|
+
"filename": file.filename,
|
|
563
|
+
"size": file_path.stat().st_size,
|
|
564
|
+
"created_at": time.time() * 1000,
|
|
565
|
+
"status": "idle"
|
|
566
|
+
}
|
|
567
|
+
save_project_metadata(session_id, meta)
|
|
568
|
+
except Exception:
|
|
569
|
+
pass
|
|
570
|
+
|
|
571
|
+
return {
|
|
572
|
+
"session_id": session_id,
|
|
573
|
+
"filename": file.filename,
|
|
574
|
+
"size": file_path.stat().st_size
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
@app.post("/api/validate-key")
|
|
579
|
+
async def validate_api_key(
|
|
580
|
+
provider: str = Form(...),
|
|
581
|
+
model: str = Form(...),
|
|
582
|
+
api_key: Optional[str] = Form(""),
|
|
583
|
+
):
|
|
584
|
+
"""Validate LLM provider credentials before starting analysis."""
|
|
585
|
+
try:
|
|
586
|
+
_load_crew()
|
|
587
|
+
except ImportError as exc:
|
|
588
|
+
raise HTTPException(status_code=503, detail=f"CrewAI not available: {exc}")
|
|
589
|
+
result = _validate_llm_connection(provider, model, api_key or "")
|
|
590
|
+
if not result.get("valid"):
|
|
591
|
+
raise HTTPException(status_code=400, detail=result.get("message", "Validation failed."))
|
|
592
|
+
return result
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
@app.post("/api/analyze")
|
|
596
|
+
async def trigger_analysis(
|
|
597
|
+
background_tasks: BackgroundTasks,
|
|
598
|
+
session_id: str = Form(...),
|
|
599
|
+
provider: str = Form(...),
|
|
600
|
+
model: str = Form(...),
|
|
601
|
+
api_key: Optional[str] = Form(""),
|
|
602
|
+
cooldown: int = Form(5),
|
|
603
|
+
selected_tasks: str = Form(""),
|
|
604
|
+
deep_analysis: str = Form("false"),
|
|
605
|
+
report_title: str = Form("")
|
|
606
|
+
):
|
|
607
|
+
"""Launches the CrewAI analysis process in the background."""
|
|
608
|
+
session_dir = get_safe_session_dir(session_id)
|
|
609
|
+
csv_path = session_dir / "original_upload.csv"
|
|
610
|
+
|
|
611
|
+
if not csv_path.exists():
|
|
612
|
+
raise HTTPException(status_code=400, detail="Session upload not found.")
|
|
613
|
+
|
|
614
|
+
# Match provider key name
|
|
615
|
+
if provider == "ollama":
|
|
616
|
+
env_key_name = "OLLAMA_BASE_URL"
|
|
617
|
+
elif provider in ("nvidia", "minimax"):
|
|
618
|
+
env_key_name = "NVIDIA_API_KEY"
|
|
619
|
+
else:
|
|
620
|
+
env_key_name = f"{provider.upper()}_API_KEY"
|
|
621
|
+
|
|
622
|
+
selected_tasks = [
|
|
623
|
+
task.strip()
|
|
624
|
+
for task in selected_tasks.split(",")
|
|
625
|
+
if task.strip()
|
|
626
|
+
]
|
|
627
|
+
if not selected_tasks:
|
|
628
|
+
selected_tasks = ["cleaning", "relations", "insights", "visualization"]
|
|
629
|
+
|
|
630
|
+
deep = deep_analysis.strip().lower() in {"true", "1", "yes", "on"}
|
|
631
|
+
|
|
632
|
+
# Persist report title if provided
|
|
633
|
+
try:
|
|
634
|
+
meta = get_project_metadata(session_id)
|
|
635
|
+
if report_title.strip():
|
|
636
|
+
meta["report_title"] = report_title.strip()
|
|
637
|
+
save_project_metadata(session_id, meta)
|
|
638
|
+
except Exception:
|
|
639
|
+
pass
|
|
640
|
+
|
|
641
|
+
# Concurrency control checks
|
|
642
|
+
global active_analyses
|
|
643
|
+
with active_analyses_lock:
|
|
644
|
+
if active_analyses >= MAX_CONCURRENT_ANALYSES:
|
|
645
|
+
raise HTTPException(
|
|
646
|
+
status_code=429,
|
|
647
|
+
detail="Server is busy. Maximum concurrent analyses limit reached. Please try again later."
|
|
648
|
+
)
|
|
649
|
+
active_analyses += 1
|
|
650
|
+
|
|
651
|
+
# Spawn thread-safe background execution
|
|
652
|
+
try:
|
|
653
|
+
background_tasks.add_task(
|
|
654
|
+
run_crew_in_background,
|
|
655
|
+
session_id=session_id,
|
|
656
|
+
csv_path=str(csv_path),
|
|
657
|
+
provider=provider,
|
|
658
|
+
model=model,
|
|
659
|
+
api_key=api_key,
|
|
660
|
+
env_key_name=env_key_name,
|
|
661
|
+
cooldown=cooldown,
|
|
662
|
+
selected_tasks=selected_tasks,
|
|
663
|
+
deep_analysis=deep,
|
|
664
|
+
report_title=report_title.strip(),
|
|
665
|
+
)
|
|
666
|
+
except Exception as e:
|
|
667
|
+
with active_analyses_lock:
|
|
668
|
+
active_analyses = max(0, active_analyses - 1)
|
|
669
|
+
raise e
|
|
670
|
+
|
|
671
|
+
return {"status": "started", "session_id": session_id}
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
@app.get("/api/analyze/stream")
|
|
675
|
+
async def stream_analysis_logs(session_id: str):
|
|
676
|
+
"""Streams running stdout log lines using Server-Sent Events (SSE)."""
|
|
677
|
+
session_dir = get_safe_session_dir(session_id)
|
|
678
|
+
log_path = session_dir / "stdout.log"
|
|
679
|
+
|
|
680
|
+
# Reset streaming state
|
|
681
|
+
if session_id in log_stream_states:
|
|
682
|
+
log_stream_states[session_id] = {"in_prompt": False}
|
|
683
|
+
|
|
684
|
+
async def log_generator():
|
|
685
|
+
# Wait for stdout.log file to populate
|
|
686
|
+
for _ in range(50):
|
|
687
|
+
if log_path.exists():
|
|
688
|
+
break
|
|
689
|
+
await asyncio.sleep(0.1)
|
|
690
|
+
|
|
691
|
+
if not log_path.exists():
|
|
692
|
+
yield "data: [Initializing pipeline...]\n\n"
|
|
693
|
+
|
|
694
|
+
with open(log_path, "r", encoding="utf-8", errors="replace") as f:
|
|
695
|
+
while True:
|
|
696
|
+
line = f.readline()
|
|
697
|
+
if line:
|
|
698
|
+
cleaned = clean_log_message(line, session_id=session_id)
|
|
699
|
+
if cleaned is not None:
|
|
700
|
+
yield f"data: {cleaned}\n\n"
|
|
701
|
+
else:
|
|
702
|
+
# Look for done flag
|
|
703
|
+
done_path = session_dir / "done.txt"
|
|
704
|
+
if done_path.exists():
|
|
705
|
+
# Read final trailing lines
|
|
706
|
+
for trail_line in f.readlines():
|
|
707
|
+
cleaned_trail = clean_log_message(trail_line, session_id=session_id)
|
|
708
|
+
if cleaned_trail is not None:
|
|
709
|
+
yield f"data: {cleaned_trail}\n\n"
|
|
710
|
+
yield "data: [EOF]\n\n"
|
|
711
|
+
break
|
|
712
|
+
await asyncio.sleep(0.1)
|
|
713
|
+
|
|
714
|
+
return StreamingResponse(log_generator(), media_type="text/event-stream")
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
@app.get("/api/results")
|
|
718
|
+
async def get_results(session_id: str):
|
|
719
|
+
"""Retrieves cached JSON results containing stats, insights, and charts."""
|
|
720
|
+
session_dir = get_safe_session_dir(session_id)
|
|
721
|
+
results_path = session_dir / "results.json"
|
|
722
|
+
if not results_path.exists():
|
|
723
|
+
return {"ready": False, "status": "pending"}
|
|
724
|
+
|
|
725
|
+
with open(results_path, "r", encoding="utf-8") as f:
|
|
726
|
+
data = json.load(f)
|
|
727
|
+
if "error" in data:
|
|
728
|
+
return data
|
|
729
|
+
data["ready"] = True
|
|
730
|
+
return data
|
|
731
|
+
|
|
732
|
+
|
|
733
|
+
@app.post("/api/copilot")
|
|
734
|
+
async def ask_copilot(
|
|
735
|
+
session_id: str = Form(...),
|
|
736
|
+
query: str = Form(...),
|
|
737
|
+
provider: str = Form(...),
|
|
738
|
+
model: str = Form(...),
|
|
739
|
+
api_key: Optional[str] = Form("")
|
|
740
|
+
):
|
|
741
|
+
"""Runs a natural language query against the dataset using the Copilot agent."""
|
|
742
|
+
if provider == "ollama":
|
|
743
|
+
env_key_name = "OLLAMA_BASE_URL"
|
|
744
|
+
elif provider in ("nvidia", "minimax"):
|
|
745
|
+
env_key_name = "NVIDIA_API_KEY"
|
|
746
|
+
else:
|
|
747
|
+
env_key_name = f"{provider.upper()}_API_KEY"
|
|
748
|
+
_load_crew()
|
|
749
|
+
_apply_runtime_llm_settings(provider, model, api_key or "", env_key_name)
|
|
750
|
+
|
|
751
|
+
session_dir = get_safe_session_dir(session_id)
|
|
752
|
+
csv_path = session_dir / "cleaned.csv"
|
|
753
|
+
output_dir = get_safe_output_dir(session_id)
|
|
754
|
+
|
|
755
|
+
if not csv_path.exists():
|
|
756
|
+
# Fall back to original upload if cleaning hasn't run or completed
|
|
757
|
+
csv_path = session_dir / "original_upload.csv"
|
|
758
|
+
|
|
759
|
+
if not csv_path.exists():
|
|
760
|
+
raise HTTPException(status_code=400, detail="Dataset not uploaded.")
|
|
761
|
+
|
|
762
|
+
# Bind thread-local context variables for the current request
|
|
763
|
+
from config.context import current_session_csv, current_session_output_dir
|
|
764
|
+
current_session_csv.set(str(csv_path))
|
|
765
|
+
current_session_output_dir.set(str(output_dir))
|
|
766
|
+
|
|
767
|
+
# Call copilot model runner
|
|
768
|
+
res = _run_copilot_query(query, str(csv_path), str(output_dir))
|
|
769
|
+
|
|
770
|
+
# Re-map absolute plot path to relative HTTP endpoint URL
|
|
771
|
+
plot_url = None
|
|
772
|
+
if res.get("plot_path"):
|
|
773
|
+
plot_filename = Path(res["plot_path"]).name
|
|
774
|
+
import urllib.parse
|
|
775
|
+
plot_url = f"/api/charts/{session_id}/{urllib.parse.quote(plot_filename)}"
|
|
776
|
+
|
|
777
|
+
return {
|
|
778
|
+
"success": res["success"],
|
|
779
|
+
"text": res["text"],
|
|
780
|
+
"plot_url": plot_url
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
@app.get("/api/export-pdf")
|
|
785
|
+
async def get_pdf_report(session_id: str, report_title: Optional[str] = None):
|
|
786
|
+
"""Generates and streams back the executive PDF report."""
|
|
787
|
+
session_dir = get_safe_session_dir(session_id)
|
|
788
|
+
results_path = session_dir / "results.json"
|
|
789
|
+
cleaned_csv = session_dir / "cleaned.csv"
|
|
790
|
+
|
|
791
|
+
if not results_path.exists() or not cleaned_csv.exists():
|
|
792
|
+
raise HTTPException(status_code=400, detail="Data analysis results not available.")
|
|
793
|
+
|
|
794
|
+
with open(results_path, "r", encoding="utf-8") as f:
|
|
795
|
+
data = json.load(f)
|
|
796
|
+
|
|
797
|
+
meta = get_project_metadata(session_id)
|
|
798
|
+
title = report_title.strip() if report_title else meta.get("report_title", meta.get("name", "Analysis Report"))
|
|
799
|
+
goal = meta.get("optimized_goal") or meta.get("goal") or ""
|
|
800
|
+
|
|
801
|
+
# Format result structure for reportlab builder
|
|
802
|
+
df = read_csv_robust(cleaned_csv)
|
|
803
|
+
report_dict = {
|
|
804
|
+
"dataframe": df,
|
|
805
|
+
"cleaning_steps": data["cleaning_steps"],
|
|
806
|
+
"relations": data["relations"],
|
|
807
|
+
"insights": data["insights"],
|
|
808
|
+
"code": data.get("code", ""),
|
|
809
|
+
"output_dir": str(get_safe_output_dir(session_id)),
|
|
810
|
+
"report_title": title,
|
|
811
|
+
"goal": goal,
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
try:
|
|
815
|
+
_load_crew()
|
|
816
|
+
pdf_bytes = _export_pdf(report_dict)
|
|
817
|
+
filename = re.sub(r"[^a-zA-Z0-9_-]", "_", title.lower())[:60] or f"report_{session_id}"
|
|
818
|
+
return StreamingResponse(
|
|
819
|
+
BytesIO_iterator(pdf_bytes),
|
|
820
|
+
media_type="application/pdf",
|
|
821
|
+
headers={"Content-Disposition": f"attachment; filename={filename}.pdf"}
|
|
822
|
+
)
|
|
823
|
+
except Exception as e:
|
|
824
|
+
raise HTTPException(status_code=500, detail=f"PDF generation failed: {e}")
|
|
825
|
+
|
|
826
|
+
|
|
827
|
+
@app.get("/api/charts/{session_id}/{filename}")
|
|
828
|
+
async def serve_chart(session_id: str, filename: str):
|
|
829
|
+
"""Serves the generated PNG visual charts."""
|
|
830
|
+
if not is_safe_filename(filename):
|
|
831
|
+
raise HTTPException(status_code=400, detail="Invalid filename.")
|
|
832
|
+
output_dir = get_safe_output_dir(session_id)
|
|
833
|
+
chart_path = (output_dir / filename).resolve()
|
|
834
|
+
try:
|
|
835
|
+
chart_path.relative_to(output_dir)
|
|
836
|
+
except ValueError:
|
|
837
|
+
raise HTTPException(status_code=400, detail="Path traversal detected.")
|
|
838
|
+
if not chart_path.exists():
|
|
839
|
+
raise HTTPException(status_code=404, detail="Chart not found.")
|
|
840
|
+
return FileResponse(chart_path)
|
|
841
|
+
|
|
842
|
+
|
|
843
|
+
# ---------------------------------------------------------------------------
|
|
844
|
+
# Utility Streams
|
|
845
|
+
# ---------------------------------------------------------------------------
|
|
846
|
+
|
|
847
|
+
def BytesIO_iterator(data_bytes: bytes):
|
|
848
|
+
"""Simple generator to stream raw bytes back to the response."""
|
|
849
|
+
yield data_bytes
|
|
850
|
+
|
|
851
|
+
|
|
852
|
+
# ---------------------------------------------------------------------------
|
|
853
|
+
# Ollama Models Fetch
|
|
854
|
+
# ---------------------------------------------------------------------------
|
|
855
|
+
|
|
856
|
+
@app.get("/api/ollama-models")
|
|
857
|
+
async def list_ollama_models(base_url: str = "http://localhost:11434"):
|
|
858
|
+
"""Fetches list of local Ollama models from the local Ollama service tags API."""
|
|
859
|
+
import requests
|
|
860
|
+
try:
|
|
861
|
+
url = base_url.rstrip("/") + "/api/tags"
|
|
862
|
+
response = requests.get(url, timeout=2.0)
|
|
863
|
+
if response.status_code == 200:
|
|
864
|
+
data = response.json()
|
|
865
|
+
models = [m["name"] for m in data.get("models", [])]
|
|
866
|
+
if models:
|
|
867
|
+
prefixed = [f"ollama/{m}" if not m.startswith("ollama/") else m for m in models]
|
|
868
|
+
return {"models": prefixed}
|
|
869
|
+
except Exception:
|
|
870
|
+
pass
|
|
871
|
+
# Fallback defaults if Ollama service is unreachable or empty
|
|
872
|
+
return {"models": ["ollama/llama3", "ollama/mistral", "ollama/gemma2"]}
|
|
873
|
+
|
|
874
|
+
|
|
875
|
+
# ---------------------------------------------------------------------------
|
|
876
|
+
# Metrics & Configurations APIs
|
|
877
|
+
# ---------------------------------------------------------------------------
|
|
878
|
+
|
|
879
|
+
def get_local_config_path() -> Path:
|
|
880
|
+
return USER_HOME / "config.json"
|
|
881
|
+
|
|
882
|
+
@app.get("/api/metrics")
|
|
883
|
+
async def get_performance_metrics():
|
|
884
|
+
from config.metrics_tracker import get_metrics
|
|
885
|
+
return get_metrics()
|
|
886
|
+
|
|
887
|
+
@app.get("/api/config")
|
|
888
|
+
async def get_local_config():
|
|
889
|
+
cfg_path = get_local_config_path()
|
|
890
|
+
if not cfg_path.exists():
|
|
891
|
+
return {}
|
|
892
|
+
try:
|
|
893
|
+
with open(cfg_path, "r", encoding="utf-8") as f:
|
|
894
|
+
cfg = json.load(f)
|
|
895
|
+
masked = {}
|
|
896
|
+
for k, v in cfg.items():
|
|
897
|
+
if v and any(keyword in k.lower() for keyword in ("key", "secret", "token")):
|
|
898
|
+
masked[k] = v[:4] + "..." + v[-4:] if len(v) > 8 else "********"
|
|
899
|
+
else:
|
|
900
|
+
masked[k] = v
|
|
901
|
+
return masked
|
|
902
|
+
except Exception:
|
|
903
|
+
return {}
|
|
904
|
+
|
|
905
|
+
@app.post("/api/config")
|
|
906
|
+
async def save_local_config(
|
|
907
|
+
provider: str = Form(...),
|
|
908
|
+
api_key: Optional[str] = Form(""),
|
|
909
|
+
base_url: Optional[str] = Form("")
|
|
910
|
+
):
|
|
911
|
+
cfg_path = get_local_config_path()
|
|
912
|
+
cfg_path.parent.mkdir(parents=True, exist_ok=True)
|
|
913
|
+
cfg = {}
|
|
914
|
+
if cfg_path.exists():
|
|
915
|
+
try:
|
|
916
|
+
with open(cfg_path, "r", encoding="utf-8") as f:
|
|
917
|
+
cfg = json.load(f)
|
|
918
|
+
except Exception:
|
|
919
|
+
pass
|
|
920
|
+
|
|
921
|
+
if provider == "ollama":
|
|
922
|
+
key_name = "OLLAMA_BASE_URL"
|
|
923
|
+
cfg[key_name] = base_url.strip()
|
|
924
|
+
elif provider in ("nvidia", "minimax"):
|
|
925
|
+
key_name = "NVIDIA_API_KEY"
|
|
926
|
+
else:
|
|
927
|
+
key_name = f"{provider.upper()}_API_KEY"
|
|
928
|
+
|
|
929
|
+
if provider != "ollama":
|
|
930
|
+
if api_key.strip():
|
|
931
|
+
if not api_key.endswith("..."):
|
|
932
|
+
cfg[key_name] = api_key.strip()
|
|
933
|
+
else:
|
|
934
|
+
cfg.pop(key_name, None)
|
|
935
|
+
|
|
936
|
+
if base_url.strip() and provider == "custom":
|
|
937
|
+
cfg["CUSTOM_BASE_URL"] = base_url.strip()
|
|
938
|
+
|
|
939
|
+
try:
|
|
940
|
+
with open(cfg_path, "w", encoding="utf-8") as f:
|
|
941
|
+
json.dump(cfg, f, indent=2)
|
|
942
|
+
for k, v in cfg.items():
|
|
943
|
+
os.environ[k] = str(v)
|
|
944
|
+
except Exception as e:
|
|
945
|
+
raise HTTPException(status_code=500, detail=f"Failed to write config: {e}")
|
|
946
|
+
return {"status": "success"}
|
|
947
|
+
|
|
948
|
+
|
|
949
|
+
# ---------------------------------------------------------------------------
|
|
950
|
+
# Project Management APIs
|
|
951
|
+
# ---------------------------------------------------------------------------
|
|
952
|
+
|
|
953
|
+
@app.get("/api/projects")
|
|
954
|
+
async def list_projects():
|
|
955
|
+
"""Lists all available data analysis projects/sessions."""
|
|
956
|
+
projects = []
|
|
957
|
+
if SESSIONS_DIR.exists():
|
|
958
|
+
for p in SESSIONS_DIR.iterdir():
|
|
959
|
+
if p.is_dir():
|
|
960
|
+
try:
|
|
961
|
+
meta = get_project_metadata(p.name)
|
|
962
|
+
if meta:
|
|
963
|
+
projects.append(meta)
|
|
964
|
+
except Exception:
|
|
965
|
+
pass
|
|
966
|
+
# Sort projects: newest first
|
|
967
|
+
projects.sort(key=lambda x: x.get("created_at", 0), reverse=True)
|
|
968
|
+
return projects
|
|
969
|
+
|
|
970
|
+
@app.post("/api/projects")
|
|
971
|
+
async def create_project(
|
|
972
|
+
name: str = Form(...),
|
|
973
|
+
report_title: str = Form(""),
|
|
974
|
+
goal: str = Form(""),
|
|
975
|
+
file: UploadFile = File(...)
|
|
976
|
+
):
|
|
977
|
+
"""Creates a new project context and uploads the dataset CSV."""
|
|
978
|
+
project_id = uuid.uuid4().hex[:12]
|
|
979
|
+
session_dir = get_safe_session_dir(project_id)
|
|
980
|
+
session_dir.mkdir(parents=True, exist_ok=True)
|
|
981
|
+
|
|
982
|
+
file_path = session_dir / "original_upload.csv"
|
|
983
|
+
with open(file_path, "wb") as buffer:
|
|
984
|
+
shutil.copyfileobj(file.file, buffer)
|
|
985
|
+
|
|
986
|
+
# Pre-configure fresh log files
|
|
987
|
+
log_path = session_dir / "stdout.log"
|
|
988
|
+
with open(log_path, "w") as f:
|
|
989
|
+
f.write("Project created. Dataset uploaded successfully.\n")
|
|
990
|
+
|
|
991
|
+
meta = {
|
|
992
|
+
"id": project_id,
|
|
993
|
+
"name": name.strip(),
|
|
994
|
+
"report_title": report_title.strip() or f"{name.strip()} Executive Analysis",
|
|
995
|
+
"goal": goal.strip(),
|
|
996
|
+
"optimized_goal": "",
|
|
997
|
+
"filename": file.filename,
|
|
998
|
+
"size": file_path.stat().st_size,
|
|
999
|
+
"created_at": time.time() * 1000,
|
|
1000
|
+
"status": "idle"
|
|
1001
|
+
}
|
|
1002
|
+
save_project_metadata(project_id, meta)
|
|
1003
|
+
|
|
1004
|
+
return meta
|
|
1005
|
+
|
|
1006
|
+
@app.post("/api/projects/{project_id}/rename")
|
|
1007
|
+
async def rename_project(project_id: str, name: str = Form(...)):
|
|
1008
|
+
"""Renames an existing project context."""
|
|
1009
|
+
session_dir = get_safe_session_dir(project_id)
|
|
1010
|
+
if not session_dir.exists():
|
|
1011
|
+
raise HTTPException(status_code=404, detail="Project not found")
|
|
1012
|
+
|
|
1013
|
+
meta = get_project_metadata(project_id)
|
|
1014
|
+
meta["name"] = name.strip()
|
|
1015
|
+
save_project_metadata(project_id, meta)
|
|
1016
|
+
|
|
1017
|
+
return meta
|
|
1018
|
+
|
|
1019
|
+
|
|
1020
|
+
@app.post("/api/projects/{project_id}/tweak-relations")
|
|
1021
|
+
async def tweak_relations(project_id: str, relations_text: str = Form(...)):
|
|
1022
|
+
"""Saves tweaked relationships back to the results cache."""
|
|
1023
|
+
session_dir = get_safe_session_dir(project_id)
|
|
1024
|
+
if not session_dir.exists():
|
|
1025
|
+
raise HTTPException(status_code=404, detail="Project not found")
|
|
1026
|
+
|
|
1027
|
+
results_path = session_dir / "results.json"
|
|
1028
|
+
|
|
1029
|
+
# Ensure results.json structure is present even if not analysed yet
|
|
1030
|
+
res_data = {}
|
|
1031
|
+
if results_path.exists():
|
|
1032
|
+
try:
|
|
1033
|
+
with open(results_path, "r", encoding="utf-8") as f:
|
|
1034
|
+
res_data = json.load(f)
|
|
1035
|
+
except Exception:
|
|
1036
|
+
pass
|
|
1037
|
+
|
|
1038
|
+
res_data["relations"] = relations_text.strip()
|
|
1039
|
+
|
|
1040
|
+
with open(results_path, "w", encoding="utf-8") as f:
|
|
1041
|
+
json.dump(res_data, f, indent=2)
|
|
1042
|
+
|
|
1043
|
+
return {"status": "success", "relations": res_data["relations"]}
|
|
1044
|
+
|
|
1045
|
+
@app.delete("/api/projects/{project_id}")
|
|
1046
|
+
async def delete_project(project_id: str):
|
|
1047
|
+
"""Deletes all session files, artifacts, and outputs of a project."""
|
|
1048
|
+
session_dir = get_safe_session_dir(project_id)
|
|
1049
|
+
output_dir = get_safe_output_dir(project_id)
|
|
1050
|
+
|
|
1051
|
+
if not session_dir.exists():
|
|
1052
|
+
raise HTTPException(status_code=404, detail="Project not found")
|
|
1053
|
+
|
|
1054
|
+
shutil.rmtree(session_dir, ignore_errors=True)
|
|
1055
|
+
if output_dir.exists():
|
|
1056
|
+
shutil.rmtree(output_dir, ignore_errors=True)
|
|
1057
|
+
|
|
1058
|
+
return {"status": "deleted", "id": project_id}
|
|
1059
|
+
|
|
1060
|
+
|
|
1061
|
+
@app.get("/api/projects/{project_id}/export-zip")
|
|
1062
|
+
async def export_project_zip(project_id: str):
|
|
1063
|
+
"""Exports the entire project (metadata, data files, results, and generated charts) as a ZIP file."""
|
|
1064
|
+
session_dir = get_safe_session_dir(project_id)
|
|
1065
|
+
output_dir = get_safe_output_dir(project_id)
|
|
1066
|
+
if not session_dir.exists():
|
|
1067
|
+
raise HTTPException(status_code=404, detail="Project not found")
|
|
1068
|
+
|
|
1069
|
+
zip_buffer = BytesIO()
|
|
1070
|
+
with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file:
|
|
1071
|
+
# Zip session files
|
|
1072
|
+
for root, dirs, files in os.walk(session_dir):
|
|
1073
|
+
for file in files:
|
|
1074
|
+
file_path = Path(root) / file
|
|
1075
|
+
arcname = Path("session") / file_path.relative_to(session_dir)
|
|
1076
|
+
zip_file.write(file_path, arcname=arcname)
|
|
1077
|
+
# Zip output files (charts)
|
|
1078
|
+
if output_dir.exists():
|
|
1079
|
+
for root, dirs, files in os.walk(output_dir):
|
|
1080
|
+
for file in files:
|
|
1081
|
+
file_path = Path(root) / file
|
|
1082
|
+
arcname = Path("outputs") / file_path.relative_to(output_dir)
|
|
1083
|
+
zip_file.write(file_path, arcname=arcname)
|
|
1084
|
+
|
|
1085
|
+
zip_buffer.seek(0)
|
|
1086
|
+
meta = get_project_metadata(project_id)
|
|
1087
|
+
safe_name = re.sub(r"[^a-zA-Z0-9_-]", "_", meta.get("name", "project").lower())
|
|
1088
|
+
filename = f"{safe_name}_{project_id}.zip"
|
|
1089
|
+
return StreamingResponse(
|
|
1090
|
+
BytesIO_iterator(zip_buffer.getvalue()),
|
|
1091
|
+
media_type="application/x-zip-compressed",
|
|
1092
|
+
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
|
1093
|
+
)
|
|
1094
|
+
|
|
1095
|
+
|
|
1096
|
+
@app.post("/api/projects/import-zip")
|
|
1097
|
+
async def import_project_zip(file: UploadFile = File(...)):
|
|
1098
|
+
"""Imports a project from a ZIP file and registers it in the system."""
|
|
1099
|
+
zip_contents = await file.read()
|
|
1100
|
+
zip_buffer = BytesIO(zip_contents)
|
|
1101
|
+
|
|
1102
|
+
project_id = uuid.uuid4().hex[:12]
|
|
1103
|
+
temp_dir = DATA_DIR / "temp_import" / project_id
|
|
1104
|
+
temp_dir.mkdir(parents=True, exist_ok=True)
|
|
1105
|
+
|
|
1106
|
+
target_project_id = project_id
|
|
1107
|
+
session_dir = None
|
|
1108
|
+
output_dir = None
|
|
1109
|
+
try:
|
|
1110
|
+
with zipfile.ZipFile(zip_buffer, "r") as zip_file:
|
|
1111
|
+
# Zip Slip check:
|
|
1112
|
+
for member in zip_file.infolist():
|
|
1113
|
+
if ".." in member.filename or member.filename.startswith("/") or member.filename.startswith("\\"):
|
|
1114
|
+
raise HTTPException(status_code=400, detail=f"Invalid zip entry: {member.filename}")
|
|
1115
|
+
target_path = (temp_dir / member.filename).resolve()
|
|
1116
|
+
try:
|
|
1117
|
+
target_path.relative_to(temp_dir.resolve())
|
|
1118
|
+
except ValueError:
|
|
1119
|
+
raise HTTPException(status_code=400, detail=f"Zip Slip detected: {member.filename}")
|
|
1120
|
+
zip_file.extractall(temp_dir)
|
|
1121
|
+
|
|
1122
|
+
# Verify metadata.json exists
|
|
1123
|
+
meta_file = temp_dir / "session" / "metadata.json"
|
|
1124
|
+
if not meta_file.exists():
|
|
1125
|
+
raise HTTPException(status_code=400, detail="Invalid zip format: missing metadata.json")
|
|
1126
|
+
|
|
1127
|
+
with open(meta_file, "r", encoding="utf-8") as f:
|
|
1128
|
+
meta = json.load(f)
|
|
1129
|
+
|
|
1130
|
+
orig_project_id = meta.get("id")
|
|
1131
|
+
if orig_project_id:
|
|
1132
|
+
if not is_safe_id(orig_project_id):
|
|
1133
|
+
raise HTTPException(status_code=400, detail="Invalid project ID in metadata.")
|
|
1134
|
+
target_project_id = orig_project_id
|
|
1135
|
+
|
|
1136
|
+
# Check if project conflicts. If so, generate new ID
|
|
1137
|
+
session_dir = get_safe_session_dir(target_project_id)
|
|
1138
|
+
if session_dir.exists():
|
|
1139
|
+
target_project_id = uuid.uuid4().hex[:12]
|
|
1140
|
+
session_dir = get_safe_session_dir(target_project_id)
|
|
1141
|
+
meta["id"] = target_project_id
|
|
1142
|
+
meta["name"] = f"{meta.get('name', 'Imported')} (Copy)"
|
|
1143
|
+
|
|
1144
|
+
output_dir = get_safe_output_dir(target_project_id)
|
|
1145
|
+
session_dir.mkdir(parents=True, exist_ok=True)
|
|
1146
|
+
|
|
1147
|
+
# Copy session files
|
|
1148
|
+
for item in (temp_dir / "session").iterdir():
|
|
1149
|
+
if item.is_file():
|
|
1150
|
+
if not is_safe_filename(item.name):
|
|
1151
|
+
continue
|
|
1152
|
+
shutil.copy2(item, session_dir / item.name)
|
|
1153
|
+
|
|
1154
|
+
# Copy outputs
|
|
1155
|
+
if (temp_dir / "outputs").exists():
|
|
1156
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
1157
|
+
for item in (temp_dir / "outputs").iterdir():
|
|
1158
|
+
if item.is_file():
|
|
1159
|
+
if not is_safe_filename(item.name):
|
|
1160
|
+
continue
|
|
1161
|
+
shutil.copy2(item, output_dir / item.name)
|
|
1162
|
+
|
|
1163
|
+
# Update metadata.json
|
|
1164
|
+
meta["id"] = target_project_id
|
|
1165
|
+
if meta.get("thumbnail"):
|
|
1166
|
+
# Update thumbnail link with new project ID
|
|
1167
|
+
thumb_parts = meta["thumbnail"].split("/")
|
|
1168
|
+
if len(thumb_parts) >= 5:
|
|
1169
|
+
thumb_parts[3] = target_project_id
|
|
1170
|
+
meta["thumbnail"] = "/".join(thumb_parts)
|
|
1171
|
+
|
|
1172
|
+
with open(session_dir / "metadata.json", "w", encoding="utf-8") as f:
|
|
1173
|
+
json.dump(meta, f, indent=2)
|
|
1174
|
+
|
|
1175
|
+
return meta
|
|
1176
|
+
except Exception as e:
|
|
1177
|
+
if session_dir and session_dir.exists():
|
|
1178
|
+
shutil.rmtree(session_dir, ignore_errors=True)
|
|
1179
|
+
if output_dir and output_dir.exists():
|
|
1180
|
+
shutil.rmtree(output_dir, ignore_errors=True)
|
|
1181
|
+
raise HTTPException(status_code=400, detail=f"Import failed: {str(e)}")
|
|
1182
|
+
finally:
|
|
1183
|
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
1184
|
+
|
|
1185
|
+
|
|
1186
|
+
@app.get("/api/projects/{project_id}/preview")
|
|
1187
|
+
async def get_dynamic_preview(project_id: str):
|
|
1188
|
+
"""Dynamically reads the latest state of the CSV and returns a 100-row preview, column names, shapes, and types."""
|
|
1189
|
+
session_dir = get_safe_session_dir(project_id)
|
|
1190
|
+
if not session_dir.exists():
|
|
1191
|
+
raise HTTPException(status_code=404, detail="Project not found")
|
|
1192
|
+
|
|
1193
|
+
cleaned_csv = session_dir / "cleaned.csv"
|
|
1194
|
+
original_csv = session_dir / "original_upload.csv"
|
|
1195
|
+
csv_path = cleaned_csv if cleaned_csv.exists() else original_csv
|
|
1196
|
+
|
|
1197
|
+
if not csv_path.exists():
|
|
1198
|
+
raise HTTPException(status_code=404, detail="CSV not found.")
|
|
1199
|
+
|
|
1200
|
+
try:
|
|
1201
|
+
df = read_csv_robust(str(csv_path))
|
|
1202
|
+
rows_count, cols_count = df.shape
|
|
1203
|
+
preview = df.head(100).fillna("").to_dict(orient="records")
|
|
1204
|
+
col_types = {col: str(dtype) for col, dtype in df.dtypes.items()}
|
|
1205
|
+
columns = list(df.columns)
|
|
1206
|
+
|
|
1207
|
+
# Update cache in results.json if it exists
|
|
1208
|
+
results_path = session_dir / "results.json"
|
|
1209
|
+
if results_path.exists():
|
|
1210
|
+
try:
|
|
1211
|
+
with open(results_path, "r", encoding="utf-8") as f:
|
|
1212
|
+
res_data = json.load(f)
|
|
1213
|
+
res_data["preview"] = preview
|
|
1214
|
+
res_data["rows_count"] = rows_count
|
|
1215
|
+
res_data["cols_count"] = cols_count
|
|
1216
|
+
with open(results_path, "w", encoding="utf-8") as f:
|
|
1217
|
+
json.dump(res_data, f, indent=2)
|
|
1218
|
+
except Exception:
|
|
1219
|
+
pass
|
|
1220
|
+
|
|
1221
|
+
return {
|
|
1222
|
+
"columns": columns,
|
|
1223
|
+
"col_types": col_types,
|
|
1224
|
+
"rows_count": rows_count,
|
|
1225
|
+
"cols_count": cols_count,
|
|
1226
|
+
"preview": preview
|
|
1227
|
+
}
|
|
1228
|
+
except Exception as e:
|
|
1229
|
+
raise HTTPException(status_code=500, detail=f"Failed to load preview: {str(e)}")
|
|
1230
|
+
|
|
1231
|
+
|
|
1232
|
+
@app.get("/api/projects/{project_id}/download-csv")
|
|
1233
|
+
async def download_project_csv(project_id: str):
|
|
1234
|
+
"""Downloads the cleaned dataset CSV for the specified project."""
|
|
1235
|
+
session_dir = get_safe_session_dir(project_id)
|
|
1236
|
+
if not session_dir.exists():
|
|
1237
|
+
raise HTTPException(status_code=404, detail="Project not found")
|
|
1238
|
+
|
|
1239
|
+
cleaned_csv = session_dir / "cleaned.csv"
|
|
1240
|
+
original_csv = session_dir / "original_upload.csv"
|
|
1241
|
+
csv_path = cleaned_csv if cleaned_csv.exists() else original_csv
|
|
1242
|
+
|
|
1243
|
+
if not csv_path.exists():
|
|
1244
|
+
raise HTTPException(status_code=404, detail="CSV not found.")
|
|
1245
|
+
|
|
1246
|
+
try:
|
|
1247
|
+
meta = get_project_metadata(project_id)
|
|
1248
|
+
orig_name = meta.get("filename", "dataset.csv")
|
|
1249
|
+
except Exception:
|
|
1250
|
+
orig_name = "dataset.csv"
|
|
1251
|
+
|
|
1252
|
+
base_name = orig_name.rsplit(".", 1)[0] if "." in orig_name else orig_name
|
|
1253
|
+
download_filename = f"{base_name}_cleaned.csv"
|
|
1254
|
+
|
|
1255
|
+
return FileResponse(csv_path, media_type="text/csv", filename=download_filename)
|
|
1256
|
+
|
|
1257
|
+
|
|
1258
|
+
# ---------------------------------------------------------------------------
|
|
1259
|
+
# Frontend Static Mounts
|
|
1260
|
+
# ---------------------------------------------------------------------------
|
|
1261
|
+
|
|
1262
|
+
BASE_DIR = Path(__file__).resolve().parent
|
|
1263
|
+
web_dir = BASE_DIR / "web"
|
|
1264
|
+
assets_dir = BASE_DIR / "assets"
|
|
1265
|
+
|
|
1266
|
+
app.mount("/assets", StaticFiles(directory=str(assets_dir)), name="assets")
|
|
1267
|
+
app.mount("/", StaticFiles(directory=str(web_dir), html=True), name="web")
|
|
1268
|
+
|
|
1269
|
+
|
|
1270
|
+
# ── Server Boot ─────────────────────────────────────────────────────────────
|
|
1271
|
+
|
|
1272
|
+
if __name__ == "__main__":
|
|
1273
|
+
import uvicorn
|
|
1274
|
+
# Start server on 8000
|
|
1275
|
+
print("\n" + "=" * 50)
|
|
1276
|
+
print("Crewlyze Web Platform")
|
|
1277
|
+
print("Local URL: http://localhost:8000")
|
|
1278
|
+
print("=" * 50 + "\n")
|
|
1279
|
+
uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
|