crewlyze 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +12 -0
- package/.gitattributes +2 -0
- package/CHANGELOG.md +86 -0
- package/Dockerfile +21 -0
- package/LICENSE +21 -0
- package/README.md +139 -0
- package/USAGE.md +106 -0
- package/agents/__init__.py +0 -0
- package/agents/cleaner.py +38 -0
- package/agents/insights.py +44 -0
- package/agents/relation.py +36 -0
- package/agents/visualizer.py +41 -0
- package/assets/badge_crewai.svg +4 -0
- package/assets/badge_matplotlib.svg +4 -0
- package/assets/badge_ollama.svg +4 -0
- package/assets/badge_pandas.svg +4 -0
- package/assets/badge_seaborn.svg +4 -0
- package/assets/branding_image.png +0 -0
- package/assets/complete_workflow.svg +216 -0
- package/assets/favicon.png +0 -0
- package/assets/logo.png +0 -0
- package/assets/stars.svg +12 -0
- package/bin/crewlyze.js +79 -0
- package/config/README.md +129 -0
- package/config/__init__.py +1 -0
- package/config/context.py +16 -0
- package/config/llm_config.py +300 -0
- package/config/metrics_tracker.py +70 -0
- package/crew.py +870 -0
- package/crewlyze-3.1.0.tgz +0 -0
- package/fix_syntax.py +54 -0
- package/main.py +1279 -0
- package/package.json +22 -0
- package/pyproject.toml +32 -0
- package/requirements.txt +33 -0
- package/tools/__init__.py +0 -0
- package/tools/dataset_tools.py +803 -0
- package/ui/__init__.py +3 -0
- package/ui/copilot.py +200 -0
- package/ui/export.py +800 -0
- package/update_appjs.py +54 -0
- package/update_llm.py +21 -0
- package/update_main.py +20 -0
- package/web/app.js +3142 -0
- package/web/index.html +1105 -0
- package/web/style.css +2561 -0
- package/workflows/__init__.py +0 -0
- package/workflows/pipeline.py +254 -0
package/ui/__init__.py
ADDED
package/ui/copilot.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# Crewlyze
|
|
2
|
+
# Copyright (c) 2025 Sowmiyan S
|
|
3
|
+
# Licensed under the MIT License
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Interactive AI Data Copilot module.
|
|
7
|
+
|
|
8
|
+
Accepts a natural language query from the user, generates Python code via LLM,
|
|
9
|
+
executes it securely in an isolated subprocess, and returns text results and
|
|
10
|
+
new dynamically generated visualizations.
|
|
11
|
+
|
|
12
|
+
Key improvements:
|
|
13
|
+
- Column names, dtypes, and per-column statistics are injected into the prompt
|
|
14
|
+
so the LLM generates accurate, runnable code without guessing column names.
|
|
15
|
+
- Supports /column slash command prefix for column-aware queries.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import os
|
|
19
|
+
import re
|
|
20
|
+
import sys
|
|
21
|
+
import textwrap
|
|
22
|
+
import uuid
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
|
|
25
|
+
import pandas as pd
|
|
26
|
+
from crewai import LLM
|
|
27
|
+
from config.llm_config import get_llm_params
|
|
28
|
+
from tools.dataset_tools import _run_in_subprocess, _strip_markdown_fences, read_csv_robust
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
# Column schema builder
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
def _build_column_context(csv_path: str, max_rows: int = 500) -> str:
|
|
36
|
+
"""
|
|
37
|
+
Load the CSV and build a compact column schema string for injection into
|
|
38
|
+
the LLM prompt. Includes dtypes, missing%, and key statistics so the LLM
|
|
39
|
+
can write correct, runnable pandas code without hallucinating column names.
|
|
40
|
+
"""
|
|
41
|
+
try:
|
|
42
|
+
df = read_csv_robust(csv_path, nrows=max_rows)
|
|
43
|
+
except Exception as exc:
|
|
44
|
+
return f"[Could not load dataset: {exc}]"
|
|
45
|
+
|
|
46
|
+
lines = [
|
|
47
|
+
f"Dataset: {max_rows if len(df) == max_rows else len(df)} rows × {len(df.columns)} columns",
|
|
48
|
+
"",
|
|
49
|
+
"Columns (name | dtype | missing% | stats):",
|
|
50
|
+
]
|
|
51
|
+
for col in df.columns:
|
|
52
|
+
dtype = df[col].dtype
|
|
53
|
+
miss_pct = round(df[col].isnull().sum() / max(len(df), 1) * 100, 1)
|
|
54
|
+
if pd.api.types.is_numeric_dtype(dtype):
|
|
55
|
+
stats = (
|
|
56
|
+
f"min={df[col].min():.4g}, "
|
|
57
|
+
f"mean={df[col].mean():.4g}, "
|
|
58
|
+
f"max={df[col].max():.4g}, "
|
|
59
|
+
f"std={df[col].std():.4g}"
|
|
60
|
+
)
|
|
61
|
+
else:
|
|
62
|
+
top3 = df[col].dropna().value_counts().head(3).index.tolist()
|
|
63
|
+
stats = "top: " + ", ".join(str(v) for v in top3) if top3 else "—"
|
|
64
|
+
lines.append(f" - {col!r}: {dtype} | missing={miss_pct}% | {stats}")
|
|
65
|
+
|
|
66
|
+
lines.append("")
|
|
67
|
+
lines.append("Sample rows (first 3):")
|
|
68
|
+
for _, row in df.head(3).iterrows():
|
|
69
|
+
lines.append(" " + str(dict(row)))
|
|
70
|
+
|
|
71
|
+
return "\n".join(lines)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
# Main copilot entry point
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
def run_copilot_query(query: str, csv_path: str, output_dir_str: str) -> dict:
|
|
79
|
+
"""
|
|
80
|
+
Accepts a user query, generates Python code using the current LLM,
|
|
81
|
+
runs the code in a sandbox subprocess, and returns {text, plot_path}.
|
|
82
|
+
|
|
83
|
+
The column schema (names, dtypes, stats) is injected into the prompt to
|
|
84
|
+
prevent NameError / KeyError in LLM-generated code.
|
|
85
|
+
"""
|
|
86
|
+
# 1. Initialise LLM from current session env vars
|
|
87
|
+
try:
|
|
88
|
+
llm_params = get_llm_params()
|
|
89
|
+
llm = LLM(**llm_params)
|
|
90
|
+
except Exception as exc:
|
|
91
|
+
return {
|
|
92
|
+
"success": False,
|
|
93
|
+
"text": f"LLM not configured: {exc}\nSet your API key in the sidebar.",
|
|
94
|
+
"plot_path": None,
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
# 2. Build column context (prevents wrong-column NameErrors)
|
|
98
|
+
column_context = _build_column_context(csv_path)
|
|
99
|
+
|
|
100
|
+
# 3. Prepare plot output path
|
|
101
|
+
output_dir = Path(output_dir_str)
|
|
102
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
103
|
+
plot_name = f"copilot_plot_{_uuid_short()}.png"
|
|
104
|
+
plot_path = output_dir / plot_name
|
|
105
|
+
|
|
106
|
+
# Clean up previous copilot plots
|
|
107
|
+
for prev in output_dir.glob("copilot_plot_*.png"):
|
|
108
|
+
try:
|
|
109
|
+
prev.unlink(missing_ok=True)
|
|
110
|
+
except OSError:
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
# 4. Build LLM prompt with full column context
|
|
114
|
+
prompt = textwrap.dedent(f"""
|
|
115
|
+
You are an expert AI Data Analyst. You have access to a CSV dataset at:
|
|
116
|
+
FILE_PATH = '{Path(csv_path).as_posix()}'
|
|
117
|
+
|
|
118
|
+
=== DATASET SCHEMA ===
|
|
119
|
+
{column_context}
|
|
120
|
+
=====================
|
|
121
|
+
|
|
122
|
+
USER QUERY: "{query}"
|
|
123
|
+
|
|
124
|
+
INSTRUCTIONS:
|
|
125
|
+
1. Read the dataset: df = pd.read_csv(FILE_PATH)
|
|
126
|
+
2. Use ONLY the column names listed in the dataset schema (exact spelling, case-sensitive).
|
|
127
|
+
3. Perform any required analysis, aggregation, computation, or modifications.
|
|
128
|
+
4. Print a clear, detailed, and nicely formatted answer to stdout detailing the results or actions taken.
|
|
129
|
+
- Use rich Markdown formatting (e.g. Markdown tables, bulleted lists, bold text, headers) to structure the output like a professional report.
|
|
130
|
+
- If the user asks for a table or for N values, print a Markdown table.
|
|
131
|
+
5. If the query asks to modify, clean, fix, rename, delete columns, drop rows, replace missing values, or update values in the dataset:
|
|
132
|
+
- Perform the operation on the DataFrame `df`.
|
|
133
|
+
- Save the modified DataFrame back to the CSV file at the end of the script: `df.to_csv(FILE_PATH, index=False)`.
|
|
134
|
+
- Print a confirmation message to stdout using Markdown (e.g., bulleted list) explaining exactly what dataset modifications were made.
|
|
135
|
+
6. If the query asks for a chart/plot/graph:
|
|
136
|
+
- You can use either Matplotlib/Seaborn OR Plotly.
|
|
137
|
+
- If using Matplotlib/Seaborn: Call `import matplotlib; matplotlib.use('Agg')` BEFORE importing pyplot. Save with `plt.savefig('{plot_path.as_posix()}')`.
|
|
138
|
+
- If using Plotly: Do NOT use `fig.write_image()`. Instead, you MUST export the figure using the Kaleido API directly:
|
|
139
|
+
```python
|
|
140
|
+
import kaleido
|
|
141
|
+
kaleido.write_fig_sync(fig, '{plot_path.as_posix()}')
|
|
142
|
+
```
|
|
143
|
+
- Generate a professional chart. Apply any specific styles, colors, layouts, grids, or palettes requested by the user.
|
|
144
|
+
|
|
145
|
+
Return ONLY valid Python code inside a ```python ... ``` block.
|
|
146
|
+
Do NOT include explanations or text outside the code block.
|
|
147
|
+
""").strip()
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
# 5. Generate code
|
|
151
|
+
response = llm.call([{"role": "user", "content": prompt}])
|
|
152
|
+
raw_code = response if isinstance(response, str) else str(response)
|
|
153
|
+
code = _strip_markdown_fences(raw_code)
|
|
154
|
+
|
|
155
|
+
if not code.strip():
|
|
156
|
+
return {
|
|
157
|
+
"success": False,
|
|
158
|
+
"text": "The model returned empty code. Try rephrasing your query.",
|
|
159
|
+
"plot_path": None,
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
# 6. Execute in sandboxed subprocess
|
|
163
|
+
success, exec_output = _run_in_subprocess(code)
|
|
164
|
+
|
|
165
|
+
plot_saved = plot_path.exists() and plot_path.stat().st_size > 0
|
|
166
|
+
final_plot_path = str(plot_path) if plot_saved else None
|
|
167
|
+
|
|
168
|
+
if success:
|
|
169
|
+
answer_text = exec_output.strip() if exec_output.strip() not in ("", "(no output)") \
|
|
170
|
+
else "Query executed successfully (no text output)."
|
|
171
|
+
return {"success": True, "text": answer_text, "plot_path": final_plot_path}
|
|
172
|
+
else:
|
|
173
|
+
return {
|
|
174
|
+
"success": False,
|
|
175
|
+
"text": f"Execution error:\n```\n{exec_output}\n```",
|
|
176
|
+
"plot_path": None,
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
except Exception as exc:
|
|
180
|
+
return {
|
|
181
|
+
"success": False,
|
|
182
|
+
"text": f"Copilot error: {exc}",
|
|
183
|
+
"plot_path": None,
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _uuid_short() -> str:
|
|
188
|
+
return uuid.uuid4().hex[:6]
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
# ---------------------------------------------------------------------------
|
|
192
|
+
# Column list helper (used by the /column slash picker in app.py)
|
|
193
|
+
# ---------------------------------------------------------------------------
|
|
194
|
+
|
|
195
|
+
def get_column_names(csv_path: str) -> list[str]:
|
|
196
|
+
"""Return column names from the CSV, or empty list on error."""
|
|
197
|
+
try:
|
|
198
|
+
return list(read_csv_robust(csv_path, nrows=0).columns)
|
|
199
|
+
except Exception:
|
|
200
|
+
return []
|