crewlyze 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/.dockerignore +12 -0
  2. package/.gitattributes +2 -0
  3. package/CHANGELOG.md +86 -0
  4. package/Dockerfile +21 -0
  5. package/LICENSE +21 -0
  6. package/README.md +139 -0
  7. package/USAGE.md +106 -0
  8. package/agents/__init__.py +0 -0
  9. package/agents/cleaner.py +38 -0
  10. package/agents/insights.py +44 -0
  11. package/agents/relation.py +36 -0
  12. package/agents/visualizer.py +41 -0
  13. package/assets/badge_crewai.svg +4 -0
  14. package/assets/badge_matplotlib.svg +4 -0
  15. package/assets/badge_ollama.svg +4 -0
  16. package/assets/badge_pandas.svg +4 -0
  17. package/assets/badge_seaborn.svg +4 -0
  18. package/assets/branding_image.png +0 -0
  19. package/assets/complete_workflow.svg +216 -0
  20. package/assets/favicon.png +0 -0
  21. package/assets/logo.png +0 -0
  22. package/assets/stars.svg +12 -0
  23. package/bin/crewlyze.js +79 -0
  24. package/config/README.md +129 -0
  25. package/config/__init__.py +1 -0
  26. package/config/context.py +16 -0
  27. package/config/llm_config.py +300 -0
  28. package/config/metrics_tracker.py +70 -0
  29. package/crew.py +870 -0
  30. package/crewlyze-3.1.0.tgz +0 -0
  31. package/fix_syntax.py +54 -0
  32. package/main.py +1279 -0
  33. package/package.json +22 -0
  34. package/pyproject.toml +32 -0
  35. package/requirements.txt +33 -0
  36. package/tools/__init__.py +0 -0
  37. package/tools/dataset_tools.py +803 -0
  38. package/ui/__init__.py +3 -0
  39. package/ui/copilot.py +200 -0
  40. package/ui/export.py +800 -0
  41. package/update_appjs.py +54 -0
  42. package/update_llm.py +21 -0
  43. package/update_main.py +20 -0
  44. package/web/app.js +3142 -0
  45. package/web/index.html +1105 -0
  46. package/web/style.css +2561 -0
  47. package/workflows/__init__.py +0 -0
  48. package/workflows/pipeline.py +254 -0
package/ui/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ # Crewlyze
2
+ # Copyright (c) 2025 Sowmiyan S
3
+ # Licensed under the MIT License
package/ui/copilot.py ADDED
@@ -0,0 +1,200 @@
1
+ # Crewlyze
2
+ # Copyright (c) 2025 Sowmiyan S
3
+ # Licensed under the MIT License
4
+
5
+ """
6
+ Interactive AI Data Copilot module.
7
+
8
+ Accepts a natural language query from the user, generates Python code via LLM,
9
+ executes it securely in an isolated subprocess, and returns text results and
10
+ new dynamically generated visualizations.
11
+
12
+ Key improvements:
13
+ - Column names, dtypes, and per-column statistics are injected into the prompt
14
+ so the LLM generates accurate, runnable code without guessing column names.
15
+ - Supports /column slash command prefix for column-aware queries.
16
+ """
17
+
18
+ import os
19
+ import re
20
+ import sys
21
+ import textwrap
22
+ import uuid
23
+ from pathlib import Path
24
+
25
+ import pandas as pd
26
+ from crewai import LLM
27
+ from config.llm_config import get_llm_params
28
+ from tools.dataset_tools import _run_in_subprocess, _strip_markdown_fences, read_csv_robust
29
+
30
+
31
+ # ---------------------------------------------------------------------------
32
+ # Column schema builder
33
+ # ---------------------------------------------------------------------------
34
+
35
+ def _build_column_context(csv_path: str, max_rows: int = 500) -> str:
36
+ """
37
+ Load the CSV and build a compact column schema string for injection into
38
+ the LLM prompt. Includes dtypes, missing%, and key statistics so the LLM
39
+ can write correct, runnable pandas code without hallucinating column names.
40
+ """
41
+ try:
42
+ df = read_csv_robust(csv_path, nrows=max_rows)
43
+ except Exception as exc:
44
+ return f"[Could not load dataset: {exc}]"
45
+
46
+ lines = [
47
+ f"Dataset: {max_rows if len(df) == max_rows else len(df)} rows × {len(df.columns)} columns",
48
+ "",
49
+ "Columns (name | dtype | missing% | stats):",
50
+ ]
51
+ for col in df.columns:
52
+ dtype = df[col].dtype
53
+ miss_pct = round(df[col].isnull().sum() / max(len(df), 1) * 100, 1)
54
+ if pd.api.types.is_numeric_dtype(dtype):
55
+ stats = (
56
+ f"min={df[col].min():.4g}, "
57
+ f"mean={df[col].mean():.4g}, "
58
+ f"max={df[col].max():.4g}, "
59
+ f"std={df[col].std():.4g}"
60
+ )
61
+ else:
62
+ top3 = df[col].dropna().value_counts().head(3).index.tolist()
63
+ stats = "top: " + ", ".join(str(v) for v in top3) if top3 else "—"
64
+ lines.append(f" - {col!r}: {dtype} | missing={miss_pct}% | {stats}")
65
+
66
+ lines.append("")
67
+ lines.append("Sample rows (first 3):")
68
+ for _, row in df.head(3).iterrows():
69
+ lines.append(" " + str(dict(row)))
70
+
71
+ return "\n".join(lines)
72
+
73
+
74
+ # ---------------------------------------------------------------------------
75
+ # Main copilot entry point
76
+ # ---------------------------------------------------------------------------
77
+
78
+ def run_copilot_query(query: str, csv_path: str, output_dir_str: str) -> dict:
79
+ """
80
+ Accepts a user query, generates Python code using the current LLM,
81
+ runs the code in a sandbox subprocess, and returns {text, plot_path}.
82
+
83
+ The column schema (names, dtypes, stats) is injected into the prompt to
84
+ prevent NameError / KeyError in LLM-generated code.
85
+ """
86
+ # 1. Initialise LLM from current session env vars
87
+ try:
88
+ llm_params = get_llm_params()
89
+ llm = LLM(**llm_params)
90
+ except Exception as exc:
91
+ return {
92
+ "success": False,
93
+ "text": f"LLM not configured: {exc}\nSet your API key in the sidebar.",
94
+ "plot_path": None,
95
+ }
96
+
97
+ # 2. Build column context (prevents wrong-column NameErrors)
98
+ column_context = _build_column_context(csv_path)
99
+
100
+ # 3. Prepare plot output path
101
+ output_dir = Path(output_dir_str)
102
+ output_dir.mkdir(parents=True, exist_ok=True)
103
+ plot_name = f"copilot_plot_{_uuid_short()}.png"
104
+ plot_path = output_dir / plot_name
105
+
106
+ # Clean up previous copilot plots
107
+ for prev in output_dir.glob("copilot_plot_*.png"):
108
+ try:
109
+ prev.unlink(missing_ok=True)
110
+ except OSError:
111
+ pass
112
+
113
+ # 4. Build LLM prompt with full column context
114
+ prompt = textwrap.dedent(f"""
115
+ You are an expert AI Data Analyst. You have access to a CSV dataset at:
116
+ FILE_PATH = '{Path(csv_path).as_posix()}'
117
+
118
+ === DATASET SCHEMA ===
119
+ {column_context}
120
+ =====================
121
+
122
+ USER QUERY: "{query}"
123
+
124
+ INSTRUCTIONS:
125
+ 1. Read the dataset: df = pd.read_csv(FILE_PATH)
126
+ 2. Use ONLY the column names listed in the dataset schema (exact spelling, case-sensitive).
127
+ 3. Perform any required analysis, aggregation, computation, or modifications.
128
+ 4. Print a clear, detailed, and nicely formatted answer to stdout detailing the results or actions taken.
129
+ - Use rich Markdown formatting (e.g. Markdown tables, bulleted lists, bold text, headers) to structure the output like a professional report.
130
+ - If the user asks for a table or for N values, print a Markdown table.
131
+ 5. If the query asks to modify, clean, fix, rename, delete columns, drop rows, replace missing values, or update values in the dataset:
132
+ - Perform the operation on the DataFrame `df`.
133
+ - Save the modified DataFrame back to the CSV file at the end of the script: `df.to_csv(FILE_PATH, index=False)`.
134
+ - Print a confirmation message to stdout using Markdown (e.g., bulleted list) explaining exactly what dataset modifications were made.
135
+ 6. If the query asks for a chart/plot/graph:
136
+ - You can use either Matplotlib/Seaborn OR Plotly.
137
+ - If using Matplotlib/Seaborn: Call `import matplotlib; matplotlib.use('Agg')` BEFORE importing pyplot. Save with `plt.savefig('{plot_path.as_posix()}')`.
138
+ - If using Plotly: Do NOT use `fig.write_image()`. Instead, you MUST export the figure using the Kaleido API directly:
139
+ ```python
140
+ import kaleido
141
+ kaleido.write_fig_sync(fig, '{plot_path.as_posix()}')
142
+ ```
143
+ - Generate a professional chart. Apply any specific styles, colors, layouts, grids, or palettes requested by the user.
144
+
145
+ Return ONLY valid Python code inside a ```python ... ``` block.
146
+ Do NOT include explanations or text outside the code block.
147
+ """).strip()
148
+
149
+ try:
150
+ # 5. Generate code
151
+ response = llm.call([{"role": "user", "content": prompt}])
152
+ raw_code = response if isinstance(response, str) else str(response)
153
+ code = _strip_markdown_fences(raw_code)
154
+
155
+ if not code.strip():
156
+ return {
157
+ "success": False,
158
+ "text": "The model returned empty code. Try rephrasing your query.",
159
+ "plot_path": None,
160
+ }
161
+
162
+ # 6. Execute in sandboxed subprocess
163
+ success, exec_output = _run_in_subprocess(code)
164
+
165
+ plot_saved = plot_path.exists() and plot_path.stat().st_size > 0
166
+ final_plot_path = str(plot_path) if plot_saved else None
167
+
168
+ if success:
169
+ answer_text = exec_output.strip() if exec_output.strip() not in ("", "(no output)") \
170
+ else "Query executed successfully (no text output)."
171
+ return {"success": True, "text": answer_text, "plot_path": final_plot_path}
172
+ else:
173
+ return {
174
+ "success": False,
175
+ "text": f"Execution error:\n```\n{exec_output}\n```",
176
+ "plot_path": None,
177
+ }
178
+
179
+ except Exception as exc:
180
+ return {
181
+ "success": False,
182
+ "text": f"Copilot error: {exc}",
183
+ "plot_path": None,
184
+ }
185
+
186
+
187
+ def _uuid_short() -> str:
188
+ return uuid.uuid4().hex[:6]
189
+
190
+
191
+ # ---------------------------------------------------------------------------
192
+ # Column list helper (used by the /column slash picker in app.py)
193
+ # ---------------------------------------------------------------------------
194
+
195
+ def get_column_names(csv_path: str) -> list[str]:
196
+ """Return column names from the CSV, or empty list on error."""
197
+ try:
198
+ return list(read_csv_robust(csv_path, nrows=0).columns)
199
+ except Exception:
200
+ return []