m8flow 1.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bundled/backend/Dockerfile +41 -0
- package/bundled/backend/add_nodes.py +416 -0
- package/bundled/backend/api/routes/appstate.py +102 -0
- package/bundled/backend/api/routes/flows.py +64 -5
- package/bundled/backend/api/routes/nodes.py +25 -1
- package/bundled/backend/core/code_validator.py +2 -0
- package/bundled/backend/core/executor.py +19 -3
- package/bundled/backend/main.py +16 -4
- package/bundled/backend/requirements.txt +27 -6
- package/bundled/backend/services/llm_service.py +984 -108
- package/bundled/backend/services/self_healer.py +1 -1
- package/bundled/backend/temp.json +0 -0
- package/bundled/backend/templates.json +0 -0
- package/bundled/backend/templates.py +2907 -745
- package/bundled/backend/warmup.py +65 -0
- package/bundled/frontend-dist/assets/index-CKUZ27n8.css +1 -0
- package/bundled/frontend-dist/assets/index-DNaB6zf0.js +46 -0
- package/bundled/frontend-dist/index.html +2 -2
- package/lib/backend.js +184 -35
- package/lib/ports.js +42 -0
- package/lib/run.js +42 -15
- package/lib/setup.js +143 -59
- package/package.json +5 -4
- package/scripts/check-docker.js +35 -0
- package/bundled/frontend-dist/assets/index-BAQ3lKsy.css +0 -1
- package/bundled/frontend-dist/assets/index-CZCCzeUC.js +0 -41
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""LLM service — OpenRouter-powered flow generation with per-task model routing."""
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
+
import time
|
|
4
5
|
import httpx
|
|
5
6
|
from contextvars import ContextVar
|
|
6
7
|
from config import config
|
|
@@ -23,38 +24,102 @@ _OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
|
|
23
24
|
_TIMEOUT_GENERATE = 120
|
|
24
25
|
_TIMEOUT_UPDATE = 180
|
|
25
26
|
|
|
26
|
-
# Per-task model routing
|
|
27
|
+
# Per-task model routing — slugs verified live against OpenRouter API (May 2026)
|
|
27
28
|
_MODELS = {
|
|
28
|
-
|
|
29
|
-
"
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
"
|
|
35
|
-
|
|
36
|
-
|
|
29
|
+
# Reasoning/Architecting — best available free reasoning model
|
|
30
|
+
"architect": "nvidia/nemotron-3-super-120b-a12b:free",
|
|
31
|
+
|
|
32
|
+
# Core Generation/Updating — large, instruction-tuned free model
|
|
33
|
+
"generate": "meta-llama/llama-3.3-70b-instruct:free",
|
|
34
|
+
"refine": "meta-llama/llama-3.3-70b-instruct:free",
|
|
35
|
+
"update": "meta-llama/llama-3.3-70b-instruct:free",
|
|
36
|
+
|
|
37
|
+
# Debugging/Healing — strong reasoning for bug analysis
|
|
38
|
+
"debug": "nvidia/nemotron-3-super-120b-a12b:free",
|
|
39
|
+
"heal": "nvidia/nemotron-3-super-120b-a12b:free",
|
|
40
|
+
|
|
41
|
+
# Explaining/Suggesting — fast free model
|
|
42
|
+
"explain": "openai/gpt-oss-20b:free",
|
|
43
|
+
"suggest": "openai/gpt-oss-20b:free",
|
|
44
|
+
|
|
45
|
+
# Safety Nets — verified live fallbacks
|
|
46
|
+
"fallback": "google/gemma-4-31b-it:free",
|
|
47
|
+
"lastresort": "meta-llama/llama-3.2-3b-instruct:free",
|
|
37
48
|
}
|
|
38
49
|
|
|
50
|
+
# ── Rate-limit cooldown cache ─────────────────────────────────────────────────
|
|
51
|
+
# Maps model_slug -> timestamp of last 429. Models in cooldown are skipped for
|
|
52
|
+
# _RATE_LIMIT_TTL seconds so we jump straight to a working model instead of
|
|
53
|
+
# burning time on a known-rate-limited one.
|
|
54
|
+
_RATE_LIMIT_CACHE: dict[str, float] = {}
|
|
55
|
+
_RATE_LIMIT_TTL = 90 # seconds
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _is_rate_limited(model: str) -> bool:
|
|
59
|
+
"""Return True if this model returned 429 within the last _RATE_LIMIT_TTL seconds."""
|
|
60
|
+
ts = _RATE_LIMIT_CACHE.get(model)
|
|
61
|
+
if ts is None:
|
|
62
|
+
return False
|
|
63
|
+
if time.time() - ts < _RATE_LIMIT_TTL:
|
|
64
|
+
return True
|
|
65
|
+
del _RATE_LIMIT_CACHE[model] # TTL expired — clear and allow retry
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _mark_rate_limited(model: str) -> None:
|
|
70
|
+
"""Record that this model returned 429 right now."""
|
|
71
|
+
_RATE_LIMIT_CACHE[model] = time.time()
|
|
72
|
+
logger.debug("Rate-limit cooldown started for %s (%ds)", model, _RATE_LIMIT_TTL)
|
|
73
|
+
|
|
39
74
|
|
|
40
75
|
# ── Catalogue helpers ──────────────────────────────────────────────────────────
|
|
41
76
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
77
|
+
_CATALOGUE_CACHE: str | None = None # built-in templates, parsed once
|
|
78
|
+
|
|
79
|
+
def _template_catalogue(custom_components: list[dict] | None = None) -> str:
|
|
80
|
+
"""Detailed catalogue: id, category, inputs, outputs.
|
|
81
|
+
|
|
82
|
+
Built-in templates are parsed ONCE and cached — they never change at runtime.
|
|
83
|
+
Custom components are appended fresh each call because they can vary per session.
|
|
84
|
+
"""
|
|
85
|
+
global _CATALOGUE_CACHE
|
|
86
|
+
|
|
87
|
+
# ── Built-ins: parse once, then serve from cache ──────────────────────────
|
|
88
|
+
if _CATALOGUE_CACHE is None:
|
|
89
|
+
from core.parser import parse_node_code
|
|
90
|
+
lines: list[str] = []
|
|
91
|
+
for t in TEMPLATES:
|
|
92
|
+
try:
|
|
93
|
+
schema = parse_node_code(t["code"])
|
|
94
|
+
data_ins = [i.name for i in schema.inputs if i.kind == "data"]
|
|
95
|
+
field_ins = [f"{i.name}:{i.kind}={i.default}" for i in schema.inputs if i.kind != "data"]
|
|
96
|
+
outs = [o.name for o in schema.outputs]
|
|
97
|
+
except Exception:
|
|
98
|
+
data_ins, field_ins, outs = [], [], []
|
|
99
|
+
lines.append(
|
|
100
|
+
f"{t['id']} [{t['category']}]\n"
|
|
101
|
+
f" inputs : {data_ins or '(none)'} fields: {field_ins or '(none)'}\n"
|
|
102
|
+
f" outputs: {outs or '(none)'}"
|
|
103
|
+
)
|
|
104
|
+
_CATALOGUE_CACHE = "\n".join(lines)
|
|
105
|
+
logger.debug("Template catalogue cached (%d templates)", len(TEMPLATES))
|
|
106
|
+
|
|
107
|
+
if not custom_components:
|
|
108
|
+
return _CATALOGUE_CACHE
|
|
109
|
+
|
|
110
|
+
# ── Custom nodes: always fresh ────────────────────────────────────────────
|
|
111
|
+
custom_lines = ["\n=== USER CUSTOM COMPONENTS (Preferred if applicable) ==="]
|
|
112
|
+
for c in custom_components:
|
|
113
|
+
schema = c.get("schema", {})
|
|
114
|
+
data_ins = [i["name"] for i in schema.get("inputs", []) if i.get("kind") == "data"]
|
|
115
|
+
field_ins = [f"{i['name']}:{i.get('kind')}={i.get('default')}" for i in schema.get("inputs", []) if i.get("kind") != "data"]
|
|
116
|
+
outs = [o["name"] for o in schema.get("outputs", [])]
|
|
117
|
+
custom_lines.append(
|
|
118
|
+
f"{c.get('id')} [Custom] \"{c.get('label')}\"\n"
|
|
119
|
+
f" inputs : {data_ins or '(none)'} fields: {field_ins or '(none)'}\n"
|
|
120
|
+
f" outputs: {outs or '(none)'}"
|
|
56
121
|
)
|
|
57
|
-
return "\n".join(
|
|
122
|
+
return _CATALOGUE_CACHE + "\n".join(custom_lines)
|
|
58
123
|
|
|
59
124
|
|
|
60
125
|
def _allowed_type_ids() -> set[str]:
|
|
@@ -65,6 +130,366 @@ def _allowed_type_ids() -> set[str]:
|
|
|
65
130
|
return ids
|
|
66
131
|
|
|
67
132
|
|
|
133
|
+
# ── Pre-flight analysis ────────────────────────────────────────────────────────
|
|
134
|
+
|
|
135
|
+
def _determine_pre_flight(profile: dict, prompt: str, context: str | None, csv_path: str | None = None) -> dict:
|
|
136
|
+
"""
|
|
137
|
+
Deterministic data-driven analysis computed BEFORE any LLM call.
|
|
138
|
+
|
|
139
|
+
Uses pandas to analyse the ACTUAL data (not just regex on the prompt), so the
|
|
140
|
+
LLM receives ground-truth facts — not guesses — about the task type and
|
|
141
|
+
preprocessing requirements.
|
|
142
|
+
"""
|
|
143
|
+
import re
|
|
144
|
+
import pandas as pd
|
|
145
|
+
import numpy as np
|
|
146
|
+
|
|
147
|
+
dtypes = profile.get("dtypes", {})
|
|
148
|
+
missing = profile.get("missing", {})
|
|
149
|
+
numeric_summary = profile.get("numeric_summary", {})
|
|
150
|
+
categorical_summary = profile.get("categorical_summary", {})
|
|
151
|
+
shape = profile.get("shape", [0, 0])
|
|
152
|
+
|
|
153
|
+
# ── Step 1: Find the target column ────────────────────────────────────────
|
|
154
|
+
# Priority: explicit mention in prompt/context > heuristic column names.
|
|
155
|
+
|
|
156
|
+
target_hint: str | None = None
|
|
157
|
+
search_text = (prompt + " " + (context or "")).lower()
|
|
158
|
+
|
|
159
|
+
# (a) Regex extraction from user text
|
|
160
|
+
for pattern in [
|
|
161
|
+
r"predict\s+(?:the\s+)?['\"]?(\w+)['\"]?",
|
|
162
|
+
r"target\s+(?:(?:column|variable|col)\s+)?(?:is\s+)?['\"]?(\w+)['\"]?",
|
|
163
|
+
r"classif(?:y|ication)\s+(?:the\s+)?['\"]?(\w+)['\"]?",
|
|
164
|
+
r"label\s+(?:column\s+)?(?:is\s+)?['\"]?(\w+)['\"]?",
|
|
165
|
+
r"output\s+(?:column\s+)?(?:is\s+)?['\"]?(\w+)['\"]?",
|
|
166
|
+
r"y\s*=\s*['\"]?(\w+)['\"]?",
|
|
167
|
+
]:
|
|
168
|
+
m = re.search(pattern, search_text)
|
|
169
|
+
if m:
|
|
170
|
+
candidate = m.group(1)
|
|
171
|
+
# Validate the candidate actually exists in the data
|
|
172
|
+
if candidate in dtypes or candidate in numeric_summary or candidate in categorical_summary:
|
|
173
|
+
target_hint = candidate
|
|
174
|
+
break
|
|
175
|
+
|
|
176
|
+
# (b) If still unknown, use heuristic column-name scoring on the real columns
|
|
177
|
+
if target_hint is None:
|
|
178
|
+
TARGET_KEYWORDS = [
|
|
179
|
+
"target", "label", "class", "output", "y", "result",
|
|
180
|
+
"outcome", "diagnosis", "status", "type", "category",
|
|
181
|
+
"survived", "churn", "default", "fraud", "price",
|
|
182
|
+
"salary", "value", "score", "sales", "revenue", "cost",
|
|
183
|
+
"medv", "charges", "fare",
|
|
184
|
+
]
|
|
185
|
+
all_columns = list(dtypes.keys())
|
|
186
|
+
best_col: str | None = None
|
|
187
|
+
best_score = -1
|
|
188
|
+
|
|
189
|
+
for col in all_columns:
|
|
190
|
+
col_lower = col.lower().replace("_", " ").replace("-", " ")
|
|
191
|
+
score = 0
|
|
192
|
+
|
|
193
|
+
# Keyword match against column name
|
|
194
|
+
for kw in TARGET_KEYWORDS:
|
|
195
|
+
if kw in col_lower:
|
|
196
|
+
score += 3
|
|
197
|
+
break
|
|
198
|
+
# Last column is commonly the target in many datasets
|
|
199
|
+
if col == all_columns[-1]:
|
|
200
|
+
score += 2
|
|
201
|
+
# Column mentioned in prompt text
|
|
202
|
+
if col_lower in search_text or col.lower() in search_text:
|
|
203
|
+
score += 4
|
|
204
|
+
|
|
205
|
+
if score > best_score:
|
|
206
|
+
best_score = score
|
|
207
|
+
best_col = col
|
|
208
|
+
|
|
209
|
+
if best_col and best_score >= 2:
|
|
210
|
+
target_hint = best_col
|
|
211
|
+
|
|
212
|
+
# ── Step 2: Determine problem type from ACTUAL DATA ───────────────────────
|
|
213
|
+
# Initialize all new keys upfront to avoid KeyErrors downstream.
|
|
214
|
+
problem_type = "unknown"
|
|
215
|
+
target_analysis: dict = {}
|
|
216
|
+
is_imbalanced = False
|
|
217
|
+
needs_outlier_removal = False
|
|
218
|
+
|
|
219
|
+
if target_hint and csv_path:
|
|
220
|
+
try:
|
|
221
|
+
df = pd.read_csv(csv_path, nrows=5000)
|
|
222
|
+
|
|
223
|
+
# ── Outlier Detection: scan all numeric feature columns ──────────
|
|
224
|
+
for col in df.select_dtypes(include=[np.number]).columns:
|
|
225
|
+
if col == target_hint:
|
|
226
|
+
continue
|
|
227
|
+
col_series = df[col].dropna()
|
|
228
|
+
if len(col_series) == 0:
|
|
229
|
+
continue
|
|
230
|
+
col_mean = col_series.mean()
|
|
231
|
+
col_max = col_series.max()
|
|
232
|
+
# Flag if max is >10x the mean AND mean is non-trivially positive
|
|
233
|
+
if col_mean > 0 and col_max > 10 * col_mean:
|
|
234
|
+
needs_outlier_removal = True
|
|
235
|
+
break # one outlier column is enough to flag the dataset
|
|
236
|
+
|
|
237
|
+
if target_hint in df.columns:
|
|
238
|
+
col_data = df[target_hint].dropna()
|
|
239
|
+
dtype = col_data.dtype
|
|
240
|
+
|
|
241
|
+
if dtype == object or str(dtype) == "category":
|
|
242
|
+
# String/category column → always classification
|
|
243
|
+
n_unique = col_data.nunique()
|
|
244
|
+
problem_type = "classification"
|
|
245
|
+
target_analysis = {
|
|
246
|
+
"dtype": str(dtype),
|
|
247
|
+
"unique_values": int(n_unique),
|
|
248
|
+
"sample_values": col_data.unique()[:5].tolist(),
|
|
249
|
+
"reasoning": f"Categorical dtype with {n_unique} unique string values → classification",
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
elif dtype == bool or (dtype == int and col_data.nunique() <= 2):
|
|
253
|
+
# Boolean or binary integer → classification
|
|
254
|
+
problem_type = "classification"
|
|
255
|
+
target_analysis = {
|
|
256
|
+
"dtype": str(dtype),
|
|
257
|
+
"unique_values": int(col_data.nunique()),
|
|
258
|
+
"sample_values": col_data.unique()[:5].tolist(),
|
|
259
|
+
"reasoning": "Binary (0/1 or True/False) target → classification",
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
elif np.issubdtype(dtype, np.integer):
|
|
263
|
+
n_unique = col_data.nunique()
|
|
264
|
+
n_total = len(col_data)
|
|
265
|
+
unique_ratio = n_unique / max(n_total, 1)
|
|
266
|
+
if n_unique <= 20 or unique_ratio < 0.05:
|
|
267
|
+
problem_type = "classification"
|
|
268
|
+
target_analysis = {
|
|
269
|
+
"dtype": str(dtype),
|
|
270
|
+
"unique_values": int(n_unique),
|
|
271
|
+
"sample_values": sorted(col_data.unique().tolist())[:10],
|
|
272
|
+
"reasoning": f"Integer with only {n_unique} unique values ({unique_ratio:.1%} of rows) → likely class labels → classification",
|
|
273
|
+
}
|
|
274
|
+
else:
|
|
275
|
+
problem_type = "regression"
|
|
276
|
+
target_analysis = {
|
|
277
|
+
"dtype": str(dtype),
|
|
278
|
+
"unique_values": int(n_unique),
|
|
279
|
+
"min": float(col_data.min()),
|
|
280
|
+
"max": float(col_data.max()),
|
|
281
|
+
"mean": float(col_data.mean()),
|
|
282
|
+
"reasoning": f"Integer with {n_unique} unique values (high cardinality) → continuous → regression",
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
elif np.issubdtype(dtype, np.floating):
|
|
286
|
+
n_unique = col_data.nunique()
|
|
287
|
+
problem_type = "regression"
|
|
288
|
+
target_analysis = {
|
|
289
|
+
"dtype": str(dtype),
|
|
290
|
+
"unique_values": int(n_unique),
|
|
291
|
+
"min": float(col_data.min()),
|
|
292
|
+
"max": float(col_data.max()),
|
|
293
|
+
"mean": float(col_data.mean()),
|
|
294
|
+
"std": float(col_data.std()),
|
|
295
|
+
"reasoning": f"Floating-point target with {n_unique} unique values → continuous → regression",
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
else:
|
|
299
|
+
# Fallback: try to convert and check cardinality
|
|
300
|
+
try:
|
|
301
|
+
as_numeric = pd.to_numeric(col_data, errors="coerce")
|
|
302
|
+
if as_numeric.isna().mean() < 0.1:
|
|
303
|
+
n_unique = as_numeric.nunique()
|
|
304
|
+
problem_type = "regression" if n_unique > 20 else "classification"
|
|
305
|
+
target_analysis = {
|
|
306
|
+
"dtype": str(dtype),
|
|
307
|
+
"unique_values": int(n_unique),
|
|
308
|
+
"reasoning": f"Converted to numeric; {n_unique} unique values → {'regression' if n_unique > 20 else 'classification'}",
|
|
309
|
+
}
|
|
310
|
+
else:
|
|
311
|
+
problem_type = "classification"
|
|
312
|
+
target_analysis = {
|
|
313
|
+
"dtype": str(dtype),
|
|
314
|
+
"reasoning": "Could not convert to numeric → treating as classification",
|
|
315
|
+
}
|
|
316
|
+
except Exception:
|
|
317
|
+
problem_type = "classification"
|
|
318
|
+
|
|
319
|
+
# ── Class Imbalance Check (classification only) ──────────────
|
|
320
|
+
if problem_type == "classification":
|
|
321
|
+
try:
|
|
322
|
+
class_freqs = col_data.value_counts(normalize=True)
|
|
323
|
+
if class_freqs.min() < 0.10:
|
|
324
|
+
is_imbalanced = True
|
|
325
|
+
except Exception:
|
|
326
|
+
pass
|
|
327
|
+
|
|
328
|
+
except Exception as exc:
|
|
329
|
+
logger.warning("Pre-flight target analysis failed: %s", exc)
|
|
330
|
+
# Fall back to dtype-only heuristic using profile data
|
|
331
|
+
if target_hint:
|
|
332
|
+
dtype_str = str(dtypes.get(target_hint, "")).lower()
|
|
333
|
+
if any(t in dtype_str for t in ("object", "category", "bool", "str")):
|
|
334
|
+
problem_type = "classification"
|
|
335
|
+
elif target_hint in categorical_summary:
|
|
336
|
+
problem_type = "classification" if categorical_summary[target_hint].get("unique", 99) < 15 else "regression"
|
|
337
|
+
elif target_hint in numeric_summary:
|
|
338
|
+
problem_type = "regression"
|
|
339
|
+
|
|
340
|
+
elif target_hint:
|
|
341
|
+
# No CSV path — fall back to profile-based heuristic
|
|
342
|
+
dtype_str = str(dtypes.get(target_hint, "")).lower()
|
|
343
|
+
if any(t in dtype_str for t in ("object", "category", "bool", "str")):
|
|
344
|
+
problem_type = "classification"
|
|
345
|
+
elif target_hint in categorical_summary:
|
|
346
|
+
problem_type = "classification" if categorical_summary.get(target_hint, {}).get("unique", 99) < 15 else "regression"
|
|
347
|
+
elif target_hint in numeric_summary:
|
|
348
|
+
problem_type = "regression"
|
|
349
|
+
|
|
350
|
+
# ── Step 3: Preprocessing flags ───────────────────────────────────────────
|
|
351
|
+
# Detect categorical columns that need encoding (exclude the target itself)
|
|
352
|
+
cat_cols = {c for c in categorical_summary if c != target_hint}
|
|
353
|
+
num_cols = {c for c in numeric_summary if c != target_hint}
|
|
354
|
+
|
|
355
|
+
missing_cols: dict[str, float] = {
|
|
356
|
+
col: round(info.get("pct", 0), 1)
|
|
357
|
+
for col, info in missing.items()
|
|
358
|
+
if info.get("pct", 0) > 0
|
|
359
|
+
}
|
|
360
|
+
cardinality: dict[str, int] = {
|
|
361
|
+
col: info.get("unique", 0)
|
|
362
|
+
for col, info in categorical_summary.items()
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
needs_encoding = len(cat_cols) > 0
|
|
366
|
+
needs_scaling = len(num_cols) >= 2
|
|
367
|
+
needs_imputation = len(missing_cols) > 0
|
|
368
|
+
|
|
369
|
+
# ── Step 4: Recommend specific model based on problem type + data size ────
|
|
370
|
+
n_rows = shape[0] if shape else 0
|
|
371
|
+
n_cols = shape[1] if len(shape) > 1 else 0
|
|
372
|
+
|
|
373
|
+
if problem_type == "classification":
|
|
374
|
+
recommended_model = "random_forest_classifier" if n_rows >= 1000 else "logistic_regression"
|
|
375
|
+
recommended_metric_node = "classification_report"
|
|
376
|
+
elif problem_type == "regression":
|
|
377
|
+
recommended_model = "random_forest_regressor" if n_rows >= 1000 else "linear_regression"
|
|
378
|
+
recommended_metric_node = "regression_metrics"
|
|
379
|
+
else:
|
|
380
|
+
recommended_model = "random_forest_classifier"
|
|
381
|
+
recommended_metric_node = "classification_report"
|
|
382
|
+
|
|
383
|
+
return {
|
|
384
|
+
"target_hint": target_hint,
|
|
385
|
+
"problem_type": problem_type,
|
|
386
|
+
"target_analysis": target_analysis,
|
|
387
|
+
"missing_cols": missing_cols,
|
|
388
|
+
"cardinality": cardinality,
|
|
389
|
+
"needs_encoding": needs_encoding,
|
|
390
|
+
"needs_scaling": needs_scaling,
|
|
391
|
+
"needs_imputation": needs_imputation,
|
|
392
|
+
"needs_outlier_removal": needs_outlier_removal,
|
|
393
|
+
"is_imbalanced": is_imbalanced,
|
|
394
|
+
"n_rows": n_rows,
|
|
395
|
+
"n_cols": n_cols,
|
|
396
|
+
"categorical_cols": sorted(cat_cols),
|
|
397
|
+
"numeric_cols": sorted(num_cols),
|
|
398
|
+
"recommended_model": recommended_model,
|
|
399
|
+
"recommended_metric": recommended_metric_node,
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def _pre_flight_block(pf: dict) -> str:
|
|
404
|
+
"""Format the pre-flight analysis as an authoritative, bossy prompt block."""
|
|
405
|
+
analysis = pf.get("target_analysis", {})
|
|
406
|
+
reasoning = analysis.get("reasoning", "")
|
|
407
|
+
sample_vals = analysis.get("sample_values", [])
|
|
408
|
+
|
|
409
|
+
lines = [
|
|
410
|
+
"╔══ [PYTHON-DETERMINED ANALYSIS — treat as ABSOLUTE GROUND TRUTH] ════╗",
|
|
411
|
+
f" ⚠ Problem type : {pf['problem_type'].upper()} (MANDATORY)",
|
|
412
|
+
f" Reasoning : {reasoning or 'heuristic from dtype/cardinality'}",
|
|
413
|
+
f" Target column : {pf['target_hint'] or 'not specified — infer from context'}",
|
|
414
|
+
]
|
|
415
|
+
if sample_vals:
|
|
416
|
+
lines.append(f" Target sample vals : {sample_vals}")
|
|
417
|
+
lines += [
|
|
418
|
+
f" Dataset size : {pf['n_rows']} rows × {pf['n_cols']} columns",
|
|
419
|
+
f" Missing values : {pf['missing_cols'] or 'none'}",
|
|
420
|
+
f" Categorical cols : {pf.get('categorical_cols') or 'none'}",
|
|
421
|
+
f" Numeric cols : {pf.get('numeric_cols') or 'none'}",
|
|
422
|
+
f" Needs encoding : {'YES — add label_encoder BEFORE train_test_split' if pf['needs_encoding'] else 'no'}",
|
|
423
|
+
f" Needs scaling : {'YES — add standard_scaler AFTER train_test_split' if pf['needs_scaling'] else 'no'}",
|
|
424
|
+
f" Needs imputation : {'YES — add data_cleaning BEFORE split' if pf['needs_imputation'] else 'no'}",
|
|
425
|
+
]
|
|
426
|
+
# Conditional directives for outlier removal
|
|
427
|
+
if pf.get("needs_outlier_removal"):
|
|
428
|
+
lines.append(
|
|
429
|
+
" ⚠ Outlier columns : YES — a numeric feature has max > 10× its mean. "
|
|
430
|
+
"Add an outlier_removal node BEFORE train_test_split."
|
|
431
|
+
)
|
|
432
|
+
# Conditional directive for class imbalance
|
|
433
|
+
if pf.get("is_imbalanced"):
|
|
434
|
+
lines.append(
|
|
435
|
+
" ⚠ Class imbalance : YES — minority class < 10% of data. "
|
|
436
|
+
"Set class_weight='balanced' on the model node config."
|
|
437
|
+
)
|
|
438
|
+
lines += [
|
|
439
|
+
f" ⚠ Model Selection : USE {pf.get('recommended_model', 'unknown').upper()} ONLY",
|
|
440
|
+
f" ✅ Metric node : {pf.get('recommended_metric', 'unknown')}",
|
|
441
|
+
"╚════════════════════════════════════════════════════════════════════════╝",
|
|
442
|
+
]
|
|
443
|
+
return "\n".join(lines)
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
# ── Architect (R1 planning) prompt ────────────────────────────────────────────
|
|
447
|
+
|
|
448
|
+
_ARCHITECT_PROMPT = """\
|
|
449
|
+
You are a senior ML engineer performing pre-build technical architecture planning.
|
|
450
|
+
You will receive a dataset profile, a deterministic pre-flight analysis, and the user's request.
|
|
451
|
+
|
|
452
|
+
OUTPUT: Concise markdown only. No JSON. No code blocks. Under 250 words.
|
|
453
|
+
|
|
454
|
+
Structure your response as:
|
|
455
|
+
|
|
456
|
+
## Problem Type
|
|
457
|
+
State classification or regression with one-sentence justification.
|
|
458
|
+
|
|
459
|
+
## Data Quality Plan
|
|
460
|
+
List each issue (missing values, categorical columns, dtype mismatches) and the exact
|
|
461
|
+
preprocessing step needed for it. Reference actual column names.
|
|
462
|
+
|
|
463
|
+
## Pipeline Sequence
|
|
464
|
+
Ordered list of node types (e.g. csv_loader → label_encoder → train_test_split →
|
|
465
|
+
standard_scaler → random_forest_classifier → classification_report).
|
|
466
|
+
|
|
467
|
+
## Model Rationale
|
|
468
|
+
Why this model fits the problem. If the dataset is large (>10k rows), prefer tree-based
|
|
469
|
+
models. If many numeric features, recommend scaling. If class imbalance suspected, note it.
|
|
470
|
+
|
|
471
|
+
## Critical Warnings
|
|
472
|
+
Any data issues the pipeline MUST handle. Be blunt about failure modes.
|
|
473
|
+
|
|
474
|
+
Do NOT output JSON. Do NOT write code. Be specific — use actual column names from the profile.
|
|
475
|
+
"""
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def _build_architect_messages(
|
|
479
|
+
prompt: str, profile_text: str, pf: dict
|
|
480
|
+
) -> list[dict]:
|
|
481
|
+
user = (
|
|
482
|
+
f"== DATASET PROFILE ==\n{profile_text}\n\n"
|
|
483
|
+
f"== DETERMINISTIC PRE-FLIGHT ==\n{_pre_flight_block(pf)}\n\n"
|
|
484
|
+
f"== USER REQUEST ==\n{prompt}\n\n"
|
|
485
|
+
"Provide your technical pipeline architecture plan."
|
|
486
|
+
)
|
|
487
|
+
return [
|
|
488
|
+
{"role": "system", "content": _ARCHITECT_PROMPT},
|
|
489
|
+
{"role": "user", "content": user},
|
|
490
|
+
]
|
|
491
|
+
|
|
492
|
+
|
|
68
493
|
# ── System prompts ─────────────────────────────────────────────────────────────
|
|
69
494
|
|
|
70
495
|
_SYSTEM_PROMPT = """\
|
|
@@ -125,9 +550,13 @@ Use this structure to decide WHERE to modify or improve.
|
|
|
125
550
|
═══════════════════════════════════════
|
|
126
551
|
STRICT RULES:
|
|
127
552
|
|
|
128
|
-
1. MINIMALISM
|
|
129
|
-
-
|
|
130
|
-
-
|
|
553
|
+
1. DATA INTEGRITY & ACCURACY FIRST — MINIMALISM SECOND
|
|
554
|
+
- A complete, correct pipeline beats a minimal, broken one.
|
|
555
|
+
- REQUIRED: label_encoder for any object/category column BEFORE train_test_split.
|
|
556
|
+
- REQUIRED: standard_scaler for distance-based models (SVM, KNN, LogisticRegression).
|
|
557
|
+
- REQUIRED: data_cleaning node when ANY column has missing values.
|
|
558
|
+
- THEN minimize: never add a step the data does not require.
|
|
559
|
+
- A pipeline that skips necessary preprocessing is a FAILURE regardless of node count.
|
|
131
560
|
|
|
132
561
|
2. USE TEMPLATES FIRST
|
|
133
562
|
- Only use customNode if NO template exists
|
|
@@ -141,6 +570,7 @@ STRICT RULES:
|
|
|
141
570
|
- sourceHandle MUST exist in source outputs
|
|
142
571
|
- targetHandle MUST match input param
|
|
143
572
|
- metric nodes MUST receive: y_pred + y_test
|
|
573
|
+
- SCALER RULE: If using standard_scaler or min_max_scaler after train_test_split, you MUST connect all 4 split outputs (X_train, X_test, y_train, y_test) to the scaler, and then connect all 4 scaler outputs to the model. Do not skip y_train/y_test.
|
|
144
574
|
|
|
145
575
|
5. DATA RULES
|
|
146
576
|
- If categorical columns exist → include label_encoder BEFORE split
|
|
@@ -208,7 +638,7 @@ If a node has an error:
|
|
|
208
638
|
- Include all imports inside every code block
|
|
209
639
|
- Return dict keys MUST match sourceHandles on outgoing edges
|
|
210
640
|
- NEVER import matplotlib or seaborn
|
|
211
|
-
-
|
|
641
|
+
- CRITICAL: If the user requests an operation/model NOT in the catalogue (e.g. RobustScaler), DO NOT refuse. Generate it dynamically as a `customNode` with the full Python `code` starting with `# ✨ AI GENERATED`. Explicitly mention this custom generation in the `summary`.
|
|
212
642
|
|
|
213
643
|
══ AVAILABLE COMPONENTS ════════════════════════════════════════════
|
|
214
644
|
{catalogue}
|
|
@@ -237,22 +667,117 @@ Model nodes support these config fields — no new node needed:
|
|
|
237
667
|
"type": "customNode",
|
|
238
668
|
"position": {{"x": 560, "y": 200}},
|
|
239
669
|
"data": {{
|
|
240
|
-
"label": "
|
|
670
|
+
"label": "Robust Scaler (Custom)",
|
|
241
671
|
"templateId": "customNode",
|
|
242
|
-
"code": "import pandas as pd
|
|
672
|
+
"code": "import pandas as pd\\nfrom sklearn.preprocessing import RobustScaler\\n\\n# ✨ AI GENERATED\\ndef run(data=None, X_train=None, X_test=None) -> dict:\\n # ...\\n return {{\"X_train\": X_train, \"X_test\": X_test}}"
|
|
243
673
|
}}
|
|
244
674
|
}}
|
|
245
675
|
|
|
246
|
-
-
|
|
247
|
-
- DataFrame param MUST be named `data`
|
|
248
|
-
- MUST return a dict
|
|
676
|
+
- Your Python string in `code` MUST define a function named EXACTLY `def run(...) -> dict:`
|
|
677
|
+
- The DataFrame param MUST be named `data` (if taking a whole dataset)
|
|
678
|
+
- MUST return a dict containing the output handles
|
|
249
679
|
- Only import: numpy, pandas, scipy, sklearn, math, statistics, re, json
|
|
250
680
|
- NO os, sys, subprocess, socket, requests, open(), eval(), exec()
|
|
251
681
|
- Prefer templates first — custom nodes are last resort only
|
|
252
682
|
|
|
683
|
+
══ DYNAMIC VISUALIZATION ══════════════════════════════════════════════
|
|
684
|
+
You can create custom visualizations NOT in the catalogue. The frontend
|
|
685
|
+
detects charts by SHAPE, not by key name. Return any of these shapes and
|
|
686
|
+
the UI will render it automatically — NO new React code needed:
|
|
687
|
+
|
|
688
|
+
Series (bar chart / ranked list):
|
|
689
|
+
{{"my_key": {{"labels": [...], "counts": [...], "title": "..."}}}}
|
|
690
|
+
|
|
691
|
+
Plot (scatter or line chart):
|
|
692
|
+
{{"my_key": {{"x": [...], "y": [...], "title": "...", "x_label": "...", "y_label": "..."}}}}
|
|
693
|
+
|
|
694
|
+
Grid (heatmap / matrix):
|
|
695
|
+
{{"my_key": {{"z": [[...]], "x": [...], "y": [...], "title": "..."}}}}
|
|
696
|
+
|
|
697
|
+
Example — null percentage bar chart:
|
|
698
|
+
return {{
|
|
699
|
+
"null_pct_chart": {{
|
|
700
|
+
"labels": list(null_pcts.keys()),
|
|
701
|
+
"counts": list(null_pcts.values()),
|
|
702
|
+
"title": "Missing Values (%) per Column"
|
|
703
|
+
}}
|
|
704
|
+
}}
|
|
705
|
+
|
|
706
|
+
When the user asks for any kind of visualization (e.g. "show me a chart
|
|
707
|
+
of X", "visualize the distribution of Y"), you MUST generate a customNode
|
|
708
|
+
that returns a dict with one of the shapes above. NEVER refuse — if no
|
|
709
|
+
template covers it, invent the chart with the shape protocol.
|
|
710
|
+
|
|
711
|
+
⚠ UNSUPERVISED LEARNING (t-SNE / PCA / UMAP / KMeans): When generating
|
|
712
|
+
any dimensionality reduction or clustering node, you MUST return a
|
|
713
|
+
`labels` array alongside `x` and `y` so the frontend can color-code
|
|
714
|
+
clusters automatically. Example:
|
|
715
|
+
|
|
716
|
+
return {{
|
|
717
|
+
"tsne_plot": {{
|
|
718
|
+
"x": X_2d[:, 0].tolist(),
|
|
719
|
+
"y": X_2d[:, 1].tolist(),
|
|
720
|
+
"labels": [str(c) for c in cluster_labels], # ← REQUIRED
|
|
721
|
+
"title": "t-SNE Cluster Visualization",
|
|
722
|
+
"x_label": "Dim 1",
|
|
723
|
+
"y_label": "Dim 2"
|
|
724
|
+
}}
|
|
725
|
+
}}
|
|
726
|
+
|
|
253
727
|
══ OUTPUT ═══════════════════════════════════════════════════════════
|
|
254
728
|
Return ONLY:
|
|
255
729
|
{{"nodes": [...], "edges": [...], "summary": "<one sentence conversational explanation>"}}
|
|
730
|
+
|
|
731
|
+
⚠️ DO NOT EVER output a summary like "Cannot process this request." If a template is missing, you MUST write the `code` for it as a `customNode` instead of giving up.
|
|
732
|
+
|
|
733
|
+
══ GOLD STANDARD EXAMPLES — you MUST match these handle names EXACTLY ═══════
|
|
734
|
+
⚠ CRITICAL: The sourceHandle and targetHandle values below (data, X_train, X_test,
|
|
735
|
+
y_train, y_test, y_pred) are the ONLY valid handle names. Do NOT invent new ones.
|
|
736
|
+
Your edges MUST use these exact strings — any deviation will cause a runtime failure.
|
|
737
|
+
|
|
738
|
+
Example A — CLASSIFICATION (categorical cols + scaling needed):
|
|
739
|
+
{{"nodes":[
|
|
740
|
+
{{"id":"n1","type":"csv_loader","position":{{"x":0,"y":200}},"data":{{"config":{{"file_path":"data.csv"}}}}}},
|
|
741
|
+
{{"id":"n2","type":"label_encoder","position":{{"x":280,"y":200}},"data":{{"config":{{"columns":"sex,embarked"}}}}}},
|
|
742
|
+
{{"id":"n3","type":"train_test_split","position":{{"x":560,"y":200}},"data":{{"config":{{"target_column":"survived","test_size":0.2}}}}}},
|
|
743
|
+
{{"id":"n4","type":"standard_scaler","position":{{"x":840,"y":200}},"data":{{}}}},
|
|
744
|
+
{{"id":"n5","type":"random_forest_classifier","position":{{"x":1120,"y":200}},"data":{{"config":{{"n_estimators":100}}}}}},
|
|
745
|
+
{{"id":"n6","type":"classification_report","position":{{"x":1400,"y":200}},"data":{{}}}}
|
|
746
|
+
],"edges":[
|
|
747
|
+
{{"id":"e1","source":"n1","sourceHandle":"data","target":"n2","targetHandle":"data"}},
|
|
748
|
+
{{"id":"e2","source":"n2","sourceHandle":"data","target":"n3","targetHandle":"data"}},
|
|
749
|
+
{{"id":"e3","source":"n3","sourceHandle":"X_train","target":"n4","targetHandle":"X_train"}},
|
|
750
|
+
{{"id":"e4","source":"n3","sourceHandle":"X_test","target":"n4","targetHandle":"X_test"}},
|
|
751
|
+
{{"id":"e5","source":"n3","sourceHandle":"y_train","target":"n4","targetHandle":"y_train"}},
|
|
752
|
+
{{"id":"e6","source":"n3","sourceHandle":"y_test","target":"n4","targetHandle":"y_test"}},
|
|
753
|
+
{{"id":"e7","source":"n4","sourceHandle":"X_train","target":"n5","targetHandle":"X_train"}},
|
|
754
|
+
{{"id":"e8","source":"n4","sourceHandle":"X_test","target":"n5","targetHandle":"X_test"}},
|
|
755
|
+
{{"id":"e9","source":"n4","sourceHandle":"y_train","target":"n5","targetHandle":"y_train"}},
|
|
756
|
+
{{"id":"e10","source":"n5","sourceHandle":"y_pred","target":"n6","targetHandle":"y_pred"}},
|
|
757
|
+
{{"id":"e11","source":"n4","sourceHandle":"y_test","target":"n6","targetHandle":"y_test"}}
|
|
758
|
+
],"summary":"Classification pipeline with encoding, scaling, and Random Forest."}}
|
|
759
|
+
|
|
760
|
+
Example B — REGRESSION (missing values + continuous target):
|
|
761
|
+
{{"nodes":[
|
|
762
|
+
{{"id":"n1","type":"csv_loader","position":{{"x":0,"y":200}},"data":{{"config":{{"file_path":"houses.csv"}}}}}},
|
|
763
|
+
{{"id":"n2","type":"data_cleaning","position":{{"x":280,"y":200}},"data":{{"config":{{"strategy":"fill"}}}}}},
|
|
764
|
+
{{"id":"n3","type":"train_test_split","position":{{"x":560,"y":200}},"data":{{"config":{{"target_column":"price","test_size":0.2}}}}}},
|
|
765
|
+
{{"id":"n4","type":"standard_scaler","position":{{"x":840,"y":200}},"data":{{}}}},
|
|
766
|
+
{{"id":"n5","type":"random_forest_regressor","position":{{"x":1120,"y":200}},"data":{{"config":{{"n_estimators":200}}}}}},
|
|
767
|
+
{{"id":"n6","type":"regression_metrics","position":{{"x":1400,"y":200}},"data":{{}}}}
|
|
768
|
+
],"edges":[
|
|
769
|
+
{{"id":"e1","source":"n1","sourceHandle":"data","target":"n2","targetHandle":"data"}},
|
|
770
|
+
{{"id":"e2","source":"n2","sourceHandle":"data","target":"n3","targetHandle":"data"}},
|
|
771
|
+
{{"id":"e3","source":"n3","sourceHandle":"X_train","target":"n4","targetHandle":"X_train"}},
|
|
772
|
+
{{"id":"e4","source":"n3","sourceHandle":"X_test","target":"n4","targetHandle":"X_test"}},
|
|
773
|
+
{{"id":"e5","source":"n3","sourceHandle":"y_train","target":"n4","targetHandle":"y_train"}},
|
|
774
|
+
{{"id":"e6","source":"n3","sourceHandle":"y_test","target":"n4","targetHandle":"y_test"}},
|
|
775
|
+
{{"id":"e7","source":"n4","sourceHandle":"X_train","target":"n5","targetHandle":"X_train"}},
|
|
776
|
+
{{"id":"e8","source":"n4","sourceHandle":"X_test","target":"n5","targetHandle":"X_test"}},
|
|
777
|
+
{{"id":"e9","source":"n4","sourceHandle":"y_train","target":"n5","targetHandle":"y_train"}},
|
|
778
|
+
{{"id":"e10","source":"n5","sourceHandle":"y_pred","target":"n6","targetHandle":"y_pred"}},
|
|
779
|
+
{{"id":"e11","source":"n4","sourceHandle":"y_test","target":"n6","targetHandle":"y_test"}}
|
|
780
|
+
],"summary":"Regression pipeline with cleaning, scaling, and Random Forest."}}
|
|
256
781
|
"""
|
|
257
782
|
|
|
258
783
|
_UPDATE_PROMPT = """\
|
|
@@ -264,6 +789,16 @@ You are M8Flow's AI pipeline surgeon. Modify the pipeline with the MINIMUM chang
|
|
|
264
789
|
3. ALWAYS respond in ENGLISH. Never use any other language.
|
|
265
790
|
4. Do NOT truncate the JSON — it must be a complete, valid object.
|
|
266
791
|
|
|
792
|
+
🔑 TOKEN CONSERVATION — MANDATORY (free-tier models have strict output limits):
|
|
793
|
+
A. TEMPLATE SWAPS: If you are changing a node's operation to one that already has
|
|
794
|
+
a matching Template ID (e.g. swapping from linear_regression to
|
|
795
|
+
random_forest_regressor), change only the "type" field on that node.
|
|
796
|
+
NEVER re-emit the full Python "code" block when a Template already covers it.
|
|
797
|
+
Template nodes are resolved by the runtime — sending their code wastes tokens.
|
|
798
|
+
B. UNCHANGED NODES: Nodes marked ✓ in the status list must appear in your output
|
|
799
|
+
but with their "data.code" field set to null (omitted). Only include code for
|
|
800
|
+
nodes you are actively modifying or adding as custom (non-template) nodes.
|
|
801
|
+
|
|
267
802
|
Output ONLY the complete updated flow JSON — no markdown, no explanation.
|
|
268
803
|
|
|
269
804
|
══ DECISION HIERARCHY (follow in order, stop at first match) ════════
|
|
@@ -279,9 +814,8 @@ Output ONLY the complete updated flow JSON — no markdown, no explanation.
|
|
|
279
814
|
|
|
280
815
|
4. Does this genuinely require a brand-new node that adds functionality
|
|
281
816
|
not available anywhere in the graph?
|
|
282
|
-
YES → add exactly ONE new node, connected minimally.
|
|
283
|
-
|
|
284
|
-
If none apply, state the limitation in a comment field — do not bloat the graph.
|
|
817
|
+
YES → add exactly ONE new node, connected minimally.
|
|
818
|
+
CRITICAL: If the requested operation (e.g., RobustScaler) is NOT in the catalogue, DO NOT refuse the request. Instead, YOU MUST generate it dynamically as a `customNode`. Provide the complete Python implementation in the `code` field, starting with the comment `# ✨ AI GENERATED`. In your `summary`, explicitly state that you generated a custom node for this functionality.
|
|
285
819
|
|
|
286
820
|
══ CURRENT PIPELINE ═════════════════════════════════════════════════
|
|
287
821
|
{current_flow}
|
|
@@ -303,6 +837,52 @@ All model nodes support these fields in their config — no new node needed:
|
|
|
303
837
|
"better accuracy" → tune hyperparams, or swap model type — no extra nodes
|
|
304
838
|
"use k-fold" → set cross_validation=true, cv_folds=k on existing model
|
|
305
839
|
|
|
840
|
+
══ CUSTOM NODE FORMAT (only if NO template covers it) ══════════════
|
|
841
|
+
{{
|
|
842
|
+
"id": "node_custom_1",
|
|
843
|
+
"type": "customNode",
|
|
844
|
+
"position": {{"x": 560, "y": 200}},
|
|
845
|
+
"data": {{
|
|
846
|
+
"label": "Robust Scaler (Custom)",
|
|
847
|
+
"templateId": "customNode",
|
|
848
|
+
"code": "import pandas as pd\\nfrom sklearn.preprocessing import RobustScaler\\n\\n# ✨ AI GENERATED\\ndef run(data=None, X_train=None, X_test=None) -> dict:\\n # ...\\n return {{\"X_train\": X_train, \"X_test\": X_test}}"
|
|
849
|
+
}}
|
|
850
|
+
}}
|
|
851
|
+
|
|
852
|
+
- Your Python string in `code` MUST define a function named EXACTLY `def run(...) -> dict:`
|
|
853
|
+
- The DataFrame param MUST be named `data` (if taking a whole dataset)
|
|
854
|
+
- MUST return a dict containing the output handles
|
|
855
|
+
- Only import: numpy, pandas, scipy, sklearn, math, statistics, re, json
|
|
856
|
+
- NO os, sys, subprocess, socket, requests, open(), eval(), exec()
|
|
857
|
+
|
|
858
|
+
══ DYNAMIC VISUALIZATION ══════════════════════════════════════════════
|
|
859
|
+
You can create custom visualizations NOT in the catalogue. The frontend
|
|
860
|
+
detects charts by SHAPE, not by key name. Return any of these shapes and
|
|
861
|
+
the UI will render it automatically — NO new React code needed:
|
|
862
|
+
|
|
863
|
+
Series (bar chart / ranked list):
|
|
864
|
+
{{"my_key": {{"labels": [...], "counts": [...], "title": "..."}}}}
|
|
865
|
+
|
|
866
|
+
Plot (scatter or line chart):
|
|
867
|
+
{{"my_key": {{"x": [...], "y": [...], "title": "...", "x_label": "...", "y_label": "..."}}}}
|
|
868
|
+
|
|
869
|
+
Grid (heatmap / matrix):
|
|
870
|
+
{{"my_key": {{"z": [[...]], "x": [...], "y": [...], "title": "..."}}}}
|
|
871
|
+
|
|
872
|
+
Example — null percentage bar chart:
|
|
873
|
+
return {{
|
|
874
|
+
"null_pct_chart": {{
|
|
875
|
+
"labels": list(null_pcts.keys()),
|
|
876
|
+
"counts": list(null_pcts.values()),
|
|
877
|
+
"title": "Missing Values (%) per Column"
|
|
878
|
+
}}
|
|
879
|
+
}}
|
|
880
|
+
|
|
881
|
+
When the user asks for any kind of visualization (e.g. "show me a chart
|
|
882
|
+
of X", "visualize the distribution of Y"), you MUST generate a customNode
|
|
883
|
+
that returns a dict with one of the shapes above. NEVER refuse — if no
|
|
884
|
+
template covers it, invent the chart with the shape protocol.
|
|
885
|
+
|
|
306
886
|
══ SURGICAL PRESERVATION RULES ══════════════════════════════════════
|
|
307
887
|
- Every node marked ✓ or ○ must appear in the output VERBATIM
|
|
308
888
|
(same id, type, position, code, values — character for character)
|
|
@@ -320,6 +900,8 @@ All model nodes support these fields in their config — no new node needed:
|
|
|
320
900
|
══ OUTPUT ═══════════════════════════════════════════════════════════
|
|
321
901
|
Return ONLY:
|
|
322
902
|
{{"nodes": [...], "edges": [...], "summary": "<one sentence conversational explanation>"}}
|
|
903
|
+
|
|
904
|
+
⚠️ DO NOT EVER output a summary like "Cannot process this request." If a template is missing, you MUST write the `code` for it as a `customNode` instead of giving up.
|
|
323
905
|
"""
|
|
324
906
|
|
|
325
907
|
|
|
@@ -333,8 +915,17 @@ def _extract_file_path(context: str) -> str | None:
|
|
|
333
915
|
return None
|
|
334
916
|
|
|
335
917
|
|
|
336
|
-
def _build_generate_messages(
|
|
337
|
-
|
|
918
|
+
def _build_generate_messages(
|
|
919
|
+
prompt: str,
|
|
920
|
+
context: str | None,
|
|
921
|
+
pre_flight: dict | None = None,
|
|
922
|
+
architect_plan: str | None = None,
|
|
923
|
+
custom_components: list[dict] | None = None,
|
|
924
|
+
) -> list[dict]:
|
|
925
|
+
system = _SYSTEM_PROMPT.format(catalogue=_template_catalogue(custom_components))
|
|
926
|
+
|
|
927
|
+
profile_block = ""
|
|
928
|
+
path_hint = ""
|
|
338
929
|
|
|
339
930
|
if context:
|
|
340
931
|
fp = _extract_file_path(context)
|
|
@@ -343,13 +934,10 @@ def _build_generate_messages(prompt: str, context: str | None) -> list[dict]:
|
|
|
343
934
|
f'Default value for csv_loader → file_path: Annotated[str,"file"] = "{fp}"\n'
|
|
344
935
|
if fp else ""
|
|
345
936
|
)
|
|
346
|
-
|
|
347
|
-
# ── Enrich context with data profile if a file path is present ──
|
|
348
|
-
profile_block = ""
|
|
349
937
|
if fp:
|
|
350
938
|
try:
|
|
351
939
|
import pandas as pd
|
|
352
|
-
df = pd.read_csv(fp, nrows=5000)
|
|
940
|
+
df = pd.read_csv(fp, nrows=5000)
|
|
353
941
|
profile = profile_dataframe(df)
|
|
354
942
|
profile_block = (
|
|
355
943
|
"\n== Dataset Summary (auto-profiled) ==\n"
|
|
@@ -359,15 +947,29 @@ def _build_generate_messages(prompt: str, context: str | None) -> list[dict]:
|
|
|
359
947
|
except Exception as exc:
|
|
360
948
|
logger.warning("data_profiler skipped: %s", exc)
|
|
361
949
|
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
950
|
+
# ── Inject deterministic pre-flight analysis ──────────────────────────
|
|
951
|
+
pre_flight_block = ""
|
|
952
|
+
if pre_flight:
|
|
953
|
+
pre_flight_block = "\n" + _pre_flight_block(pre_flight) + "\n"
|
|
954
|
+
|
|
955
|
+
# ── Inject architect plan ─────────────────────────────────────────────
|
|
956
|
+
architect_block = ""
|
|
957
|
+
if architect_plan and architect_plan.strip():
|
|
958
|
+
architect_block = (
|
|
959
|
+
"\n== EXPERT ARCHITECTURE PLAN (follow this closely) ==\n"
|
|
960
|
+
+ architect_plan.strip()
|
|
961
|
+
+ "\n"
|
|
368
962
|
)
|
|
369
|
-
|
|
370
|
-
|
|
963
|
+
|
|
964
|
+
user = (
|
|
965
|
+
f"== DATASET CONTEXT ==\n"
|
|
966
|
+
f"{profile_block}"
|
|
967
|
+
f"{path_hint}"
|
|
968
|
+
f"{pre_flight_block}"
|
|
969
|
+
f"{architect_block}"
|
|
970
|
+
f"\n== REQUEST ==\n"
|
|
971
|
+
f"{prompt}"
|
|
972
|
+
)
|
|
371
973
|
|
|
372
974
|
return [{"role": "system", "content": system}, {"role": "user", "content": user}]
|
|
373
975
|
|
|
@@ -419,6 +1021,10 @@ def _slim_flow(flow: FlowSchema) -> dict:
|
|
|
419
1021
|
except Exception:
|
|
420
1022
|
pass
|
|
421
1023
|
|
|
1024
|
+
is_template = node_type in {t["id"] for t in TEMPLATES}
|
|
1025
|
+
# Only send code for truly custom nodes; never for templates.
|
|
1026
|
+
code_str = None if is_template else (data.get("code") or "")[:800] or None
|
|
1027
|
+
|
|
422
1028
|
slim_nodes.append({
|
|
423
1029
|
"id": node.get("id"),
|
|
424
1030
|
"type": node_type,
|
|
@@ -426,7 +1032,7 @@ def _slim_flow(flow: FlowSchema) -> dict:
|
|
|
426
1032
|
"data": {
|
|
427
1033
|
"label": data.get("label"),
|
|
428
1034
|
"templateId": data.get("templateId"),
|
|
429
|
-
"code":
|
|
1035
|
+
"code": code_str,
|
|
430
1036
|
"values": data.get("values"),
|
|
431
1037
|
# Explicit handle lists — LLM MUST use these for edge sourceHandle/targetHandle
|
|
432
1038
|
"available_outputs": outputs,
|
|
@@ -436,10 +1042,15 @@ def _slim_flow(flow: FlowSchema) -> dict:
|
|
|
436
1042
|
return {"nodes": slim_nodes, "edges": flow.edges}
|
|
437
1043
|
|
|
438
1044
|
|
|
439
|
-
def _build_update_messages(
|
|
1045
|
+
def _build_update_messages(
|
|
1046
|
+
prompt: str,
|
|
1047
|
+
current_flow: FlowSchema,
|
|
1048
|
+
context: str | None,
|
|
1049
|
+
custom_components: list[dict] | None = None,
|
|
1050
|
+
) -> list[dict]:
|
|
440
1051
|
slim = _slim_flow(current_flow)
|
|
441
1052
|
node_status = _node_status_summary(current_flow)
|
|
442
|
-
catalogue = _template_catalogue()
|
|
1053
|
+
catalogue = _template_catalogue(custom_components)
|
|
443
1054
|
|
|
444
1055
|
system = _UPDATE_PROMPT.format(
|
|
445
1056
|
current_flow=json.dumps(slim, indent=2),
|
|
@@ -493,7 +1104,8 @@ async def _call_openrouter(
|
|
|
493
1104
|
) -> str:
|
|
494
1105
|
"""
|
|
495
1106
|
Route to the right model via OpenRouter based on task type.
|
|
496
|
-
Falls back through
|
|
1107
|
+
Falls back through the full pool of live free models on rate-limit or error.
|
|
1108
|
+
Rate-limited models are skipped for _RATE_LIMIT_TTL seconds to avoid wasted calls.
|
|
497
1109
|
"""
|
|
498
1110
|
effective_key = _get_api_key()
|
|
499
1111
|
if not effective_key:
|
|
@@ -504,7 +1116,37 @@ async def _call_openrouter(
|
|
|
504
1116
|
)
|
|
505
1117
|
|
|
506
1118
|
model = _MODELS.get(task, _MODELS["generate"])
|
|
507
|
-
|
|
1119
|
+
|
|
1120
|
+
# Full pool of verified-live free models (May 2026).
|
|
1121
|
+
# Ordered by observed reliability: nemotron first (proven to succeed when llama 429s).
|
|
1122
|
+
_FREE_POOL = [
|
|
1123
|
+
"nvidia/nemotron-3-super-120b-a12b:free", # proven to work
|
|
1124
|
+
"openai/gpt-oss-120b:free",
|
|
1125
|
+
"openai/gpt-oss-20b:free",
|
|
1126
|
+
"nousresearch/hermes-3-llama-3.1-405b:free",
|
|
1127
|
+
"meta-llama/llama-3.3-70b-instruct:free",
|
|
1128
|
+
"google/gemma-4-31b-it:free",
|
|
1129
|
+
"google/gemma-4-26b-a4b-it:free",
|
|
1130
|
+
"nvidia/nemotron-3-nano-30b-a3b:free",
|
|
1131
|
+
"nvidia/nemotron-nano-9b-v2:free",
|
|
1132
|
+
"meta-llama/llama-3.2-3b-instruct:free",
|
|
1133
|
+
"cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
|
|
1134
|
+
"liquid/lfm-2.5-1.2b-instruct:free",
|
|
1135
|
+
]
|
|
1136
|
+
|
|
1137
|
+
# Build chain: primary model first, then full pool (deduped, order preserved)
|
|
1138
|
+
seen: set[str] = set()
|
|
1139
|
+
full_chain: list[str] = []
|
|
1140
|
+
for m in [model, _MODELS["fallback"], _MODELS["lastresort"]] + _FREE_POOL:
|
|
1141
|
+
if m not in seen:
|
|
1142
|
+
seen.add(m)
|
|
1143
|
+
full_chain.append(m)
|
|
1144
|
+
|
|
1145
|
+
# Skip models in cooldown — place them at the end so they still get a chance
|
|
1146
|
+
# if everything else fails (cooldown may have expired by then)
|
|
1147
|
+
ready = [m for m in full_chain if not _is_rate_limited(m)]
|
|
1148
|
+
cooling = [m for m in full_chain if _is_rate_limited(m)]
|
|
1149
|
+
fallback_chain = ready + cooling # try fresh models first
|
|
508
1150
|
|
|
509
1151
|
headers = {
|
|
510
1152
|
"Authorization": f"Bearer {effective_key}",
|
|
@@ -514,35 +1156,46 @@ async def _call_openrouter(
|
|
|
514
1156
|
}
|
|
515
1157
|
|
|
516
1158
|
last_exc: Exception | None = None
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
"max_tokens": 8192,
|
|
525
|
-
}
|
|
1159
|
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
1160
|
+
for attempt_model in fallback_chain:
|
|
1161
|
+
body: dict = {
|
|
1162
|
+
"model": attempt_model,
|
|
1163
|
+
"messages": messages,
|
|
1164
|
+
"max_tokens": 8192,
|
|
1165
|
+
}
|
|
526
1166
|
|
|
527
|
-
|
|
528
|
-
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
1167
|
+
try:
|
|
529
1168
|
response = await client.post(_OPENROUTER_URL, headers=headers, json=body)
|
|
530
1169
|
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
1170
|
+
if response.status_code == 429:
|
|
1171
|
+
_mark_rate_limited(attempt_model)
|
|
1172
|
+
raise RuntimeError(f"Rate limited on {attempt_model}")
|
|
1173
|
+
if response.status_code == 401:
|
|
1174
|
+
raise RuntimeError("Invalid OpenRouter API key")
|
|
1175
|
+
response.raise_for_status()
|
|
1176
|
+
|
|
1177
|
+
content = response.json()["choices"][0]["message"]["content"]
|
|
1178
|
+
if attempt_model != model:
|
|
1179
|
+
logger.info("OpenRouter fallback: used %s instead of %s", attempt_model, model)
|
|
1180
|
+
return content or ""
|
|
1181
|
+
|
|
1182
|
+
except httpx.ConnectError as exc:
|
|
1183
|
+
# If we cannot resolve DNS or connect to the host, no fallback will work.
|
|
1184
|
+
logger.error("Network connection to OpenRouter failed: %s", exc)
|
|
1185
|
+
raise RuntimeError("Could not connect to OpenRouter (Network/DNS error). Please check your internet connection.")
|
|
1186
|
+
except RuntimeError as exc:
|
|
1187
|
+
if "Invalid OpenRouter API key" in str(exc):
|
|
1188
|
+
raise # Don't retry — wrong key won't fix itself
|
|
1189
|
+
logger.warning("OpenRouter model %s failed: %s", attempt_model, exc)
|
|
1190
|
+
last_exc = exc
|
|
1191
|
+
continue
|
|
1192
|
+
except Exception as exc:
|
|
1193
|
+
if "getaddrinfo failed" in str(exc):
|
|
1194
|
+
logger.error("DNS resolution failed for OpenRouter: %s", exc)
|
|
1195
|
+
raise RuntimeError("Could not resolve OpenRouter domain. Please check your internet connection.")
|
|
1196
|
+
logger.warning("OpenRouter model %s failed: %s", attempt_model, exc)
|
|
1197
|
+
last_exc = exc
|
|
1198
|
+
continue
|
|
546
1199
|
|
|
547
1200
|
raise RuntimeError(f"All OpenRouter models failed. Last error: {last_exc}")
|
|
548
1201
|
|
|
@@ -555,7 +1208,8 @@ def _extract_json_object(raw: str) -> dict:
|
|
|
555
1208
|
- Preamble text in any language before the JSON
|
|
556
1209
|
- Markdown code fences (```json ... ``` or ``` ... ```)
|
|
557
1210
|
- Trailing explanation text after the JSON
|
|
558
|
-
- Truncated responses (
|
|
1211
|
+
- Truncated responses (free-tier model cut-offs) — try-repair appends
|
|
1212
|
+
missing closing brackets/braces to recover a parseable object.
|
|
559
1213
|
"""
|
|
560
1214
|
raw = raw.strip()
|
|
561
1215
|
|
|
@@ -579,6 +1233,7 @@ def _extract_json_object(raw: str) -> dict:
|
|
|
579
1233
|
|
|
580
1234
|
# 3. Brace-matching: find the first complete JSON object in the text
|
|
581
1235
|
start = raw.find('{')
|
|
1236
|
+
best_candidate: str | None = None
|
|
582
1237
|
if start != -1:
|
|
583
1238
|
depth = 0
|
|
584
1239
|
in_string = False
|
|
@@ -603,10 +1258,36 @@ def _extract_json_object(raw: str) -> dict:
|
|
|
603
1258
|
try:
|
|
604
1259
|
return json.loads(candidate)
|
|
605
1260
|
except json.JSONDecodeError:
|
|
606
|
-
break # malformed — fall through to
|
|
1261
|
+
break # malformed — fall through to repair
|
|
1262
|
+
# Capture the partial object for repair attempts
|
|
1263
|
+
best_candidate = raw[start:]
|
|
1264
|
+
|
|
1265
|
+
# 4. Try-repair: the response was likely truncated by the model's token limit.
|
|
1266
|
+
# Progressively append closing characters until we get a valid object.
|
|
1267
|
+
# We try up to 12 combinations: 0-4 extra ']' + 0-4 extra '}', ordered
|
|
1268
|
+
# by shortest repair first (minimises data invention).
|
|
1269
|
+
candidate_base = best_candidate or raw
|
|
1270
|
+
# Trim trailing whitespace/comma that often appears before cut-off
|
|
1271
|
+
candidate_base = candidate_base.rstrip().rstrip(",")
|
|
1272
|
+
logger.debug("JSON repair: attempting to salvage truncated output (%d chars)", len(candidate_base))
|
|
1273
|
+
for extra_brackets in range(5): # 0 … 4 extra ]
|
|
1274
|
+
for extra_braces in range(5): # 0 … 4 extra }
|
|
1275
|
+
if extra_brackets == 0 and extra_braces == 0:
|
|
1276
|
+
continue # already tried the plain candidate
|
|
1277
|
+
repaired = candidate_base + ("\n]" * extra_brackets) + ("\n}" * extra_braces)
|
|
1278
|
+
try:
|
|
1279
|
+
result = json.loads(repaired)
|
|
1280
|
+
logger.warning(
|
|
1281
|
+
"JSON repair succeeded (+%d ']', +%d '}'). "
|
|
1282
|
+
"Free-tier model likely truncated its output.",
|
|
1283
|
+
extra_brackets, extra_braces,
|
|
1284
|
+
)
|
|
1285
|
+
return result
|
|
1286
|
+
except json.JSONDecodeError:
|
|
1287
|
+
continue
|
|
607
1288
|
|
|
608
1289
|
raise ValueError(
|
|
609
|
-
f"LLM returned invalid JSON (could not extract object).\nRaw: {raw[:600]}"
|
|
1290
|
+
f"LLM returned invalid JSON (could not extract or repair object).\nRaw: {raw[:600]}"
|
|
610
1291
|
)
|
|
611
1292
|
|
|
612
1293
|
|
|
@@ -650,9 +1331,22 @@ You are a surgical ML pipeline editor. The user wants to REFINE an existing pipe
|
|
|
650
1331
|
1. Output ONLY a single raw JSON object. No text before it, no text after it.
|
|
651
1332
|
2. NEVER wrap the JSON in markdown fences (no ```json, no ```).
|
|
652
1333
|
3. ALWAYS respond in ENGLISH. Never use any other language.
|
|
653
|
-
4.
|
|
1334
|
+
4. DO NOT EVER output a summary like "Cannot process this request." If a template is missing, you MUST write the `code` for it as a `customNode` instead of giving up.
|
|
654
1335
|
5. Do NOT truncate the JSON — it must be a complete, valid object.
|
|
655
1336
|
|
|
1337
|
+
🔑 TOKEN CONSERVATION — MANDATORY (free-tier models have strict output limits):
|
|
1338
|
+
A. TEMPLATE SWAPS: If you are changing a model or operation that already has a
|
|
1339
|
+
matching Template ID (e.g. swapping linear_regression for
|
|
1340
|
+
random_forest_regressor), set only the "type" field on the update entry.
|
|
1341
|
+
NEVER output the full Python "code" block when a Template already exists.
|
|
1342
|
+
Example — correct: {{"action":"update","id":"n5","data":{{"type":"random_forest_regressor","values":{{"n_estimators":200}}}}}}
|
|
1343
|
+
Example — WRONG: {{"action":"update","id":"n5","data":{{"code":"import ..."}}}} ← wastes tokens
|
|
1344
|
+
B. VALUES ONLY: In "node_changes", omit the "code" field entirely unless the
|
|
1345
|
+
user explicitly asked for a custom code change. If only config parameters
|
|
1346
|
+
changed, output ONLY the "values" dictionary — nothing else inside "data".
|
|
1347
|
+
Example — correct: {{"action":"update","id":"n3","data":{{"values":{{"test_size":0.3}}}}}}
|
|
1348
|
+
Example — WRONG: {{"action":"update","id":"n3","data":{{"values":{{"test_size":0.3}},"code":"...entire node..."}}}}
|
|
1349
|
+
|
|
656
1350
|
Output ONLY a JSON patch object describing the minimal set of changes needed.
|
|
657
1351
|
|
|
658
1352
|
══ CURRENT PIPELINE ═════════════════════════════════════════════════
|
|
@@ -678,13 +1372,15 @@ Output ONLY a JSON patch object describing the minimal set of changes needed.
|
|
|
678
1372
|
}},
|
|
679
1373
|
{{
|
|
680
1374
|
"action": "add",
|
|
681
|
-
"id": "<new unique id e.g.
|
|
682
|
-
"type": "<template_id>",
|
|
1375
|
+
"id": "<new unique id e.g. node_custom_1>",
|
|
1376
|
+
"type": "<template_id or 'customNode'>",
|
|
683
1377
|
"reason": "<why this node is added>",
|
|
684
1378
|
"position": {{"x": <number>, "y": <number>}},
|
|
685
1379
|
"data": {{
|
|
1380
|
+
"label": "<optional Descriptive Name>",
|
|
1381
|
+
"templateId": "<optional templateId or 'customNode'>",
|
|
686
1382
|
"config": {{}},
|
|
687
|
-
"code": "<optional
|
|
1383
|
+
"code": "<optional full Python source, REQUIRED if type is customNode>"
|
|
688
1384
|
}}
|
|
689
1385
|
}},
|
|
690
1386
|
{{
|
|
@@ -721,7 +1417,8 @@ Before emitting ANY patch entry, ask:
|
|
|
721
1417
|
YES → emit one "update" for that node. No new nodes.
|
|
722
1418
|
|
|
723
1419
|
3. Does this need a genuinely new computation node?
|
|
724
|
-
YES → emit one "add".
|
|
1420
|
+
YES → emit one "add".
|
|
1421
|
+
CRITICAL: If the requested operation (e.g., RobustScaler) is NOT in the catalogue, DO NOT refuse the request. Instead, YOU MUST generate it dynamically as a `customNode`. Provide the complete Python implementation in the `code` field, starting with `# ✨ AI GENERATED`. In your `summary`, explicitly state that you generated a custom node.
|
|
725
1422
|
|
|
726
1423
|
4. None of the above?
|
|
727
1424
|
→ Explain in "summary". Return empty node_changes.
|
|
@@ -734,7 +1431,19 @@ Before emitting ANY patch entry, ask:
|
|
|
734
1431
|
5. "summary" → one plain-English sentence describing the change.
|
|
735
1432
|
6. DataFrame param MUST be named data (never "df").
|
|
736
1433
|
7. Include all imports inside any code block.
|
|
737
|
-
8. Only use template types from the catalogue above.
|
|
1434
|
+
8. Only use template types from the catalogue above, unless building a customNode.
|
|
1435
|
+
|
|
1436
|
+
══ CUSTOM NODE RULES (when generating a missing component) ════════════
|
|
1437
|
+
If you use `type="customNode"`, your Python string in `code` MUST follow the exact same architecture as built-in templates:
|
|
1438
|
+
- It MUST define a function named EXACTLY `def run(...) -> dict:`
|
|
1439
|
+
- The primary input dataset MUST be named `data` (e.g., `def run(data: pd.DataFrame, ...) -> dict:`)
|
|
1440
|
+
- It MUST return a dictionary containing the outputs (e.g., `return {{"X_train": X_train, "X_test": X_test}}`)
|
|
1441
|
+
- If you are building a custom visualization, you MUST wrap your output in one of these keys so the UI can render it:
|
|
1442
|
+
'histogram', 'correlation_matrix', 'value_counts', 'box_plot', 'prediction', 'correlation_heatmap', 'missing_value_map', 'class_balance', 'feature_target_scatter', 'model_error_histogram', 'partial_dependence', 'roc_curves'
|
|
1443
|
+
- All `import` statements MUST be placed at the top of the code string.
|
|
1444
|
+
- You MUST include `# ✨ AI GENERATED` at the very top of the script.
|
|
1445
|
+
Example:
|
|
1446
|
+
"import pandas as pd\\nfrom sklearn.preprocessing import RobustScaler\\n\\n# ✨ AI GENERATED\\ndef run(data=None, X_train=None, X_test=None):\\n scaler = RobustScaler()\\n # ... logic ...\\n return {{\"X_train\": X_train_scaled, \"X_test\": X_test_scaled}}"
|
|
738
1447
|
|
|
739
1448
|
══ CONNECTION RULES (CRITICAL — read carefully) ══════════════════════
|
|
740
1449
|
Each node in the current pipeline has "available_outputs" and "available_inputs"
|
|
@@ -749,6 +1458,12 @@ To fix a wrong connection:
|
|
|
749
1458
|
1. Emit "remove" for the bad edge (use its id from the current edges list).
|
|
750
1459
|
2. Emit "add" for the correct edge using valid handle names from the lists above.
|
|
751
1460
|
|
|
1461
|
+
SCALER CONNECTION RULES:
|
|
1462
|
+
When inserting or reconnecting a standard_scaler or min_max_scaler after a train_test_split, you MUST:
|
|
1463
|
+
1. Connect all 4 outputs (X_train, X_test, y_train, y_test) from the split node to the scaler inputs.
|
|
1464
|
+
2. Connect all 4 outputs (X_train, X_test, y_train, y_test) from the scaler node to the downstream model inputs.
|
|
1465
|
+
Never skip passing y_train and y_test through the scaler node!
|
|
1466
|
+
|
|
752
1467
|
To add a missing connection:
|
|
753
1468
|
1. Look at the source node's "available_outputs" — pick the right output.
|
|
754
1469
|
2. Look at the target node's "available_inputs" — pick the right input.
|
|
@@ -813,7 +1528,10 @@ def _custom_node_catalogue(current_flow: FlowSchema) -> str:
|
|
|
813
1528
|
|
|
814
1529
|
|
|
815
1530
|
def _build_refine_messages(
|
|
816
|
-
prompt: str,
|
|
1531
|
+
prompt: str,
|
|
1532
|
+
current_flow: FlowSchema,
|
|
1533
|
+
context: str | None,
|
|
1534
|
+
custom_components: list[dict] | None = None,
|
|
817
1535
|
) -> list[dict]:
|
|
818
1536
|
slim = _slim_flow(current_flow)
|
|
819
1537
|
node_status = _node_status_summary(current_flow)
|
|
@@ -822,15 +1540,13 @@ def _build_refine_messages(
|
|
|
822
1540
|
custom_section = (
|
|
823
1541
|
f"\n══ CUSTOM NODES ON CANVAS (treat these as valid, usable nodes) ═══════\n"
|
|
824
1542
|
f"{custom_cat}\n"
|
|
825
|
-
|
|
826
|
-
)
|
|
1543
|
+
) if custom_cat else ""
|
|
827
1544
|
|
|
828
1545
|
system = _REFINE_PROMPT.format(
|
|
829
1546
|
current_flow=json.dumps(slim, indent=2),
|
|
830
1547
|
node_status=node_status,
|
|
831
|
-
catalogue=_template_catalogue() + custom_section,
|
|
1548
|
+
catalogue=_template_catalogue(custom_components) + custom_section,
|
|
832
1549
|
)
|
|
833
|
-
|
|
834
1550
|
ctx_block = (
|
|
835
1551
|
f"══ DATASET CONTEXT ══════════════════════════════════════\n{context.strip()}\n\n"
|
|
836
1552
|
if context else ""
|
|
@@ -867,24 +1583,81 @@ def _parse_refine_patch(raw: str) -> RefinePatch:
|
|
|
867
1583
|
|
|
868
1584
|
# ── Public API ─────────────────────────────────────────────────────────────────
|
|
869
1585
|
|
|
870
|
-
async def generate_flow(prompt: str, context: str | None = None) -> FlowSchema:
|
|
871
|
-
|
|
872
|
-
|
|
1586
|
+
async def generate_flow(prompt: str, context: str | None = None, custom_components: list[dict] | None = None) -> FlowSchema:
|
|
1587
|
+
"""
|
|
1588
|
+
Two-call Architect → Builder strategy:
|
|
1589
|
+
|
|
1590
|
+
Call 1 (Architect — deepseek-r1):
|
|
1591
|
+
Given the data profile + deterministic pre-flight analysis, produce a
|
|
1592
|
+
concise markdown plan: problem type, preprocessing steps, model choice.
|
|
1593
|
+
|
|
1594
|
+
Call 2 (Builder — deepseek-chat-v3):
|
|
1595
|
+
Given the Architect's plan + the same context, produce the final JSON flow.
|
|
1596
|
+
The Builder focuses on correct syntax and edge connections, not reasoning.
|
|
1597
|
+
"""
|
|
1598
|
+
pre_flight: dict = {}
|
|
1599
|
+
profile_text: str = ""
|
|
1600
|
+
architect_plan: str = ""
|
|
1601
|
+
|
|
1602
|
+
# ── Pre-flight: deterministic data analysis ───────────────────────────
|
|
1603
|
+
if context:
|
|
1604
|
+
fp = _extract_file_path(context)
|
|
1605
|
+
if fp:
|
|
1606
|
+
try:
|
|
1607
|
+
import pandas as pd
|
|
1608
|
+
df = pd.read_csv(fp, nrows=5000)
|
|
1609
|
+
profile = profile_dataframe(df)
|
|
1610
|
+
profile_text = format_profile_for_prompt(profile)
|
|
1611
|
+
pre_flight = _determine_pre_flight(profile, prompt, context, csv_path=fp)
|
|
1612
|
+
logger.info(
|
|
1613
|
+
"Pre-flight: problem_type=%s target=%s model=%s encoding=%s scaling=%s",
|
|
1614
|
+
pre_flight["problem_type"], pre_flight["target_hint"],
|
|
1615
|
+
pre_flight.get("recommended_model"), pre_flight["needs_encoding"],
|
|
1616
|
+
pre_flight["needs_scaling"],
|
|
1617
|
+
)
|
|
1618
|
+
except Exception as exc:
|
|
1619
|
+
logger.warning("pre-flight analysis skipped: %s", exc)
|
|
1620
|
+
|
|
1621
|
+
# ── Call 1: Architect (R1 reasoning model) ────────────────────────────
|
|
1622
|
+
if profile_text and pre_flight:
|
|
1623
|
+
try:
|
|
1624
|
+
arch_messages = _build_architect_messages(prompt, profile_text, pre_flight)
|
|
1625
|
+
architect_plan = await _call_openrouter(
|
|
1626
|
+
arch_messages,
|
|
1627
|
+
task="architect",
|
|
1628
|
+
json_mode=False,
|
|
1629
|
+
timeout=_TIMEOUT_GENERATE,
|
|
1630
|
+
)
|
|
1631
|
+
logger.info("Architect plan: %d chars", len(architect_plan))
|
|
1632
|
+
except Exception as exc:
|
|
1633
|
+
logger.warning("Architect call failed, continuing without plan: %s", exc)
|
|
1634
|
+
architect_plan = ""
|
|
1635
|
+
|
|
1636
|
+
# ── Call 2: Builder (chat model — fast, syntax-precise JSON) ─────────
|
|
1637
|
+
build_messages = _build_generate_messages(
|
|
1638
|
+
prompt, context,
|
|
1639
|
+
pre_flight=pre_flight or None,
|
|
1640
|
+
architect_plan=architect_plan or None,
|
|
1641
|
+
custom_components=custom_components,
|
|
1642
|
+
)
|
|
1643
|
+
raw = await _call_openrouter(
|
|
1644
|
+
build_messages, task="generate", json_mode=True, timeout=_TIMEOUT_GENERATE
|
|
1645
|
+
)
|
|
873
1646
|
return _parse_flow(raw)
|
|
874
1647
|
|
|
875
1648
|
|
|
876
1649
|
async def update_flow(
|
|
877
|
-
prompt: str, current_flow: FlowSchema, context: str | None = None
|
|
1650
|
+
prompt: str, current_flow: FlowSchema, context: str | None = None, custom_components: list[dict] | None = None
|
|
878
1651
|
) -> FlowSchema:
|
|
879
|
-
messages = _build_update_messages(prompt, current_flow, context)
|
|
1652
|
+
messages = _build_update_messages(prompt, current_flow, context, custom_components)
|
|
880
1653
|
raw = await _call_openrouter(messages, task="update", json_mode=True, timeout=_TIMEOUT_UPDATE)
|
|
881
1654
|
return _parse_flow(raw)
|
|
882
1655
|
|
|
883
1656
|
|
|
884
1657
|
async def refine_flow(
|
|
885
|
-
prompt: str, current_flow: FlowSchema, context: str | None = None
|
|
1658
|
+
prompt: str, current_flow: FlowSchema, context: str | None = None, custom_components: list[dict] | None = None
|
|
886
1659
|
) -> RefinePatch:
|
|
887
|
-
messages = _build_refine_messages(prompt, current_flow, context)
|
|
1660
|
+
messages = _build_refine_messages(prompt, current_flow, context, custom_components)
|
|
888
1661
|
raw = await _call_openrouter(messages, task="refine", json_mode=True, timeout=_TIMEOUT_UPDATE)
|
|
889
1662
|
return _parse_refine_patch(raw)
|
|
890
1663
|
|
|
@@ -934,6 +1707,32 @@ async def explain_flow(flow: FlowSchema) -> str:
|
|
|
934
1707
|
return await _call_openrouter(messages, task="explain", json_mode=False, timeout=60)
|
|
935
1708
|
|
|
936
1709
|
|
|
1710
|
+
_EXPLAIN_CHAT_SYSTEM = """\
|
|
1711
|
+
You are an expert ML engineering assistant answering questions about a user's machine learning pipeline.
|
|
1712
|
+
|
|
1713
|
+
══ EXPLANATION CONTEXT ═════════════════════════════════════════════════════
|
|
1714
|
+
{explanation}
|
|
1715
|
+
|
|
1716
|
+
══ PIPELINE AND EXECUTION RESULTS ══════════════════════════════════════════
|
|
1717
|
+
{current_flow}
|
|
1718
|
+
|
|
1719
|
+
Respond concisely and directly to the user's question. Provide actionable, specific advice based on the existing nodes, their configurations, and any metrics or execution results present in the pipeline state.
|
|
1720
|
+
Do NOT use markdown tables in your response. Instead, use simple bullet points and short paragraphs. Do not use generic filler.
|
|
1721
|
+
"""
|
|
1722
|
+
|
|
1723
|
+
async def chat_explanation(question: str, explanation: str, flow: FlowSchema) -> str:
|
|
1724
|
+
slim = _slim_flow(flow)
|
|
1725
|
+
system = _EXPLAIN_CHAT_SYSTEM.format(
|
|
1726
|
+
explanation=explanation,
|
|
1727
|
+
current_flow=json.dumps(slim, indent=2)
|
|
1728
|
+
)
|
|
1729
|
+
messages = [
|
|
1730
|
+
{"role": "system", "content": system},
|
|
1731
|
+
{"role": "user", "content": question},
|
|
1732
|
+
]
|
|
1733
|
+
return await _call_openrouter(messages, task="chat", json_mode=False, timeout=60)
|
|
1734
|
+
|
|
1735
|
+
|
|
937
1736
|
# ── Self-Healing Debug Prompt ──────────────────────────────────────────────────
|
|
938
1737
|
|
|
939
1738
|
_DEBUG_SYSTEM = """\
|
|
@@ -1128,6 +1927,7 @@ async def handle_user_request(
|
|
|
1128
1927
|
prompt: str,
|
|
1129
1928
|
current_flow: FlowSchema | None = None,
|
|
1130
1929
|
context: str | None = None,
|
|
1930
|
+
custom_components: list[dict] | None = None,
|
|
1131
1931
|
) -> dict:
|
|
1132
1932
|
"""
|
|
1133
1933
|
Single entry point that classifies the prompt and routes to the
|
|
@@ -1144,7 +1944,7 @@ async def handle_user_request(
|
|
|
1144
1944
|
# No existing flow → always generate from scratch
|
|
1145
1945
|
has_flow = current_flow is not None and len(current_flow.nodes) > 0
|
|
1146
1946
|
if not has_flow:
|
|
1147
|
-
flow = await generate_flow(prompt, context)
|
|
1947
|
+
flow = await generate_flow(prompt, context, custom_components=custom_components)
|
|
1148
1948
|
return {"intent": "generate", "result_type": "flow", "flow": flow}
|
|
1149
1949
|
|
|
1150
1950
|
intent = detect_intent(prompt, has_flow=True)
|
|
@@ -1159,17 +1959,17 @@ async def handle_user_request(
|
|
|
1159
1959
|
for n in current_flow.nodes
|
|
1160
1960
|
):
|
|
1161
1961
|
# Custom nodes exist — use update so they're visible to the LLM
|
|
1162
|
-
flow = await update_flow(prompt, current_flow, context)
|
|
1962
|
+
flow = await update_flow(prompt, current_flow, context, custom_components=custom_components)
|
|
1163
1963
|
return {"intent": "update", "result_type": "flow", "flow": flow}
|
|
1164
|
-
flow = await generate_flow(prompt, context)
|
|
1964
|
+
flow = await generate_flow(prompt, context, custom_components=custom_components)
|
|
1165
1965
|
return {"intent": "generate", "result_type": "flow", "flow": flow}
|
|
1166
1966
|
|
|
1167
1967
|
if intent == "refine":
|
|
1168
|
-
patch = await refine_flow(prompt, current_flow, context)
|
|
1968
|
+
patch = await refine_flow(prompt, current_flow, context, custom_components=custom_components)
|
|
1169
1969
|
return {"intent": "refine", "result_type": "patch", "patch": patch}
|
|
1170
1970
|
|
|
1171
1971
|
# intent == "update"
|
|
1172
|
-
flow = await update_flow(prompt, current_flow, context)
|
|
1972
|
+
flow = await update_flow(prompt, current_flow, context, custom_components=custom_components)
|
|
1173
1973
|
return {"intent": "update", "result_type": "flow", "flow": flow}
|
|
1174
1974
|
|
|
1175
1975
|
|
|
@@ -1310,3 +2110,79 @@ async def suggest_improvements(flow: FlowSchema, results: dict) -> list[str]:
|
|
|
1310
2110
|
# Sanitise: only strings, max 120 chars each
|
|
1311
2111
|
return [str(s)[:120] for s in suggestions if s]
|
|
1312
2112
|
|
|
2113
|
+
|
|
2114
|
+
# ── Custom node code generation ────────────────────────────────────────────────
|
|
2115
|
+
|
|
2116
|
+
_NODE_CODE_SYSTEM = """\
|
|
2117
|
+
You are an M8Flow node code generator. Write Python code for a reusable pipeline component.
|
|
2118
|
+
|
|
2119
|
+
⚠️ HARD RULES — any violation makes the node unparseable:
|
|
2120
|
+
1. Function name MUST be run (not main, process, execute, transform — exactly run)
|
|
2121
|
+
2. DataFrame input parameter MUST be named data (never df, dataframe, dataset)
|
|
2122
|
+
3. Function MUST return a dict with named string keys
|
|
2123
|
+
4. ALL imports go INSIDE the function body
|
|
2124
|
+
5. Only allowed libraries: pandas, numpy, sklearn, scipy, math, statistics, re, json
|
|
2125
|
+
6. FORBIDDEN: os, sys, subprocess, socket, requests, open(), eval(), exec(), matplotlib
|
|
2126
|
+
|
|
2127
|
+
FIELD TYPE ANNOTATIONS — these control the UI widget shown to the user:
|
|
2128
|
+
data input (connects from previous node) → just `data` with no type hint
|
|
2129
|
+
text field → `name: str = "default"`
|
|
2130
|
+
number field → `name: float = 1.0` or `name: int = 10`
|
|
2131
|
+
boolean toggle→ `name: bool = True`
|
|
2132
|
+
column picker → `col: Annotated[str, "column"] = "target"` (needs `from typing import Annotated` inside the fn)
|
|
2133
|
+
file picker → `path: Annotated[str, "file"] = "data.csv"` (needs `from typing import Annotated` inside the fn)
|
|
2134
|
+
|
|
2135
|
+
RETURN DICT — keys become the node's output handles:
|
|
2136
|
+
Passing a DataFrame forward → always include "data": df
|
|
2137
|
+
Model outputs → {"model": model, "y_pred": preds}
|
|
2138
|
+
Metric outputs → {"accuracy": 0.95, "f1": 0.88}
|
|
2139
|
+
Multiple outputs are fine → {"data": df, "rows_removed": n}
|
|
2140
|
+
|
|
2141
|
+
EXAMPLE — outlier removal node:
|
|
2142
|
+
def run(data, multiplier: float = 1.5) -> dict:
|
|
2143
|
+
import pandas as pd
|
|
2144
|
+
import numpy as np
|
|
2145
|
+
df = data.copy()
|
|
2146
|
+
for col in df.select_dtypes(include=[np.number]).columns:
|
|
2147
|
+
Q1, Q3 = df[col].quantile(0.25), df[col].quantile(0.75)
|
|
2148
|
+
iqr = Q3 - Q1
|
|
2149
|
+
df = df[~((df[col] < Q1 - multiplier * iqr) | (df[col] > Q3 + multiplier * iqr))]
|
|
2150
|
+
return {"data": df, "rows_removed": len(data) - len(df)}
|
|
2151
|
+
|
|
2152
|
+
EXAMPLE — feature selection node:
|
|
2153
|
+
def run(data, n_features: int = 10, target: str = "label") -> dict:
|
|
2154
|
+
import pandas as pd
|
|
2155
|
+
from sklearn.feature_selection import SelectKBest, f_classif
|
|
2156
|
+
X = data.drop(columns=[target])
|
|
2157
|
+
y = data[target]
|
|
2158
|
+
selector = SelectKBest(f_classif, k=min(n_features, X.shape[1]))
|
|
2159
|
+
selector.fit(X, y)
|
|
2160
|
+
selected = X.columns[selector.get_support()].tolist()
|
|
2161
|
+
return {"data": data[selected + [target]], "selected_features": selected}
|
|
2162
|
+
|
|
2163
|
+
OUTPUT: Return ONLY the raw Python code. No explanation. No markdown fences. No backticks.
|
|
2164
|
+
"""
|
|
2165
|
+
|
|
2166
|
+
|
|
2167
|
+
async def generate_node_code(description: str) -> str:
|
|
2168
|
+
"""
|
|
2169
|
+
Generate M8Flow-compatible Python node code from a natural language description.
|
|
2170
|
+
Uses the chat model (fast, syntax-precise) with the node code system prompt.
|
|
2171
|
+
"""
|
|
2172
|
+
messages = [
|
|
2173
|
+
{"role": "system", "content": _NODE_CODE_SYSTEM},
|
|
2174
|
+
{"role": "user", "content": f"Generate an M8Flow node that: {description}"},
|
|
2175
|
+
]
|
|
2176
|
+
raw = await _call_openrouter(messages, task="generate", json_mode=False, timeout=60)
|
|
2177
|
+
|
|
2178
|
+
# Strip any markdown fences the model may add despite instructions
|
|
2179
|
+
raw = raw.strip()
|
|
2180
|
+
for fence in ("```python", "```"):
|
|
2181
|
+
if raw.startswith(fence):
|
|
2182
|
+
raw = raw[len(fence):]
|
|
2183
|
+
break
|
|
2184
|
+
if raw.endswith("```"):
|
|
2185
|
+
raw = raw[:-3]
|
|
2186
|
+
|
|
2187
|
+
return raw.strip()
|
|
2188
|
+
|