m8flow 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bundled/backend/Dockerfile +41 -0
- package/bundled/backend/add_nodes.py +416 -0
- package/bundled/backend/api/routes/appstate.py +102 -0
- package/bundled/backend/api/routes/flows.py +64 -5
- package/bundled/backend/api/routes/nodes.py +25 -1
- package/bundled/backend/core/code_validator.py +2 -0
- package/bundled/backend/core/executor.py +19 -3
- package/bundled/backend/main.py +16 -4
- package/bundled/backend/requirements.txt +27 -6
- package/bundled/backend/services/llm_service.py +957 -98
- package/bundled/backend/services/self_healer.py +1 -1
- package/bundled/backend/storage/__init__.py +0 -0
- package/bundled/backend/storage/memory.py +16 -0
- package/bundled/backend/temp.json +0 -0
- package/bundled/backend/templates.json +0 -0
- package/bundled/backend/templates.py +2907 -745
- package/bundled/backend/warmup.py +65 -0
- package/bundled/frontend-dist/assets/index-CKUZ27n8.css +1 -0
- package/bundled/frontend-dist/assets/index-DNaB6zf0.js +46 -0
- package/bundled/frontend-dist/index.html +2 -2
- package/lib/backend.js +155 -35
- package/lib/run.js +18 -7
- package/lib/setup.js +119 -59
- package/package.json +3 -2
- package/scripts/build.js +1 -2
- package/scripts/check-docker.js +35 -0
- package/bundled/frontend-dist/assets/index-Dm2J6DQp.js +0 -41
- package/bundled/frontend-dist/assets/index-xKOV3MGm.css +0 -1
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""LLM service — OpenRouter-powered flow generation with per-task model routing."""
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
+
import time
|
|
4
5
|
import httpx
|
|
5
6
|
from contextvars import ContextVar
|
|
6
7
|
from config import config
|
|
@@ -23,23 +24,57 @@ _OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
|
|
23
24
|
_TIMEOUT_GENERATE = 120
|
|
24
25
|
_TIMEOUT_UPDATE = 180
|
|
25
26
|
|
|
26
|
-
# Per-task model routing
|
|
27
|
+
# Per-task model routing — slugs verified live against OpenRouter API (May 2026)
|
|
27
28
|
_MODELS = {
|
|
28
|
-
|
|
29
|
-
"
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
"
|
|
35
|
-
|
|
36
|
-
|
|
29
|
+
# Reasoning/Architecting — best available free reasoning model
|
|
30
|
+
"architect": "nvidia/nemotron-3-super-120b-a12b:free",
|
|
31
|
+
|
|
32
|
+
# Core Generation/Updating — large, instruction-tuned free model
|
|
33
|
+
"generate": "meta-llama/llama-3.3-70b-instruct:free",
|
|
34
|
+
"refine": "meta-llama/llama-3.3-70b-instruct:free",
|
|
35
|
+
"update": "meta-llama/llama-3.3-70b-instruct:free",
|
|
36
|
+
|
|
37
|
+
# Debugging/Healing — strong reasoning for bug analysis
|
|
38
|
+
"debug": "nvidia/nemotron-3-super-120b-a12b:free",
|
|
39
|
+
"heal": "nvidia/nemotron-3-super-120b-a12b:free",
|
|
40
|
+
|
|
41
|
+
# Explaining/Suggesting — fast free model
|
|
42
|
+
"explain": "openai/gpt-oss-20b:free",
|
|
43
|
+
"suggest": "openai/gpt-oss-20b:free",
|
|
44
|
+
|
|
45
|
+
# Safety Nets — verified live fallbacks
|
|
46
|
+
"fallback": "google/gemma-4-31b-it:free",
|
|
47
|
+
"lastresort": "meta-llama/llama-3.2-3b-instruct:free",
|
|
37
48
|
}
|
|
38
49
|
|
|
50
|
+
# ── Rate-limit cooldown cache ─────────────────────────────────────────────────
|
|
51
|
+
# Maps model_slug -> timestamp of last 429. Models in cooldown are skipped for
|
|
52
|
+
# _RATE_LIMIT_TTL seconds so we jump straight to a working model instead of
|
|
53
|
+
# burning time on a known-rate-limited one.
|
|
54
|
+
_RATE_LIMIT_CACHE: dict[str, float] = {}
|
|
55
|
+
_RATE_LIMIT_TTL = 90 # seconds
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _is_rate_limited(model: str) -> bool:
|
|
59
|
+
"""Return True if this model returned 429 within the last _RATE_LIMIT_TTL seconds."""
|
|
60
|
+
ts = _RATE_LIMIT_CACHE.get(model)
|
|
61
|
+
if ts is None:
|
|
62
|
+
return False
|
|
63
|
+
if time.time() - ts < _RATE_LIMIT_TTL:
|
|
64
|
+
return True
|
|
65
|
+
del _RATE_LIMIT_CACHE[model] # TTL expired — clear and allow retry
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _mark_rate_limited(model: str) -> None:
|
|
70
|
+
"""Record that this model returned 429 right now."""
|
|
71
|
+
_RATE_LIMIT_CACHE[model] = time.time()
|
|
72
|
+
logger.debug("Rate-limit cooldown started for %s (%ds)", model, _RATE_LIMIT_TTL)
|
|
73
|
+
|
|
39
74
|
|
|
40
75
|
# ── Catalogue helpers ──────────────────────────────────────────────────────────
|
|
41
76
|
|
|
42
|
-
def _template_catalogue() -> str:
|
|
77
|
+
def _template_catalogue(custom_components: list[dict] | None = None) -> str:
|
|
43
78
|
"""Detailed catalogue: id, category, inputs, outputs."""
|
|
44
79
|
from core.parser import parse_node_code
|
|
45
80
|
lines: list[str] = []
|
|
@@ -49,11 +84,24 @@ def _template_catalogue() -> str:
|
|
|
49
84
|
field_ins = [f"{i.name}:{i.kind}={i.default}" for i in schema.inputs if i.kind != "data"]
|
|
50
85
|
outs = [o.name for o in schema.outputs]
|
|
51
86
|
lines.append(
|
|
52
|
-
f"
|
|
53
|
-
f"
|
|
54
|
-
f"
|
|
55
|
-
f" outputs : {outs or '(none)'}"
|
|
87
|
+
f"{t['id']} [{t['category']}]\n"
|
|
88
|
+
f" inputs : {data_ins or '(none)'} fields: {field_ins or '(none)'}\n"
|
|
89
|
+
f" outputs: {outs or '(none)'}"
|
|
56
90
|
)
|
|
91
|
+
|
|
92
|
+
if custom_components:
|
|
93
|
+
lines.append("\n=== USER CUSTOM COMPONENTS (Preferred if applicable) ===")
|
|
94
|
+
for c in custom_components:
|
|
95
|
+
schema = c.get("schema", {})
|
|
96
|
+
data_ins = [i["name"] for i in schema.get("inputs", []) if i.get("kind") == "data"]
|
|
97
|
+
field_ins = [f"{i['name']}:{i.get('kind')}={i.get('default')}" for i in schema.get("inputs", []) if i.get("kind") != "data"]
|
|
98
|
+
outs = [o["name"] for o in schema.get("outputs", [])]
|
|
99
|
+
lines.append(
|
|
100
|
+
f"{c.get('id')} [Custom] \"{c.get('label')}\"\n"
|
|
101
|
+
f" inputs : {data_ins or '(none)'} fields: {field_ins or '(none)'}\n"
|
|
102
|
+
f" outputs: {outs or '(none)'}"
|
|
103
|
+
)
|
|
104
|
+
|
|
57
105
|
return "\n".join(lines)
|
|
58
106
|
|
|
59
107
|
|
|
@@ -65,6 +113,366 @@ def _allowed_type_ids() -> set[str]:
|
|
|
65
113
|
return ids
|
|
66
114
|
|
|
67
115
|
|
|
116
|
+
# ── Pre-flight analysis ────────────────────────────────────────────────────────
|
|
117
|
+
|
|
118
|
+
def _determine_pre_flight(profile: dict, prompt: str, context: str | None, csv_path: str | None = None) -> dict:
|
|
119
|
+
"""
|
|
120
|
+
Deterministic data-driven analysis computed BEFORE any LLM call.
|
|
121
|
+
|
|
122
|
+
Uses pandas to analyse the ACTUAL data (not just regex on the prompt), so the
|
|
123
|
+
LLM receives ground-truth facts — not guesses — about the task type and
|
|
124
|
+
preprocessing requirements.
|
|
125
|
+
"""
|
|
126
|
+
import re
|
|
127
|
+
import pandas as pd
|
|
128
|
+
import numpy as np
|
|
129
|
+
|
|
130
|
+
dtypes = profile.get("dtypes", {})
|
|
131
|
+
missing = profile.get("missing", {})
|
|
132
|
+
numeric_summary = profile.get("numeric_summary", {})
|
|
133
|
+
categorical_summary = profile.get("categorical_summary", {})
|
|
134
|
+
shape = profile.get("shape", [0, 0])
|
|
135
|
+
|
|
136
|
+
# ── Step 1: Find the target column ────────────────────────────────────────
|
|
137
|
+
# Priority: explicit mention in prompt/context > heuristic column names.
|
|
138
|
+
|
|
139
|
+
target_hint: str | None = None
|
|
140
|
+
search_text = (prompt + " " + (context or "")).lower()
|
|
141
|
+
|
|
142
|
+
# (a) Regex extraction from user text
|
|
143
|
+
for pattern in [
|
|
144
|
+
r"predict\s+(?:the\s+)?['\"]?(\w+)['\"]?",
|
|
145
|
+
r"target\s+(?:(?:column|variable|col)\s+)?(?:is\s+)?['\"]?(\w+)['\"]?",
|
|
146
|
+
r"classif(?:y|ication)\s+(?:the\s+)?['\"]?(\w+)['\"]?",
|
|
147
|
+
r"label\s+(?:column\s+)?(?:is\s+)?['\"]?(\w+)['\"]?",
|
|
148
|
+
r"output\s+(?:column\s+)?(?:is\s+)?['\"]?(\w+)['\"]?",
|
|
149
|
+
r"y\s*=\s*['\"]?(\w+)['\"]?",
|
|
150
|
+
]:
|
|
151
|
+
m = re.search(pattern, search_text)
|
|
152
|
+
if m:
|
|
153
|
+
candidate = m.group(1)
|
|
154
|
+
# Validate the candidate actually exists in the data
|
|
155
|
+
if candidate in dtypes or candidate in numeric_summary or candidate in categorical_summary:
|
|
156
|
+
target_hint = candidate
|
|
157
|
+
break
|
|
158
|
+
|
|
159
|
+
# (b) If still unknown, use heuristic column-name scoring on the real columns
|
|
160
|
+
if target_hint is None:
|
|
161
|
+
TARGET_KEYWORDS = [
|
|
162
|
+
"target", "label", "class", "output", "y", "result",
|
|
163
|
+
"outcome", "diagnosis", "status", "type", "category",
|
|
164
|
+
"survived", "churn", "default", "fraud", "price",
|
|
165
|
+
"salary", "value", "score", "sales", "revenue", "cost",
|
|
166
|
+
"medv", "charges", "fare",
|
|
167
|
+
]
|
|
168
|
+
all_columns = list(dtypes.keys())
|
|
169
|
+
best_col: str | None = None
|
|
170
|
+
best_score = -1
|
|
171
|
+
|
|
172
|
+
for col in all_columns:
|
|
173
|
+
col_lower = col.lower().replace("_", " ").replace("-", " ")
|
|
174
|
+
score = 0
|
|
175
|
+
|
|
176
|
+
# Keyword match against column name
|
|
177
|
+
for kw in TARGET_KEYWORDS:
|
|
178
|
+
if kw in col_lower:
|
|
179
|
+
score += 3
|
|
180
|
+
break
|
|
181
|
+
# Last column is commonly the target in many datasets
|
|
182
|
+
if col == all_columns[-1]:
|
|
183
|
+
score += 2
|
|
184
|
+
# Column mentioned in prompt text
|
|
185
|
+
if col_lower in search_text or col.lower() in search_text:
|
|
186
|
+
score += 4
|
|
187
|
+
|
|
188
|
+
if score > best_score:
|
|
189
|
+
best_score = score
|
|
190
|
+
best_col = col
|
|
191
|
+
|
|
192
|
+
if best_col and best_score >= 2:
|
|
193
|
+
target_hint = best_col
|
|
194
|
+
|
|
195
|
+
# ── Step 2: Determine problem type from ACTUAL DATA ───────────────────────
|
|
196
|
+
# Initialize all new keys upfront to avoid KeyErrors downstream.
|
|
197
|
+
problem_type = "unknown"
|
|
198
|
+
target_analysis: dict = {}
|
|
199
|
+
is_imbalanced = False
|
|
200
|
+
needs_outlier_removal = False
|
|
201
|
+
|
|
202
|
+
if target_hint and csv_path:
|
|
203
|
+
try:
|
|
204
|
+
df = pd.read_csv(csv_path, nrows=5000)
|
|
205
|
+
|
|
206
|
+
# ── Outlier Detection: scan all numeric feature columns ──────────
|
|
207
|
+
for col in df.select_dtypes(include=[np.number]).columns:
|
|
208
|
+
if col == target_hint:
|
|
209
|
+
continue
|
|
210
|
+
col_series = df[col].dropna()
|
|
211
|
+
if len(col_series) == 0:
|
|
212
|
+
continue
|
|
213
|
+
col_mean = col_series.mean()
|
|
214
|
+
col_max = col_series.max()
|
|
215
|
+
# Flag if max is >10x the mean AND mean is non-trivially positive
|
|
216
|
+
if col_mean > 0 and col_max > 10 * col_mean:
|
|
217
|
+
needs_outlier_removal = True
|
|
218
|
+
break # one outlier column is enough to flag the dataset
|
|
219
|
+
|
|
220
|
+
if target_hint in df.columns:
|
|
221
|
+
col_data = df[target_hint].dropna()
|
|
222
|
+
dtype = col_data.dtype
|
|
223
|
+
|
|
224
|
+
if dtype == object or str(dtype) == "category":
|
|
225
|
+
# String/category column → always classification
|
|
226
|
+
n_unique = col_data.nunique()
|
|
227
|
+
problem_type = "classification"
|
|
228
|
+
target_analysis = {
|
|
229
|
+
"dtype": str(dtype),
|
|
230
|
+
"unique_values": int(n_unique),
|
|
231
|
+
"sample_values": col_data.unique()[:5].tolist(),
|
|
232
|
+
"reasoning": f"Categorical dtype with {n_unique} unique string values → classification",
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
elif dtype == bool or (dtype == int and col_data.nunique() <= 2):
|
|
236
|
+
# Boolean or binary integer → classification
|
|
237
|
+
problem_type = "classification"
|
|
238
|
+
target_analysis = {
|
|
239
|
+
"dtype": str(dtype),
|
|
240
|
+
"unique_values": int(col_data.nunique()),
|
|
241
|
+
"sample_values": col_data.unique()[:5].tolist(),
|
|
242
|
+
"reasoning": "Binary (0/1 or True/False) target → classification",
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
elif np.issubdtype(dtype, np.integer):
|
|
246
|
+
n_unique = col_data.nunique()
|
|
247
|
+
n_total = len(col_data)
|
|
248
|
+
unique_ratio = n_unique / max(n_total, 1)
|
|
249
|
+
if n_unique <= 20 or unique_ratio < 0.05:
|
|
250
|
+
problem_type = "classification"
|
|
251
|
+
target_analysis = {
|
|
252
|
+
"dtype": str(dtype),
|
|
253
|
+
"unique_values": int(n_unique),
|
|
254
|
+
"sample_values": sorted(col_data.unique().tolist())[:10],
|
|
255
|
+
"reasoning": f"Integer with only {n_unique} unique values ({unique_ratio:.1%} of rows) → likely class labels → classification",
|
|
256
|
+
}
|
|
257
|
+
else:
|
|
258
|
+
problem_type = "regression"
|
|
259
|
+
target_analysis = {
|
|
260
|
+
"dtype": str(dtype),
|
|
261
|
+
"unique_values": int(n_unique),
|
|
262
|
+
"min": float(col_data.min()),
|
|
263
|
+
"max": float(col_data.max()),
|
|
264
|
+
"mean": float(col_data.mean()),
|
|
265
|
+
"reasoning": f"Integer with {n_unique} unique values (high cardinality) → continuous → regression",
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
elif np.issubdtype(dtype, np.floating):
|
|
269
|
+
n_unique = col_data.nunique()
|
|
270
|
+
problem_type = "regression"
|
|
271
|
+
target_analysis = {
|
|
272
|
+
"dtype": str(dtype),
|
|
273
|
+
"unique_values": int(n_unique),
|
|
274
|
+
"min": float(col_data.min()),
|
|
275
|
+
"max": float(col_data.max()),
|
|
276
|
+
"mean": float(col_data.mean()),
|
|
277
|
+
"std": float(col_data.std()),
|
|
278
|
+
"reasoning": f"Floating-point target with {n_unique} unique values → continuous → regression",
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
else:
|
|
282
|
+
# Fallback: try to convert and check cardinality
|
|
283
|
+
try:
|
|
284
|
+
as_numeric = pd.to_numeric(col_data, errors="coerce")
|
|
285
|
+
if as_numeric.isna().mean() < 0.1:
|
|
286
|
+
n_unique = as_numeric.nunique()
|
|
287
|
+
problem_type = "regression" if n_unique > 20 else "classification"
|
|
288
|
+
target_analysis = {
|
|
289
|
+
"dtype": str(dtype),
|
|
290
|
+
"unique_values": int(n_unique),
|
|
291
|
+
"reasoning": f"Converted to numeric; {n_unique} unique values → {'regression' if n_unique > 20 else 'classification'}",
|
|
292
|
+
}
|
|
293
|
+
else:
|
|
294
|
+
problem_type = "classification"
|
|
295
|
+
target_analysis = {
|
|
296
|
+
"dtype": str(dtype),
|
|
297
|
+
"reasoning": "Could not convert to numeric → treating as classification",
|
|
298
|
+
}
|
|
299
|
+
except Exception:
|
|
300
|
+
problem_type = "classification"
|
|
301
|
+
|
|
302
|
+
# ── Class Imbalance Check (classification only) ──────────────
|
|
303
|
+
if problem_type == "classification":
|
|
304
|
+
try:
|
|
305
|
+
class_freqs = col_data.value_counts(normalize=True)
|
|
306
|
+
if class_freqs.min() < 0.10:
|
|
307
|
+
is_imbalanced = True
|
|
308
|
+
except Exception:
|
|
309
|
+
pass
|
|
310
|
+
|
|
311
|
+
except Exception as exc:
|
|
312
|
+
logger.warning("Pre-flight target analysis failed: %s", exc)
|
|
313
|
+
# Fall back to dtype-only heuristic using profile data
|
|
314
|
+
if target_hint:
|
|
315
|
+
dtype_str = str(dtypes.get(target_hint, "")).lower()
|
|
316
|
+
if any(t in dtype_str for t in ("object", "category", "bool", "str")):
|
|
317
|
+
problem_type = "classification"
|
|
318
|
+
elif target_hint in categorical_summary:
|
|
319
|
+
problem_type = "classification" if categorical_summary[target_hint].get("unique", 99) < 15 else "regression"
|
|
320
|
+
elif target_hint in numeric_summary:
|
|
321
|
+
problem_type = "regression"
|
|
322
|
+
|
|
323
|
+
elif target_hint:
|
|
324
|
+
# No CSV path — fall back to profile-based heuristic
|
|
325
|
+
dtype_str = str(dtypes.get(target_hint, "")).lower()
|
|
326
|
+
if any(t in dtype_str for t in ("object", "category", "bool", "str")):
|
|
327
|
+
problem_type = "classification"
|
|
328
|
+
elif target_hint in categorical_summary:
|
|
329
|
+
problem_type = "classification" if categorical_summary.get(target_hint, {}).get("unique", 99) < 15 else "regression"
|
|
330
|
+
elif target_hint in numeric_summary:
|
|
331
|
+
problem_type = "regression"
|
|
332
|
+
|
|
333
|
+
# ── Step 3: Preprocessing flags ───────────────────────────────────────────
|
|
334
|
+
# Detect categorical columns that need encoding (exclude the target itself)
|
|
335
|
+
cat_cols = {c for c in categorical_summary if c != target_hint}
|
|
336
|
+
num_cols = {c for c in numeric_summary if c != target_hint}
|
|
337
|
+
|
|
338
|
+
missing_cols: dict[str, float] = {
|
|
339
|
+
col: round(info.get("pct", 0), 1)
|
|
340
|
+
for col, info in missing.items()
|
|
341
|
+
if info.get("pct", 0) > 0
|
|
342
|
+
}
|
|
343
|
+
cardinality: dict[str, int] = {
|
|
344
|
+
col: info.get("unique", 0)
|
|
345
|
+
for col, info in categorical_summary.items()
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
needs_encoding = len(cat_cols) > 0
|
|
349
|
+
needs_scaling = len(num_cols) >= 2
|
|
350
|
+
needs_imputation = len(missing_cols) > 0
|
|
351
|
+
|
|
352
|
+
# ── Step 4: Recommend specific model based on problem type + data size ────
|
|
353
|
+
n_rows = shape[0] if shape else 0
|
|
354
|
+
n_cols = shape[1] if len(shape) > 1 else 0
|
|
355
|
+
|
|
356
|
+
if problem_type == "classification":
|
|
357
|
+
recommended_model = "random_forest_classifier" if n_rows >= 1000 else "logistic_regression"
|
|
358
|
+
recommended_metric_node = "classification_report"
|
|
359
|
+
elif problem_type == "regression":
|
|
360
|
+
recommended_model = "random_forest_regressor" if n_rows >= 1000 else "linear_regression"
|
|
361
|
+
recommended_metric_node = "regression_metrics"
|
|
362
|
+
else:
|
|
363
|
+
recommended_model = "random_forest_classifier"
|
|
364
|
+
recommended_metric_node = "classification_report"
|
|
365
|
+
|
|
366
|
+
return {
|
|
367
|
+
"target_hint": target_hint,
|
|
368
|
+
"problem_type": problem_type,
|
|
369
|
+
"target_analysis": target_analysis,
|
|
370
|
+
"missing_cols": missing_cols,
|
|
371
|
+
"cardinality": cardinality,
|
|
372
|
+
"needs_encoding": needs_encoding,
|
|
373
|
+
"needs_scaling": needs_scaling,
|
|
374
|
+
"needs_imputation": needs_imputation,
|
|
375
|
+
"needs_outlier_removal": needs_outlier_removal,
|
|
376
|
+
"is_imbalanced": is_imbalanced,
|
|
377
|
+
"n_rows": n_rows,
|
|
378
|
+
"n_cols": n_cols,
|
|
379
|
+
"categorical_cols": sorted(cat_cols),
|
|
380
|
+
"numeric_cols": sorted(num_cols),
|
|
381
|
+
"recommended_model": recommended_model,
|
|
382
|
+
"recommended_metric": recommended_metric_node,
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _pre_flight_block(pf: dict) -> str:
|
|
387
|
+
"""Format the pre-flight analysis as an authoritative, bossy prompt block."""
|
|
388
|
+
analysis = pf.get("target_analysis", {})
|
|
389
|
+
reasoning = analysis.get("reasoning", "")
|
|
390
|
+
sample_vals = analysis.get("sample_values", [])
|
|
391
|
+
|
|
392
|
+
lines = [
|
|
393
|
+
"╔══ [PYTHON-DETERMINED ANALYSIS — treat as ABSOLUTE GROUND TRUTH] ════╗",
|
|
394
|
+
f" ⚠ Problem type : {pf['problem_type'].upper()} (MANDATORY)",
|
|
395
|
+
f" Reasoning : {reasoning or 'heuristic from dtype/cardinality'}",
|
|
396
|
+
f" Target column : {pf['target_hint'] or 'not specified — infer from context'}",
|
|
397
|
+
]
|
|
398
|
+
if sample_vals:
|
|
399
|
+
lines.append(f" Target sample vals : {sample_vals}")
|
|
400
|
+
lines += [
|
|
401
|
+
f" Dataset size : {pf['n_rows']} rows × {pf['n_cols']} columns",
|
|
402
|
+
f" Missing values : {pf['missing_cols'] or 'none'}",
|
|
403
|
+
f" Categorical cols : {pf.get('categorical_cols') or 'none'}",
|
|
404
|
+
f" Numeric cols : {pf.get('numeric_cols') or 'none'}",
|
|
405
|
+
f" Needs encoding : {'YES — add label_encoder BEFORE train_test_split' if pf['needs_encoding'] else 'no'}",
|
|
406
|
+
f" Needs scaling : {'YES — add standard_scaler AFTER train_test_split' if pf['needs_scaling'] else 'no'}",
|
|
407
|
+
f" Needs imputation : {'YES — add data_cleaning BEFORE split' if pf['needs_imputation'] else 'no'}",
|
|
408
|
+
]
|
|
409
|
+
# Conditional directives for outlier removal
|
|
410
|
+
if pf.get("needs_outlier_removal"):
|
|
411
|
+
lines.append(
|
|
412
|
+
" ⚠ Outlier columns : YES — a numeric feature has max > 10× its mean. "
|
|
413
|
+
"Add an outlier_removal node BEFORE train_test_split."
|
|
414
|
+
)
|
|
415
|
+
# Conditional directive for class imbalance
|
|
416
|
+
if pf.get("is_imbalanced"):
|
|
417
|
+
lines.append(
|
|
418
|
+
" ⚠ Class imbalance : YES — minority class < 10% of data. "
|
|
419
|
+
"Set class_weight='balanced' on the model node config."
|
|
420
|
+
)
|
|
421
|
+
lines += [
|
|
422
|
+
f" ⚠ Model Selection : USE {pf.get('recommended_model', 'unknown').upper()} ONLY",
|
|
423
|
+
f" ✅ Metric node : {pf.get('recommended_metric', 'unknown')}",
|
|
424
|
+
"╚════════════════════════════════════════════════════════════════════════╝",
|
|
425
|
+
]
|
|
426
|
+
return "\n".join(lines)
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
# ── Architect (R1 planning) prompt ────────────────────────────────────────────
|
|
430
|
+
|
|
431
|
+
_ARCHITECT_PROMPT = """\
|
|
432
|
+
You are a senior ML engineer performing pre-build technical architecture planning.
|
|
433
|
+
You will receive a dataset profile, a deterministic pre-flight analysis, and the user's request.
|
|
434
|
+
|
|
435
|
+
OUTPUT: Concise markdown only. No JSON. No code blocks. Under 250 words.
|
|
436
|
+
|
|
437
|
+
Structure your response as:
|
|
438
|
+
|
|
439
|
+
## Problem Type
|
|
440
|
+
State classification or regression with one-sentence justification.
|
|
441
|
+
|
|
442
|
+
## Data Quality Plan
|
|
443
|
+
List each issue (missing values, categorical columns, dtype mismatches) and the exact
|
|
444
|
+
preprocessing step needed for it. Reference actual column names.
|
|
445
|
+
|
|
446
|
+
## Pipeline Sequence
|
|
447
|
+
Ordered list of node types (e.g. csv_loader → label_encoder → train_test_split →
|
|
448
|
+
standard_scaler → random_forest_classifier → classification_report).
|
|
449
|
+
|
|
450
|
+
## Model Rationale
|
|
451
|
+
Why this model fits the problem. If the dataset is large (>10k rows), prefer tree-based
|
|
452
|
+
models. If many numeric features, recommend scaling. If class imbalance suspected, note it.
|
|
453
|
+
|
|
454
|
+
## Critical Warnings
|
|
455
|
+
Any data issues the pipeline MUST handle. Be blunt about failure modes.
|
|
456
|
+
|
|
457
|
+
Do NOT output JSON. Do NOT write code. Be specific — use actual column names from the profile.
|
|
458
|
+
"""
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def _build_architect_messages(
|
|
462
|
+
prompt: str, profile_text: str, pf: dict
|
|
463
|
+
) -> list[dict]:
|
|
464
|
+
user = (
|
|
465
|
+
f"== DATASET PROFILE ==\n{profile_text}\n\n"
|
|
466
|
+
f"== DETERMINISTIC PRE-FLIGHT ==\n{_pre_flight_block(pf)}\n\n"
|
|
467
|
+
f"== USER REQUEST ==\n{prompt}\n\n"
|
|
468
|
+
"Provide your technical pipeline architecture plan."
|
|
469
|
+
)
|
|
470
|
+
return [
|
|
471
|
+
{"role": "system", "content": _ARCHITECT_PROMPT},
|
|
472
|
+
{"role": "user", "content": user},
|
|
473
|
+
]
|
|
474
|
+
|
|
475
|
+
|
|
68
476
|
# ── System prompts ─────────────────────────────────────────────────────────────
|
|
69
477
|
|
|
70
478
|
_SYSTEM_PROMPT = """\
|
|
@@ -125,9 +533,13 @@ Use this structure to decide WHERE to modify or improve.
|
|
|
125
533
|
═══════════════════════════════════════
|
|
126
534
|
STRICT RULES:
|
|
127
535
|
|
|
128
|
-
1. MINIMALISM
|
|
129
|
-
-
|
|
130
|
-
-
|
|
536
|
+
1. DATA INTEGRITY & ACCURACY FIRST — MINIMALISM SECOND
|
|
537
|
+
- A complete, correct pipeline beats a minimal, broken one.
|
|
538
|
+
- REQUIRED: label_encoder for any object/category column BEFORE train_test_split.
|
|
539
|
+
- REQUIRED: standard_scaler for distance-based models (SVM, KNN, LogisticRegression).
|
|
540
|
+
- REQUIRED: data_cleaning node when ANY column has missing values.
|
|
541
|
+
- THEN minimize: never add a step the data does not require.
|
|
542
|
+
- A pipeline that skips necessary preprocessing is a FAILURE regardless of node count.
|
|
131
543
|
|
|
132
544
|
2. USE TEMPLATES FIRST
|
|
133
545
|
- Only use customNode if NO template exists
|
|
@@ -141,6 +553,7 @@ STRICT RULES:
|
|
|
141
553
|
- sourceHandle MUST exist in source outputs
|
|
142
554
|
- targetHandle MUST match input param
|
|
143
555
|
- metric nodes MUST receive: y_pred + y_test
|
|
556
|
+
- SCALER RULE: If using standard_scaler or min_max_scaler after train_test_split, you MUST connect all 4 split outputs (X_train, X_test, y_train, y_test) to the scaler, and then connect all 4 scaler outputs to the model. Do not skip y_train/y_test.
|
|
144
557
|
|
|
145
558
|
5. DATA RULES
|
|
146
559
|
- If categorical columns exist → include label_encoder BEFORE split
|
|
@@ -208,7 +621,7 @@ If a node has an error:
|
|
|
208
621
|
- Include all imports inside every code block
|
|
209
622
|
- Return dict keys MUST match sourceHandles on outgoing edges
|
|
210
623
|
- NEVER import matplotlib or seaborn
|
|
211
|
-
-
|
|
624
|
+
- CRITICAL: If the user requests an operation/model NOT in the catalogue (e.g. RobustScaler), DO NOT refuse. Generate it dynamically as a `customNode` with the full Python `code` starting with `# ✨ AI GENERATED`. Explicitly mention this custom generation in the `summary`.
|
|
212
625
|
|
|
213
626
|
══ AVAILABLE COMPONENTS ════════════════════════════════════════════
|
|
214
627
|
{catalogue}
|
|
@@ -237,22 +650,117 @@ Model nodes support these config fields — no new node needed:
|
|
|
237
650
|
"type": "customNode",
|
|
238
651
|
"position": {{"x": 560, "y": 200}},
|
|
239
652
|
"data": {{
|
|
240
|
-
"label": "
|
|
653
|
+
"label": "Robust Scaler (Custom)",
|
|
241
654
|
"templateId": "customNode",
|
|
242
|
-
"code": "import pandas as pd
|
|
655
|
+
"code": "import pandas as pd\\nfrom sklearn.preprocessing import RobustScaler\\n\\n# ✨ AI GENERATED\\ndef run(data=None, X_train=None, X_test=None) -> dict:\\n # ...\\n return {{\"X_train\": X_train, \"X_test\": X_test}}"
|
|
243
656
|
}}
|
|
244
657
|
}}
|
|
245
658
|
|
|
246
|
-
-
|
|
247
|
-
- DataFrame param MUST be named `data`
|
|
248
|
-
- MUST return a dict
|
|
659
|
+
- Your Python string in `code` MUST define a function named EXACTLY `def run(...) -> dict:`
|
|
660
|
+
- The DataFrame param MUST be named `data` (if taking a whole dataset)
|
|
661
|
+
- MUST return a dict containing the output handles
|
|
249
662
|
- Only import: numpy, pandas, scipy, sklearn, math, statistics, re, json
|
|
250
663
|
- NO os, sys, subprocess, socket, requests, open(), eval(), exec()
|
|
251
664
|
- Prefer templates first — custom nodes are last resort only
|
|
252
665
|
|
|
666
|
+
══ DYNAMIC VISUALIZATION ══════════════════════════════════════════════
|
|
667
|
+
You can create custom visualizations NOT in the catalogue. The frontend
|
|
668
|
+
detects charts by SHAPE, not by key name. Return any of these shapes and
|
|
669
|
+
the UI will render it automatically — NO new React code needed:
|
|
670
|
+
|
|
671
|
+
Series (bar chart / ranked list):
|
|
672
|
+
{{"my_key": {{"labels": [...], "counts": [...], "title": "..."}}}}
|
|
673
|
+
|
|
674
|
+
Plot (scatter or line chart):
|
|
675
|
+
{{"my_key": {{"x": [...], "y": [...], "title": "...", "x_label": "...", "y_label": "..."}}}}
|
|
676
|
+
|
|
677
|
+
Grid (heatmap / matrix):
|
|
678
|
+
{{"my_key": {{"z": [[...]], "x": [...], "y": [...], "title": "..."}}}}
|
|
679
|
+
|
|
680
|
+
Example — null percentage bar chart:
|
|
681
|
+
return {{
|
|
682
|
+
"null_pct_chart": {{
|
|
683
|
+
"labels": list(null_pcts.keys()),
|
|
684
|
+
"counts": list(null_pcts.values()),
|
|
685
|
+
"title": "Missing Values (%) per Column"
|
|
686
|
+
}}
|
|
687
|
+
}}
|
|
688
|
+
|
|
689
|
+
When the user asks for any kind of visualization (e.g. "show me a chart
|
|
690
|
+
of X", "visualize the distribution of Y"), you MUST generate a customNode
|
|
691
|
+
that returns a dict with one of the shapes above. NEVER refuse — if no
|
|
692
|
+
template covers it, invent the chart with the shape protocol.
|
|
693
|
+
|
|
694
|
+
⚠ UNSUPERVISED LEARNING (t-SNE / PCA / UMAP / KMeans): When generating
|
|
695
|
+
any dimensionality reduction or clustering node, you MUST return a
|
|
696
|
+
`labels` array alongside `x` and `y` so the frontend can color-code
|
|
697
|
+
clusters automatically. Example:
|
|
698
|
+
|
|
699
|
+
return {{
|
|
700
|
+
"tsne_plot": {{
|
|
701
|
+
"x": X_2d[:, 0].tolist(),
|
|
702
|
+
"y": X_2d[:, 1].tolist(),
|
|
703
|
+
"labels": [str(c) for c in cluster_labels], # ← REQUIRED
|
|
704
|
+
"title": "t-SNE Cluster Visualization",
|
|
705
|
+
"x_label": "Dim 1",
|
|
706
|
+
"y_label": "Dim 2"
|
|
707
|
+
}}
|
|
708
|
+
}}
|
|
709
|
+
|
|
253
710
|
══ OUTPUT ═══════════════════════════════════════════════════════════
|
|
254
711
|
Return ONLY:
|
|
255
712
|
{{"nodes": [...], "edges": [...], "summary": "<one sentence conversational explanation>"}}
|
|
713
|
+
|
|
714
|
+
⚠️ DO NOT EVER output a summary like "Cannot process this request." If a template is missing, you MUST write the `code` for it as a `customNode` instead of giving up.
|
|
715
|
+
|
|
716
|
+
══ GOLD STANDARD EXAMPLES — you MUST match these handle names EXACTLY ═══════
|
|
717
|
+
⚠ CRITICAL: The sourceHandle and targetHandle values below (data, X_train, X_test,
|
|
718
|
+
y_train, y_test, y_pred) are the ONLY valid handle names. Do NOT invent new ones.
|
|
719
|
+
Your edges MUST use these exact strings — any deviation will cause a runtime failure.
|
|
720
|
+
|
|
721
|
+
Example A — CLASSIFICATION (categorical cols + scaling needed):
|
|
722
|
+
{{"nodes":[
|
|
723
|
+
{{"id":"n1","type":"csv_loader","position":{{"x":0,"y":200}},"data":{{"config":{{"file_path":"data.csv"}}}}}},
|
|
724
|
+
{{"id":"n2","type":"label_encoder","position":{{"x":280,"y":200}},"data":{{"config":{{"columns":"sex,embarked"}}}}}},
|
|
725
|
+
{{"id":"n3","type":"train_test_split","position":{{"x":560,"y":200}},"data":{{"config":{{"target_column":"survived","test_size":0.2}}}}}},
|
|
726
|
+
{{"id":"n4","type":"standard_scaler","position":{{"x":840,"y":200}},"data":{{}}}},
|
|
727
|
+
{{"id":"n5","type":"random_forest_classifier","position":{{"x":1120,"y":200}},"data":{{"config":{{"n_estimators":100}}}}}},
|
|
728
|
+
{{"id":"n6","type":"classification_report","position":{{"x":1400,"y":200}},"data":{{}}}}
|
|
729
|
+
],"edges":[
|
|
730
|
+
{{"id":"e1","source":"n1","sourceHandle":"data","target":"n2","targetHandle":"data"}},
|
|
731
|
+
{{"id":"e2","source":"n2","sourceHandle":"data","target":"n3","targetHandle":"data"}},
|
|
732
|
+
{{"id":"e3","source":"n3","sourceHandle":"X_train","target":"n4","targetHandle":"X_train"}},
|
|
733
|
+
{{"id":"e4","source":"n3","sourceHandle":"X_test","target":"n4","targetHandle":"X_test"}},
|
|
734
|
+
{{"id":"e5","source":"n3","sourceHandle":"y_train","target":"n4","targetHandle":"y_train"}},
|
|
735
|
+
{{"id":"e6","source":"n3","sourceHandle":"y_test","target":"n4","targetHandle":"y_test"}},
|
|
736
|
+
{{"id":"e7","source":"n4","sourceHandle":"X_train","target":"n5","targetHandle":"X_train"}},
|
|
737
|
+
{{"id":"e8","source":"n4","sourceHandle":"X_test","target":"n5","targetHandle":"X_test"}},
|
|
738
|
+
{{"id":"e9","source":"n4","sourceHandle":"y_train","target":"n5","targetHandle":"y_train"}},
|
|
739
|
+
{{"id":"e10","source":"n5","sourceHandle":"y_pred","target":"n6","targetHandle":"y_pred"}},
|
|
740
|
+
{{"id":"e11","source":"n4","sourceHandle":"y_test","target":"n6","targetHandle":"y_test"}}
|
|
741
|
+
],"summary":"Classification pipeline with encoding, scaling, and Random Forest."}}
|
|
742
|
+
|
|
743
|
+
Example B — REGRESSION (missing values + continuous target):
|
|
744
|
+
{{"nodes":[
|
|
745
|
+
{{"id":"n1","type":"csv_loader","position":{{"x":0,"y":200}},"data":{{"config":{{"file_path":"houses.csv"}}}}}},
|
|
746
|
+
{{"id":"n2","type":"data_cleaning","position":{{"x":280,"y":200}},"data":{{"config":{{"strategy":"fill"}}}}}},
|
|
747
|
+
{{"id":"n3","type":"train_test_split","position":{{"x":560,"y":200}},"data":{{"config":{{"target_column":"price","test_size":0.2}}}}}},
|
|
748
|
+
{{"id":"n4","type":"standard_scaler","position":{{"x":840,"y":200}},"data":{{}}}},
|
|
749
|
+
{{"id":"n5","type":"random_forest_regressor","position":{{"x":1120,"y":200}},"data":{{"config":{{"n_estimators":200}}}}}},
|
|
750
|
+
{{"id":"n6","type":"regression_metrics","position":{{"x":1400,"y":200}},"data":{{}}}}
|
|
751
|
+
],"edges":[
|
|
752
|
+
{{"id":"e1","source":"n1","sourceHandle":"data","target":"n2","targetHandle":"data"}},
|
|
753
|
+
{{"id":"e2","source":"n2","sourceHandle":"data","target":"n3","targetHandle":"data"}},
|
|
754
|
+
{{"id":"e3","source":"n3","sourceHandle":"X_train","target":"n4","targetHandle":"X_train"}},
|
|
755
|
+
{{"id":"e4","source":"n3","sourceHandle":"X_test","target":"n4","targetHandle":"X_test"}},
|
|
756
|
+
{{"id":"e5","source":"n3","sourceHandle":"y_train","target":"n4","targetHandle":"y_train"}},
|
|
757
|
+
{{"id":"e6","source":"n3","sourceHandle":"y_test","target":"n4","targetHandle":"y_test"}},
|
|
758
|
+
{{"id":"e7","source":"n4","sourceHandle":"X_train","target":"n5","targetHandle":"X_train"}},
|
|
759
|
+
{{"id":"e8","source":"n4","sourceHandle":"X_test","target":"n5","targetHandle":"X_test"}},
|
|
760
|
+
{{"id":"e9","source":"n4","sourceHandle":"y_train","target":"n5","targetHandle":"y_train"}},
|
|
761
|
+
{{"id":"e10","source":"n5","sourceHandle":"y_pred","target":"n6","targetHandle":"y_pred"}},
|
|
762
|
+
{{"id":"e11","source":"n4","sourceHandle":"y_test","target":"n6","targetHandle":"y_test"}}
|
|
763
|
+
],"summary":"Regression pipeline with cleaning, scaling, and Random Forest."}}
|
|
256
764
|
"""
|
|
257
765
|
|
|
258
766
|
_UPDATE_PROMPT = """\
|
|
@@ -264,6 +772,16 @@ You are M8Flow's AI pipeline surgeon. Modify the pipeline with the MINIMUM chang
|
|
|
264
772
|
3. ALWAYS respond in ENGLISH. Never use any other language.
|
|
265
773
|
4. Do NOT truncate the JSON — it must be a complete, valid object.
|
|
266
774
|
|
|
775
|
+
🔑 TOKEN CONSERVATION — MANDATORY (free-tier models have strict output limits):
|
|
776
|
+
A. TEMPLATE SWAPS: If you are changing a node's operation to one that already has
|
|
777
|
+
a matching Template ID (e.g. swapping from linear_regression to
|
|
778
|
+
random_forest_regressor), change only the "type" field on that node.
|
|
779
|
+
NEVER re-emit the full Python "code" block when a Template already covers it.
|
|
780
|
+
Template nodes are resolved by the runtime — sending their code wastes tokens.
|
|
781
|
+
B. UNCHANGED NODES: Nodes marked ✓ in the status list must appear in your output
|
|
782
|
+
but with their "data.code" field set to null (omitted). Only include code for
|
|
783
|
+
nodes you are actively modifying or adding as custom (non-template) nodes.
|
|
784
|
+
|
|
267
785
|
Output ONLY the complete updated flow JSON — no markdown, no explanation.
|
|
268
786
|
|
|
269
787
|
══ DECISION HIERARCHY (follow in order, stop at first match) ════════
|
|
@@ -279,9 +797,8 @@ Output ONLY the complete updated flow JSON — no markdown, no explanation.
|
|
|
279
797
|
|
|
280
798
|
4. Does this genuinely require a brand-new node that adds functionality
|
|
281
799
|
not available anywhere in the graph?
|
|
282
|
-
YES → add exactly ONE new node, connected minimally.
|
|
283
|
-
|
|
284
|
-
If none apply, state the limitation in a comment field — do not bloat the graph.
|
|
800
|
+
YES → add exactly ONE new node, connected minimally.
|
|
801
|
+
CRITICAL: If the requested operation (e.g., RobustScaler) is NOT in the catalogue, DO NOT refuse the request. Instead, YOU MUST generate it dynamically as a `customNode`. Provide the complete Python implementation in the `code` field, starting with the comment `# ✨ AI GENERATED`. In your `summary`, explicitly state that you generated a custom node for this functionality.
|
|
285
802
|
|
|
286
803
|
══ CURRENT PIPELINE ═════════════════════════════════════════════════
|
|
287
804
|
{current_flow}
|
|
@@ -303,6 +820,52 @@ All model nodes support these fields in their config — no new node needed:
|
|
|
303
820
|
"better accuracy" → tune hyperparams, or swap model type — no extra nodes
|
|
304
821
|
"use k-fold" → set cross_validation=true, cv_folds=k on existing model
|
|
305
822
|
|
|
823
|
+
══ CUSTOM NODE FORMAT (only if NO template covers it) ══════════════
|
|
824
|
+
{{
|
|
825
|
+
"id": "node_custom_1",
|
|
826
|
+
"type": "customNode",
|
|
827
|
+
"position": {{"x": 560, "y": 200}},
|
|
828
|
+
"data": {{
|
|
829
|
+
"label": "Robust Scaler (Custom)",
|
|
830
|
+
"templateId": "customNode",
|
|
831
|
+
"code": "import pandas as pd\\nfrom sklearn.preprocessing import RobustScaler\\n\\n# ✨ AI GENERATED\\ndef run(data=None, X_train=None, X_test=None) -> dict:\\n # ...\\n return {{\"X_train\": X_train, \"X_test\": X_test}}"
|
|
832
|
+
}}
|
|
833
|
+
}}
|
|
834
|
+
|
|
835
|
+
- Your Python string in `code` MUST define a function named EXACTLY `def run(...) -> dict:`
|
|
836
|
+
- The DataFrame param MUST be named `data` (if taking a whole dataset)
|
|
837
|
+
- MUST return a dict containing the output handles
|
|
838
|
+
- Only import: numpy, pandas, scipy, sklearn, math, statistics, re, json
|
|
839
|
+
- NO os, sys, subprocess, socket, requests, open(), eval(), exec()
|
|
840
|
+
|
|
841
|
+
══ DYNAMIC VISUALIZATION ══════════════════════════════════════════════
|
|
842
|
+
You can create custom visualizations NOT in the catalogue. The frontend
|
|
843
|
+
detects charts by SHAPE, not by key name. Return any of these shapes and
|
|
844
|
+
the UI will render it automatically — NO new React code needed:
|
|
845
|
+
|
|
846
|
+
Series (bar chart / ranked list):
|
|
847
|
+
{{"my_key": {{"labels": [...], "counts": [...], "title": "..."}}}}
|
|
848
|
+
|
|
849
|
+
Plot (scatter or line chart):
|
|
850
|
+
{{"my_key": {{"x": [...], "y": [...], "title": "...", "x_label": "...", "y_label": "..."}}}}
|
|
851
|
+
|
|
852
|
+
Grid (heatmap / matrix):
|
|
853
|
+
{{"my_key": {{"z": [[...]], "x": [...], "y": [...], "title": "..."}}}}
|
|
854
|
+
|
|
855
|
+
Example — null percentage bar chart:
|
|
856
|
+
return {{
|
|
857
|
+
"null_pct_chart": {{
|
|
858
|
+
"labels": list(null_pcts.keys()),
|
|
859
|
+
"counts": list(null_pcts.values()),
|
|
860
|
+
"title": "Missing Values (%) per Column"
|
|
861
|
+
}}
|
|
862
|
+
}}
|
|
863
|
+
|
|
864
|
+
When the user asks for any kind of visualization (e.g. "show me a chart
|
|
865
|
+
of X", "visualize the distribution of Y"), you MUST generate a customNode
|
|
866
|
+
that returns a dict with one of the shapes above. NEVER refuse — if no
|
|
867
|
+
template covers it, invent the chart with the shape protocol.
|
|
868
|
+
|
|
306
869
|
══ SURGICAL PRESERVATION RULES ══════════════════════════════════════
|
|
307
870
|
- Every node marked ✓ or ○ must appear in the output VERBATIM
|
|
308
871
|
(same id, type, position, code, values — character for character)
|
|
@@ -320,6 +883,8 @@ All model nodes support these fields in their config — no new node needed:
|
|
|
320
883
|
══ OUTPUT ═══════════════════════════════════════════════════════════
|
|
321
884
|
Return ONLY:
|
|
322
885
|
{{"nodes": [...], "edges": [...], "summary": "<one sentence conversational explanation>"}}
|
|
886
|
+
|
|
887
|
+
⚠️ DO NOT EVER output a summary like "Cannot process this request." If a template is missing, you MUST write the `code` for it as a `customNode` instead of giving up.
|
|
323
888
|
"""
|
|
324
889
|
|
|
325
890
|
|
|
@@ -333,8 +898,17 @@ def _extract_file_path(context: str) -> str | None:
|
|
|
333
898
|
return None
|
|
334
899
|
|
|
335
900
|
|
|
336
|
-
def _build_generate_messages(
|
|
337
|
-
|
|
901
|
+
def _build_generate_messages(
|
|
902
|
+
prompt: str,
|
|
903
|
+
context: str | None,
|
|
904
|
+
pre_flight: dict | None = None,
|
|
905
|
+
architect_plan: str | None = None,
|
|
906
|
+
custom_components: list[dict] | None = None,
|
|
907
|
+
) -> list[dict]:
|
|
908
|
+
system = _SYSTEM_PROMPT.format(catalogue=_template_catalogue(custom_components))
|
|
909
|
+
|
|
910
|
+
profile_block = ""
|
|
911
|
+
path_hint = ""
|
|
338
912
|
|
|
339
913
|
if context:
|
|
340
914
|
fp = _extract_file_path(context)
|
|
@@ -343,13 +917,10 @@ def _build_generate_messages(prompt: str, context: str | None) -> list[dict]:
|
|
|
343
917
|
f'Default value for csv_loader → file_path: Annotated[str,"file"] = "{fp}"\n'
|
|
344
918
|
if fp else ""
|
|
345
919
|
)
|
|
346
|
-
|
|
347
|
-
# ── Enrich context with data profile if a file path is present ──
|
|
348
|
-
profile_block = ""
|
|
349
920
|
if fp:
|
|
350
921
|
try:
|
|
351
922
|
import pandas as pd
|
|
352
|
-
df = pd.read_csv(fp, nrows=5000)
|
|
923
|
+
df = pd.read_csv(fp, nrows=5000)
|
|
353
924
|
profile = profile_dataframe(df)
|
|
354
925
|
profile_block = (
|
|
355
926
|
"\n== Dataset Summary (auto-profiled) ==\n"
|
|
@@ -359,15 +930,29 @@ def _build_generate_messages(prompt: str, context: str | None) -> list[dict]:
|
|
|
359
930
|
except Exception as exc:
|
|
360
931
|
logger.warning("data_profiler skipped: %s", exc)
|
|
361
932
|
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
933
|
+
# ── Inject deterministic pre-flight analysis ──────────────────────────
|
|
934
|
+
pre_flight_block = ""
|
|
935
|
+
if pre_flight:
|
|
936
|
+
pre_flight_block = "\n" + _pre_flight_block(pre_flight) + "\n"
|
|
937
|
+
|
|
938
|
+
# ── Inject architect plan ─────────────────────────────────────────────
|
|
939
|
+
architect_block = ""
|
|
940
|
+
if architect_plan and architect_plan.strip():
|
|
941
|
+
architect_block = (
|
|
942
|
+
"\n== EXPERT ARCHITECTURE PLAN (follow this closely) ==\n"
|
|
943
|
+
+ architect_plan.strip()
|
|
944
|
+
+ "\n"
|
|
368
945
|
)
|
|
369
|
-
|
|
370
|
-
|
|
946
|
+
|
|
947
|
+
user = (
|
|
948
|
+
f"== DATASET CONTEXT ==\n"
|
|
949
|
+
f"{profile_block}"
|
|
950
|
+
f"{path_hint}"
|
|
951
|
+
f"{pre_flight_block}"
|
|
952
|
+
f"{architect_block}"
|
|
953
|
+
f"\n== REQUEST ==\n"
|
|
954
|
+
f"{prompt}"
|
|
955
|
+
)
|
|
371
956
|
|
|
372
957
|
return [{"role": "system", "content": system}, {"role": "user", "content": user}]
|
|
373
958
|
|
|
@@ -419,6 +1004,10 @@ def _slim_flow(flow: FlowSchema) -> dict:
|
|
|
419
1004
|
except Exception:
|
|
420
1005
|
pass
|
|
421
1006
|
|
|
1007
|
+
is_template = node_type in {t["id"] for t in TEMPLATES}
|
|
1008
|
+
# Only send code for truly custom nodes; never for templates.
|
|
1009
|
+
code_str = None if is_template else (data.get("code") or "")[:800] or None
|
|
1010
|
+
|
|
422
1011
|
slim_nodes.append({
|
|
423
1012
|
"id": node.get("id"),
|
|
424
1013
|
"type": node_type,
|
|
@@ -426,7 +1015,7 @@ def _slim_flow(flow: FlowSchema) -> dict:
|
|
|
426
1015
|
"data": {
|
|
427
1016
|
"label": data.get("label"),
|
|
428
1017
|
"templateId": data.get("templateId"),
|
|
429
|
-
"code":
|
|
1018
|
+
"code": code_str,
|
|
430
1019
|
"values": data.get("values"),
|
|
431
1020
|
# Explicit handle lists — LLM MUST use these for edge sourceHandle/targetHandle
|
|
432
1021
|
"available_outputs": outputs,
|
|
@@ -436,10 +1025,15 @@ def _slim_flow(flow: FlowSchema) -> dict:
|
|
|
436
1025
|
return {"nodes": slim_nodes, "edges": flow.edges}
|
|
437
1026
|
|
|
438
1027
|
|
|
439
|
-
def _build_update_messages(
|
|
1028
|
+
def _build_update_messages(
|
|
1029
|
+
prompt: str,
|
|
1030
|
+
current_flow: FlowSchema,
|
|
1031
|
+
context: str | None,
|
|
1032
|
+
custom_components: list[dict] | None = None,
|
|
1033
|
+
) -> list[dict]:
|
|
440
1034
|
slim = _slim_flow(current_flow)
|
|
441
1035
|
node_status = _node_status_summary(current_flow)
|
|
442
|
-
catalogue = _template_catalogue()
|
|
1036
|
+
catalogue = _template_catalogue(custom_components)
|
|
443
1037
|
|
|
444
1038
|
system = _UPDATE_PROMPT.format(
|
|
445
1039
|
current_flow=json.dumps(slim, indent=2),
|
|
@@ -493,7 +1087,8 @@ async def _call_openrouter(
|
|
|
493
1087
|
) -> str:
|
|
494
1088
|
"""
|
|
495
1089
|
Route to the right model via OpenRouter based on task type.
|
|
496
|
-
Falls back through
|
|
1090
|
+
Falls back through the full pool of live free models on rate-limit or error.
|
|
1091
|
+
Rate-limited models are skipped for _RATE_LIMIT_TTL seconds to avoid wasted calls.
|
|
497
1092
|
"""
|
|
498
1093
|
effective_key = _get_api_key()
|
|
499
1094
|
if not effective_key:
|
|
@@ -504,7 +1099,37 @@ async def _call_openrouter(
|
|
|
504
1099
|
)
|
|
505
1100
|
|
|
506
1101
|
model = _MODELS.get(task, _MODELS["generate"])
|
|
507
|
-
|
|
1102
|
+
|
|
1103
|
+
# Full pool of verified-live free models (May 2026).
|
|
1104
|
+
# Ordered by observed reliability: nemotron first (proven to succeed when llama 429s).
|
|
1105
|
+
_FREE_POOL = [
|
|
1106
|
+
"nvidia/nemotron-3-super-120b-a12b:free", # proven to work
|
|
1107
|
+
"openai/gpt-oss-120b:free",
|
|
1108
|
+
"openai/gpt-oss-20b:free",
|
|
1109
|
+
"nousresearch/hermes-3-llama-3.1-405b:free",
|
|
1110
|
+
"meta-llama/llama-3.3-70b-instruct:free",
|
|
1111
|
+
"google/gemma-4-31b-it:free",
|
|
1112
|
+
"google/gemma-4-26b-a4b-it:free",
|
|
1113
|
+
"nvidia/nemotron-3-nano-30b-a3b:free",
|
|
1114
|
+
"nvidia/nemotron-nano-9b-v2:free",
|
|
1115
|
+
"meta-llama/llama-3.2-3b-instruct:free",
|
|
1116
|
+
"cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
|
|
1117
|
+
"liquid/lfm-2.5-1.2b-instruct:free",
|
|
1118
|
+
]
|
|
1119
|
+
|
|
1120
|
+
# Build chain: primary model first, then full pool (deduped, order preserved)
|
|
1121
|
+
seen: set[str] = set()
|
|
1122
|
+
full_chain: list[str] = []
|
|
1123
|
+
for m in [model, _MODELS["fallback"], _MODELS["lastresort"]] + _FREE_POOL:
|
|
1124
|
+
if m not in seen:
|
|
1125
|
+
seen.add(m)
|
|
1126
|
+
full_chain.append(m)
|
|
1127
|
+
|
|
1128
|
+
# Skip models in cooldown — place them at the end so they still get a chance
|
|
1129
|
+
# if everything else fails (cooldown may have expired by then)
|
|
1130
|
+
ready = [m for m in full_chain if not _is_rate_limited(m)]
|
|
1131
|
+
cooling = [m for m in full_chain if _is_rate_limited(m)]
|
|
1132
|
+
fallback_chain = ready + cooling # try fresh models first
|
|
508
1133
|
|
|
509
1134
|
headers = {
|
|
510
1135
|
"Authorization": f"Bearer {effective_key}",
|
|
@@ -514,35 +1139,46 @@ async def _call_openrouter(
|
|
|
514
1139
|
}
|
|
515
1140
|
|
|
516
1141
|
last_exc: Exception | None = None
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
"max_tokens": 8192,
|
|
525
|
-
}
|
|
1142
|
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
1143
|
+
for attempt_model in fallback_chain:
|
|
1144
|
+
body: dict = {
|
|
1145
|
+
"model": attempt_model,
|
|
1146
|
+
"messages": messages,
|
|
1147
|
+
"max_tokens": 8192,
|
|
1148
|
+
}
|
|
526
1149
|
|
|
527
|
-
|
|
528
|
-
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
1150
|
+
try:
|
|
529
1151
|
response = await client.post(_OPENROUTER_URL, headers=headers, json=body)
|
|
530
1152
|
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
1153
|
+
if response.status_code == 429:
|
|
1154
|
+
_mark_rate_limited(attempt_model)
|
|
1155
|
+
raise RuntimeError(f"Rate limited on {attempt_model}")
|
|
1156
|
+
if response.status_code == 401:
|
|
1157
|
+
raise RuntimeError("Invalid OpenRouter API key")
|
|
1158
|
+
response.raise_for_status()
|
|
1159
|
+
|
|
1160
|
+
content = response.json()["choices"][0]["message"]["content"]
|
|
1161
|
+
if attempt_model != model:
|
|
1162
|
+
logger.info("OpenRouter fallback: used %s instead of %s", attempt_model, model)
|
|
1163
|
+
return content or ""
|
|
1164
|
+
|
|
1165
|
+
except httpx.ConnectError as exc:
|
|
1166
|
+
# If we cannot resolve DNS or connect to the host, no fallback will work.
|
|
1167
|
+
logger.error("Network connection to OpenRouter failed: %s", exc)
|
|
1168
|
+
raise RuntimeError("Could not connect to OpenRouter (Network/DNS error). Please check your internet connection.")
|
|
1169
|
+
except RuntimeError as exc:
|
|
1170
|
+
if "Invalid OpenRouter API key" in str(exc):
|
|
1171
|
+
raise # Don't retry — wrong key won't fix itself
|
|
1172
|
+
logger.warning("OpenRouter model %s failed: %s", attempt_model, exc)
|
|
1173
|
+
last_exc = exc
|
|
1174
|
+
continue
|
|
1175
|
+
except Exception as exc:
|
|
1176
|
+
if "getaddrinfo failed" in str(exc):
|
|
1177
|
+
logger.error("DNS resolution failed for OpenRouter: %s", exc)
|
|
1178
|
+
raise RuntimeError("Could not resolve OpenRouter domain. Please check your internet connection.")
|
|
1179
|
+
logger.warning("OpenRouter model %s failed: %s", attempt_model, exc)
|
|
1180
|
+
last_exc = exc
|
|
1181
|
+
continue
|
|
546
1182
|
|
|
547
1183
|
raise RuntimeError(f"All OpenRouter models failed. Last error: {last_exc}")
|
|
548
1184
|
|
|
@@ -555,7 +1191,8 @@ def _extract_json_object(raw: str) -> dict:
|
|
|
555
1191
|
- Preamble text in any language before the JSON
|
|
556
1192
|
- Markdown code fences (```json ... ``` or ``` ... ```)
|
|
557
1193
|
- Trailing explanation text after the JSON
|
|
558
|
-
- Truncated responses (
|
|
1194
|
+
- Truncated responses (free-tier model cut-offs) — try-repair appends
|
|
1195
|
+
missing closing brackets/braces to recover a parseable object.
|
|
559
1196
|
"""
|
|
560
1197
|
raw = raw.strip()
|
|
561
1198
|
|
|
@@ -579,6 +1216,7 @@ def _extract_json_object(raw: str) -> dict:
|
|
|
579
1216
|
|
|
580
1217
|
# 3. Brace-matching: find the first complete JSON object in the text
|
|
581
1218
|
start = raw.find('{')
|
|
1219
|
+
best_candidate: str | None = None
|
|
582
1220
|
if start != -1:
|
|
583
1221
|
depth = 0
|
|
584
1222
|
in_string = False
|
|
@@ -603,10 +1241,36 @@ def _extract_json_object(raw: str) -> dict:
|
|
|
603
1241
|
try:
|
|
604
1242
|
return json.loads(candidate)
|
|
605
1243
|
except json.JSONDecodeError:
|
|
606
|
-
break # malformed — fall through to
|
|
1244
|
+
break # malformed — fall through to repair
|
|
1245
|
+
# Capture the partial object for repair attempts
|
|
1246
|
+
best_candidate = raw[start:]
|
|
1247
|
+
|
|
1248
|
+
# 4. Try-repair: the response was likely truncated by the model's token limit.
|
|
1249
|
+
# Progressively append closing characters until we get a valid object.
|
|
1250
|
+
# We try up to 12 combinations: 0-4 extra ']' + 0-4 extra '}', ordered
|
|
1251
|
+
# by shortest repair first (minimises data invention).
|
|
1252
|
+
candidate_base = best_candidate or raw
|
|
1253
|
+
# Trim trailing whitespace/comma that often appears before cut-off
|
|
1254
|
+
candidate_base = candidate_base.rstrip().rstrip(",")
|
|
1255
|
+
logger.debug("JSON repair: attempting to salvage truncated output (%d chars)", len(candidate_base))
|
|
1256
|
+
for extra_brackets in range(5): # 0 … 4 extra ]
|
|
1257
|
+
for extra_braces in range(5): # 0 … 4 extra }
|
|
1258
|
+
if extra_brackets == 0 and extra_braces == 0:
|
|
1259
|
+
continue # already tried the plain candidate
|
|
1260
|
+
repaired = candidate_base + ("\n]" * extra_brackets) + ("\n}" * extra_braces)
|
|
1261
|
+
try:
|
|
1262
|
+
result = json.loads(repaired)
|
|
1263
|
+
logger.warning(
|
|
1264
|
+
"JSON repair succeeded (+%d ']', +%d '}'). "
|
|
1265
|
+
"Free-tier model likely truncated its output.",
|
|
1266
|
+
extra_brackets, extra_braces,
|
|
1267
|
+
)
|
|
1268
|
+
return result
|
|
1269
|
+
except json.JSONDecodeError:
|
|
1270
|
+
continue
|
|
607
1271
|
|
|
608
1272
|
raise ValueError(
|
|
609
|
-
f"LLM returned invalid JSON (could not extract object).\nRaw: {raw[:600]}"
|
|
1273
|
+
f"LLM returned invalid JSON (could not extract or repair object).\nRaw: {raw[:600]}"
|
|
610
1274
|
)
|
|
611
1275
|
|
|
612
1276
|
|
|
@@ -650,9 +1314,22 @@ You are a surgical ML pipeline editor. The user wants to REFINE an existing pipe
|
|
|
650
1314
|
1. Output ONLY a single raw JSON object. No text before it, no text after it.
|
|
651
1315
|
2. NEVER wrap the JSON in markdown fences (no ```json, no ```).
|
|
652
1316
|
3. ALWAYS respond in ENGLISH. Never use any other language.
|
|
653
|
-
4.
|
|
1317
|
+
4. DO NOT EVER output a summary like "Cannot process this request." If a template is missing, you MUST write the `code` for it as a `customNode` instead of giving up.
|
|
654
1318
|
5. Do NOT truncate the JSON — it must be a complete, valid object.
|
|
655
1319
|
|
|
1320
|
+
🔑 TOKEN CONSERVATION — MANDATORY (free-tier models have strict output limits):
|
|
1321
|
+
A. TEMPLATE SWAPS: If you are changing a model or operation that already has a
|
|
1322
|
+
matching Template ID (e.g. swapping linear_regression for
|
|
1323
|
+
random_forest_regressor), set only the "type" field on the update entry.
|
|
1324
|
+
NEVER output the full Python "code" block when a Template already exists.
|
|
1325
|
+
Example — correct: {{"action":"update","id":"n5","data":{{"type":"random_forest_regressor","values":{{"n_estimators":200}}}}}}
|
|
1326
|
+
Example — WRONG: {{"action":"update","id":"n5","data":{{"code":"import ..."}}}} ← wastes tokens
|
|
1327
|
+
B. VALUES ONLY: In "node_changes", omit the "code" field entirely unless the
|
|
1328
|
+
user explicitly asked for a custom code change. If only config parameters
|
|
1329
|
+
changed, output ONLY the "values" dictionary — nothing else inside "data".
|
|
1330
|
+
Example — correct: {{"action":"update","id":"n3","data":{{"values":{{"test_size":0.3}}}}}}
|
|
1331
|
+
Example — WRONG: {{"action":"update","id":"n3","data":{{"values":{{"test_size":0.3}},"code":"...entire node..."}}}}
|
|
1332
|
+
|
|
656
1333
|
Output ONLY a JSON patch object describing the minimal set of changes needed.
|
|
657
1334
|
|
|
658
1335
|
══ CURRENT PIPELINE ═════════════════════════════════════════════════
|
|
@@ -678,13 +1355,15 @@ Output ONLY a JSON patch object describing the minimal set of changes needed.
|
|
|
678
1355
|
}},
|
|
679
1356
|
{{
|
|
680
1357
|
"action": "add",
|
|
681
|
-
"id": "<new unique id e.g.
|
|
682
|
-
"type": "<template_id>",
|
|
1358
|
+
"id": "<new unique id e.g. node_custom_1>",
|
|
1359
|
+
"type": "<template_id or 'customNode'>",
|
|
683
1360
|
"reason": "<why this node is added>",
|
|
684
1361
|
"position": {{"x": <number>, "y": <number>}},
|
|
685
1362
|
"data": {{
|
|
1363
|
+
"label": "<optional Descriptive Name>",
|
|
1364
|
+
"templateId": "<optional templateId or 'customNode'>",
|
|
686
1365
|
"config": {{}},
|
|
687
|
-
"code": "<optional
|
|
1366
|
+
"code": "<optional full Python source, REQUIRED if type is customNode>"
|
|
688
1367
|
}}
|
|
689
1368
|
}},
|
|
690
1369
|
{{
|
|
@@ -721,7 +1400,8 @@ Before emitting ANY patch entry, ask:
|
|
|
721
1400
|
YES → emit one "update" for that node. No new nodes.
|
|
722
1401
|
|
|
723
1402
|
3. Does this need a genuinely new computation node?
|
|
724
|
-
YES → emit one "add".
|
|
1403
|
+
YES → emit one "add".
|
|
1404
|
+
CRITICAL: If the requested operation (e.g., RobustScaler) is NOT in the catalogue, DO NOT refuse the request. Instead, YOU MUST generate it dynamically as a `customNode`. Provide the complete Python implementation in the `code` field, starting with `# ✨ AI GENERATED`. In your `summary`, explicitly state that you generated a custom node.
|
|
725
1405
|
|
|
726
1406
|
4. None of the above?
|
|
727
1407
|
→ Explain in "summary". Return empty node_changes.
|
|
@@ -734,7 +1414,19 @@ Before emitting ANY patch entry, ask:
|
|
|
734
1414
|
5. "summary" → one plain-English sentence describing the change.
|
|
735
1415
|
6. DataFrame param MUST be named data (never "df").
|
|
736
1416
|
7. Include all imports inside any code block.
|
|
737
|
-
8. Only use template types from the catalogue above.
|
|
1417
|
+
8. Only use template types from the catalogue above, unless building a customNode.
|
|
1418
|
+
|
|
1419
|
+
══ CUSTOM NODE RULES (when generating a missing component) ════════════
|
|
1420
|
+
If you use `type="customNode"`, your Python string in `code` MUST follow the exact same architecture as built-in templates:
|
|
1421
|
+
- It MUST define a function named EXACTLY `def run(...) -> dict:`
|
|
1422
|
+
- The primary input dataset MUST be named `data` (e.g., `def run(data: pd.DataFrame, ...) -> dict:`)
|
|
1423
|
+
- It MUST return a dictionary containing the outputs (e.g., `return {{"X_train": X_train, "X_test": X_test}}`)
|
|
1424
|
+
- If you are building a custom visualization, you MUST wrap your output in one of these keys so the UI can render it:
|
|
1425
|
+
'histogram', 'correlation_matrix', 'value_counts', 'box_plot', 'prediction', 'correlation_heatmap', 'missing_value_map', 'class_balance', 'feature_target_scatter', 'model_error_histogram', 'partial_dependence', 'roc_curves'
|
|
1426
|
+
- All `import` statements MUST be placed at the top of the code string.
|
|
1427
|
+
- You MUST include `# ✨ AI GENERATED` at the very top of the script.
|
|
1428
|
+
Example:
|
|
1429
|
+
"import pandas as pd\\nfrom sklearn.preprocessing import RobustScaler\\n\\n# ✨ AI GENERATED\\ndef run(data=None, X_train=None, X_test=None):\\n scaler = RobustScaler()\\n # ... logic ...\\n return {{\"X_train\": X_train_scaled, \"X_test\": X_test_scaled}}"
|
|
738
1430
|
|
|
739
1431
|
══ CONNECTION RULES (CRITICAL — read carefully) ══════════════════════
|
|
740
1432
|
Each node in the current pipeline has "available_outputs" and "available_inputs"
|
|
@@ -749,6 +1441,12 @@ To fix a wrong connection:
|
|
|
749
1441
|
1. Emit "remove" for the bad edge (use its id from the current edges list).
|
|
750
1442
|
2. Emit "add" for the correct edge using valid handle names from the lists above.
|
|
751
1443
|
|
|
1444
|
+
SCALER CONNECTION RULES:
|
|
1445
|
+
When inserting or reconnecting a standard_scaler or min_max_scaler after a train_test_split, you MUST:
|
|
1446
|
+
1. Connect all 4 outputs (X_train, X_test, y_train, y_test) from the split node to the scaler inputs.
|
|
1447
|
+
2. Connect all 4 outputs (X_train, X_test, y_train, y_test) from the scaler node to the downstream model inputs.
|
|
1448
|
+
Never skip passing y_train and y_test through the scaler node!
|
|
1449
|
+
|
|
752
1450
|
To add a missing connection:
|
|
753
1451
|
1. Look at the source node's "available_outputs" — pick the right output.
|
|
754
1452
|
2. Look at the target node's "available_inputs" — pick the right input.
|
|
@@ -813,7 +1511,10 @@ def _custom_node_catalogue(current_flow: FlowSchema) -> str:
|
|
|
813
1511
|
|
|
814
1512
|
|
|
815
1513
|
def _build_refine_messages(
|
|
816
|
-
prompt: str,
|
|
1514
|
+
prompt: str,
|
|
1515
|
+
current_flow: FlowSchema,
|
|
1516
|
+
context: str | None,
|
|
1517
|
+
custom_components: list[dict] | None = None,
|
|
817
1518
|
) -> list[dict]:
|
|
818
1519
|
slim = _slim_flow(current_flow)
|
|
819
1520
|
node_status = _node_status_summary(current_flow)
|
|
@@ -822,15 +1523,13 @@ def _build_refine_messages(
|
|
|
822
1523
|
custom_section = (
|
|
823
1524
|
f"\n══ CUSTOM NODES ON CANVAS (treat these as valid, usable nodes) ═══════\n"
|
|
824
1525
|
f"{custom_cat}\n"
|
|
825
|
-
|
|
826
|
-
)
|
|
1526
|
+
) if custom_cat else ""
|
|
827
1527
|
|
|
828
1528
|
system = _REFINE_PROMPT.format(
|
|
829
1529
|
current_flow=json.dumps(slim, indent=2),
|
|
830
1530
|
node_status=node_status,
|
|
831
|
-
catalogue=_template_catalogue() + custom_section,
|
|
1531
|
+
catalogue=_template_catalogue(custom_components) + custom_section,
|
|
832
1532
|
)
|
|
833
|
-
|
|
834
1533
|
ctx_block = (
|
|
835
1534
|
f"══ DATASET CONTEXT ══════════════════════════════════════\n{context.strip()}\n\n"
|
|
836
1535
|
if context else ""
|
|
@@ -867,24 +1566,81 @@ def _parse_refine_patch(raw: str) -> RefinePatch:
|
|
|
867
1566
|
|
|
868
1567
|
# ── Public API ─────────────────────────────────────────────────────────────────
|
|
869
1568
|
|
|
870
|
-
async def generate_flow(prompt: str, context: str | None = None) -> FlowSchema:
|
|
871
|
-
|
|
872
|
-
|
|
1569
|
+
async def generate_flow(prompt: str, context: str | None = None, custom_components: list[dict] | None = None) -> FlowSchema:
|
|
1570
|
+
"""
|
|
1571
|
+
Two-call Architect → Builder strategy:
|
|
1572
|
+
|
|
1573
|
+
Call 1 (Architect — deepseek-r1):
|
|
1574
|
+
Given the data profile + deterministic pre-flight analysis, produce a
|
|
1575
|
+
concise markdown plan: problem type, preprocessing steps, model choice.
|
|
1576
|
+
|
|
1577
|
+
Call 2 (Builder — deepseek-chat-v3):
|
|
1578
|
+
Given the Architect's plan + the same context, produce the final JSON flow.
|
|
1579
|
+
The Builder focuses on correct syntax and edge connections, not reasoning.
|
|
1580
|
+
"""
|
|
1581
|
+
pre_flight: dict = {}
|
|
1582
|
+
profile_text: str = ""
|
|
1583
|
+
architect_plan: str = ""
|
|
1584
|
+
|
|
1585
|
+
# ── Pre-flight: deterministic data analysis ───────────────────────────
|
|
1586
|
+
if context:
|
|
1587
|
+
fp = _extract_file_path(context)
|
|
1588
|
+
if fp:
|
|
1589
|
+
try:
|
|
1590
|
+
import pandas as pd
|
|
1591
|
+
df = pd.read_csv(fp, nrows=5000)
|
|
1592
|
+
profile = profile_dataframe(df)
|
|
1593
|
+
profile_text = format_profile_for_prompt(profile)
|
|
1594
|
+
pre_flight = _determine_pre_flight(profile, prompt, context, csv_path=fp)
|
|
1595
|
+
logger.info(
|
|
1596
|
+
"Pre-flight: problem_type=%s target=%s model=%s encoding=%s scaling=%s",
|
|
1597
|
+
pre_flight["problem_type"], pre_flight["target_hint"],
|
|
1598
|
+
pre_flight.get("recommended_model"), pre_flight["needs_encoding"],
|
|
1599
|
+
pre_flight["needs_scaling"],
|
|
1600
|
+
)
|
|
1601
|
+
except Exception as exc:
|
|
1602
|
+
logger.warning("pre-flight analysis skipped: %s", exc)
|
|
1603
|
+
|
|
1604
|
+
# ── Call 1: Architect (R1 reasoning model) ────────────────────────────
|
|
1605
|
+
if profile_text and pre_flight:
|
|
1606
|
+
try:
|
|
1607
|
+
arch_messages = _build_architect_messages(prompt, profile_text, pre_flight)
|
|
1608
|
+
architect_plan = await _call_openrouter(
|
|
1609
|
+
arch_messages,
|
|
1610
|
+
task="architect",
|
|
1611
|
+
json_mode=False,
|
|
1612
|
+
timeout=_TIMEOUT_GENERATE,
|
|
1613
|
+
)
|
|
1614
|
+
logger.info("Architect plan: %d chars", len(architect_plan))
|
|
1615
|
+
except Exception as exc:
|
|
1616
|
+
logger.warning("Architect call failed, continuing without plan: %s", exc)
|
|
1617
|
+
architect_plan = ""
|
|
1618
|
+
|
|
1619
|
+
# ── Call 2: Builder (chat model — fast, syntax-precise JSON) ─────────
|
|
1620
|
+
build_messages = _build_generate_messages(
|
|
1621
|
+
prompt, context,
|
|
1622
|
+
pre_flight=pre_flight or None,
|
|
1623
|
+
architect_plan=architect_plan or None,
|
|
1624
|
+
custom_components=custom_components,
|
|
1625
|
+
)
|
|
1626
|
+
raw = await _call_openrouter(
|
|
1627
|
+
build_messages, task="generate", json_mode=True, timeout=_TIMEOUT_GENERATE
|
|
1628
|
+
)
|
|
873
1629
|
return _parse_flow(raw)
|
|
874
1630
|
|
|
875
1631
|
|
|
876
1632
|
async def update_flow(
|
|
877
|
-
prompt: str, current_flow: FlowSchema, context: str | None = None
|
|
1633
|
+
prompt: str, current_flow: FlowSchema, context: str | None = None, custom_components: list[dict] | None = None
|
|
878
1634
|
) -> FlowSchema:
|
|
879
|
-
messages = _build_update_messages(prompt, current_flow, context)
|
|
1635
|
+
messages = _build_update_messages(prompt, current_flow, context, custom_components)
|
|
880
1636
|
raw = await _call_openrouter(messages, task="update", json_mode=True, timeout=_TIMEOUT_UPDATE)
|
|
881
1637
|
return _parse_flow(raw)
|
|
882
1638
|
|
|
883
1639
|
|
|
884
1640
|
async def refine_flow(
|
|
885
|
-
prompt: str, current_flow: FlowSchema, context: str | None = None
|
|
1641
|
+
prompt: str, current_flow: FlowSchema, context: str | None = None, custom_components: list[dict] | None = None
|
|
886
1642
|
) -> RefinePatch:
|
|
887
|
-
messages = _build_refine_messages(prompt, current_flow, context)
|
|
1643
|
+
messages = _build_refine_messages(prompt, current_flow, context, custom_components)
|
|
888
1644
|
raw = await _call_openrouter(messages, task="refine", json_mode=True, timeout=_TIMEOUT_UPDATE)
|
|
889
1645
|
return _parse_refine_patch(raw)
|
|
890
1646
|
|
|
@@ -934,6 +1690,32 @@ async def explain_flow(flow: FlowSchema) -> str:
|
|
|
934
1690
|
return await _call_openrouter(messages, task="explain", json_mode=False, timeout=60)
|
|
935
1691
|
|
|
936
1692
|
|
|
1693
|
+
_EXPLAIN_CHAT_SYSTEM = """\
|
|
1694
|
+
You are an expert ML engineering assistant answering questions about a user's machine learning pipeline.
|
|
1695
|
+
|
|
1696
|
+
══ EXPLANATION CONTEXT ═════════════════════════════════════════════════════
|
|
1697
|
+
{explanation}
|
|
1698
|
+
|
|
1699
|
+
══ PIPELINE AND EXECUTION RESULTS ══════════════════════════════════════════
|
|
1700
|
+
{current_flow}
|
|
1701
|
+
|
|
1702
|
+
Respond concisely and directly to the user's question. Provide actionable, specific advice based on the existing nodes, their configurations, and any metrics or execution results present in the pipeline state.
|
|
1703
|
+
Do NOT use markdown tables in your response. Instead, use simple bullet points and short paragraphs. Do not use generic filler.
|
|
1704
|
+
"""
|
|
1705
|
+
|
|
1706
|
+
async def chat_explanation(question: str, explanation: str, flow: FlowSchema) -> str:
|
|
1707
|
+
slim = _slim_flow(flow)
|
|
1708
|
+
system = _EXPLAIN_CHAT_SYSTEM.format(
|
|
1709
|
+
explanation=explanation,
|
|
1710
|
+
current_flow=json.dumps(slim, indent=2)
|
|
1711
|
+
)
|
|
1712
|
+
messages = [
|
|
1713
|
+
{"role": "system", "content": system},
|
|
1714
|
+
{"role": "user", "content": question},
|
|
1715
|
+
]
|
|
1716
|
+
return await _call_openrouter(messages, task="chat", json_mode=False, timeout=60)
|
|
1717
|
+
|
|
1718
|
+
|
|
937
1719
|
# ── Self-Healing Debug Prompt ──────────────────────────────────────────────────
|
|
938
1720
|
|
|
939
1721
|
_DEBUG_SYSTEM = """\
|
|
@@ -1128,6 +1910,7 @@ async def handle_user_request(
|
|
|
1128
1910
|
prompt: str,
|
|
1129
1911
|
current_flow: FlowSchema | None = None,
|
|
1130
1912
|
context: str | None = None,
|
|
1913
|
+
custom_components: list[dict] | None = None,
|
|
1131
1914
|
) -> dict:
|
|
1132
1915
|
"""
|
|
1133
1916
|
Single entry point that classifies the prompt and routes to the
|
|
@@ -1144,7 +1927,7 @@ async def handle_user_request(
|
|
|
1144
1927
|
# No existing flow → always generate from scratch
|
|
1145
1928
|
has_flow = current_flow is not None and len(current_flow.nodes) > 0
|
|
1146
1929
|
if not has_flow:
|
|
1147
|
-
flow = await generate_flow(prompt, context)
|
|
1930
|
+
flow = await generate_flow(prompt, context, custom_components=custom_components)
|
|
1148
1931
|
return {"intent": "generate", "result_type": "flow", "flow": flow}
|
|
1149
1932
|
|
|
1150
1933
|
intent = detect_intent(prompt, has_flow=True)
|
|
@@ -1159,17 +1942,17 @@ async def handle_user_request(
|
|
|
1159
1942
|
for n in current_flow.nodes
|
|
1160
1943
|
):
|
|
1161
1944
|
# Custom nodes exist — use update so they're visible to the LLM
|
|
1162
|
-
flow = await update_flow(prompt, current_flow, context)
|
|
1945
|
+
flow = await update_flow(prompt, current_flow, context, custom_components=custom_components)
|
|
1163
1946
|
return {"intent": "update", "result_type": "flow", "flow": flow}
|
|
1164
|
-
flow = await generate_flow(prompt, context)
|
|
1947
|
+
flow = await generate_flow(prompt, context, custom_components=custom_components)
|
|
1165
1948
|
return {"intent": "generate", "result_type": "flow", "flow": flow}
|
|
1166
1949
|
|
|
1167
1950
|
if intent == "refine":
|
|
1168
|
-
patch = await refine_flow(prompt, current_flow, context)
|
|
1951
|
+
patch = await refine_flow(prompt, current_flow, context, custom_components=custom_components)
|
|
1169
1952
|
return {"intent": "refine", "result_type": "patch", "patch": patch}
|
|
1170
1953
|
|
|
1171
1954
|
# intent == "update"
|
|
1172
|
-
flow = await update_flow(prompt, current_flow, context)
|
|
1955
|
+
flow = await update_flow(prompt, current_flow, context, custom_components=custom_components)
|
|
1173
1956
|
return {"intent": "update", "result_type": "flow", "flow": flow}
|
|
1174
1957
|
|
|
1175
1958
|
|
|
@@ -1310,3 +2093,79 @@ async def suggest_improvements(flow: FlowSchema, results: dict) -> list[str]:
|
|
|
1310
2093
|
# Sanitise: only strings, max 120 chars each
|
|
1311
2094
|
return [str(s)[:120] for s in suggestions if s]
|
|
1312
2095
|
|
|
2096
|
+
|
|
2097
|
+
# ── Custom node code generation ────────────────────────────────────────────────
|
|
2098
|
+
|
|
2099
|
+
_NODE_CODE_SYSTEM = """\
|
|
2100
|
+
You are an M8Flow node code generator. Write Python code for a reusable pipeline component.
|
|
2101
|
+
|
|
2102
|
+
⚠️ HARD RULES — any violation makes the node unparseable:
|
|
2103
|
+
1. Function name MUST be run (not main, process, execute, transform — exactly run)
|
|
2104
|
+
2. DataFrame input parameter MUST be named data (never df, dataframe, dataset)
|
|
2105
|
+
3. Function MUST return a dict with named string keys
|
|
2106
|
+
4. ALL imports go INSIDE the function body
|
|
2107
|
+
5. Only allowed libraries: pandas, numpy, sklearn, scipy, math, statistics, re, json
|
|
2108
|
+
6. FORBIDDEN: os, sys, subprocess, socket, requests, open(), eval(), exec(), matplotlib
|
|
2109
|
+
|
|
2110
|
+
FIELD TYPE ANNOTATIONS — these control the UI widget shown to the user:
|
|
2111
|
+
data input (connects from previous node) → just `data` with no type hint
|
|
2112
|
+
text field → `name: str = "default"`
|
|
2113
|
+
number field → `name: float = 1.0` or `name: int = 10`
|
|
2114
|
+
boolean toggle→ `name: bool = True`
|
|
2115
|
+
column picker → `col: Annotated[str, "column"] = "target"` (needs `from typing import Annotated` inside the fn)
|
|
2116
|
+
file picker → `path: Annotated[str, "file"] = "data.csv"` (needs `from typing import Annotated` inside the fn)
|
|
2117
|
+
|
|
2118
|
+
RETURN DICT — keys become the node's output handles:
|
|
2119
|
+
Passing a DataFrame forward → always include "data": df
|
|
2120
|
+
Model outputs → {"model": model, "y_pred": preds}
|
|
2121
|
+
Metric outputs → {"accuracy": 0.95, "f1": 0.88}
|
|
2122
|
+
Multiple outputs are fine → {"data": df, "rows_removed": n}
|
|
2123
|
+
|
|
2124
|
+
EXAMPLE — outlier removal node:
|
|
2125
|
+
def run(data, multiplier: float = 1.5) -> dict:
|
|
2126
|
+
import pandas as pd
|
|
2127
|
+
import numpy as np
|
|
2128
|
+
df = data.copy()
|
|
2129
|
+
for col in df.select_dtypes(include=[np.number]).columns:
|
|
2130
|
+
Q1, Q3 = df[col].quantile(0.25), df[col].quantile(0.75)
|
|
2131
|
+
iqr = Q3 - Q1
|
|
2132
|
+
df = df[~((df[col] < Q1 - multiplier * iqr) | (df[col] > Q3 + multiplier * iqr))]
|
|
2133
|
+
return {"data": df, "rows_removed": len(data) - len(df)}
|
|
2134
|
+
|
|
2135
|
+
EXAMPLE — feature selection node:
|
|
2136
|
+
def run(data, n_features: int = 10, target: str = "label") -> dict:
|
|
2137
|
+
import pandas as pd
|
|
2138
|
+
from sklearn.feature_selection import SelectKBest, f_classif
|
|
2139
|
+
X = data.drop(columns=[target])
|
|
2140
|
+
y = data[target]
|
|
2141
|
+
selector = SelectKBest(f_classif, k=min(n_features, X.shape[1]))
|
|
2142
|
+
selector.fit(X, y)
|
|
2143
|
+
selected = X.columns[selector.get_support()].tolist()
|
|
2144
|
+
return {"data": data[selected + [target]], "selected_features": selected}
|
|
2145
|
+
|
|
2146
|
+
OUTPUT: Return ONLY the raw Python code. No explanation. No markdown fences. No backticks.
|
|
2147
|
+
"""
|
|
2148
|
+
|
|
2149
|
+
|
|
2150
|
+
async def generate_node_code(description: str) -> str:
|
|
2151
|
+
"""
|
|
2152
|
+
Generate M8Flow-compatible Python node code from a natural language description.
|
|
2153
|
+
Uses the chat model (fast, syntax-precise) with the node code system prompt.
|
|
2154
|
+
"""
|
|
2155
|
+
messages = [
|
|
2156
|
+
{"role": "system", "content": _NODE_CODE_SYSTEM},
|
|
2157
|
+
{"role": "user", "content": f"Generate an M8Flow node that: {description}"},
|
|
2158
|
+
]
|
|
2159
|
+
raw = await _call_openrouter(messages, task="generate", json_mode=False, timeout=60)
|
|
2160
|
+
|
|
2161
|
+
# Strip any markdown fences the model may add despite instructions
|
|
2162
|
+
raw = raw.strip()
|
|
2163
|
+
for fence in ("```python", "```"):
|
|
2164
|
+
if raw.startswith(fence):
|
|
2165
|
+
raw = raw[len(fence):]
|
|
2166
|
+
break
|
|
2167
|
+
if raw.endswith("```"):
|
|
2168
|
+
raw = raw[:-3]
|
|
2169
|
+
|
|
2170
|
+
return raw.strip()
|
|
2171
|
+
|