m8flow 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  """LLM service — OpenRouter-powered flow generation with per-task model routing."""
2
2
  import json
3
3
  import logging
4
+ import time
4
5
  import httpx
5
6
  from contextvars import ContextVar
6
7
  from config import config
@@ -23,23 +24,57 @@ _OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
23
24
  _TIMEOUT_GENERATE = 120
24
25
  _TIMEOUT_UPDATE = 180
25
26
 
26
- # Per-task model routing (verified OpenRouter slugs no :free on paid-tier models)
27
+ # Per-task model routing slugs verified live against OpenRouter API (May 2026)
27
28
  _MODELS = {
28
- "generate": "deepseek/deepseek-chat-v3-0324",
29
- "refine": "deepseek/deepseek-chat-v3-0324",
30
- "update": "deepseek/deepseek-chat-v3-0324",
31
- "debug": "deepseek/deepseek-r1",
32
- "heal": "deepseek/deepseek-r1",
33
- "explain": "google/gemini-2.0-flash-001",
34
- "suggest": "google/gemini-2.0-flash-001",
35
- "fallback": "meta-llama/llama-3.3-70b-instruct:free",
36
- "lastresort": "qwen/qwen3-coder:free",
29
+ # Reasoning/Architecting — best available free reasoning model
30
+ "architect": "nvidia/nemotron-3-super-120b-a12b:free",
31
+
32
+ # Core Generation/Updating — large, instruction-tuned free model
33
+ "generate": "meta-llama/llama-3.3-70b-instruct:free",
34
+ "refine": "meta-llama/llama-3.3-70b-instruct:free",
35
+ "update": "meta-llama/llama-3.3-70b-instruct:free",
36
+
37
+ # Debugging/Healing — strong reasoning for bug analysis
38
+ "debug": "nvidia/nemotron-3-super-120b-a12b:free",
39
+ "heal": "nvidia/nemotron-3-super-120b-a12b:free",
40
+
41
+ # Explaining/Suggesting — fast free model
42
+ "explain": "openai/gpt-oss-20b:free",
43
+ "suggest": "openai/gpt-oss-20b:free",
44
+
45
+ # Safety Nets — verified live fallbacks
46
+ "fallback": "google/gemma-4-31b-it:free",
47
+ "lastresort": "meta-llama/llama-3.2-3b-instruct:free",
37
48
  }
38
49
 
50
+ # ── Rate-limit cooldown cache ─────────────────────────────────────────────────
51
+ # Maps model_slug -> timestamp of last 429. Models in cooldown are skipped for
52
+ # _RATE_LIMIT_TTL seconds so we jump straight to a working model instead of
53
+ # burning time on a known-rate-limited one.
54
+ _RATE_LIMIT_CACHE: dict[str, float] = {}
55
+ _RATE_LIMIT_TTL = 90 # seconds
56
+
57
+
58
+ def _is_rate_limited(model: str) -> bool:
59
+ """Return True if this model returned 429 within the last _RATE_LIMIT_TTL seconds."""
60
+ ts = _RATE_LIMIT_CACHE.get(model)
61
+ if ts is None:
62
+ return False
63
+ if time.time() - ts < _RATE_LIMIT_TTL:
64
+ return True
65
+ del _RATE_LIMIT_CACHE[model] # TTL expired — clear and allow retry
66
+ return False
67
+
68
+
69
+ def _mark_rate_limited(model: str) -> None:
70
+ """Record that this model returned 429 right now."""
71
+ _RATE_LIMIT_CACHE[model] = time.time()
72
+ logger.debug("Rate-limit cooldown started for %s (%ds)", model, _RATE_LIMIT_TTL)
73
+
39
74
 
40
75
  # ── Catalogue helpers ──────────────────────────────────────────────────────────
41
76
 
42
- def _template_catalogue() -> str:
77
+ def _template_catalogue(custom_components: list[dict] | None = None) -> str:
43
78
  """Detailed catalogue: id, category, inputs, outputs."""
44
79
  from core.parser import parse_node_code
45
80
  lines: list[str] = []
@@ -49,11 +84,24 @@ def _template_catalogue() -> str:
49
84
  field_ins = [f"{i.name}:{i.kind}={i.default}" for i in schema.inputs if i.kind != "data"]
50
85
  outs = [o.name for o in schema.outputs]
51
86
  lines.append(
52
- f" {t['id']} [{t['category']}] \"{t['label']}\"\n"
53
- f" data-inputs : {data_ins or '(none)'}\n"
54
- f" fields : {field_ins or '(none)'}\n"
55
- f" outputs : {outs or '(none)'}"
87
+ f"{t['id']} [{t['category']}]\n"
88
+ f" inputs : {data_ins or '(none)'} fields: {field_ins or '(none)'}\n"
89
+ f" outputs: {outs or '(none)'}"
56
90
  )
91
+
92
+ if custom_components:
93
+ lines.append("\n=== USER CUSTOM COMPONENTS (Preferred if applicable) ===")
94
+ for c in custom_components:
95
+ schema = c.get("schema", {})
96
+ data_ins = [i["name"] for i in schema.get("inputs", []) if i.get("kind") == "data"]
97
+ field_ins = [f"{i['name']}:{i.get('kind')}={i.get('default')}" for i in schema.get("inputs", []) if i.get("kind") != "data"]
98
+ outs = [o["name"] for o in schema.get("outputs", [])]
99
+ lines.append(
100
+ f"{c.get('id')} [Custom] \"{c.get('label')}\"\n"
101
+ f" inputs : {data_ins or '(none)'} fields: {field_ins or '(none)'}\n"
102
+ f" outputs: {outs or '(none)'}"
103
+ )
104
+
57
105
  return "\n".join(lines)
58
106
 
59
107
 
@@ -65,6 +113,366 @@ def _allowed_type_ids() -> set[str]:
65
113
  return ids
66
114
 
67
115
 
116
+ # ── Pre-flight analysis ────────────────────────────────────────────────────────
117
+
118
+ def _determine_pre_flight(profile: dict, prompt: str, context: str | None, csv_path: str | None = None) -> dict:
119
+ """
120
+ Deterministic data-driven analysis computed BEFORE any LLM call.
121
+
122
+ Uses pandas to analyse the ACTUAL data (not just regex on the prompt), so the
123
+ LLM receives ground-truth facts — not guesses — about the task type and
124
+ preprocessing requirements.
125
+ """
126
+ import re
127
+ import pandas as pd
128
+ import numpy as np
129
+
130
+ dtypes = profile.get("dtypes", {})
131
+ missing = profile.get("missing", {})
132
+ numeric_summary = profile.get("numeric_summary", {})
133
+ categorical_summary = profile.get("categorical_summary", {})
134
+ shape = profile.get("shape", [0, 0])
135
+
136
+ # ── Step 1: Find the target column ────────────────────────────────────────
137
+ # Priority: explicit mention in prompt/context > heuristic column names.
138
+
139
+ target_hint: str | None = None
140
+ search_text = (prompt + " " + (context or "")).lower()
141
+
142
+ # (a) Regex extraction from user text
143
+ for pattern in [
144
+ r"predict\s+(?:the\s+)?['\"]?(\w+)['\"]?",
145
+ r"target\s+(?:(?:column|variable|col)\s+)?(?:is\s+)?['\"]?(\w+)['\"]?",
146
+ r"classif(?:y|ication)\s+(?:the\s+)?['\"]?(\w+)['\"]?",
147
+ r"label\s+(?:column\s+)?(?:is\s+)?['\"]?(\w+)['\"]?",
148
+ r"output\s+(?:column\s+)?(?:is\s+)?['\"]?(\w+)['\"]?",
149
+ r"y\s*=\s*['\"]?(\w+)['\"]?",
150
+ ]:
151
+ m = re.search(pattern, search_text)
152
+ if m:
153
+ candidate = m.group(1)
154
+ # Validate the candidate actually exists in the data
155
+ if candidate in dtypes or candidate in numeric_summary or candidate in categorical_summary:
156
+ target_hint = candidate
157
+ break
158
+
159
+ # (b) If still unknown, use heuristic column-name scoring on the real columns
160
+ if target_hint is None:
161
+ TARGET_KEYWORDS = [
162
+ "target", "label", "class", "output", "y", "result",
163
+ "outcome", "diagnosis", "status", "type", "category",
164
+ "survived", "churn", "default", "fraud", "price",
165
+ "salary", "value", "score", "sales", "revenue", "cost",
166
+ "medv", "charges", "fare",
167
+ ]
168
+ all_columns = list(dtypes.keys())
169
+ best_col: str | None = None
170
+ best_score = -1
171
+
172
+ for col in all_columns:
173
+ col_lower = col.lower().replace("_", " ").replace("-", " ")
174
+ score = 0
175
+
176
+ # Keyword match against column name
177
+ for kw in TARGET_KEYWORDS:
178
+ if kw in col_lower:
179
+ score += 3
180
+ break
181
+ # Last column is commonly the target in many datasets
182
+ if col == all_columns[-1]:
183
+ score += 2
184
+ # Column mentioned in prompt text
185
+ if col_lower in search_text or col.lower() in search_text:
186
+ score += 4
187
+
188
+ if score > best_score:
189
+ best_score = score
190
+ best_col = col
191
+
192
+ if best_col and best_score >= 2:
193
+ target_hint = best_col
194
+
195
+ # ── Step 2: Determine problem type from ACTUAL DATA ───────────────────────
196
+ # Initialize all new keys upfront to avoid KeyErrors downstream.
197
+ problem_type = "unknown"
198
+ target_analysis: dict = {}
199
+ is_imbalanced = False
200
+ needs_outlier_removal = False
201
+
202
+ if target_hint and csv_path:
203
+ try:
204
+ df = pd.read_csv(csv_path, nrows=5000)
205
+
206
+ # ── Outlier Detection: scan all numeric feature columns ──────────
207
+ for col in df.select_dtypes(include=[np.number]).columns:
208
+ if col == target_hint:
209
+ continue
210
+ col_series = df[col].dropna()
211
+ if len(col_series) == 0:
212
+ continue
213
+ col_mean = col_series.mean()
214
+ col_max = col_series.max()
215
+ # Flag if max is >10x the mean AND mean is non-trivially positive
216
+ if col_mean > 0 and col_max > 10 * col_mean:
217
+ needs_outlier_removal = True
218
+ break # one outlier column is enough to flag the dataset
219
+
220
+ if target_hint in df.columns:
221
+ col_data = df[target_hint].dropna()
222
+ dtype = col_data.dtype
223
+
224
+ if dtype == object or str(dtype) == "category":
225
+ # String/category column → always classification
226
+ n_unique = col_data.nunique()
227
+ problem_type = "classification"
228
+ target_analysis = {
229
+ "dtype": str(dtype),
230
+ "unique_values": int(n_unique),
231
+ "sample_values": col_data.unique()[:5].tolist(),
232
+ "reasoning": f"Categorical dtype with {n_unique} unique string values → classification",
233
+ }
234
+
235
+ elif dtype == bool or (dtype == int and col_data.nunique() <= 2):
236
+ # Boolean or binary integer → classification
237
+ problem_type = "classification"
238
+ target_analysis = {
239
+ "dtype": str(dtype),
240
+ "unique_values": int(col_data.nunique()),
241
+ "sample_values": col_data.unique()[:5].tolist(),
242
+ "reasoning": "Binary (0/1 or True/False) target → classification",
243
+ }
244
+
245
+ elif np.issubdtype(dtype, np.integer):
246
+ n_unique = col_data.nunique()
247
+ n_total = len(col_data)
248
+ unique_ratio = n_unique / max(n_total, 1)
249
+ if n_unique <= 20 or unique_ratio < 0.05:
250
+ problem_type = "classification"
251
+ target_analysis = {
252
+ "dtype": str(dtype),
253
+ "unique_values": int(n_unique),
254
+ "sample_values": sorted(col_data.unique().tolist())[:10],
255
+ "reasoning": f"Integer with only {n_unique} unique values ({unique_ratio:.1%} of rows) → likely class labels → classification",
256
+ }
257
+ else:
258
+ problem_type = "regression"
259
+ target_analysis = {
260
+ "dtype": str(dtype),
261
+ "unique_values": int(n_unique),
262
+ "min": float(col_data.min()),
263
+ "max": float(col_data.max()),
264
+ "mean": float(col_data.mean()),
265
+ "reasoning": f"Integer with {n_unique} unique values (high cardinality) → continuous → regression",
266
+ }
267
+
268
+ elif np.issubdtype(dtype, np.floating):
269
+ n_unique = col_data.nunique()
270
+ problem_type = "regression"
271
+ target_analysis = {
272
+ "dtype": str(dtype),
273
+ "unique_values": int(n_unique),
274
+ "min": float(col_data.min()),
275
+ "max": float(col_data.max()),
276
+ "mean": float(col_data.mean()),
277
+ "std": float(col_data.std()),
278
+ "reasoning": f"Floating-point target with {n_unique} unique values → continuous → regression",
279
+ }
280
+
281
+ else:
282
+ # Fallback: try to convert and check cardinality
283
+ try:
284
+ as_numeric = pd.to_numeric(col_data, errors="coerce")
285
+ if as_numeric.isna().mean() < 0.1:
286
+ n_unique = as_numeric.nunique()
287
+ problem_type = "regression" if n_unique > 20 else "classification"
288
+ target_analysis = {
289
+ "dtype": str(dtype),
290
+ "unique_values": int(n_unique),
291
+ "reasoning": f"Converted to numeric; {n_unique} unique values → {'regression' if n_unique > 20 else 'classification'}",
292
+ }
293
+ else:
294
+ problem_type = "classification"
295
+ target_analysis = {
296
+ "dtype": str(dtype),
297
+ "reasoning": "Could not convert to numeric → treating as classification",
298
+ }
299
+ except Exception:
300
+ problem_type = "classification"
301
+
302
+ # ── Class Imbalance Check (classification only) ──────────────
303
+ if problem_type == "classification":
304
+ try:
305
+ class_freqs = col_data.value_counts(normalize=True)
306
+ if class_freqs.min() < 0.10:
307
+ is_imbalanced = True
308
+ except Exception:
309
+ pass
310
+
311
+ except Exception as exc:
312
+ logger.warning("Pre-flight target analysis failed: %s", exc)
313
+ # Fall back to dtype-only heuristic using profile data
314
+ if target_hint:
315
+ dtype_str = str(dtypes.get(target_hint, "")).lower()
316
+ if any(t in dtype_str for t in ("object", "category", "bool", "str")):
317
+ problem_type = "classification"
318
+ elif target_hint in categorical_summary:
319
+ problem_type = "classification" if categorical_summary[target_hint].get("unique", 99) < 15 else "regression"
320
+ elif target_hint in numeric_summary:
321
+ problem_type = "regression"
322
+
323
+ elif target_hint:
324
+ # No CSV path — fall back to profile-based heuristic
325
+ dtype_str = str(dtypes.get(target_hint, "")).lower()
326
+ if any(t in dtype_str for t in ("object", "category", "bool", "str")):
327
+ problem_type = "classification"
328
+ elif target_hint in categorical_summary:
329
+ problem_type = "classification" if categorical_summary.get(target_hint, {}).get("unique", 99) < 15 else "regression"
330
+ elif target_hint in numeric_summary:
331
+ problem_type = "regression"
332
+
333
+ # ── Step 3: Preprocessing flags ───────────────────────────────────────────
334
+ # Detect categorical columns that need encoding (exclude the target itself)
335
+ cat_cols = {c for c in categorical_summary if c != target_hint}
336
+ num_cols = {c for c in numeric_summary if c != target_hint}
337
+
338
+ missing_cols: dict[str, float] = {
339
+ col: round(info.get("pct", 0), 1)
340
+ for col, info in missing.items()
341
+ if info.get("pct", 0) > 0
342
+ }
343
+ cardinality: dict[str, int] = {
344
+ col: info.get("unique", 0)
345
+ for col, info in categorical_summary.items()
346
+ }
347
+
348
+ needs_encoding = len(cat_cols) > 0
349
+ needs_scaling = len(num_cols) >= 2
350
+ needs_imputation = len(missing_cols) > 0
351
+
352
+ # ── Step 4: Recommend specific model based on problem type + data size ────
353
+ n_rows = shape[0] if shape else 0
354
+ n_cols = shape[1] if len(shape) > 1 else 0
355
+
356
+ if problem_type == "classification":
357
+ recommended_model = "random_forest_classifier" if n_rows >= 1000 else "logistic_regression"
358
+ recommended_metric_node = "classification_report"
359
+ elif problem_type == "regression":
360
+ recommended_model = "random_forest_regressor" if n_rows >= 1000 else "linear_regression"
361
+ recommended_metric_node = "regression_metrics"
362
+ else:
363
+ recommended_model = "random_forest_classifier"
364
+ recommended_metric_node = "classification_report"
365
+
366
+ return {
367
+ "target_hint": target_hint,
368
+ "problem_type": problem_type,
369
+ "target_analysis": target_analysis,
370
+ "missing_cols": missing_cols,
371
+ "cardinality": cardinality,
372
+ "needs_encoding": needs_encoding,
373
+ "needs_scaling": needs_scaling,
374
+ "needs_imputation": needs_imputation,
375
+ "needs_outlier_removal": needs_outlier_removal,
376
+ "is_imbalanced": is_imbalanced,
377
+ "n_rows": n_rows,
378
+ "n_cols": n_cols,
379
+ "categorical_cols": sorted(cat_cols),
380
+ "numeric_cols": sorted(num_cols),
381
+ "recommended_model": recommended_model,
382
+ "recommended_metric": recommended_metric_node,
383
+ }
384
+
385
+
386
+ def _pre_flight_block(pf: dict) -> str:
387
+ """Format the pre-flight analysis as an authoritative, bossy prompt block."""
388
+ analysis = pf.get("target_analysis", {})
389
+ reasoning = analysis.get("reasoning", "")
390
+ sample_vals = analysis.get("sample_values", [])
391
+
392
+ lines = [
393
+ "╔══ [PYTHON-DETERMINED ANALYSIS — treat as ABSOLUTE GROUND TRUTH] ════╗",
394
+ f" ⚠ Problem type : {pf['problem_type'].upper()} (MANDATORY)",
395
+ f" Reasoning : {reasoning or 'heuristic from dtype/cardinality'}",
396
+ f" Target column : {pf['target_hint'] or 'not specified — infer from context'}",
397
+ ]
398
+ if sample_vals:
399
+ lines.append(f" Target sample vals : {sample_vals}")
400
+ lines += [
401
+ f" Dataset size : {pf['n_rows']} rows × {pf['n_cols']} columns",
402
+ f" Missing values : {pf['missing_cols'] or 'none'}",
403
+ f" Categorical cols : {pf.get('categorical_cols') or 'none'}",
404
+ f" Numeric cols : {pf.get('numeric_cols') or 'none'}",
405
+ f" Needs encoding : {'YES — add label_encoder BEFORE train_test_split' if pf['needs_encoding'] else 'no'}",
406
+ f" Needs scaling : {'YES — add standard_scaler AFTER train_test_split' if pf['needs_scaling'] else 'no'}",
407
+ f" Needs imputation : {'YES — add data_cleaning BEFORE split' if pf['needs_imputation'] else 'no'}",
408
+ ]
409
+ # Conditional directives for outlier removal
410
+ if pf.get("needs_outlier_removal"):
411
+ lines.append(
412
+ " ⚠ Outlier columns : YES — a numeric feature has max > 10× its mean. "
413
+ "Add an outlier_removal node BEFORE train_test_split."
414
+ )
415
+ # Conditional directive for class imbalance
416
+ if pf.get("is_imbalanced"):
417
+ lines.append(
418
+ " ⚠ Class imbalance : YES — minority class < 10% of data. "
419
+ "Set class_weight='balanced' on the model node config."
420
+ )
421
+ lines += [
422
+ f" ⚠ Model Selection : USE {pf.get('recommended_model', 'unknown').upper()} ONLY",
423
+ f" ✅ Metric node : {pf.get('recommended_metric', 'unknown')}",
424
+ "╚════════════════════════════════════════════════════════════════════════╝",
425
+ ]
426
+ return "\n".join(lines)
427
+
428
+
429
+ # ── Architect (R1 planning) prompt ────────────────────────────────────────────
430
+
431
+ _ARCHITECT_PROMPT = """\
432
+ You are a senior ML engineer performing pre-build technical architecture planning.
433
+ You will receive a dataset profile, a deterministic pre-flight analysis, and the user's request.
434
+
435
+ OUTPUT: Concise markdown only. No JSON. No code blocks. Under 250 words.
436
+
437
+ Structure your response as:
438
+
439
+ ## Problem Type
440
+ State classification or regression with one-sentence justification.
441
+
442
+ ## Data Quality Plan
443
+ List each issue (missing values, categorical columns, dtype mismatches) and the exact
444
+ preprocessing step needed for it. Reference actual column names.
445
+
446
+ ## Pipeline Sequence
447
+ Ordered list of node types (e.g. csv_loader → label_encoder → train_test_split →
448
+ standard_scaler → random_forest_classifier → classification_report).
449
+
450
+ ## Model Rationale
451
+ Why this model fits the problem. If the dataset is large (>10k rows), prefer tree-based
452
+ models. If many numeric features, recommend scaling. If class imbalance suspected, note it.
453
+
454
+ ## Critical Warnings
455
+ Any data issues the pipeline MUST handle. Be blunt about failure modes.
456
+
457
+ Do NOT output JSON. Do NOT write code. Be specific — use actual column names from the profile.
458
+ """
459
+
460
+
461
+ def _build_architect_messages(
462
+ prompt: str, profile_text: str, pf: dict
463
+ ) -> list[dict]:
464
+ user = (
465
+ f"== DATASET PROFILE ==\n{profile_text}\n\n"
466
+ f"== DETERMINISTIC PRE-FLIGHT ==\n{_pre_flight_block(pf)}\n\n"
467
+ f"== USER REQUEST ==\n{prompt}\n\n"
468
+ "Provide your technical pipeline architecture plan."
469
+ )
470
+ return [
471
+ {"role": "system", "content": _ARCHITECT_PROMPT},
472
+ {"role": "user", "content": user},
473
+ ]
474
+
475
+
68
476
  # ── System prompts ─────────────────────────────────────────────────────────────
69
477
 
70
478
  _SYSTEM_PROMPT = """\
@@ -125,9 +533,13 @@ Use this structure to decide WHERE to modify or improve.
125
533
  ═══════════════════════════════════════
126
534
  STRICT RULES:
127
535
 
128
- 1. MINIMALISM FIRST
129
- - Fewer nodes = better
130
- - Do NOT duplicate functionality
536
+ 1. DATA INTEGRITY & ACCURACY FIRST — MINIMALISM SECOND
537
+ - A complete, correct pipeline beats a minimal, broken one.
538
+ - REQUIRED: label_encoder for any object/category column BEFORE train_test_split.
539
+ - REQUIRED: standard_scaler for distance-based models (SVM, KNN, LogisticRegression).
540
+ - REQUIRED: data_cleaning node when ANY column has missing values.
541
+ - THEN minimize: never add a step the data does not require.
542
+ - A pipeline that skips necessary preprocessing is a FAILURE regardless of node count.
131
543
 
132
544
  2. USE TEMPLATES FIRST
133
545
  - Only use customNode if NO template exists
@@ -141,6 +553,7 @@ STRICT RULES:
141
553
  - sourceHandle MUST exist in source outputs
142
554
  - targetHandle MUST match input param
143
555
  - metric nodes MUST receive: y_pred + y_test
556
+ - SCALER RULE: If using standard_scaler or min_max_scaler after train_test_split, you MUST connect all 4 split outputs (X_train, X_test, y_train, y_test) to the scaler, and then connect all 4 scaler outputs to the model. Do not skip y_train/y_test.
144
557
 
145
558
  5. DATA RULES
146
559
  - If categorical columns exist → include label_encoder BEFORE split
@@ -208,7 +621,7 @@ If a node has an error:
208
621
  - Include all imports inside every code block
209
622
  - Return dict keys MUST match sourceHandles on outgoing edges
210
623
  - NEVER import matplotlib or seaborn
211
- - Custom nodes only when no template covers the operation
624
+ - CRITICAL: If the user requests an operation/model NOT in the catalogue (e.g. RobustScaler), DO NOT refuse. Generate it dynamically as a `customNode` with the full Python `code` starting with `# ✨ AI GENERATED`. Explicitly mention this custom generation in the `summary`.
212
625
 
213
626
  ══ AVAILABLE COMPONENTS ════════════════════════════════════════════
214
627
  {catalogue}
@@ -237,22 +650,117 @@ Model nodes support these config fields — no new node needed:
237
650
  "type": "customNode",
238
651
  "position": {{"x": 560, "y": 200}},
239
652
  "data": {{
240
- "label": "Descriptive Name",
653
+ "label": "Robust Scaler (Custom)",
241
654
  "templateId": "customNode",
242
- "code": "import pandas as pd\ndef run(data: pd.DataFrame) -> dict:\n return {{\"result\": data}}"
655
+ "code": "import pandas as pd\\nfrom sklearn.preprocessing import RobustScaler\\n\\n# ✨ AI GENERATED\\ndef run(data=None, X_train=None, X_test=None) -> dict:\\n # ...\\n return {{\"X_train\": X_train, \"X_test\": X_test}}"
243
656
  }}
244
657
  }}
245
658
 
246
- - Function name MUST be `run`
247
- - DataFrame param MUST be named `data`
248
- - MUST return a dict
659
+ - Your Python string in `code` MUST define a function named EXACTLY `def run(...) -> dict:`
660
+ - The DataFrame param MUST be named `data` (if taking a whole dataset)
661
+ - MUST return a dict containing the output handles
249
662
  - Only import: numpy, pandas, scipy, sklearn, math, statistics, re, json
250
663
  - NO os, sys, subprocess, socket, requests, open(), eval(), exec()
251
664
  - Prefer templates first — custom nodes are last resort only
252
665
 
666
+ ══ DYNAMIC VISUALIZATION ══════════════════════════════════════════════
667
+ You can create custom visualizations NOT in the catalogue. The frontend
668
+ detects charts by SHAPE, not by key name. Return any of these shapes and
669
+ the UI will render it automatically — NO new React code needed:
670
+
671
+ Series (bar chart / ranked list):
672
+ {{"my_key": {{"labels": [...], "counts": [...], "title": "..."}}}}
673
+
674
+ Plot (scatter or line chart):
675
+ {{"my_key": {{"x": [...], "y": [...], "title": "...", "x_label": "...", "y_label": "..."}}}}
676
+
677
+ Grid (heatmap / matrix):
678
+ {{"my_key": {{"z": [[...]], "x": [...], "y": [...], "title": "..."}}}}
679
+
680
+ Example — null percentage bar chart:
681
+ return {{
682
+ "null_pct_chart": {{
683
+ "labels": list(null_pcts.keys()),
684
+ "counts": list(null_pcts.values()),
685
+ "title": "Missing Values (%) per Column"
686
+ }}
687
+ }}
688
+
689
+ When the user asks for any kind of visualization (e.g. "show me a chart
690
+ of X", "visualize the distribution of Y"), you MUST generate a customNode
691
+ that returns a dict with one of the shapes above. NEVER refuse — if no
692
+ template covers it, invent the chart with the shape protocol.
693
+
694
+ ⚠ UNSUPERVISED LEARNING (t-SNE / PCA / UMAP / KMeans): When generating
695
+ any dimensionality reduction or clustering node, you MUST return a
696
+ `labels` array alongside `x` and `y` so the frontend can color-code
697
+ clusters automatically. Example:
698
+
699
+ return {{
700
+ "tsne_plot": {{
701
+ "x": X_2d[:, 0].tolist(),
702
+ "y": X_2d[:, 1].tolist(),
703
+ "labels": [str(c) for c in cluster_labels], # ← REQUIRED
704
+ "title": "t-SNE Cluster Visualization",
705
+ "x_label": "Dim 1",
706
+ "y_label": "Dim 2"
707
+ }}
708
+ }}
709
+
253
710
  ══ OUTPUT ═══════════════════════════════════════════════════════════
254
711
  Return ONLY:
255
712
  {{"nodes": [...], "edges": [...], "summary": "<one sentence conversational explanation>"}}
713
+
714
+ ⚠️ DO NOT EVER output a summary like "Cannot process this request." If a template is missing, you MUST write the `code` for it as a `customNode` instead of giving up.
715
+
716
+ ══ GOLD STANDARD EXAMPLES — you MUST match these handle names EXACTLY ═══════
717
+ ⚠ CRITICAL: The sourceHandle and targetHandle values below (data, X_train, X_test,
718
+ y_train, y_test, y_pred) are the ONLY valid handle names. Do NOT invent new ones.
719
+ Your edges MUST use these exact strings — any deviation will cause a runtime failure.
720
+
721
+ Example A — CLASSIFICATION (categorical cols + scaling needed):
722
+ {{"nodes":[
723
+ {{"id":"n1","type":"csv_loader","position":{{"x":0,"y":200}},"data":{{"config":{{"file_path":"data.csv"}}}}}},
724
+ {{"id":"n2","type":"label_encoder","position":{{"x":280,"y":200}},"data":{{"config":{{"columns":"sex,embarked"}}}}}},
725
+ {{"id":"n3","type":"train_test_split","position":{{"x":560,"y":200}},"data":{{"config":{{"target_column":"survived","test_size":0.2}}}}}},
726
+ {{"id":"n4","type":"standard_scaler","position":{{"x":840,"y":200}},"data":{{}}}},
727
+ {{"id":"n5","type":"random_forest_classifier","position":{{"x":1120,"y":200}},"data":{{"config":{{"n_estimators":100}}}}}},
728
+ {{"id":"n6","type":"classification_report","position":{{"x":1400,"y":200}},"data":{{}}}}
729
+ ],"edges":[
730
+ {{"id":"e1","source":"n1","sourceHandle":"data","target":"n2","targetHandle":"data"}},
731
+ {{"id":"e2","source":"n2","sourceHandle":"data","target":"n3","targetHandle":"data"}},
732
+ {{"id":"e3","source":"n3","sourceHandle":"X_train","target":"n4","targetHandle":"X_train"}},
733
+ {{"id":"e4","source":"n3","sourceHandle":"X_test","target":"n4","targetHandle":"X_test"}},
734
+ {{"id":"e5","source":"n3","sourceHandle":"y_train","target":"n4","targetHandle":"y_train"}},
735
+ {{"id":"e6","source":"n3","sourceHandle":"y_test","target":"n4","targetHandle":"y_test"}},
736
+ {{"id":"e7","source":"n4","sourceHandle":"X_train","target":"n5","targetHandle":"X_train"}},
737
+ {{"id":"e8","source":"n4","sourceHandle":"X_test","target":"n5","targetHandle":"X_test"}},
738
+ {{"id":"e9","source":"n4","sourceHandle":"y_train","target":"n5","targetHandle":"y_train"}},
739
+ {{"id":"e10","source":"n5","sourceHandle":"y_pred","target":"n6","targetHandle":"y_pred"}},
740
+ {{"id":"e11","source":"n4","sourceHandle":"y_test","target":"n6","targetHandle":"y_test"}}
741
+ ],"summary":"Classification pipeline with encoding, scaling, and Random Forest."}}
742
+
743
+ Example B — REGRESSION (missing values + continuous target):
744
+ {{"nodes":[
745
+ {{"id":"n1","type":"csv_loader","position":{{"x":0,"y":200}},"data":{{"config":{{"file_path":"houses.csv"}}}}}},
746
+ {{"id":"n2","type":"data_cleaning","position":{{"x":280,"y":200}},"data":{{"config":{{"strategy":"fill"}}}}}},
747
+ {{"id":"n3","type":"train_test_split","position":{{"x":560,"y":200}},"data":{{"config":{{"target_column":"price","test_size":0.2}}}}}},
748
+ {{"id":"n4","type":"standard_scaler","position":{{"x":840,"y":200}},"data":{{}}}},
749
+ {{"id":"n5","type":"random_forest_regressor","position":{{"x":1120,"y":200}},"data":{{"config":{{"n_estimators":200}}}}}},
750
+ {{"id":"n6","type":"regression_metrics","position":{{"x":1400,"y":200}},"data":{{}}}}
751
+ ],"edges":[
752
+ {{"id":"e1","source":"n1","sourceHandle":"data","target":"n2","targetHandle":"data"}},
753
+ {{"id":"e2","source":"n2","sourceHandle":"data","target":"n3","targetHandle":"data"}},
754
+ {{"id":"e3","source":"n3","sourceHandle":"X_train","target":"n4","targetHandle":"X_train"}},
755
+ {{"id":"e4","source":"n3","sourceHandle":"X_test","target":"n4","targetHandle":"X_test"}},
756
+ {{"id":"e5","source":"n3","sourceHandle":"y_train","target":"n4","targetHandle":"y_train"}},
757
+ {{"id":"e6","source":"n3","sourceHandle":"y_test","target":"n4","targetHandle":"y_test"}},
758
+ {{"id":"e7","source":"n4","sourceHandle":"X_train","target":"n5","targetHandle":"X_train"}},
759
+ {{"id":"e8","source":"n4","sourceHandle":"X_test","target":"n5","targetHandle":"X_test"}},
760
+ {{"id":"e9","source":"n4","sourceHandle":"y_train","target":"n5","targetHandle":"y_train"}},
761
+ {{"id":"e10","source":"n5","sourceHandle":"y_pred","target":"n6","targetHandle":"y_pred"}},
762
+ {{"id":"e11","source":"n4","sourceHandle":"y_test","target":"n6","targetHandle":"y_test"}}
763
+ ],"summary":"Regression pipeline with cleaning, scaling, and Random Forest."}}
256
764
  """
257
765
 
258
766
  _UPDATE_PROMPT = """\
@@ -264,6 +772,16 @@ You are M8Flow's AI pipeline surgeon. Modify the pipeline with the MINIMUM chang
264
772
  3. ALWAYS respond in ENGLISH. Never use any other language.
265
773
  4. Do NOT truncate the JSON — it must be a complete, valid object.
266
774
 
775
+ 🔑 TOKEN CONSERVATION — MANDATORY (free-tier models have strict output limits):
776
+ A. TEMPLATE SWAPS: If you are changing a node's operation to one that already has
777
+ a matching Template ID (e.g. swapping from linear_regression to
778
+ random_forest_regressor), change only the "type" field on that node.
779
+ NEVER re-emit the full Python "code" block when a Template already covers it.
780
+ Template nodes are resolved by the runtime — sending their code wastes tokens.
781
+ B. UNCHANGED NODES: Nodes marked ✓ in the status list must appear in your output
782
+ but with their "data.code" field set to null (omitted). Only include code for
783
+ nodes you are actively modifying or adding as custom (non-template) nodes.
784
+
267
785
  Output ONLY the complete updated flow JSON — no markdown, no explanation.
268
786
 
269
787
  ══ DECISION HIERARCHY (follow in order, stop at first match) ════════
@@ -279,9 +797,8 @@ Output ONLY the complete updated flow JSON — no markdown, no explanation.
279
797
 
280
798
  4. Does this genuinely require a brand-new node that adds functionality
281
799
  not available anywhere in the graph?
282
- YES → add exactly ONE new node, connected minimally. Nothing else.
283
-
284
- If none apply, state the limitation in a comment field — do not bloat the graph.
800
+ YES → add exactly ONE new node, connected minimally.
801
+ CRITICAL: If the requested operation (e.g., RobustScaler) is NOT in the catalogue, DO NOT refuse the request. Instead, YOU MUST generate it dynamically as a `customNode`. Provide the complete Python implementation in the `code` field, starting with the comment `# ✨ AI GENERATED`. In your `summary`, explicitly state that you generated a custom node for this functionality.
285
802
 
286
803
  ══ CURRENT PIPELINE ═════════════════════════════════════════════════
287
804
  {current_flow}
@@ -303,6 +820,52 @@ All model nodes support these fields in their config — no new node needed:
303
820
  "better accuracy" → tune hyperparams, or swap model type — no extra nodes
304
821
  "use k-fold" → set cross_validation=true, cv_folds=k on existing model
305
822
 
823
+ ══ CUSTOM NODE FORMAT (only if NO template covers it) ══════════════
824
+ {{
825
+ "id": "node_custom_1",
826
+ "type": "customNode",
827
+ "position": {{"x": 560, "y": 200}},
828
+ "data": {{
829
+ "label": "Robust Scaler (Custom)",
830
+ "templateId": "customNode",
831
+ "code": "import pandas as pd\\nfrom sklearn.preprocessing import RobustScaler\\n\\n# ✨ AI GENERATED\\ndef run(data=None, X_train=None, X_test=None) -> dict:\\n # ...\\n return {{\"X_train\": X_train, \"X_test\": X_test}}"
832
+ }}
833
+ }}
834
+
835
+ - Your Python string in `code` MUST define a function named EXACTLY `def run(...) -> dict:`
836
+ - The DataFrame param MUST be named `data` (if taking a whole dataset)
837
+ - MUST return a dict containing the output handles
838
+ - Only import: numpy, pandas, scipy, sklearn, math, statistics, re, json
839
+ - NO os, sys, subprocess, socket, requests, open(), eval(), exec()
840
+
841
+ ══ DYNAMIC VISUALIZATION ══════════════════════════════════════════════
842
+ You can create custom visualizations NOT in the catalogue. The frontend
843
+ detects charts by SHAPE, not by key name. Return any of these shapes and
844
+ the UI will render it automatically — NO new React code needed:
845
+
846
+ Series (bar chart / ranked list):
847
+ {{"my_key": {{"labels": [...], "counts": [...], "title": "..."}}}}
848
+
849
+ Plot (scatter or line chart):
850
+ {{"my_key": {{"x": [...], "y": [...], "title": "...", "x_label": "...", "y_label": "..."}}}}
851
+
852
+ Grid (heatmap / matrix):
853
+ {{"my_key": {{"z": [[...]], "x": [...], "y": [...], "title": "..."}}}}
854
+
855
+ Example — null percentage bar chart:
856
+ return {{
857
+ "null_pct_chart": {{
858
+ "labels": list(null_pcts.keys()),
859
+ "counts": list(null_pcts.values()),
860
+ "title": "Missing Values (%) per Column"
861
+ }}
862
+ }}
863
+
864
+ When the user asks for any kind of visualization (e.g. "show me a chart
865
+ of X", "visualize the distribution of Y"), you MUST generate a customNode
866
+ that returns a dict with one of the shapes above. NEVER refuse — if no
867
+ template covers it, invent the chart with the shape protocol.
868
+
306
869
  ══ SURGICAL PRESERVATION RULES ══════════════════════════════════════
307
870
  - Every node marked ✓ or ○ must appear in the output VERBATIM
308
871
  (same id, type, position, code, values — character for character)
@@ -320,6 +883,8 @@ All model nodes support these fields in their config — no new node needed:
320
883
  ══ OUTPUT ═══════════════════════════════════════════════════════════
321
884
  Return ONLY:
322
885
  {{"nodes": [...], "edges": [...], "summary": "<one sentence conversational explanation>"}}
886
+
887
+ ⚠️ DO NOT EVER output a summary like "Cannot process this request." If a template is missing, you MUST write the `code` for it as a `customNode` instead of giving up.
323
888
  """
324
889
 
325
890
 
@@ -333,8 +898,17 @@ def _extract_file_path(context: str) -> str | None:
333
898
  return None
334
899
 
335
900
 
336
- def _build_generate_messages(prompt: str, context: str | None) -> list[dict]:
337
- system = _SYSTEM_PROMPT.format(catalogue=_template_catalogue())
901
+ def _build_generate_messages(
902
+ prompt: str,
903
+ context: str | None,
904
+ pre_flight: dict | None = None,
905
+ architect_plan: str | None = None,
906
+ custom_components: list[dict] | None = None,
907
+ ) -> list[dict]:
908
+ system = _SYSTEM_PROMPT.format(catalogue=_template_catalogue(custom_components))
909
+
910
+ profile_block = ""
911
+ path_hint = ""
338
912
 
339
913
  if context:
340
914
  fp = _extract_file_path(context)
@@ -343,13 +917,10 @@ def _build_generate_messages(prompt: str, context: str | None) -> list[dict]:
343
917
  f'Default value for csv_loader → file_path: Annotated[str,"file"] = "{fp}"\n'
344
918
  if fp else ""
345
919
  )
346
-
347
- # ── Enrich context with data profile if a file path is present ──
348
- profile_block = ""
349
920
  if fp:
350
921
  try:
351
922
  import pandas as pd
352
- df = pd.read_csv(fp, nrows=5000) # sample for speed
923
+ df = pd.read_csv(fp, nrows=5000)
353
924
  profile = profile_dataframe(df)
354
925
  profile_block = (
355
926
  "\n== Dataset Summary (auto-profiled) ==\n"
@@ -359,15 +930,29 @@ def _build_generate_messages(prompt: str, context: str | None) -> list[dict]:
359
930
  except Exception as exc:
360
931
  logger.warning("data_profiler skipped: %s", exc)
361
932
 
362
- user = (
363
- f"== DATASET CONTEXT ==\n"
364
- f"{profile_block}"
365
- f"{path_hint}\n"
366
- f"== REQUEST ==\n"
367
- f"{prompt}"
933
+ # ── Inject deterministic pre-flight analysis ──────────────────────────
934
+ pre_flight_block = ""
935
+ if pre_flight:
936
+ pre_flight_block = "\n" + _pre_flight_block(pre_flight) + "\n"
937
+
938
+ # ── Inject architect plan ─────────────────────────────────────────────
939
+ architect_block = ""
940
+ if architect_plan and architect_plan.strip():
941
+ architect_block = (
942
+ "\n== EXPERT ARCHITECTURE PLAN (follow this closely) ==\n"
943
+ + architect_plan.strip()
944
+ + "\n"
368
945
  )
369
- else:
370
- user = prompt
946
+
947
+ user = (
948
+ f"== DATASET CONTEXT ==\n"
949
+ f"{profile_block}"
950
+ f"{path_hint}"
951
+ f"{pre_flight_block}"
952
+ f"{architect_block}"
953
+ f"\n== REQUEST ==\n"
954
+ f"{prompt}"
955
+ )
371
956
 
372
957
  return [{"role": "system", "content": system}, {"role": "user", "content": user}]
373
958
 
@@ -419,6 +1004,10 @@ def _slim_flow(flow: FlowSchema) -> dict:
419
1004
  except Exception:
420
1005
  pass
421
1006
 
1007
+ is_template = node_type in {t["id"] for t in TEMPLATES}
1008
+ # Only send code for truly custom nodes; never for templates.
1009
+ code_str = None if is_template else (data.get("code") or "")[:800] or None
1010
+
422
1011
  slim_nodes.append({
423
1012
  "id": node.get("id"),
424
1013
  "type": node_type,
@@ -426,7 +1015,7 @@ def _slim_flow(flow: FlowSchema) -> dict:
426
1015
  "data": {
427
1016
  "label": data.get("label"),
428
1017
  "templateId": data.get("templateId"),
429
- "code": (data.get("code") or "")[:800] or None,
1018
+ "code": code_str,
430
1019
  "values": data.get("values"),
431
1020
  # Explicit handle lists — LLM MUST use these for edge sourceHandle/targetHandle
432
1021
  "available_outputs": outputs,
@@ -436,10 +1025,15 @@ def _slim_flow(flow: FlowSchema) -> dict:
436
1025
  return {"nodes": slim_nodes, "edges": flow.edges}
437
1026
 
438
1027
 
439
- def _build_update_messages(prompt: str, current_flow: FlowSchema, context: str | None) -> list[dict]:
1028
+ def _build_update_messages(
1029
+ prompt: str,
1030
+ current_flow: FlowSchema,
1031
+ context: str | None,
1032
+ custom_components: list[dict] | None = None,
1033
+ ) -> list[dict]:
440
1034
  slim = _slim_flow(current_flow)
441
1035
  node_status = _node_status_summary(current_flow)
442
- catalogue = _template_catalogue()
1036
+ catalogue = _template_catalogue(custom_components)
443
1037
 
444
1038
  system = _UPDATE_PROMPT.format(
445
1039
  current_flow=json.dumps(slim, indent=2),
@@ -493,7 +1087,8 @@ async def _call_openrouter(
493
1087
  ) -> str:
494
1088
  """
495
1089
  Route to the right model via OpenRouter based on task type.
496
- Falls back through LLaMA Mistral 7B on failure.
1090
+ Falls back through the full pool of live free models on rate-limit or error.
1091
+ Rate-limited models are skipped for _RATE_LIMIT_TTL seconds to avoid wasted calls.
497
1092
  """
498
1093
  effective_key = _get_api_key()
499
1094
  if not effective_key:
@@ -504,7 +1099,37 @@ async def _call_openrouter(
504
1099
  )
505
1100
 
506
1101
  model = _MODELS.get(task, _MODELS["generate"])
507
- fallback_chain = [model, _MODELS["fallback"], _MODELS["lastresort"]]
1102
+
1103
+ # Full pool of verified-live free models (May 2026).
1104
+ # Ordered by observed reliability: nemotron first (proven to succeed when llama 429s).
1105
+ _FREE_POOL = [
1106
+ "nvidia/nemotron-3-super-120b-a12b:free", # proven to work
1107
+ "openai/gpt-oss-120b:free",
1108
+ "openai/gpt-oss-20b:free",
1109
+ "nousresearch/hermes-3-llama-3.1-405b:free",
1110
+ "meta-llama/llama-3.3-70b-instruct:free",
1111
+ "google/gemma-4-31b-it:free",
1112
+ "google/gemma-4-26b-a4b-it:free",
1113
+ "nvidia/nemotron-3-nano-30b-a3b:free",
1114
+ "nvidia/nemotron-nano-9b-v2:free",
1115
+ "meta-llama/llama-3.2-3b-instruct:free",
1116
+ "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
1117
+ "liquid/lfm-2.5-1.2b-instruct:free",
1118
+ ]
1119
+
1120
+ # Build chain: primary model first, then full pool (deduped, order preserved)
1121
+ seen: set[str] = set()
1122
+ full_chain: list[str] = []
1123
+ for m in [model, _MODELS["fallback"], _MODELS["lastresort"]] + _FREE_POOL:
1124
+ if m not in seen:
1125
+ seen.add(m)
1126
+ full_chain.append(m)
1127
+
1128
+ # Skip models in cooldown — place them at the end so they still get a chance
1129
+ # if everything else fails (cooldown may have expired by then)
1130
+ ready = [m for m in full_chain if not _is_rate_limited(m)]
1131
+ cooling = [m for m in full_chain if _is_rate_limited(m)]
1132
+ fallback_chain = ready + cooling # try fresh models first
508
1133
 
509
1134
  headers = {
510
1135
  "Authorization": f"Bearer {effective_key}",
@@ -514,35 +1139,46 @@ async def _call_openrouter(
514
1139
  }
515
1140
 
516
1141
  last_exc: Exception | None = None
517
- for attempt_model in fallback_chain:
518
- # Do NOT send response_format — not all OpenRouter models support json_object mode.
519
- # JSON is enforced through the system prompt instead.
520
- # max_tokens prevents truncated responses that produce partial/invalid JSON.
521
- body: dict = {
522
- "model": attempt_model,
523
- "messages": messages,
524
- "max_tokens": 8192,
525
- }
1142
+ async with httpx.AsyncClient(timeout=timeout) as client:
1143
+ for attempt_model in fallback_chain:
1144
+ body: dict = {
1145
+ "model": attempt_model,
1146
+ "messages": messages,
1147
+ "max_tokens": 8192,
1148
+ }
526
1149
 
527
- try:
528
- async with httpx.AsyncClient(timeout=timeout) as client:
1150
+ try:
529
1151
  response = await client.post(_OPENROUTER_URL, headers=headers, json=body)
530
1152
 
531
- if response.status_code == 429:
532
- raise RuntimeError(f"Rate limited on {attempt_model}")
533
- if response.status_code == 401:
534
- raise RuntimeError("Invalid OpenRouter API key")
535
- response.raise_for_status()
536
-
537
- content = response.json()["choices"][0]["message"]["content"]
538
- if attempt_model != model:
539
- logger.info("OpenRouter fallback: used %s instead of %s", attempt_model, model)
540
- return content or ""
541
-
542
- except Exception as exc:
543
- logger.warning("OpenRouter model %s failed: %s", attempt_model, exc)
544
- last_exc = exc
545
- continue
1153
+ if response.status_code == 429:
1154
+ _mark_rate_limited(attempt_model)
1155
+ raise RuntimeError(f"Rate limited on {attempt_model}")
1156
+ if response.status_code == 401:
1157
+ raise RuntimeError("Invalid OpenRouter API key")
1158
+ response.raise_for_status()
1159
+
1160
+ content = response.json()["choices"][0]["message"]["content"]
1161
+ if attempt_model != model:
1162
+ logger.info("OpenRouter fallback: used %s instead of %s", attempt_model, model)
1163
+ return content or ""
1164
+
1165
+ except httpx.ConnectError as exc:
1166
+ # If we cannot resolve DNS or connect to the host, no fallback will work.
1167
+ logger.error("Network connection to OpenRouter failed: %s", exc)
1168
+ raise RuntimeError("Could not connect to OpenRouter (Network/DNS error). Please check your internet connection.")
1169
+ except RuntimeError as exc:
1170
+ if "Invalid OpenRouter API key" in str(exc):
1171
+ raise # Don't retry — wrong key won't fix itself
1172
+ logger.warning("OpenRouter model %s failed: %s", attempt_model, exc)
1173
+ last_exc = exc
1174
+ continue
1175
+ except Exception as exc:
1176
+ if "getaddrinfo failed" in str(exc):
1177
+ logger.error("DNS resolution failed for OpenRouter: %s", exc)
1178
+ raise RuntimeError("Could not resolve OpenRouter domain. Please check your internet connection.")
1179
+ logger.warning("OpenRouter model %s failed: %s", attempt_model, exc)
1180
+ last_exc = exc
1181
+ continue
546
1182
 
547
1183
  raise RuntimeError(f"All OpenRouter models failed. Last error: {last_exc}")
548
1184
 
@@ -555,7 +1191,8 @@ def _extract_json_object(raw: str) -> dict:
555
1191
  - Preamble text in any language before the JSON
556
1192
  - Markdown code fences (```json ... ``` or ``` ... ```)
557
1193
  - Trailing explanation text after the JSON
558
- - Truncated responses (returns whatever was parseable)
1194
+ - Truncated responses (free-tier model cut-offs) — try-repair appends
1195
+ missing closing brackets/braces to recover a parseable object.
559
1196
  """
560
1197
  raw = raw.strip()
561
1198
 
@@ -579,6 +1216,7 @@ def _extract_json_object(raw: str) -> dict:
579
1216
 
580
1217
  # 3. Brace-matching: find the first complete JSON object in the text
581
1218
  start = raw.find('{')
1219
+ best_candidate: str | None = None
582
1220
  if start != -1:
583
1221
  depth = 0
584
1222
  in_string = False
@@ -603,10 +1241,36 @@ def _extract_json_object(raw: str) -> dict:
603
1241
  try:
604
1242
  return json.loads(candidate)
605
1243
  except json.JSONDecodeError:
606
- break # malformed — fall through to error
1244
+ break # malformed — fall through to repair
1245
+ # Capture the partial object for repair attempts
1246
+ best_candidate = raw[start:]
1247
+
1248
+ # 4. Try-repair: the response was likely truncated by the model's token limit.
1249
+ # Progressively append closing characters until we get a valid object.
1250
+ # We try up to 12 combinations: 0-4 extra ']' + 0-4 extra '}', ordered
1251
+ # by shortest repair first (minimises data invention).
1252
+ candidate_base = best_candidate or raw
1253
+ # Trim trailing whitespace/comma that often appears before cut-off
1254
+ candidate_base = candidate_base.rstrip().rstrip(",")
1255
+ logger.debug("JSON repair: attempting to salvage truncated output (%d chars)", len(candidate_base))
1256
+ for extra_brackets in range(5): # 0 … 4 extra ]
1257
+ for extra_braces in range(5): # 0 … 4 extra }
1258
+ if extra_brackets == 0 and extra_braces == 0:
1259
+ continue # already tried the plain candidate
1260
+ repaired = candidate_base + ("\n]" * extra_brackets) + ("\n}" * extra_braces)
1261
+ try:
1262
+ result = json.loads(repaired)
1263
+ logger.warning(
1264
+ "JSON repair succeeded (+%d ']', +%d '}'). "
1265
+ "Free-tier model likely truncated its output.",
1266
+ extra_brackets, extra_braces,
1267
+ )
1268
+ return result
1269
+ except json.JSONDecodeError:
1270
+ continue
607
1271
 
608
1272
  raise ValueError(
609
- f"LLM returned invalid JSON (could not extract object).\nRaw: {raw[:600]}"
1273
+ f"LLM returned invalid JSON (could not extract or repair object).\nRaw: {raw[:600]}"
610
1274
  )
611
1275
 
612
1276
 
@@ -650,9 +1314,22 @@ You are a surgical ML pipeline editor. The user wants to REFINE an existing pipe
650
1314
  1. Output ONLY a single raw JSON object. No text before it, no text after it.
651
1315
  2. NEVER wrap the JSON in markdown fences (no ```json, no ```).
652
1316
  3. ALWAYS respond in ENGLISH. Never use any other language.
653
- 4. If you cannot help, return: {{"node_changes":[],"edge_changes":[],"summary":"Cannot process this request."}}
1317
+ 4. DO NOT EVER output a summary like "Cannot process this request." If a template is missing, you MUST write the `code` for it as a `customNode` instead of giving up.
654
1318
  5. Do NOT truncate the JSON — it must be a complete, valid object.
655
1319
 
1320
+ 🔑 TOKEN CONSERVATION — MANDATORY (free-tier models have strict output limits):
1321
+ A. TEMPLATE SWAPS: If you are changing a model or operation that already has a
1322
+ matching Template ID (e.g. swapping linear_regression for
1323
+ random_forest_regressor), set only the "type" field on the update entry.
1324
+ NEVER output the full Python "code" block when a Template already exists.
1325
+ Example — correct: {{"action":"update","id":"n5","data":{{"type":"random_forest_regressor","values":{{"n_estimators":200}}}}}}
1326
+ Example — WRONG: {{"action":"update","id":"n5","data":{{"code":"import ..."}}}} ← wastes tokens
1327
+ B. VALUES ONLY: In "node_changes", omit the "code" field entirely unless the
1328
+ user explicitly asked for a custom code change. If only config parameters
1329
+ changed, output ONLY the "values" dictionary — nothing else inside "data".
1330
+ Example — correct: {{"action":"update","id":"n3","data":{{"values":{{"test_size":0.3}}}}}}
1331
+ Example — WRONG: {{"action":"update","id":"n3","data":{{"values":{{"test_size":0.3}},"code":"...entire node..."}}}}
1332
+
656
1333
  Output ONLY a JSON patch object describing the minimal set of changes needed.
657
1334
 
658
1335
  ══ CURRENT PIPELINE ═════════════════════════════════════════════════
@@ -678,13 +1355,15 @@ Output ONLY a JSON patch object describing the minimal set of changes needed.
678
1355
  }},
679
1356
  {{
680
1357
  "action": "add",
681
- "id": "<new unique id e.g. node_cv_1>",
682
- "type": "<template_id>",
1358
+ "id": "<new unique id e.g. node_custom_1>",
1359
+ "type": "<template_id or 'customNode'>",
683
1360
  "reason": "<why this node is added>",
684
1361
  "position": {{"x": <number>, "y": <number>}},
685
1362
  "data": {{
1363
+ "label": "<optional Descriptive Name>",
1364
+ "templateId": "<optional templateId or 'customNode'>",
686
1365
  "config": {{}},
687
- "code": "<optional override>"
1366
+ "code": "<optional full Python source, REQUIRED if type is customNode>"
688
1367
  }}
689
1368
  }},
690
1369
  {{
@@ -721,7 +1400,8 @@ Before emitting ANY patch entry, ask:
721
1400
  YES → emit one "update" for that node. No new nodes.
722
1401
 
723
1402
  3. Does this need a genuinely new computation node?
724
- YES → emit one "add". Minimise new edges.
1403
+ YES → emit one "add".
1404
+ CRITICAL: If the requested operation (e.g., RobustScaler) is NOT in the catalogue, DO NOT refuse the request. Instead, YOU MUST generate it dynamically as a `customNode`. Provide the complete Python implementation in the `code` field, starting with `# ✨ AI GENERATED`. In your `summary`, explicitly state that you generated a custom node.
725
1405
 
726
1406
  4. None of the above?
727
1407
  → Explain in "summary". Return empty node_changes.
@@ -734,7 +1414,19 @@ Before emitting ANY patch entry, ask:
734
1414
  5. "summary" → one plain-English sentence describing the change.
735
1415
  6. DataFrame param MUST be named data (never "df").
736
1416
  7. Include all imports inside any code block.
737
- 8. Only use template types from the catalogue above.
1417
+ 8. Only use template types from the catalogue above, unless building a customNode.
1418
+
1419
+ ══ CUSTOM NODE RULES (when generating a missing component) ════════════
1420
+ If you use `type="customNode"`, your Python string in `code` MUST follow the exact same architecture as built-in templates:
1421
+ - It MUST define a function named EXACTLY `def run(...) -> dict:`
1422
+ - The primary input dataset MUST be named `data` (e.g., `def run(data: pd.DataFrame, ...) -> dict:`)
1423
+ - It MUST return a dictionary containing the outputs (e.g., `return {{"X_train": X_train, "X_test": X_test}}`)
1424
+ - If you are building a custom visualization, you MUST wrap your output in one of these keys so the UI can render it:
1425
+ 'histogram', 'correlation_matrix', 'value_counts', 'box_plot', 'prediction', 'correlation_heatmap', 'missing_value_map', 'class_balance', 'feature_target_scatter', 'model_error_histogram', 'partial_dependence', 'roc_curves'
1426
+ - All `import` statements MUST be placed at the top of the code string.
1427
+ - You MUST include `# ✨ AI GENERATED` at the very top of the script.
1428
+ Example:
1429
+ "import pandas as pd\\nfrom sklearn.preprocessing import RobustScaler\\n\\n# ✨ AI GENERATED\\ndef run(data=None, X_train=None, X_test=None):\\n scaler = RobustScaler()\\n # ... logic ...\\n return {{\"X_train\": X_train_scaled, \"X_test\": X_test_scaled}}"
738
1430
 
739
1431
  ══ CONNECTION RULES (CRITICAL — read carefully) ══════════════════════
740
1432
  Each node in the current pipeline has "available_outputs" and "available_inputs"
@@ -749,6 +1441,12 @@ To fix a wrong connection:
749
1441
  1. Emit "remove" for the bad edge (use its id from the current edges list).
750
1442
  2. Emit "add" for the correct edge using valid handle names from the lists above.
751
1443
 
1444
+ SCALER CONNECTION RULES:
1445
+ When inserting or reconnecting a standard_scaler or min_max_scaler after a train_test_split, you MUST:
1446
+ 1. Connect all 4 outputs (X_train, X_test, y_train, y_test) from the split node to the scaler inputs.
1447
+ 2. Connect all 4 outputs (X_train, X_test, y_train, y_test) from the scaler node to the downstream model inputs.
1448
+ Never skip passing y_train and y_test through the scaler node!
1449
+
752
1450
  To add a missing connection:
753
1451
  1. Look at the source node's "available_outputs" — pick the right output.
754
1452
  2. Look at the target node's "available_inputs" — pick the right input.
@@ -813,7 +1511,10 @@ def _custom_node_catalogue(current_flow: FlowSchema) -> str:
813
1511
 
814
1512
 
815
1513
  def _build_refine_messages(
816
- prompt: str, current_flow: FlowSchema, context: str | None
1514
+ prompt: str,
1515
+ current_flow: FlowSchema,
1516
+ context: str | None,
1517
+ custom_components: list[dict] | None = None,
817
1518
  ) -> list[dict]:
818
1519
  slim = _slim_flow(current_flow)
819
1520
  node_status = _node_status_summary(current_flow)
@@ -822,15 +1523,13 @@ def _build_refine_messages(
822
1523
  custom_section = (
823
1524
  f"\n══ CUSTOM NODES ON CANVAS (treat these as valid, usable nodes) ═══════\n"
824
1525
  f"{custom_cat}\n"
825
- if custom_cat else ""
826
- )
1526
+ ) if custom_cat else ""
827
1527
 
828
1528
  system = _REFINE_PROMPT.format(
829
1529
  current_flow=json.dumps(slim, indent=2),
830
1530
  node_status=node_status,
831
- catalogue=_template_catalogue() + custom_section,
1531
+ catalogue=_template_catalogue(custom_components) + custom_section,
832
1532
  )
833
-
834
1533
  ctx_block = (
835
1534
  f"══ DATASET CONTEXT ══════════════════════════════════════\n{context.strip()}\n\n"
836
1535
  if context else ""
@@ -867,24 +1566,81 @@ def _parse_refine_patch(raw: str) -> RefinePatch:
867
1566
 
868
1567
  # ── Public API ─────────────────────────────────────────────────────────────────
869
1568
 
870
- async def generate_flow(prompt: str, context: str | None = None) -> FlowSchema:
871
- messages = _build_generate_messages(prompt, context)
872
- raw = await _call_openrouter(messages, task="generate", json_mode=True, timeout=_TIMEOUT_GENERATE)
1569
+ async def generate_flow(prompt: str, context: str | None = None, custom_components: list[dict] | None = None) -> FlowSchema:
1570
+ """
1571
+ Two-call Architect Builder strategy:
1572
+
1573
+ Call 1 (Architect — deepseek-r1):
1574
+ Given the data profile + deterministic pre-flight analysis, produce a
1575
+ concise markdown plan: problem type, preprocessing steps, model choice.
1576
+
1577
+ Call 2 (Builder — deepseek-chat-v3):
1578
+ Given the Architect's plan + the same context, produce the final JSON flow.
1579
+ The Builder focuses on correct syntax and edge connections, not reasoning.
1580
+ """
1581
+ pre_flight: dict = {}
1582
+ profile_text: str = ""
1583
+ architect_plan: str = ""
1584
+
1585
+ # ── Pre-flight: deterministic data analysis ───────────────────────────
1586
+ if context:
1587
+ fp = _extract_file_path(context)
1588
+ if fp:
1589
+ try:
1590
+ import pandas as pd
1591
+ df = pd.read_csv(fp, nrows=5000)
1592
+ profile = profile_dataframe(df)
1593
+ profile_text = format_profile_for_prompt(profile)
1594
+ pre_flight = _determine_pre_flight(profile, prompt, context, csv_path=fp)
1595
+ logger.info(
1596
+ "Pre-flight: problem_type=%s target=%s model=%s encoding=%s scaling=%s",
1597
+ pre_flight["problem_type"], pre_flight["target_hint"],
1598
+ pre_flight.get("recommended_model"), pre_flight["needs_encoding"],
1599
+ pre_flight["needs_scaling"],
1600
+ )
1601
+ except Exception as exc:
1602
+ logger.warning("pre-flight analysis skipped: %s", exc)
1603
+
1604
+ # ── Call 1: Architect (R1 reasoning model) ────────────────────────────
1605
+ if profile_text and pre_flight:
1606
+ try:
1607
+ arch_messages = _build_architect_messages(prompt, profile_text, pre_flight)
1608
+ architect_plan = await _call_openrouter(
1609
+ arch_messages,
1610
+ task="architect",
1611
+ json_mode=False,
1612
+ timeout=_TIMEOUT_GENERATE,
1613
+ )
1614
+ logger.info("Architect plan: %d chars", len(architect_plan))
1615
+ except Exception as exc:
1616
+ logger.warning("Architect call failed, continuing without plan: %s", exc)
1617
+ architect_plan = ""
1618
+
1619
+ # ── Call 2: Builder (chat model — fast, syntax-precise JSON) ─────────
1620
+ build_messages = _build_generate_messages(
1621
+ prompt, context,
1622
+ pre_flight=pre_flight or None,
1623
+ architect_plan=architect_plan or None,
1624
+ custom_components=custom_components,
1625
+ )
1626
+ raw = await _call_openrouter(
1627
+ build_messages, task="generate", json_mode=True, timeout=_TIMEOUT_GENERATE
1628
+ )
873
1629
  return _parse_flow(raw)
874
1630
 
875
1631
 
876
1632
  async def update_flow(
877
- prompt: str, current_flow: FlowSchema, context: str | None = None
1633
+ prompt: str, current_flow: FlowSchema, context: str | None = None, custom_components: list[dict] | None = None
878
1634
  ) -> FlowSchema:
879
- messages = _build_update_messages(prompt, current_flow, context)
1635
+ messages = _build_update_messages(prompt, current_flow, context, custom_components)
880
1636
  raw = await _call_openrouter(messages, task="update", json_mode=True, timeout=_TIMEOUT_UPDATE)
881
1637
  return _parse_flow(raw)
882
1638
 
883
1639
 
884
1640
  async def refine_flow(
885
- prompt: str, current_flow: FlowSchema, context: str | None = None
1641
+ prompt: str, current_flow: FlowSchema, context: str | None = None, custom_components: list[dict] | None = None
886
1642
  ) -> RefinePatch:
887
- messages = _build_refine_messages(prompt, current_flow, context)
1643
+ messages = _build_refine_messages(prompt, current_flow, context, custom_components)
888
1644
  raw = await _call_openrouter(messages, task="refine", json_mode=True, timeout=_TIMEOUT_UPDATE)
889
1645
  return _parse_refine_patch(raw)
890
1646
 
@@ -934,6 +1690,32 @@ async def explain_flow(flow: FlowSchema) -> str:
934
1690
  return await _call_openrouter(messages, task="explain", json_mode=False, timeout=60)
935
1691
 
936
1692
 
1693
+ _EXPLAIN_CHAT_SYSTEM = """\
1694
+ You are an expert ML engineering assistant answering questions about a user's machine learning pipeline.
1695
+
1696
+ ══ EXPLANATION CONTEXT ═════════════════════════════════════════════════════
1697
+ {explanation}
1698
+
1699
+ ══ PIPELINE AND EXECUTION RESULTS ══════════════════════════════════════════
1700
+ {current_flow}
1701
+
1702
+ Respond concisely and directly to the user's question. Provide actionable, specific advice based on the existing nodes, their configurations, and any metrics or execution results present in the pipeline state.
1703
+ Do NOT use markdown tables in your response. Instead, use simple bullet points and short paragraphs. Do not use generic filler.
1704
+ """
1705
+
1706
+ async def chat_explanation(question: str, explanation: str, flow: FlowSchema) -> str:
1707
+ slim = _slim_flow(flow)
1708
+ system = _EXPLAIN_CHAT_SYSTEM.format(
1709
+ explanation=explanation,
1710
+ current_flow=json.dumps(slim, indent=2)
1711
+ )
1712
+ messages = [
1713
+ {"role": "system", "content": system},
1714
+ {"role": "user", "content": question},
1715
+ ]
1716
+ return await _call_openrouter(messages, task="chat", json_mode=False, timeout=60)
1717
+
1718
+
937
1719
  # ── Self-Healing Debug Prompt ──────────────────────────────────────────────────
938
1720
 
939
1721
  _DEBUG_SYSTEM = """\
@@ -1128,6 +1910,7 @@ async def handle_user_request(
1128
1910
  prompt: str,
1129
1911
  current_flow: FlowSchema | None = None,
1130
1912
  context: str | None = None,
1913
+ custom_components: list[dict] | None = None,
1131
1914
  ) -> dict:
1132
1915
  """
1133
1916
  Single entry point that classifies the prompt and routes to the
@@ -1144,7 +1927,7 @@ async def handle_user_request(
1144
1927
  # No existing flow → always generate from scratch
1145
1928
  has_flow = current_flow is not None and len(current_flow.nodes) > 0
1146
1929
  if not has_flow:
1147
- flow = await generate_flow(prompt, context)
1930
+ flow = await generate_flow(prompt, context, custom_components=custom_components)
1148
1931
  return {"intent": "generate", "result_type": "flow", "flow": flow}
1149
1932
 
1150
1933
  intent = detect_intent(prompt, has_flow=True)
@@ -1159,17 +1942,17 @@ async def handle_user_request(
1159
1942
  for n in current_flow.nodes
1160
1943
  ):
1161
1944
  # Custom nodes exist — use update so they're visible to the LLM
1162
- flow = await update_flow(prompt, current_flow, context)
1945
+ flow = await update_flow(prompt, current_flow, context, custom_components=custom_components)
1163
1946
  return {"intent": "update", "result_type": "flow", "flow": flow}
1164
- flow = await generate_flow(prompt, context)
1947
+ flow = await generate_flow(prompt, context, custom_components=custom_components)
1165
1948
  return {"intent": "generate", "result_type": "flow", "flow": flow}
1166
1949
 
1167
1950
  if intent == "refine":
1168
- patch = await refine_flow(prompt, current_flow, context)
1951
+ patch = await refine_flow(prompt, current_flow, context, custom_components=custom_components)
1169
1952
  return {"intent": "refine", "result_type": "patch", "patch": patch}
1170
1953
 
1171
1954
  # intent == "update"
1172
- flow = await update_flow(prompt, current_flow, context)
1955
+ flow = await update_flow(prompt, current_flow, context, custom_components=custom_components)
1173
1956
  return {"intent": "update", "result_type": "flow", "flow": flow}
1174
1957
 
1175
1958
 
@@ -1310,3 +2093,79 @@ async def suggest_improvements(flow: FlowSchema, results: dict) -> list[str]:
1310
2093
  # Sanitise: only strings, max 120 chars each
1311
2094
  return [str(s)[:120] for s in suggestions if s]
1312
2095
 
2096
+
2097
+ # ── Custom node code generation ────────────────────────────────────────────────
2098
+
2099
+ _NODE_CODE_SYSTEM = """\
2100
+ You are an M8Flow node code generator. Write Python code for a reusable pipeline component.
2101
+
2102
+ ⚠️ HARD RULES — any violation makes the node unparseable:
2103
+ 1. Function name MUST be run (not main, process, execute, transform — exactly run)
2104
+ 2. DataFrame input parameter MUST be named data (never df, dataframe, dataset)
2105
+ 3. Function MUST return a dict with named string keys
2106
+ 4. ALL imports go INSIDE the function body
2107
+ 5. Only allowed libraries: pandas, numpy, sklearn, scipy, math, statistics, re, json
2108
+ 6. FORBIDDEN: os, sys, subprocess, socket, requests, open(), eval(), exec(), matplotlib
2109
+
2110
+ FIELD TYPE ANNOTATIONS — these control the UI widget shown to the user:
2111
+ data input (connects from previous node) → just `data` with no type hint
2112
+ text field → `name: str = "default"`
2113
+ number field → `name: float = 1.0` or `name: int = 10`
2114
+ boolean toggle→ `name: bool = True`
2115
+ column picker → `col: Annotated[str, "column"] = "target"` (needs `from typing import Annotated` inside the fn)
2116
+ file picker → `path: Annotated[str, "file"] = "data.csv"` (needs `from typing import Annotated` inside the fn)
2117
+
2118
+ RETURN DICT — keys become the node's output handles:
2119
+ Passing a DataFrame forward → always include "data": df
2120
+ Model outputs → {"model": model, "y_pred": preds}
2121
+ Metric outputs → {"accuracy": 0.95, "f1": 0.88}
2122
+ Multiple outputs are fine → {"data": df, "rows_removed": n}
2123
+
2124
+ EXAMPLE — outlier removal node:
2125
+ def run(data, multiplier: float = 1.5) -> dict:
2126
+ import pandas as pd
2127
+ import numpy as np
2128
+ df = data.copy()
2129
+ for col in df.select_dtypes(include=[np.number]).columns:
2130
+ Q1, Q3 = df[col].quantile(0.25), df[col].quantile(0.75)
2131
+ iqr = Q3 - Q1
2132
+ df = df[~((df[col] < Q1 - multiplier * iqr) | (df[col] > Q3 + multiplier * iqr))]
2133
+ return {"data": df, "rows_removed": len(data) - len(df)}
2134
+
2135
+ EXAMPLE — feature selection node:
2136
+ def run(data, n_features: int = 10, target: str = "label") -> dict:
2137
+ import pandas as pd
2138
+ from sklearn.feature_selection import SelectKBest, f_classif
2139
+ X = data.drop(columns=[target])
2140
+ y = data[target]
2141
+ selector = SelectKBest(f_classif, k=min(n_features, X.shape[1]))
2142
+ selector.fit(X, y)
2143
+ selected = X.columns[selector.get_support()].tolist()
2144
+ return {"data": data[selected + [target]], "selected_features": selected}
2145
+
2146
+ OUTPUT: Return ONLY the raw Python code. No explanation. No markdown fences. No backticks.
2147
+ """
2148
+
2149
+
2150
+ async def generate_node_code(description: str) -> str:
2151
+ """
2152
+ Generate M8Flow-compatible Python node code from a natural language description.
2153
+ Uses the chat model (fast, syntax-precise) with the node code system prompt.
2154
+ """
2155
+ messages = [
2156
+ {"role": "system", "content": _NODE_CODE_SYSTEM},
2157
+ {"role": "user", "content": f"Generate an M8Flow node that: {description}"},
2158
+ ]
2159
+ raw = await _call_openrouter(messages, task="generate", json_mode=False, timeout=60)
2160
+
2161
+ # Strip any markdown fences the model may add despite instructions
2162
+ raw = raw.strip()
2163
+ for fence in ("```python", "```"):
2164
+ if raw.startswith(fence):
2165
+ raw = raw[len(fence):]
2166
+ break
2167
+ if raw.endswith("```"):
2168
+ raw = raw[:-3]
2169
+
2170
+ return raw.strip()
2171
+