m8flow 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  """LLM service — OpenRouter-powered flow generation with per-task model routing."""
2
2
  import json
3
3
  import logging
4
+ import time
4
5
  import httpx
5
6
  from contextvars import ContextVar
6
7
  from config import config
@@ -23,38 +24,102 @@ _OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
23
24
  _TIMEOUT_GENERATE = 120
24
25
  _TIMEOUT_UPDATE = 180
25
26
 
26
- # Per-task model routing (verified OpenRouter slugs no :free on paid-tier models)
27
+ # Per-task model routing slugs verified live against OpenRouter API (May 2026)
27
28
  _MODELS = {
28
- "generate": "deepseek/deepseek-chat-v3-0324",
29
- "refine": "deepseek/deepseek-chat-v3-0324",
30
- "update": "deepseek/deepseek-chat-v3-0324",
31
- "debug": "deepseek/deepseek-r1",
32
- "heal": "deepseek/deepseek-r1",
33
- "explain": "google/gemini-2.0-flash-001",
34
- "suggest": "google/gemini-2.0-flash-001",
35
- "fallback": "meta-llama/llama-3.3-70b-instruct:free",
36
- "lastresort": "qwen/qwen3-coder:free",
29
+ # Reasoning/Architecting — best available free reasoning model
30
+ "architect": "nvidia/nemotron-3-super-120b-a12b:free",
31
+
32
+ # Core Generation/Updating — large, instruction-tuned free model
33
+ "generate": "meta-llama/llama-3.3-70b-instruct:free",
34
+ "refine": "meta-llama/llama-3.3-70b-instruct:free",
35
+ "update": "meta-llama/llama-3.3-70b-instruct:free",
36
+
37
+ # Debugging/Healing — strong reasoning for bug analysis
38
+ "debug": "nvidia/nemotron-3-super-120b-a12b:free",
39
+ "heal": "nvidia/nemotron-3-super-120b-a12b:free",
40
+
41
+ # Explaining/Suggesting — fast free model
42
+ "explain": "openai/gpt-oss-20b:free",
43
+ "suggest": "openai/gpt-oss-20b:free",
44
+
45
+ # Safety Nets — verified live fallbacks
46
+ "fallback": "google/gemma-4-31b-it:free",
47
+ "lastresort": "meta-llama/llama-3.2-3b-instruct:free",
37
48
  }
38
49
 
50
+ # ── Rate-limit cooldown cache ─────────────────────────────────────────────────
51
+ # Maps model_slug -> timestamp of last 429. Models in cooldown are skipped for
52
+ # _RATE_LIMIT_TTL seconds so we jump straight to a working model instead of
53
+ # burning time on a known-rate-limited one.
54
+ _RATE_LIMIT_CACHE: dict[str, float] = {}
55
+ _RATE_LIMIT_TTL = 90 # seconds
56
+
57
+
58
+ def _is_rate_limited(model: str) -> bool:
59
+ """Return True if this model returned 429 within the last _RATE_LIMIT_TTL seconds."""
60
+ ts = _RATE_LIMIT_CACHE.get(model)
61
+ if ts is None:
62
+ return False
63
+ if time.time() - ts < _RATE_LIMIT_TTL:
64
+ return True
65
+ del _RATE_LIMIT_CACHE[model] # TTL expired — clear and allow retry
66
+ return False
67
+
68
+
69
+ def _mark_rate_limited(model: str) -> None:
70
+ """Record that this model returned 429 right now."""
71
+ _RATE_LIMIT_CACHE[model] = time.time()
72
+ logger.debug("Rate-limit cooldown started for %s (%ds)", model, _RATE_LIMIT_TTL)
73
+
39
74
 
40
75
  # ── Catalogue helpers ──────────────────────────────────────────────────────────
41
76
 
42
- def _template_catalogue() -> str:
43
- """Detailed catalogue: id, category, inputs, outputs."""
44
- from core.parser import parse_node_code
45
- lines: list[str] = []
46
- for t in TEMPLATES:
47
- schema = parse_node_code(t["code"])
48
- data_ins = [i.name for i in schema.inputs if i.kind == "data"]
49
- field_ins = [f"{i.name}:{i.kind}={i.default}" for i in schema.inputs if i.kind != "data"]
50
- outs = [o.name for o in schema.outputs]
51
- lines.append(
52
- f" {t['id']} [{t['category']}] \"{t['label']}\"\n"
53
- f" data-inputs : {data_ins or '(none)'}\n"
54
- f" fields : {field_ins or '(none)'}\n"
55
- f" outputs : {outs or '(none)'}"
77
+ _CATALOGUE_CACHE: str | None = None # built-in templates, parsed once
78
+
79
+ def _template_catalogue(custom_components: list[dict] | None = None) -> str:
80
+ """Detailed catalogue: id, category, inputs, outputs.
81
+
82
+ Built-in templates are parsed ONCE and cached — they never change at runtime.
83
+ Custom components are appended fresh each call because they can vary per session.
84
+ """
85
+ global _CATALOGUE_CACHE
86
+
87
+ # ── Built-ins: parse once, then serve from cache ──────────────────────────
88
+ if _CATALOGUE_CACHE is None:
89
+ from core.parser import parse_node_code
90
+ lines: list[str] = []
91
+ for t in TEMPLATES:
92
+ try:
93
+ schema = parse_node_code(t["code"])
94
+ data_ins = [i.name for i in schema.inputs if i.kind == "data"]
95
+ field_ins = [f"{i.name}:{i.kind}={i.default}" for i in schema.inputs if i.kind != "data"]
96
+ outs = [o.name for o in schema.outputs]
97
+ except Exception:
98
+ data_ins, field_ins, outs = [], [], []
99
+ lines.append(
100
+ f"{t['id']} [{t['category']}]\n"
101
+ f" inputs : {data_ins or '(none)'} fields: {field_ins or '(none)'}\n"
102
+ f" outputs: {outs or '(none)'}"
103
+ )
104
+ _CATALOGUE_CACHE = "\n".join(lines)
105
+ logger.debug("Template catalogue cached (%d templates)", len(TEMPLATES))
106
+
107
+ if not custom_components:
108
+ return _CATALOGUE_CACHE
109
+
110
+ # ── Custom nodes: always fresh ────────────────────────────────────────────
111
+ custom_lines = ["\n=== USER CUSTOM COMPONENTS (Preferred if applicable) ==="]
112
+ for c in custom_components:
113
+ schema = c.get("schema", {})
114
+ data_ins = [i["name"] for i in schema.get("inputs", []) if i.get("kind") == "data"]
115
+ field_ins = [f"{i['name']}:{i.get('kind')}={i.get('default')}" for i in schema.get("inputs", []) if i.get("kind") != "data"]
116
+ outs = [o["name"] for o in schema.get("outputs", [])]
117
+ custom_lines.append(
118
+ f"{c.get('id')} [Custom] \"{c.get('label')}\"\n"
119
+ f" inputs : {data_ins or '(none)'} fields: {field_ins or '(none)'}\n"
120
+ f" outputs: {outs or '(none)'}"
56
121
  )
57
- return "\n".join(lines)
122
+ return _CATALOGUE_CACHE + "\n".join(custom_lines)
58
123
 
59
124
 
60
125
  def _allowed_type_ids() -> set[str]:
@@ -65,6 +130,366 @@ def _allowed_type_ids() -> set[str]:
65
130
  return ids
66
131
 
67
132
 
133
+ # ── Pre-flight analysis ────────────────────────────────────────────────────────
134
+
135
+ def _determine_pre_flight(profile: dict, prompt: str, context: str | None, csv_path: str | None = None) -> dict:
136
+ """
137
+ Deterministic data-driven analysis computed BEFORE any LLM call.
138
+
139
+ Uses pandas to analyse the ACTUAL data (not just regex on the prompt), so the
140
+ LLM receives ground-truth facts — not guesses — about the task type and
141
+ preprocessing requirements.
142
+ """
143
+ import re
144
+ import pandas as pd
145
+ import numpy as np
146
+
147
+ dtypes = profile.get("dtypes", {})
148
+ missing = profile.get("missing", {})
149
+ numeric_summary = profile.get("numeric_summary", {})
150
+ categorical_summary = profile.get("categorical_summary", {})
151
+ shape = profile.get("shape", [0, 0])
152
+
153
+ # ── Step 1: Find the target column ────────────────────────────────────────
154
+ # Priority: explicit mention in prompt/context > heuristic column names.
155
+
156
+ target_hint: str | None = None
157
+ search_text = (prompt + " " + (context or "")).lower()
158
+
159
+ # (a) Regex extraction from user text
160
+ for pattern in [
161
+ r"predict\s+(?:the\s+)?['\"]?(\w+)['\"]?",
162
+ r"target\s+(?:(?:column|variable|col)\s+)?(?:is\s+)?['\"]?(\w+)['\"]?",
163
+ r"classif(?:y|ication)\s+(?:the\s+)?['\"]?(\w+)['\"]?",
164
+ r"label\s+(?:column\s+)?(?:is\s+)?['\"]?(\w+)['\"]?",
165
+ r"output\s+(?:column\s+)?(?:is\s+)?['\"]?(\w+)['\"]?",
166
+ r"y\s*=\s*['\"]?(\w+)['\"]?",
167
+ ]:
168
+ m = re.search(pattern, search_text)
169
+ if m:
170
+ candidate = m.group(1)
171
+ # Validate the candidate actually exists in the data
172
+ if candidate in dtypes or candidate in numeric_summary or candidate in categorical_summary:
173
+ target_hint = candidate
174
+ break
175
+
176
+ # (b) If still unknown, use heuristic column-name scoring on the real columns
177
+ if target_hint is None:
178
+ TARGET_KEYWORDS = [
179
+ "target", "label", "class", "output", "y", "result",
180
+ "outcome", "diagnosis", "status", "type", "category",
181
+ "survived", "churn", "default", "fraud", "price",
182
+ "salary", "value", "score", "sales", "revenue", "cost",
183
+ "medv", "charges", "fare",
184
+ ]
185
+ all_columns = list(dtypes.keys())
186
+ best_col: str | None = None
187
+ best_score = -1
188
+
189
+ for col in all_columns:
190
+ col_lower = col.lower().replace("_", " ").replace("-", " ")
191
+ score = 0
192
+
193
+ # Keyword match against column name
194
+ for kw in TARGET_KEYWORDS:
195
+ if kw in col_lower:
196
+ score += 3
197
+ break
198
+ # Last column is commonly the target in many datasets
199
+ if col == all_columns[-1]:
200
+ score += 2
201
+ # Column mentioned in prompt text
202
+ if col_lower in search_text or col.lower() in search_text:
203
+ score += 4
204
+
205
+ if score > best_score:
206
+ best_score = score
207
+ best_col = col
208
+
209
+ if best_col and best_score >= 2:
210
+ target_hint = best_col
211
+
212
+ # ── Step 2: Determine problem type from ACTUAL DATA ───────────────────────
213
+ # Initialize all new keys upfront to avoid KeyErrors downstream.
214
+ problem_type = "unknown"
215
+ target_analysis: dict = {}
216
+ is_imbalanced = False
217
+ needs_outlier_removal = False
218
+
219
+ if target_hint and csv_path:
220
+ try:
221
+ df = pd.read_csv(csv_path, nrows=5000)
222
+
223
+ # ── Outlier Detection: scan all numeric feature columns ──────────
224
+ for col in df.select_dtypes(include=[np.number]).columns:
225
+ if col == target_hint:
226
+ continue
227
+ col_series = df[col].dropna()
228
+ if len(col_series) == 0:
229
+ continue
230
+ col_mean = col_series.mean()
231
+ col_max = col_series.max()
232
+ # Flag if max is >10x the mean AND mean is non-trivially positive
233
+ if col_mean > 0 and col_max > 10 * col_mean:
234
+ needs_outlier_removal = True
235
+ break # one outlier column is enough to flag the dataset
236
+
237
+ if target_hint in df.columns:
238
+ col_data = df[target_hint].dropna()
239
+ dtype = col_data.dtype
240
+
241
+ if dtype == object or str(dtype) == "category":
242
+ # String/category column → always classification
243
+ n_unique = col_data.nunique()
244
+ problem_type = "classification"
245
+ target_analysis = {
246
+ "dtype": str(dtype),
247
+ "unique_values": int(n_unique),
248
+ "sample_values": col_data.unique()[:5].tolist(),
249
+ "reasoning": f"Categorical dtype with {n_unique} unique string values → classification",
250
+ }
251
+
252
+ elif dtype == bool or (dtype == int and col_data.nunique() <= 2):
253
+ # Boolean or binary integer → classification
254
+ problem_type = "classification"
255
+ target_analysis = {
256
+ "dtype": str(dtype),
257
+ "unique_values": int(col_data.nunique()),
258
+ "sample_values": col_data.unique()[:5].tolist(),
259
+ "reasoning": "Binary (0/1 or True/False) target → classification",
260
+ }
261
+
262
+ elif np.issubdtype(dtype, np.integer):
263
+ n_unique = col_data.nunique()
264
+ n_total = len(col_data)
265
+ unique_ratio = n_unique / max(n_total, 1)
266
+ if n_unique <= 20 or unique_ratio < 0.05:
267
+ problem_type = "classification"
268
+ target_analysis = {
269
+ "dtype": str(dtype),
270
+ "unique_values": int(n_unique),
271
+ "sample_values": sorted(col_data.unique().tolist())[:10],
272
+ "reasoning": f"Integer with only {n_unique} unique values ({unique_ratio:.1%} of rows) → likely class labels → classification",
273
+ }
274
+ else:
275
+ problem_type = "regression"
276
+ target_analysis = {
277
+ "dtype": str(dtype),
278
+ "unique_values": int(n_unique),
279
+ "min": float(col_data.min()),
280
+ "max": float(col_data.max()),
281
+ "mean": float(col_data.mean()),
282
+ "reasoning": f"Integer with {n_unique} unique values (high cardinality) → continuous → regression",
283
+ }
284
+
285
+ elif np.issubdtype(dtype, np.floating):
286
+ n_unique = col_data.nunique()
287
+ problem_type = "regression"
288
+ target_analysis = {
289
+ "dtype": str(dtype),
290
+ "unique_values": int(n_unique),
291
+ "min": float(col_data.min()),
292
+ "max": float(col_data.max()),
293
+ "mean": float(col_data.mean()),
294
+ "std": float(col_data.std()),
295
+ "reasoning": f"Floating-point target with {n_unique} unique values → continuous → regression",
296
+ }
297
+
298
+ else:
299
+ # Fallback: try to convert and check cardinality
300
+ try:
301
+ as_numeric = pd.to_numeric(col_data, errors="coerce")
302
+ if as_numeric.isna().mean() < 0.1:
303
+ n_unique = as_numeric.nunique()
304
+ problem_type = "regression" if n_unique > 20 else "classification"
305
+ target_analysis = {
306
+ "dtype": str(dtype),
307
+ "unique_values": int(n_unique),
308
+ "reasoning": f"Converted to numeric; {n_unique} unique values → {'regression' if n_unique > 20 else 'classification'}",
309
+ }
310
+ else:
311
+ problem_type = "classification"
312
+ target_analysis = {
313
+ "dtype": str(dtype),
314
+ "reasoning": "Could not convert to numeric → treating as classification",
315
+ }
316
+ except Exception:
317
+ problem_type = "classification"
318
+
319
+ # ── Class Imbalance Check (classification only) ──────────────
320
+ if problem_type == "classification":
321
+ try:
322
+ class_freqs = col_data.value_counts(normalize=True)
323
+ if class_freqs.min() < 0.10:
324
+ is_imbalanced = True
325
+ except Exception:
326
+ pass
327
+
328
+ except Exception as exc:
329
+ logger.warning("Pre-flight target analysis failed: %s", exc)
330
+ # Fall back to dtype-only heuristic using profile data
331
+ if target_hint:
332
+ dtype_str = str(dtypes.get(target_hint, "")).lower()
333
+ if any(t in dtype_str for t in ("object", "category", "bool", "str")):
334
+ problem_type = "classification"
335
+ elif target_hint in categorical_summary:
336
+ problem_type = "classification" if categorical_summary[target_hint].get("unique", 99) < 15 else "regression"
337
+ elif target_hint in numeric_summary:
338
+ problem_type = "regression"
339
+
340
+ elif target_hint:
341
+ # No CSV path — fall back to profile-based heuristic
342
+ dtype_str = str(dtypes.get(target_hint, "")).lower()
343
+ if any(t in dtype_str for t in ("object", "category", "bool", "str")):
344
+ problem_type = "classification"
345
+ elif target_hint in categorical_summary:
346
+ problem_type = "classification" if categorical_summary.get(target_hint, {}).get("unique", 99) < 15 else "regression"
347
+ elif target_hint in numeric_summary:
348
+ problem_type = "regression"
349
+
350
+ # ── Step 3: Preprocessing flags ───────────────────────────────────────────
351
+ # Detect categorical columns that need encoding (exclude the target itself)
352
+ cat_cols = {c for c in categorical_summary if c != target_hint}
353
+ num_cols = {c for c in numeric_summary if c != target_hint}
354
+
355
+ missing_cols: dict[str, float] = {
356
+ col: round(info.get("pct", 0), 1)
357
+ for col, info in missing.items()
358
+ if info.get("pct", 0) > 0
359
+ }
360
+ cardinality: dict[str, int] = {
361
+ col: info.get("unique", 0)
362
+ for col, info in categorical_summary.items()
363
+ }
364
+
365
+ needs_encoding = len(cat_cols) > 0
366
+ needs_scaling = len(num_cols) >= 2
367
+ needs_imputation = len(missing_cols) > 0
368
+
369
+ # ── Step 4: Recommend specific model based on problem type + data size ────
370
+ n_rows = shape[0] if shape else 0
371
+ n_cols = shape[1] if len(shape) > 1 else 0
372
+
373
+ if problem_type == "classification":
374
+ recommended_model = "random_forest_classifier" if n_rows >= 1000 else "logistic_regression"
375
+ recommended_metric_node = "classification_report"
376
+ elif problem_type == "regression":
377
+ recommended_model = "random_forest_regressor" if n_rows >= 1000 else "linear_regression"
378
+ recommended_metric_node = "regression_metrics"
379
+ else:
380
+ recommended_model = "random_forest_classifier"
381
+ recommended_metric_node = "classification_report"
382
+
383
+ return {
384
+ "target_hint": target_hint,
385
+ "problem_type": problem_type,
386
+ "target_analysis": target_analysis,
387
+ "missing_cols": missing_cols,
388
+ "cardinality": cardinality,
389
+ "needs_encoding": needs_encoding,
390
+ "needs_scaling": needs_scaling,
391
+ "needs_imputation": needs_imputation,
392
+ "needs_outlier_removal": needs_outlier_removal,
393
+ "is_imbalanced": is_imbalanced,
394
+ "n_rows": n_rows,
395
+ "n_cols": n_cols,
396
+ "categorical_cols": sorted(cat_cols),
397
+ "numeric_cols": sorted(num_cols),
398
+ "recommended_model": recommended_model,
399
+ "recommended_metric": recommended_metric_node,
400
+ }
401
+
402
+
403
+ def _pre_flight_block(pf: dict) -> str:
404
+ """Format the pre-flight analysis as an authoritative, bossy prompt block."""
405
+ analysis = pf.get("target_analysis", {})
406
+ reasoning = analysis.get("reasoning", "")
407
+ sample_vals = analysis.get("sample_values", [])
408
+
409
+ lines = [
410
+ "╔══ [PYTHON-DETERMINED ANALYSIS — treat as ABSOLUTE GROUND TRUTH] ════╗",
411
+ f" ⚠ Problem type : {pf['problem_type'].upper()} (MANDATORY)",
412
+ f" Reasoning : {reasoning or 'heuristic from dtype/cardinality'}",
413
+ f" Target column : {pf['target_hint'] or 'not specified — infer from context'}",
414
+ ]
415
+ if sample_vals:
416
+ lines.append(f" Target sample vals : {sample_vals}")
417
+ lines += [
418
+ f" Dataset size : {pf['n_rows']} rows × {pf['n_cols']} columns",
419
+ f" Missing values : {pf['missing_cols'] or 'none'}",
420
+ f" Categorical cols : {pf.get('categorical_cols') or 'none'}",
421
+ f" Numeric cols : {pf.get('numeric_cols') or 'none'}",
422
+ f" Needs encoding : {'YES — add label_encoder BEFORE train_test_split' if pf['needs_encoding'] else 'no'}",
423
+ f" Needs scaling : {'YES — add standard_scaler AFTER train_test_split' if pf['needs_scaling'] else 'no'}",
424
+ f" Needs imputation : {'YES — add data_cleaning BEFORE split' if pf['needs_imputation'] else 'no'}",
425
+ ]
426
+ # Conditional directives for outlier removal
427
+ if pf.get("needs_outlier_removal"):
428
+ lines.append(
429
+ " ⚠ Outlier columns : YES — a numeric feature has max > 10× its mean. "
430
+ "Add an outlier_removal node BEFORE train_test_split."
431
+ )
432
+ # Conditional directive for class imbalance
433
+ if pf.get("is_imbalanced"):
434
+ lines.append(
435
+ " ⚠ Class imbalance : YES — minority class < 10% of data. "
436
+ "Set class_weight='balanced' on the model node config."
437
+ )
438
+ lines += [
439
+ f" ⚠ Model Selection : USE {pf.get('recommended_model', 'unknown').upper()} ONLY",
440
+ f" ✅ Metric node : {pf.get('recommended_metric', 'unknown')}",
441
+ "╚════════════════════════════════════════════════════════════════════════╝",
442
+ ]
443
+ return "\n".join(lines)
444
+
445
+
446
+ # ── Architect (R1 planning) prompt ────────────────────────────────────────────
447
+
448
+ _ARCHITECT_PROMPT = """\
449
+ You are a senior ML engineer performing pre-build technical architecture planning.
450
+ You will receive a dataset profile, a deterministic pre-flight analysis, and the user's request.
451
+
452
+ OUTPUT: Concise markdown only. No JSON. No code blocks. Under 250 words.
453
+
454
+ Structure your response as:
455
+
456
+ ## Problem Type
457
+ State classification or regression with one-sentence justification.
458
+
459
+ ## Data Quality Plan
460
+ List each issue (missing values, categorical columns, dtype mismatches) and the exact
461
+ preprocessing step needed for it. Reference actual column names.
462
+
463
+ ## Pipeline Sequence
464
+ Ordered list of node types (e.g. csv_loader → label_encoder → train_test_split →
465
+ standard_scaler → random_forest_classifier → classification_report).
466
+
467
+ ## Model Rationale
468
+ Why this model fits the problem. If the dataset is large (>10k rows), prefer tree-based
469
+ models. If many numeric features, recommend scaling. If class imbalance suspected, note it.
470
+
471
+ ## Critical Warnings
472
+ Any data issues the pipeline MUST handle. Be blunt about failure modes.
473
+
474
+ Do NOT output JSON. Do NOT write code. Be specific — use actual column names from the profile.
475
+ """
476
+
477
+
478
+ def _build_architect_messages(
479
+ prompt: str, profile_text: str, pf: dict
480
+ ) -> list[dict]:
481
+ user = (
482
+ f"== DATASET PROFILE ==\n{profile_text}\n\n"
483
+ f"== DETERMINISTIC PRE-FLIGHT ==\n{_pre_flight_block(pf)}\n\n"
484
+ f"== USER REQUEST ==\n{prompt}\n\n"
485
+ "Provide your technical pipeline architecture plan."
486
+ )
487
+ return [
488
+ {"role": "system", "content": _ARCHITECT_PROMPT},
489
+ {"role": "user", "content": user},
490
+ ]
491
+
492
+
68
493
  # ── System prompts ─────────────────────────────────────────────────────────────
69
494
 
70
495
  _SYSTEM_PROMPT = """\
@@ -125,9 +550,13 @@ Use this structure to decide WHERE to modify or improve.
125
550
  ═══════════════════════════════════════
126
551
  STRICT RULES:
127
552
 
128
- 1. MINIMALISM FIRST
129
- - Fewer nodes = better
130
- - Do NOT duplicate functionality
553
+ 1. DATA INTEGRITY & ACCURACY FIRST — MINIMALISM SECOND
554
+ - A complete, correct pipeline beats a minimal, broken one.
555
+ - REQUIRED: label_encoder for any object/category column BEFORE train_test_split.
556
+ - REQUIRED: standard_scaler for distance-based models (SVM, KNN, LogisticRegression).
557
+ - REQUIRED: data_cleaning node when ANY column has missing values.
558
+ - THEN minimize: never add a step the data does not require.
559
+ - A pipeline that skips necessary preprocessing is a FAILURE regardless of node count.
131
560
 
132
561
  2. USE TEMPLATES FIRST
133
562
  - Only use customNode if NO template exists
@@ -141,6 +570,7 @@ STRICT RULES:
141
570
  - sourceHandle MUST exist in source outputs
142
571
  - targetHandle MUST match input param
143
572
  - metric nodes MUST receive: y_pred + y_test
573
+ - SCALER RULE: If using standard_scaler or min_max_scaler after train_test_split, you MUST connect all 4 split outputs (X_train, X_test, y_train, y_test) to the scaler, and then connect all 4 scaler outputs to the model. Do not skip y_train/y_test.
144
574
 
145
575
  5. DATA RULES
146
576
  - If categorical columns exist → include label_encoder BEFORE split
@@ -208,7 +638,7 @@ If a node has an error:
208
638
  - Include all imports inside every code block
209
639
  - Return dict keys MUST match sourceHandles on outgoing edges
210
640
  - NEVER import matplotlib or seaborn
211
- - Custom nodes only when no template covers the operation
641
+ - CRITICAL: If the user requests an operation/model NOT in the catalogue (e.g. RobustScaler), DO NOT refuse. Generate it dynamically as a `customNode` with the full Python `code` starting with `# ✨ AI GENERATED`. Explicitly mention this custom generation in the `summary`.
212
642
 
213
643
  ══ AVAILABLE COMPONENTS ════════════════════════════════════════════
214
644
  {catalogue}
@@ -237,22 +667,117 @@ Model nodes support these config fields — no new node needed:
237
667
  "type": "customNode",
238
668
  "position": {{"x": 560, "y": 200}},
239
669
  "data": {{
240
- "label": "Descriptive Name",
670
+ "label": "Robust Scaler (Custom)",
241
671
  "templateId": "customNode",
242
- "code": "import pandas as pd\ndef run(data: pd.DataFrame) -> dict:\n return {{\"result\": data}}"
672
+ "code": "import pandas as pd\\nfrom sklearn.preprocessing import RobustScaler\\n\\n# ✨ AI GENERATED\\ndef run(data=None, X_train=None, X_test=None) -> dict:\\n # ...\\n return {{\"X_train\": X_train, \"X_test\": X_test}}"
243
673
  }}
244
674
  }}
245
675
 
246
- - Function name MUST be `run`
247
- - DataFrame param MUST be named `data`
248
- - MUST return a dict
676
+ - Your Python string in `code` MUST define a function named EXACTLY `def run(...) -> dict:`
677
+ - The DataFrame param MUST be named `data` (if taking a whole dataset)
678
+ - MUST return a dict containing the output handles
249
679
  - Only import: numpy, pandas, scipy, sklearn, math, statistics, re, json
250
680
  - NO os, sys, subprocess, socket, requests, open(), eval(), exec()
251
681
  - Prefer templates first — custom nodes are last resort only
252
682
 
683
+ ══ DYNAMIC VISUALIZATION ══════════════════════════════════════════════
684
+ You can create custom visualizations NOT in the catalogue. The frontend
685
+ detects charts by SHAPE, not by key name. Return any of these shapes and
686
+ the UI will render it automatically — NO new React code needed:
687
+
688
+ Series (bar chart / ranked list):
689
+ {{"my_key": {{"labels": [...], "counts": [...], "title": "..."}}}}
690
+
691
+ Plot (scatter or line chart):
692
+ {{"my_key": {{"x": [...], "y": [...], "title": "...", "x_label": "...", "y_label": "..."}}}}
693
+
694
+ Grid (heatmap / matrix):
695
+ {{"my_key": {{"z": [[...]], "x": [...], "y": [...], "title": "..."}}}}
696
+
697
+ Example — null percentage bar chart:
698
+ return {{
699
+ "null_pct_chart": {{
700
+ "labels": list(null_pcts.keys()),
701
+ "counts": list(null_pcts.values()),
702
+ "title": "Missing Values (%) per Column"
703
+ }}
704
+ }}
705
+
706
+ When the user asks for any kind of visualization (e.g. "show me a chart
707
+ of X", "visualize the distribution of Y"), you MUST generate a customNode
708
+ that returns a dict with one of the shapes above. NEVER refuse — if no
709
+ template covers it, invent the chart with the shape protocol.
710
+
711
+ ⚠ UNSUPERVISED LEARNING (t-SNE / PCA / UMAP / KMeans): When generating
712
+ any dimensionality reduction or clustering node, you MUST return a
713
+ `labels` array alongside `x` and `y` so the frontend can color-code
714
+ clusters automatically. Example:
715
+
716
+ return {{
717
+ "tsne_plot": {{
718
+ "x": X_2d[:, 0].tolist(),
719
+ "y": X_2d[:, 1].tolist(),
720
+ "labels": [str(c) for c in cluster_labels], # ← REQUIRED
721
+ "title": "t-SNE Cluster Visualization",
722
+ "x_label": "Dim 1",
723
+ "y_label": "Dim 2"
724
+ }}
725
+ }}
726
+
253
727
  ══ OUTPUT ═══════════════════════════════════════════════════════════
254
728
  Return ONLY:
255
729
  {{"nodes": [...], "edges": [...], "summary": "<one sentence conversational explanation>"}}
730
+
731
+ ⚠️ DO NOT EVER output a summary like "Cannot process this request." If a template is missing, you MUST write the `code` for it as a `customNode` instead of giving up.
732
+
733
+ ══ GOLD STANDARD EXAMPLES — you MUST match these handle names EXACTLY ═══════
734
+ ⚠ CRITICAL: The sourceHandle and targetHandle values below (data, X_train, X_test,
735
+ y_train, y_test, y_pred) are the ONLY valid handle names. Do NOT invent new ones.
736
+ Your edges MUST use these exact strings — any deviation will cause a runtime failure.
737
+
738
+ Example A — CLASSIFICATION (categorical cols + scaling needed):
739
+ {{"nodes":[
740
+ {{"id":"n1","type":"csv_loader","position":{{"x":0,"y":200}},"data":{{"config":{{"file_path":"data.csv"}}}}}},
741
+ {{"id":"n2","type":"label_encoder","position":{{"x":280,"y":200}},"data":{{"config":{{"columns":"sex,embarked"}}}}}},
742
+ {{"id":"n3","type":"train_test_split","position":{{"x":560,"y":200}},"data":{{"config":{{"target_column":"survived","test_size":0.2}}}}}},
743
+ {{"id":"n4","type":"standard_scaler","position":{{"x":840,"y":200}},"data":{{}}}},
744
+ {{"id":"n5","type":"random_forest_classifier","position":{{"x":1120,"y":200}},"data":{{"config":{{"n_estimators":100}}}}}},
745
+ {{"id":"n6","type":"classification_report","position":{{"x":1400,"y":200}},"data":{{}}}}
746
+ ],"edges":[
747
+ {{"id":"e1","source":"n1","sourceHandle":"data","target":"n2","targetHandle":"data"}},
748
+ {{"id":"e2","source":"n2","sourceHandle":"data","target":"n3","targetHandle":"data"}},
749
+ {{"id":"e3","source":"n3","sourceHandle":"X_train","target":"n4","targetHandle":"X_train"}},
750
+ {{"id":"e4","source":"n3","sourceHandle":"X_test","target":"n4","targetHandle":"X_test"}},
751
+ {{"id":"e5","source":"n3","sourceHandle":"y_train","target":"n4","targetHandle":"y_train"}},
752
+ {{"id":"e6","source":"n3","sourceHandle":"y_test","target":"n4","targetHandle":"y_test"}},
753
+ {{"id":"e7","source":"n4","sourceHandle":"X_train","target":"n5","targetHandle":"X_train"}},
754
+ {{"id":"e8","source":"n4","sourceHandle":"X_test","target":"n5","targetHandle":"X_test"}},
755
+ {{"id":"e9","source":"n4","sourceHandle":"y_train","target":"n5","targetHandle":"y_train"}},
756
+ {{"id":"e10","source":"n5","sourceHandle":"y_pred","target":"n6","targetHandle":"y_pred"}},
757
+ {{"id":"e11","source":"n4","sourceHandle":"y_test","target":"n6","targetHandle":"y_test"}}
758
+ ],"summary":"Classification pipeline with encoding, scaling, and Random Forest."}}
759
+
760
+ Example B — REGRESSION (missing values + continuous target):
761
+ {{"nodes":[
762
+ {{"id":"n1","type":"csv_loader","position":{{"x":0,"y":200}},"data":{{"config":{{"file_path":"houses.csv"}}}}}},
763
+ {{"id":"n2","type":"data_cleaning","position":{{"x":280,"y":200}},"data":{{"config":{{"strategy":"fill"}}}}}},
764
+ {{"id":"n3","type":"train_test_split","position":{{"x":560,"y":200}},"data":{{"config":{{"target_column":"price","test_size":0.2}}}}}},
765
+ {{"id":"n4","type":"standard_scaler","position":{{"x":840,"y":200}},"data":{{}}}},
766
+ {{"id":"n5","type":"random_forest_regressor","position":{{"x":1120,"y":200}},"data":{{"config":{{"n_estimators":200}}}}}},
767
+ {{"id":"n6","type":"regression_metrics","position":{{"x":1400,"y":200}},"data":{{}}}}
768
+ ],"edges":[
769
+ {{"id":"e1","source":"n1","sourceHandle":"data","target":"n2","targetHandle":"data"}},
770
+ {{"id":"e2","source":"n2","sourceHandle":"data","target":"n3","targetHandle":"data"}},
771
+ {{"id":"e3","source":"n3","sourceHandle":"X_train","target":"n4","targetHandle":"X_train"}},
772
+ {{"id":"e4","source":"n3","sourceHandle":"X_test","target":"n4","targetHandle":"X_test"}},
773
+ {{"id":"e5","source":"n3","sourceHandle":"y_train","target":"n4","targetHandle":"y_train"}},
774
+ {{"id":"e6","source":"n3","sourceHandle":"y_test","target":"n4","targetHandle":"y_test"}},
775
+ {{"id":"e7","source":"n4","sourceHandle":"X_train","target":"n5","targetHandle":"X_train"}},
776
+ {{"id":"e8","source":"n4","sourceHandle":"X_test","target":"n5","targetHandle":"X_test"}},
777
+ {{"id":"e9","source":"n4","sourceHandle":"y_train","target":"n5","targetHandle":"y_train"}},
778
+ {{"id":"e10","source":"n5","sourceHandle":"y_pred","target":"n6","targetHandle":"y_pred"}},
779
+ {{"id":"e11","source":"n4","sourceHandle":"y_test","target":"n6","targetHandle":"y_test"}}
780
+ ],"summary":"Regression pipeline with cleaning, scaling, and Random Forest."}}
256
781
  """
257
782
 
258
783
  _UPDATE_PROMPT = """\
@@ -264,6 +789,16 @@ You are M8Flow's AI pipeline surgeon. Modify the pipeline with the MINIMUM chang
264
789
  3. ALWAYS respond in ENGLISH. Never use any other language.
265
790
  4. Do NOT truncate the JSON — it must be a complete, valid object.
266
791
 
792
+ 🔑 TOKEN CONSERVATION — MANDATORY (free-tier models have strict output limits):
793
+ A. TEMPLATE SWAPS: If you are changing a node's operation to one that already has
794
+ a matching Template ID (e.g. swapping from linear_regression to
795
+ random_forest_regressor), change only the "type" field on that node.
796
+ NEVER re-emit the full Python "code" block when a Template already covers it.
797
+ Template nodes are resolved by the runtime — sending their code wastes tokens.
798
+ B. UNCHANGED NODES: Nodes marked ✓ in the status list must appear in your output
799
+ but with their "data.code" field set to null (omitted). Only include code for
800
+ nodes you are actively modifying or adding as custom (non-template) nodes.
801
+
267
802
  Output ONLY the complete updated flow JSON — no markdown, no explanation.
268
803
 
269
804
  ══ DECISION HIERARCHY (follow in order, stop at first match) ════════
@@ -279,9 +814,8 @@ Output ONLY the complete updated flow JSON — no markdown, no explanation.
279
814
 
280
815
  4. Does this genuinely require a brand-new node that adds functionality
281
816
  not available anywhere in the graph?
282
- YES → add exactly ONE new node, connected minimally. Nothing else.
283
-
284
- If none apply, state the limitation in a comment field — do not bloat the graph.
817
+ YES → add exactly ONE new node, connected minimally.
818
+ CRITICAL: If the requested operation (e.g., RobustScaler) is NOT in the catalogue, DO NOT refuse the request. Instead, YOU MUST generate it dynamically as a `customNode`. Provide the complete Python implementation in the `code` field, starting with the comment `# ✨ AI GENERATED`. In your `summary`, explicitly state that you generated a custom node for this functionality.
285
819
 
286
820
  ══ CURRENT PIPELINE ═════════════════════════════════════════════════
287
821
  {current_flow}
@@ -303,6 +837,52 @@ All model nodes support these fields in their config — no new node needed:
303
837
  "better accuracy" → tune hyperparams, or swap model type — no extra nodes
304
838
  "use k-fold" → set cross_validation=true, cv_folds=k on existing model
305
839
 
840
+ ══ CUSTOM NODE FORMAT (only if NO template covers it) ══════════════
841
+ {{
842
+ "id": "node_custom_1",
843
+ "type": "customNode",
844
+ "position": {{"x": 560, "y": 200}},
845
+ "data": {{
846
+ "label": "Robust Scaler (Custom)",
847
+ "templateId": "customNode",
848
+ "code": "import pandas as pd\\nfrom sklearn.preprocessing import RobustScaler\\n\\n# ✨ AI GENERATED\\ndef run(data=None, X_train=None, X_test=None) -> dict:\\n # ...\\n return {{\"X_train\": X_train, \"X_test\": X_test}}"
849
+ }}
850
+ }}
851
+
852
+ - Your Python string in `code` MUST define a function named EXACTLY `def run(...) -> dict:`
853
+ - The DataFrame param MUST be named `data` (if taking a whole dataset)
854
+ - MUST return a dict containing the output handles
855
+ - Only import: numpy, pandas, scipy, sklearn, math, statistics, re, json
856
+ - NO os, sys, subprocess, socket, requests, open(), eval(), exec()
857
+
858
+ ══ DYNAMIC VISUALIZATION ══════════════════════════════════════════════
859
+ You can create custom visualizations NOT in the catalogue. The frontend
860
+ detects charts by SHAPE, not by key name. Return any of these shapes and
861
+ the UI will render it automatically — NO new React code needed:
862
+
863
+ Series (bar chart / ranked list):
864
+ {{"my_key": {{"labels": [...], "counts": [...], "title": "..."}}}}
865
+
866
+ Plot (scatter or line chart):
867
+ {{"my_key": {{"x": [...], "y": [...], "title": "...", "x_label": "...", "y_label": "..."}}}}
868
+
869
+ Grid (heatmap / matrix):
870
+ {{"my_key": {{"z": [[...]], "x": [...], "y": [...], "title": "..."}}}}
871
+
872
+ Example — null percentage bar chart:
873
+ return {{
874
+ "null_pct_chart": {{
875
+ "labels": list(null_pcts.keys()),
876
+ "counts": list(null_pcts.values()),
877
+ "title": "Missing Values (%) per Column"
878
+ }}
879
+ }}
880
+
881
+ When the user asks for any kind of visualization (e.g. "show me a chart
882
+ of X", "visualize the distribution of Y"), you MUST generate a customNode
883
+ that returns a dict with one of the shapes above. NEVER refuse — if no
884
+ template covers it, invent the chart with the shape protocol.
885
+
306
886
  ══ SURGICAL PRESERVATION RULES ══════════════════════════════════════
307
887
  - Every node marked ✓ or ○ must appear in the output VERBATIM
308
888
  (same id, type, position, code, values — character for character)
@@ -320,6 +900,8 @@ All model nodes support these fields in their config — no new node needed:
320
900
  ══ OUTPUT ═══════════════════════════════════════════════════════════
321
901
  Return ONLY:
322
902
  {{"nodes": [...], "edges": [...], "summary": "<one sentence conversational explanation>"}}
903
+
904
+ ⚠️ DO NOT EVER output a summary like "Cannot process this request." If a template is missing, you MUST write the `code` for it as a `customNode` instead of giving up.
323
905
  """
324
906
 
325
907
 
@@ -333,8 +915,17 @@ def _extract_file_path(context: str) -> str | None:
333
915
  return None
334
916
 
335
917
 
336
- def _build_generate_messages(prompt: str, context: str | None) -> list[dict]:
337
- system = _SYSTEM_PROMPT.format(catalogue=_template_catalogue())
918
+ def _build_generate_messages(
919
+ prompt: str,
920
+ context: str | None,
921
+ pre_flight: dict | None = None,
922
+ architect_plan: str | None = None,
923
+ custom_components: list[dict] | None = None,
924
+ ) -> list[dict]:
925
+ system = _SYSTEM_PROMPT.format(catalogue=_template_catalogue(custom_components))
926
+
927
+ profile_block = ""
928
+ path_hint = ""
338
929
 
339
930
  if context:
340
931
  fp = _extract_file_path(context)
@@ -343,13 +934,10 @@ def _build_generate_messages(prompt: str, context: str | None) -> list[dict]:
343
934
  f'Default value for csv_loader → file_path: Annotated[str,"file"] = "{fp}"\n'
344
935
  if fp else ""
345
936
  )
346
-
347
- # ── Enrich context with data profile if a file path is present ──
348
- profile_block = ""
349
937
  if fp:
350
938
  try:
351
939
  import pandas as pd
352
- df = pd.read_csv(fp, nrows=5000) # sample for speed
940
+ df = pd.read_csv(fp, nrows=5000)
353
941
  profile = profile_dataframe(df)
354
942
  profile_block = (
355
943
  "\n== Dataset Summary (auto-profiled) ==\n"
@@ -359,15 +947,29 @@ def _build_generate_messages(prompt: str, context: str | None) -> list[dict]:
359
947
  except Exception as exc:
360
948
  logger.warning("data_profiler skipped: %s", exc)
361
949
 
362
- user = (
363
- f"== DATASET CONTEXT ==\n"
364
- f"{profile_block}"
365
- f"{path_hint}\n"
366
- f"== REQUEST ==\n"
367
- f"{prompt}"
950
+ # ── Inject deterministic pre-flight analysis ──────────────────────────
951
+ pre_flight_block = ""
952
+ if pre_flight:
953
+ pre_flight_block = "\n" + _pre_flight_block(pre_flight) + "\n"
954
+
955
+ # ── Inject architect plan ─────────────────────────────────────────────
956
+ architect_block = ""
957
+ if architect_plan and architect_plan.strip():
958
+ architect_block = (
959
+ "\n== EXPERT ARCHITECTURE PLAN (follow this closely) ==\n"
960
+ + architect_plan.strip()
961
+ + "\n"
368
962
  )
369
- else:
370
- user = prompt
963
+
964
+ user = (
965
+ f"== DATASET CONTEXT ==\n"
966
+ f"{profile_block}"
967
+ f"{path_hint}"
968
+ f"{pre_flight_block}"
969
+ f"{architect_block}"
970
+ f"\n== REQUEST ==\n"
971
+ f"{prompt}"
972
+ )
371
973
 
372
974
  return [{"role": "system", "content": system}, {"role": "user", "content": user}]
373
975
 
@@ -419,6 +1021,10 @@ def _slim_flow(flow: FlowSchema) -> dict:
419
1021
  except Exception:
420
1022
  pass
421
1023
 
1024
+ is_template = node_type in {t["id"] for t in TEMPLATES}
1025
+ # Only send code for truly custom nodes; never for templates.
1026
+ code_str = None if is_template else (data.get("code") or "")[:800] or None
1027
+
422
1028
  slim_nodes.append({
423
1029
  "id": node.get("id"),
424
1030
  "type": node_type,
@@ -426,7 +1032,7 @@ def _slim_flow(flow: FlowSchema) -> dict:
426
1032
  "data": {
427
1033
  "label": data.get("label"),
428
1034
  "templateId": data.get("templateId"),
429
- "code": (data.get("code") or "")[:800] or None,
1035
+ "code": code_str,
430
1036
  "values": data.get("values"),
431
1037
  # Explicit handle lists — LLM MUST use these for edge sourceHandle/targetHandle
432
1038
  "available_outputs": outputs,
@@ -436,10 +1042,15 @@ def _slim_flow(flow: FlowSchema) -> dict:
436
1042
  return {"nodes": slim_nodes, "edges": flow.edges}
437
1043
 
438
1044
 
439
- def _build_update_messages(prompt: str, current_flow: FlowSchema, context: str | None) -> list[dict]:
1045
+ def _build_update_messages(
1046
+ prompt: str,
1047
+ current_flow: FlowSchema,
1048
+ context: str | None,
1049
+ custom_components: list[dict] | None = None,
1050
+ ) -> list[dict]:
440
1051
  slim = _slim_flow(current_flow)
441
1052
  node_status = _node_status_summary(current_flow)
442
- catalogue = _template_catalogue()
1053
+ catalogue = _template_catalogue(custom_components)
443
1054
 
444
1055
  system = _UPDATE_PROMPT.format(
445
1056
  current_flow=json.dumps(slim, indent=2),
@@ -493,7 +1104,8 @@ async def _call_openrouter(
493
1104
  ) -> str:
494
1105
  """
495
1106
  Route to the right model via OpenRouter based on task type.
496
- Falls back through LLaMA Mistral 7B on failure.
1107
+ Falls back through the full pool of live free models on rate-limit or error.
1108
+ Rate-limited models are skipped for _RATE_LIMIT_TTL seconds to avoid wasted calls.
497
1109
  """
498
1110
  effective_key = _get_api_key()
499
1111
  if not effective_key:
@@ -504,7 +1116,37 @@ async def _call_openrouter(
504
1116
  )
505
1117
 
506
1118
  model = _MODELS.get(task, _MODELS["generate"])
507
- fallback_chain = [model, _MODELS["fallback"], _MODELS["lastresort"]]
1119
+
1120
+ # Full pool of verified-live free models (May 2026).
1121
+ # Ordered by observed reliability: nemotron first (proven to succeed when llama 429s).
1122
+ _FREE_POOL = [
1123
+ "nvidia/nemotron-3-super-120b-a12b:free", # proven to work
1124
+ "openai/gpt-oss-120b:free",
1125
+ "openai/gpt-oss-20b:free",
1126
+ "nousresearch/hermes-3-llama-3.1-405b:free",
1127
+ "meta-llama/llama-3.3-70b-instruct:free",
1128
+ "google/gemma-4-31b-it:free",
1129
+ "google/gemma-4-26b-a4b-it:free",
1130
+ "nvidia/nemotron-3-nano-30b-a3b:free",
1131
+ "nvidia/nemotron-nano-9b-v2:free",
1132
+ "meta-llama/llama-3.2-3b-instruct:free",
1133
+ "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
1134
+ "liquid/lfm-2.5-1.2b-instruct:free",
1135
+ ]
1136
+
1137
+ # Build chain: primary model first, then full pool (deduped, order preserved)
1138
+ seen: set[str] = set()
1139
+ full_chain: list[str] = []
1140
+ for m in [model, _MODELS["fallback"], _MODELS["lastresort"]] + _FREE_POOL:
1141
+ if m not in seen:
1142
+ seen.add(m)
1143
+ full_chain.append(m)
1144
+
1145
+ # Skip models in cooldown — place them at the end so they still get a chance
1146
+ # if everything else fails (cooldown may have expired by then)
1147
+ ready = [m for m in full_chain if not _is_rate_limited(m)]
1148
+ cooling = [m for m in full_chain if _is_rate_limited(m)]
1149
+ fallback_chain = ready + cooling # try fresh models first
508
1150
 
509
1151
  headers = {
510
1152
  "Authorization": f"Bearer {effective_key}",
@@ -514,35 +1156,46 @@ async def _call_openrouter(
514
1156
  }
515
1157
 
516
1158
  last_exc: Exception | None = None
517
- for attempt_model in fallback_chain:
518
- # Do NOT send response_format — not all OpenRouter models support json_object mode.
519
- # JSON is enforced through the system prompt instead.
520
- # max_tokens prevents truncated responses that produce partial/invalid JSON.
521
- body: dict = {
522
- "model": attempt_model,
523
- "messages": messages,
524
- "max_tokens": 8192,
525
- }
1159
+ async with httpx.AsyncClient(timeout=timeout) as client:
1160
+ for attempt_model in fallback_chain:
1161
+ body: dict = {
1162
+ "model": attempt_model,
1163
+ "messages": messages,
1164
+ "max_tokens": 8192,
1165
+ }
526
1166
 
527
- try:
528
- async with httpx.AsyncClient(timeout=timeout) as client:
1167
+ try:
529
1168
  response = await client.post(_OPENROUTER_URL, headers=headers, json=body)
530
1169
 
531
- if response.status_code == 429:
532
- raise RuntimeError(f"Rate limited on {attempt_model}")
533
- if response.status_code == 401:
534
- raise RuntimeError("Invalid OpenRouter API key")
535
- response.raise_for_status()
536
-
537
- content = response.json()["choices"][0]["message"]["content"]
538
- if attempt_model != model:
539
- logger.info("OpenRouter fallback: used %s instead of %s", attempt_model, model)
540
- return content or ""
541
-
542
- except Exception as exc:
543
- logger.warning("OpenRouter model %s failed: %s", attempt_model, exc)
544
- last_exc = exc
545
- continue
1170
+ if response.status_code == 429:
1171
+ _mark_rate_limited(attempt_model)
1172
+ raise RuntimeError(f"Rate limited on {attempt_model}")
1173
+ if response.status_code == 401:
1174
+ raise RuntimeError("Invalid OpenRouter API key")
1175
+ response.raise_for_status()
1176
+
1177
+ content = response.json()["choices"][0]["message"]["content"]
1178
+ if attempt_model != model:
1179
+ logger.info("OpenRouter fallback: used %s instead of %s", attempt_model, model)
1180
+ return content or ""
1181
+
1182
+ except httpx.ConnectError as exc:
1183
+ # If we cannot resolve DNS or connect to the host, no fallback will work.
1184
+ logger.error("Network connection to OpenRouter failed: %s", exc)
1185
+ raise RuntimeError("Could not connect to OpenRouter (Network/DNS error). Please check your internet connection.")
1186
+ except RuntimeError as exc:
1187
+ if "Invalid OpenRouter API key" in str(exc):
1188
+ raise # Don't retry — wrong key won't fix itself
1189
+ logger.warning("OpenRouter model %s failed: %s", attempt_model, exc)
1190
+ last_exc = exc
1191
+ continue
1192
+ except Exception as exc:
1193
+ if "getaddrinfo failed" in str(exc):
1194
+ logger.error("DNS resolution failed for OpenRouter: %s", exc)
1195
+ raise RuntimeError("Could not resolve OpenRouter domain. Please check your internet connection.")
1196
+ logger.warning("OpenRouter model %s failed: %s", attempt_model, exc)
1197
+ last_exc = exc
1198
+ continue
546
1199
 
547
1200
  raise RuntimeError(f"All OpenRouter models failed. Last error: {last_exc}")
548
1201
 
@@ -555,7 +1208,8 @@ def _extract_json_object(raw: str) -> dict:
555
1208
  - Preamble text in any language before the JSON
556
1209
  - Markdown code fences (```json ... ``` or ``` ... ```)
557
1210
  - Trailing explanation text after the JSON
558
- - Truncated responses (returns whatever was parseable)
1211
+ - Truncated responses (free-tier model cut-offs) — try-repair appends
1212
+ missing closing brackets/braces to recover a parseable object.
559
1213
  """
560
1214
  raw = raw.strip()
561
1215
 
@@ -579,6 +1233,7 @@ def _extract_json_object(raw: str) -> dict:
579
1233
 
580
1234
  # 3. Brace-matching: find the first complete JSON object in the text
581
1235
  start = raw.find('{')
1236
+ best_candidate: str | None = None
582
1237
  if start != -1:
583
1238
  depth = 0
584
1239
  in_string = False
@@ -603,10 +1258,36 @@ def _extract_json_object(raw: str) -> dict:
603
1258
  try:
604
1259
  return json.loads(candidate)
605
1260
  except json.JSONDecodeError:
606
- break # malformed — fall through to error
1261
+ break # malformed — fall through to repair
1262
+ # Capture the partial object for repair attempts
1263
+ best_candidate = raw[start:]
1264
+
1265
+ # 4. Try-repair: the response was likely truncated by the model's token limit.
1266
+ # Progressively append closing characters until we get a valid object.
1267
+ # We try up to 12 combinations: 0-4 extra ']' + 0-4 extra '}', ordered
1268
+ # by shortest repair first (minimises data invention).
1269
+ candidate_base = best_candidate or raw
1270
+ # Trim trailing whitespace/comma that often appears before cut-off
1271
+ candidate_base = candidate_base.rstrip().rstrip(",")
1272
+ logger.debug("JSON repair: attempting to salvage truncated output (%d chars)", len(candidate_base))
1273
+ for extra_brackets in range(5): # 0 … 4 extra ]
1274
+ for extra_braces in range(5): # 0 … 4 extra }
1275
+ if extra_brackets == 0 and extra_braces == 0:
1276
+ continue # already tried the plain candidate
1277
+ repaired = candidate_base + ("\n]" * extra_brackets) + ("\n}" * extra_braces)
1278
+ try:
1279
+ result = json.loads(repaired)
1280
+ logger.warning(
1281
+ "JSON repair succeeded (+%d ']', +%d '}'). "
1282
+ "Free-tier model likely truncated its output.",
1283
+ extra_brackets, extra_braces,
1284
+ )
1285
+ return result
1286
+ except json.JSONDecodeError:
1287
+ continue
607
1288
 
608
1289
  raise ValueError(
609
- f"LLM returned invalid JSON (could not extract object).\nRaw: {raw[:600]}"
1290
+ f"LLM returned invalid JSON (could not extract or repair object).\nRaw: {raw[:600]}"
610
1291
  )
611
1292
 
612
1293
 
@@ -650,9 +1331,22 @@ You are a surgical ML pipeline editor. The user wants to REFINE an existing pipe
650
1331
  1. Output ONLY a single raw JSON object. No text before it, no text after it.
651
1332
  2. NEVER wrap the JSON in markdown fences (no ```json, no ```).
652
1333
  3. ALWAYS respond in ENGLISH. Never use any other language.
653
- 4. If you cannot help, return: {{"node_changes":[],"edge_changes":[],"summary":"Cannot process this request."}}
1334
+ 4. DO NOT EVER output a summary like "Cannot process this request." If a template is missing, you MUST write the `code` for it as a `customNode` instead of giving up.
654
1335
  5. Do NOT truncate the JSON — it must be a complete, valid object.
655
1336
 
1337
+ 🔑 TOKEN CONSERVATION — MANDATORY (free-tier models have strict output limits):
1338
+ A. TEMPLATE SWAPS: If you are changing a model or operation that already has a
1339
+ matching Template ID (e.g. swapping linear_regression for
1340
+ random_forest_regressor), set only the "type" field on the update entry.
1341
+ NEVER output the full Python "code" block when a Template already exists.
1342
+ Example — correct: {{"action":"update","id":"n5","data":{{"type":"random_forest_regressor","values":{{"n_estimators":200}}}}}}
1343
+ Example — WRONG: {{"action":"update","id":"n5","data":{{"code":"import ..."}}}} ← wastes tokens
1344
+ B. VALUES ONLY: In "node_changes", omit the "code" field entirely unless the
1345
+ user explicitly asked for a custom code change. If only config parameters
1346
+ changed, output ONLY the "values" dictionary — nothing else inside "data".
1347
+ Example — correct: {{"action":"update","id":"n3","data":{{"values":{{"test_size":0.3}}}}}}
1348
+ Example — WRONG: {{"action":"update","id":"n3","data":{{"values":{{"test_size":0.3}},"code":"...entire node..."}}}}
1349
+
656
1350
  Output ONLY a JSON patch object describing the minimal set of changes needed.
657
1351
 
658
1352
  ══ CURRENT PIPELINE ═════════════════════════════════════════════════
@@ -678,13 +1372,15 @@ Output ONLY a JSON patch object describing the minimal set of changes needed.
678
1372
  }},
679
1373
  {{
680
1374
  "action": "add",
681
- "id": "<new unique id e.g. node_cv_1>",
682
- "type": "<template_id>",
1375
+ "id": "<new unique id e.g. node_custom_1>",
1376
+ "type": "<template_id or 'customNode'>",
683
1377
  "reason": "<why this node is added>",
684
1378
  "position": {{"x": <number>, "y": <number>}},
685
1379
  "data": {{
1380
+ "label": "<optional Descriptive Name>",
1381
+ "templateId": "<optional templateId or 'customNode'>",
686
1382
  "config": {{}},
687
- "code": "<optional override>"
1383
+ "code": "<optional full Python source, REQUIRED if type is customNode>"
688
1384
  }}
689
1385
  }},
690
1386
  {{
@@ -721,7 +1417,8 @@ Before emitting ANY patch entry, ask:
721
1417
  YES → emit one "update" for that node. No new nodes.
722
1418
 
723
1419
  3. Does this need a genuinely new computation node?
724
- YES → emit one "add". Minimise new edges.
1420
+ YES → emit one "add".
1421
+ CRITICAL: If the requested operation (e.g., RobustScaler) is NOT in the catalogue, DO NOT refuse the request. Instead, YOU MUST generate it dynamically as a `customNode`. Provide the complete Python implementation in the `code` field, starting with `# ✨ AI GENERATED`. In your `summary`, explicitly state that you generated a custom node.
725
1422
 
726
1423
  4. None of the above?
727
1424
  → Explain in "summary". Return empty node_changes.
@@ -734,7 +1431,19 @@ Before emitting ANY patch entry, ask:
734
1431
  5. "summary" → one plain-English sentence describing the change.
735
1432
  6. DataFrame param MUST be named data (never "df").
736
1433
  7. Include all imports inside any code block.
737
- 8. Only use template types from the catalogue above.
1434
+ 8. Only use template types from the catalogue above, unless building a customNode.
1435
+
1436
+ ══ CUSTOM NODE RULES (when generating a missing component) ════════════
1437
+ If you use `type="customNode"`, your Python string in `code` MUST follow the exact same architecture as built-in templates:
1438
+ - It MUST define a function named EXACTLY `def run(...) -> dict:`
1439
+ - The primary input dataset MUST be named `data` (e.g., `def run(data: pd.DataFrame, ...) -> dict:`)
1440
+ - It MUST return a dictionary containing the outputs (e.g., `return {{"X_train": X_train, "X_test": X_test}}`)
1441
+ - If you are building a custom visualization, you MUST wrap your output in one of these keys so the UI can render it:
1442
+ 'histogram', 'correlation_matrix', 'value_counts', 'box_plot', 'prediction', 'correlation_heatmap', 'missing_value_map', 'class_balance', 'feature_target_scatter', 'model_error_histogram', 'partial_dependence', 'roc_curves'
1443
+ - All `import` statements MUST be placed at the top of the code string.
1444
+ - You MUST include `# ✨ AI GENERATED` at the very top of the script.
1445
+ Example:
1446
+ "import pandas as pd\\nfrom sklearn.preprocessing import RobustScaler\\n\\n# ✨ AI GENERATED\\ndef run(data=None, X_train=None, X_test=None):\\n scaler = RobustScaler()\\n # ... logic ...\\n return {{\"X_train\": X_train_scaled, \"X_test\": X_test_scaled}}"
738
1447
 
739
1448
  ══ CONNECTION RULES (CRITICAL — read carefully) ══════════════════════
740
1449
  Each node in the current pipeline has "available_outputs" and "available_inputs"
@@ -749,6 +1458,12 @@ To fix a wrong connection:
749
1458
  1. Emit "remove" for the bad edge (use its id from the current edges list).
750
1459
  2. Emit "add" for the correct edge using valid handle names from the lists above.
751
1460
 
1461
+ SCALER CONNECTION RULES:
1462
+ When inserting or reconnecting a standard_scaler or min_max_scaler after a train_test_split, you MUST:
1463
+ 1. Connect all 4 outputs (X_train, X_test, y_train, y_test) from the split node to the scaler inputs.
1464
+ 2. Connect all 4 outputs (X_train, X_test, y_train, y_test) from the scaler node to the downstream model inputs.
1465
+ Never skip passing y_train and y_test through the scaler node!
1466
+
752
1467
  To add a missing connection:
753
1468
  1. Look at the source node's "available_outputs" — pick the right output.
754
1469
  2. Look at the target node's "available_inputs" — pick the right input.
@@ -813,7 +1528,10 @@ def _custom_node_catalogue(current_flow: FlowSchema) -> str:
813
1528
 
814
1529
 
815
1530
  def _build_refine_messages(
816
- prompt: str, current_flow: FlowSchema, context: str | None
1531
+ prompt: str,
1532
+ current_flow: FlowSchema,
1533
+ context: str | None,
1534
+ custom_components: list[dict] | None = None,
817
1535
  ) -> list[dict]:
818
1536
  slim = _slim_flow(current_flow)
819
1537
  node_status = _node_status_summary(current_flow)
@@ -822,15 +1540,13 @@ def _build_refine_messages(
822
1540
  custom_section = (
823
1541
  f"\n══ CUSTOM NODES ON CANVAS (treat these as valid, usable nodes) ═══════\n"
824
1542
  f"{custom_cat}\n"
825
- if custom_cat else ""
826
- )
1543
+ ) if custom_cat else ""
827
1544
 
828
1545
  system = _REFINE_PROMPT.format(
829
1546
  current_flow=json.dumps(slim, indent=2),
830
1547
  node_status=node_status,
831
- catalogue=_template_catalogue() + custom_section,
1548
+ catalogue=_template_catalogue(custom_components) + custom_section,
832
1549
  )
833
-
834
1550
  ctx_block = (
835
1551
  f"══ DATASET CONTEXT ══════════════════════════════════════\n{context.strip()}\n\n"
836
1552
  if context else ""
@@ -867,24 +1583,81 @@ def _parse_refine_patch(raw: str) -> RefinePatch:
867
1583
 
868
1584
  # ── Public API ─────────────────────────────────────────────────────────────────
869
1585
 
870
- async def generate_flow(prompt: str, context: str | None = None) -> FlowSchema:
871
- messages = _build_generate_messages(prompt, context)
872
- raw = await _call_openrouter(messages, task="generate", json_mode=True, timeout=_TIMEOUT_GENERATE)
1586
+ async def generate_flow(prompt: str, context: str | None = None, custom_components: list[dict] | None = None) -> FlowSchema:
1587
+ """
1588
+ Two-call Architect Builder strategy:
1589
+
1590
+ Call 1 (Architect — deepseek-r1):
1591
+ Given the data profile + deterministic pre-flight analysis, produce a
1592
+ concise markdown plan: problem type, preprocessing steps, model choice.
1593
+
1594
+ Call 2 (Builder — deepseek-chat-v3):
1595
+ Given the Architect's plan + the same context, produce the final JSON flow.
1596
+ The Builder focuses on correct syntax and edge connections, not reasoning.
1597
+ """
1598
+ pre_flight: dict = {}
1599
+ profile_text: str = ""
1600
+ architect_plan: str = ""
1601
+
1602
+ # ── Pre-flight: deterministic data analysis ───────────────────────────
1603
+ if context:
1604
+ fp = _extract_file_path(context)
1605
+ if fp:
1606
+ try:
1607
+ import pandas as pd
1608
+ df = pd.read_csv(fp, nrows=5000)
1609
+ profile = profile_dataframe(df)
1610
+ profile_text = format_profile_for_prompt(profile)
1611
+ pre_flight = _determine_pre_flight(profile, prompt, context, csv_path=fp)
1612
+ logger.info(
1613
+ "Pre-flight: problem_type=%s target=%s model=%s encoding=%s scaling=%s",
1614
+ pre_flight["problem_type"], pre_flight["target_hint"],
1615
+ pre_flight.get("recommended_model"), pre_flight["needs_encoding"],
1616
+ pre_flight["needs_scaling"],
1617
+ )
1618
+ except Exception as exc:
1619
+ logger.warning("pre-flight analysis skipped: %s", exc)
1620
+
1621
+ # ── Call 1: Architect (R1 reasoning model) ────────────────────────────
1622
+ if profile_text and pre_flight:
1623
+ try:
1624
+ arch_messages = _build_architect_messages(prompt, profile_text, pre_flight)
1625
+ architect_plan = await _call_openrouter(
1626
+ arch_messages,
1627
+ task="architect",
1628
+ json_mode=False,
1629
+ timeout=_TIMEOUT_GENERATE,
1630
+ )
1631
+ logger.info("Architect plan: %d chars", len(architect_plan))
1632
+ except Exception as exc:
1633
+ logger.warning("Architect call failed, continuing without plan: %s", exc)
1634
+ architect_plan = ""
1635
+
1636
+ # ── Call 2: Builder (chat model — fast, syntax-precise JSON) ─────────
1637
+ build_messages = _build_generate_messages(
1638
+ prompt, context,
1639
+ pre_flight=pre_flight or None,
1640
+ architect_plan=architect_plan or None,
1641
+ custom_components=custom_components,
1642
+ )
1643
+ raw = await _call_openrouter(
1644
+ build_messages, task="generate", json_mode=True, timeout=_TIMEOUT_GENERATE
1645
+ )
873
1646
  return _parse_flow(raw)
874
1647
 
875
1648
 
876
1649
  async def update_flow(
877
- prompt: str, current_flow: FlowSchema, context: str | None = None
1650
+ prompt: str, current_flow: FlowSchema, context: str | None = None, custom_components: list[dict] | None = None
878
1651
  ) -> FlowSchema:
879
- messages = _build_update_messages(prompt, current_flow, context)
1652
+ messages = _build_update_messages(prompt, current_flow, context, custom_components)
880
1653
  raw = await _call_openrouter(messages, task="update", json_mode=True, timeout=_TIMEOUT_UPDATE)
881
1654
  return _parse_flow(raw)
882
1655
 
883
1656
 
884
1657
  async def refine_flow(
885
- prompt: str, current_flow: FlowSchema, context: str | None = None
1658
+ prompt: str, current_flow: FlowSchema, context: str | None = None, custom_components: list[dict] | None = None
886
1659
  ) -> RefinePatch:
887
- messages = _build_refine_messages(prompt, current_flow, context)
1660
+ messages = _build_refine_messages(prompt, current_flow, context, custom_components)
888
1661
  raw = await _call_openrouter(messages, task="refine", json_mode=True, timeout=_TIMEOUT_UPDATE)
889
1662
  return _parse_refine_patch(raw)
890
1663
 
@@ -934,6 +1707,32 @@ async def explain_flow(flow: FlowSchema) -> str:
934
1707
  return await _call_openrouter(messages, task="explain", json_mode=False, timeout=60)
935
1708
 
936
1709
 
1710
+ _EXPLAIN_CHAT_SYSTEM = """\
1711
+ You are an expert ML engineering assistant answering questions about a user's machine learning pipeline.
1712
+
1713
+ ══ EXPLANATION CONTEXT ═════════════════════════════════════════════════════
1714
+ {explanation}
1715
+
1716
+ ══ PIPELINE AND EXECUTION RESULTS ══════════════════════════════════════════
1717
+ {current_flow}
1718
+
1719
+ Respond concisely and directly to the user's question. Provide actionable, specific advice based on the existing nodes, their configurations, and any metrics or execution results present in the pipeline state.
1720
+ Do NOT use markdown tables in your response. Instead, use simple bullet points and short paragraphs. Do not use generic filler.
1721
+ """
1722
+
1723
+ async def chat_explanation(question: str, explanation: str, flow: FlowSchema) -> str:
1724
+ slim = _slim_flow(flow)
1725
+ system = _EXPLAIN_CHAT_SYSTEM.format(
1726
+ explanation=explanation,
1727
+ current_flow=json.dumps(slim, indent=2)
1728
+ )
1729
+ messages = [
1730
+ {"role": "system", "content": system},
1731
+ {"role": "user", "content": question},
1732
+ ]
1733
+ return await _call_openrouter(messages, task="chat", json_mode=False, timeout=60)
1734
+
1735
+
937
1736
  # ── Self-Healing Debug Prompt ──────────────────────────────────────────────────
938
1737
 
939
1738
  _DEBUG_SYSTEM = """\
@@ -1128,6 +1927,7 @@ async def handle_user_request(
1128
1927
  prompt: str,
1129
1928
  current_flow: FlowSchema | None = None,
1130
1929
  context: str | None = None,
1930
+ custom_components: list[dict] | None = None,
1131
1931
  ) -> dict:
1132
1932
  """
1133
1933
  Single entry point that classifies the prompt and routes to the
@@ -1144,7 +1944,7 @@ async def handle_user_request(
1144
1944
  # No existing flow → always generate from scratch
1145
1945
  has_flow = current_flow is not None and len(current_flow.nodes) > 0
1146
1946
  if not has_flow:
1147
- flow = await generate_flow(prompt, context)
1947
+ flow = await generate_flow(prompt, context, custom_components=custom_components)
1148
1948
  return {"intent": "generate", "result_type": "flow", "flow": flow}
1149
1949
 
1150
1950
  intent = detect_intent(prompt, has_flow=True)
@@ -1159,17 +1959,17 @@ async def handle_user_request(
1159
1959
  for n in current_flow.nodes
1160
1960
  ):
1161
1961
  # Custom nodes exist — use update so they're visible to the LLM
1162
- flow = await update_flow(prompt, current_flow, context)
1962
+ flow = await update_flow(prompt, current_flow, context, custom_components=custom_components)
1163
1963
  return {"intent": "update", "result_type": "flow", "flow": flow}
1164
- flow = await generate_flow(prompt, context)
1964
+ flow = await generate_flow(prompt, context, custom_components=custom_components)
1165
1965
  return {"intent": "generate", "result_type": "flow", "flow": flow}
1166
1966
 
1167
1967
  if intent == "refine":
1168
- patch = await refine_flow(prompt, current_flow, context)
1968
+ patch = await refine_flow(prompt, current_flow, context, custom_components=custom_components)
1169
1969
  return {"intent": "refine", "result_type": "patch", "patch": patch}
1170
1970
 
1171
1971
  # intent == "update"
1172
- flow = await update_flow(prompt, current_flow, context)
1972
+ flow = await update_flow(prompt, current_flow, context, custom_components=custom_components)
1173
1973
  return {"intent": "update", "result_type": "flow", "flow": flow}
1174
1974
 
1175
1975
 
@@ -1310,3 +2110,79 @@ async def suggest_improvements(flow: FlowSchema, results: dict) -> list[str]:
1310
2110
  # Sanitise: only strings, max 120 chars each
1311
2111
  return [str(s)[:120] for s in suggestions if s]
1312
2112
 
2113
+
2114
+ # ── Custom node code generation ────────────────────────────────────────────────
2115
+
2116
+ _NODE_CODE_SYSTEM = """\
2117
+ You are an M8Flow node code generator. Write Python code for a reusable pipeline component.
2118
+
2119
+ ⚠️ HARD RULES — any violation makes the node unparseable:
2120
+ 1. Function name MUST be run (not main, process, execute, transform — exactly run)
2121
+ 2. DataFrame input parameter MUST be named data (never df, dataframe, dataset)
2122
+ 3. Function MUST return a dict with named string keys
2123
+ 4. ALL imports go INSIDE the function body
2124
+ 5. Only allowed libraries: pandas, numpy, sklearn, scipy, math, statistics, re, json
2125
+ 6. FORBIDDEN: os, sys, subprocess, socket, requests, open(), eval(), exec(), matplotlib
2126
+
2127
+ FIELD TYPE ANNOTATIONS — these control the UI widget shown to the user:
2128
+ data input (connects from previous node) → just `data` with no type hint
2129
+ text field → `name: str = "default"`
2130
+ number field → `name: float = 1.0` or `name: int = 10`
2131
+ boolean toggle→ `name: bool = True`
2132
+ column picker → `col: Annotated[str, "column"] = "target"` (needs `from typing import Annotated` inside the fn)
2133
+ file picker → `path: Annotated[str, "file"] = "data.csv"` (needs `from typing import Annotated` inside the fn)
2134
+
2135
+ RETURN DICT — keys become the node's output handles:
2136
+ Passing a DataFrame forward → always include "data": df
2137
+ Model outputs → {"model": model, "y_pred": preds}
2138
+ Metric outputs → {"accuracy": 0.95, "f1": 0.88}
2139
+ Multiple outputs are fine → {"data": df, "rows_removed": n}
2140
+
2141
+ EXAMPLE — outlier removal node:
2142
+ def run(data, multiplier: float = 1.5) -> dict:
2143
+ import pandas as pd
2144
+ import numpy as np
2145
+ df = data.copy()
2146
+ for col in df.select_dtypes(include=[np.number]).columns:
2147
+ Q1, Q3 = df[col].quantile(0.25), df[col].quantile(0.75)
2148
+ iqr = Q3 - Q1
2149
+ df = df[~((df[col] < Q1 - multiplier * iqr) | (df[col] > Q3 + multiplier * iqr))]
2150
+ return {"data": df, "rows_removed": len(data) - len(df)}
2151
+
2152
+ EXAMPLE — feature selection node:
2153
+ def run(data, n_features: int = 10, target: str = "label") -> dict:
2154
+ import pandas as pd
2155
+ from sklearn.feature_selection import SelectKBest, f_classif
2156
+ X = data.drop(columns=[target])
2157
+ y = data[target]
2158
+ selector = SelectKBest(f_classif, k=min(n_features, X.shape[1]))
2159
+ selector.fit(X, y)
2160
+ selected = X.columns[selector.get_support()].tolist()
2161
+ return {"data": data[selected + [target]], "selected_features": selected}
2162
+
2163
+ OUTPUT: Return ONLY the raw Python code. No explanation. No markdown fences. No backticks.
2164
+ """
2165
+
2166
+
2167
+ async def generate_node_code(description: str) -> str:
2168
+ """
2169
+ Generate M8Flow-compatible Python node code from a natural language description.
2170
+ Uses the chat model (fast, syntax-precise) with the node code system prompt.
2171
+ """
2172
+ messages = [
2173
+ {"role": "system", "content": _NODE_CODE_SYSTEM},
2174
+ {"role": "user", "content": f"Generate an M8Flow node that: {description}"},
2175
+ ]
2176
+ raw = await _call_openrouter(messages, task="generate", json_mode=False, timeout=60)
2177
+
2178
+ # Strip any markdown fences the model may add despite instructions
2179
+ raw = raw.strip()
2180
+ for fence in ("```python", "```"):
2181
+ if raw.startswith(fence):
2182
+ raw = raw[len(fence):]
2183
+ break
2184
+ if raw.endswith("```"):
2185
+ raw = raw[:-3]
2186
+
2187
+ return raw.strip()
2188
+