npm - m8flow - Versions diffs - 1.0.1 → 1.1.0 - Mend

m8flow 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/bundled/backend/Dockerfile +41 -0
package/bundled/backend/add_nodes.py +416 -0
package/bundled/backend/api/routes/appstate.py +102 -0
package/bundled/backend/api/routes/flows.py +64 -5
package/bundled/backend/api/routes/nodes.py +25 -1
package/bundled/backend/core/code_validator.py +2 -0
package/bundled/backend/core/executor.py +19 -3
package/bundled/backend/main.py +16 -4
package/bundled/backend/requirements.txt +27 -6
package/bundled/backend/services/llm_service.py +957 -98
package/bundled/backend/services/self_healer.py +1 -1
package/bundled/backend/storage/__init__.py +0 -0
package/bundled/backend/storage/memory.py +16 -0
package/bundled/backend/temp.json +0 -0
package/bundled/backend/templates.json +0 -0
package/bundled/backend/templates.py +2907 -745
package/bundled/backend/warmup.py +65 -0
package/bundled/frontend-dist/assets/index-CKUZ27n8.css +1 -0
package/bundled/frontend-dist/assets/index-DNaB6zf0.js +46 -0
package/bundled/frontend-dist/index.html +2 -2
package/lib/backend.js +155 -35
package/lib/run.js +18 -7
package/lib/setup.js +119 -59
package/package.json +3 -2
package/scripts/build.js +1 -2
package/scripts/check-docker.js +35 -0
package/bundled/frontend-dist/assets/index-Dm2J6DQp.js +0 -41
package/bundled/frontend-dist/assets/index-xKOV3MGm.css +0 -1

package/bundled/backend/services/llm_service.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """LLM service — OpenRouter-powered flow generation with per-task model routing."""
 import json
 import logging
+import time
 import httpx
 from contextvars import ContextVar
 from config import config
@@ -23,23 +24,57 @@ _OPENROUTER_URL  = "https://openrouter.ai/api/v1/chat/completions"
 _TIMEOUT_GENERATE = 120
 _TIMEOUT_UPDATE   = 180
-# Per-task model routing (verified OpenRouter slugs — no :free on paid-tier models)
+# Per-task model routing — slugs verified live against OpenRouter API (May 2026)
 _MODELS = {
-    "generate":   "deepseek/deepseek-chat-v3-0324",
-    "refine":     "deepseek/deepseek-chat-v3-0324",
-    "update":     "deepseek/deepseek-chat-v3-0324",
-    "debug":      "deepseek/deepseek-r1",
-    "heal":       "deepseek/deepseek-r1",
-    "explain":    "google/gemini-2.0-flash-001",
-    "suggest":    "google/gemini-2.0-flash-001",
-    "fallback":   "meta-llama/llama-3.3-70b-instruct:free",
-    "lastresort": "qwen/qwen3-coder:free",
+    # Reasoning/Architecting — best available free reasoning model
+    "architect":  "nvidia/nemotron-3-super-120b-a12b:free",
+    # Core Generation/Updating — large, instruction-tuned free model
+    "generate":   "meta-llama/llama-3.3-70b-instruct:free",
+    "refine":     "meta-llama/llama-3.3-70b-instruct:free",
+    "update":     "meta-llama/llama-3.3-70b-instruct:free",
+    # Debugging/Healing — strong reasoning for bug analysis
+    "debug":      "nvidia/nemotron-3-super-120b-a12b:free",
+    "heal":       "nvidia/nemotron-3-super-120b-a12b:free",
+    # Explaining/Suggesting — fast free model
+    "explain":    "openai/gpt-oss-20b:free",
+    "suggest":    "openai/gpt-oss-20b:free",
+    # Safety Nets — verified live fallbacks
+    "fallback":   "google/gemma-4-31b-it:free",
+    "lastresort": "meta-llama/llama-3.2-3b-instruct:free",
 }
+# ── Rate-limit cooldown cache ─────────────────────────────────────────────────
+# Maps model_slug -> timestamp of last 429. Models in cooldown are skipped for
+# _RATE_LIMIT_TTL seconds so we jump straight to a working model instead of
+# burning time on a known-rate-limited one.
+_RATE_LIMIT_CACHE: dict[str, float] = {}
+_RATE_LIMIT_TTL   = 90  # seconds
+def _is_rate_limited(model: str) -> bool:
+    """Return True if this model returned 429 within the last _RATE_LIMIT_TTL seconds."""
+    ts = _RATE_LIMIT_CACHE.get(model)
+    if ts is None:
+        return False
+    if time.time() - ts < _RATE_LIMIT_TTL:
+        return True
+    del _RATE_LIMIT_CACHE[model]  # TTL expired — clear and allow retry
+    return False
+def _mark_rate_limited(model: str) -> None:
+    """Record that this model returned 429 right now."""
+    _RATE_LIMIT_CACHE[model] = time.time()
+    logger.debug("Rate-limit cooldown started for %s (%ds)", model, _RATE_LIMIT_TTL)
 # ── Catalogue helpers ──────────────────────────────────────────────────────────
-def _template_catalogue() -> str:
+def _template_catalogue(custom_components: list[dict] | None = None) -> str:
     """Detailed catalogue: id, category, inputs, outputs."""
     from core.parser import parse_node_code
     lines: list[str] = []
@@ -49,11 +84,24 @@ def _template_catalogue() -> str:
         field_ins = [f"{i.name}:{i.kind}={i.default}" for i in schema.inputs  if i.kind != "data"]
         outs      = [o.name for o in schema.outputs]
         lines.append(
-            f"  {t['id']} [{t['category']}] \"{t['label']}\"\n"
-            f"    data-inputs : {data_ins or '(none)'}\n"
-            f"    fields      : {field_ins or '(none)'}\n"
-            f"    outputs     : {outs or '(none)'}"
+            f"{t['id']} [{t['category']}]\n"
+            f"  inputs : {data_ins or '(none)'}  fields: {field_ins or '(none)'}\n"
+            f"  outputs: {outs or '(none)'}"
         )
+    if custom_components:
+        lines.append("\n=== USER CUSTOM COMPONENTS (Preferred if applicable) ===")
+        for c in custom_components:
+            schema = c.get("schema", {})
+            data_ins  = [i["name"] for i in schema.get("inputs", []) if i.get("kind") == "data"]
+            field_ins = [f"{i['name']}:{i.get('kind')}={i.get('default')}" for i in schema.get("inputs", []) if i.get("kind") != "data"]
+            outs      = [o["name"] for o in schema.get("outputs", [])]
+            lines.append(
+                f"{c.get('id')} [Custom] \"{c.get('label')}\"\n"
+                f"  inputs : {data_ins or '(none)'}  fields: {field_ins or '(none)'}\n"
+                f"  outputs: {outs or '(none)'}"
+            )
     return "\n".join(lines)
@@ -65,6 +113,366 @@ def _allowed_type_ids() -> set[str]:
     return ids
+# ── Pre-flight analysis ────────────────────────────────────────────────────────
+def _determine_pre_flight(profile: dict, prompt: str, context: str | None, csv_path: str | None = None) -> dict:
+    """
+    Deterministic data-driven analysis computed BEFORE any LLM call.
+    Uses pandas to analyse the ACTUAL data (not just regex on the prompt), so the
+    LLM receives ground-truth facts — not guesses — about the task type and
+    preprocessing requirements.
+    """
+    import re
+    import pandas as pd
+    import numpy as np
+    dtypes              = profile.get("dtypes", {})
+    missing             = profile.get("missing", {})
+    numeric_summary     = profile.get("numeric_summary", {})
+    categorical_summary = profile.get("categorical_summary", {})
+    shape               = profile.get("shape", [0, 0])
+    # ── Step 1: Find the target column ────────────────────────────────────────
+    # Priority: explicit mention in prompt/context  > heuristic column names.
+    target_hint: str | None = None
+    search_text = (prompt + " " + (context or "")).lower()
+    # (a) Regex extraction from user text
+    for pattern in [
+        r"predict\s+(?:the\s+)?['\"]?(\w+)['\"]?",
+        r"target\s+(?:(?:column|variable|col)\s+)?(?:is\s+)?['\"]?(\w+)['\"]?",
+        r"classif(?:y|ication)\s+(?:the\s+)?['\"]?(\w+)['\"]?",
+        r"label\s+(?:column\s+)?(?:is\s+)?['\"]?(\w+)['\"]?",
+        r"output\s+(?:column\s+)?(?:is\s+)?['\"]?(\w+)['\"]?",
+        r"y\s*=\s*['\"]?(\w+)['\"]?",
+    ]:
+        m = re.search(pattern, search_text)
+        if m:
+            candidate = m.group(1)
+            # Validate the candidate actually exists in the data
+            if candidate in dtypes or candidate in numeric_summary or candidate in categorical_summary:
+                target_hint = candidate
+                break
+    # (b) If still unknown, use heuristic column-name scoring on the real columns
+    if target_hint is None:
+        TARGET_KEYWORDS = [
+            "target", "label", "class", "output", "y", "result",
+            "outcome", "diagnosis", "status", "type", "category",
+            "survived", "churn", "default", "fraud", "price",
+            "salary", "value", "score", "sales", "revenue", "cost",
+            "medv", "charges", "fare",
+        ]
+        all_columns = list(dtypes.keys())
+        best_col: str | None = None
+        best_score = -1
+        for col in all_columns:
+            col_lower = col.lower().replace("_", " ").replace("-", " ")
+            score = 0
+            # Keyword match against column name
+            for kw in TARGET_KEYWORDS:
+                if kw in col_lower:
+                    score += 3
+                    break
+            # Last column is commonly the target in many datasets
+            if col == all_columns[-1]:
+                score += 2
+            # Column mentioned in prompt text
+            if col_lower in search_text or col.lower() in search_text:
+                score += 4
+            if score > best_score:
+                best_score = score
+                best_col = col
+        if best_col and best_score >= 2:
+            target_hint = best_col
+    # ── Step 2: Determine problem type from ACTUAL DATA ───────────────────────
+    # Initialize all new keys upfront to avoid KeyErrors downstream.
+    problem_type        = "unknown"
+    target_analysis: dict = {}
+    is_imbalanced       = False
+    needs_outlier_removal = False
+    if target_hint and csv_path:
+        try:
+            df = pd.read_csv(csv_path, nrows=5000)
+            # ── Outlier Detection: scan all numeric feature columns ──────────
+            for col in df.select_dtypes(include=[np.number]).columns:
+                if col == target_hint:
+                    continue
+                col_series = df[col].dropna()
+                if len(col_series) == 0:
+                    continue
+                col_mean = col_series.mean()
+                col_max  = col_series.max()
+                # Flag if max is >10x the mean AND mean is non-trivially positive
+                if col_mean > 0 and col_max > 10 * col_mean:
+                    needs_outlier_removal = True
+                    break  # one outlier column is enough to flag the dataset
+            if target_hint in df.columns:
+                col_data = df[target_hint].dropna()
+                dtype = col_data.dtype
+                if dtype == object or str(dtype) == "category":
+                    # String/category column → always classification
+                    n_unique = col_data.nunique()
+                    problem_type = "classification"
+                    target_analysis = {
+                        "dtype": str(dtype),
+                        "unique_values": int(n_unique),
+                        "sample_values": col_data.unique()[:5].tolist(),
+                        "reasoning": f"Categorical dtype with {n_unique} unique string values → classification",
+                    }
+                elif dtype == bool or (dtype == int and col_data.nunique() <= 2):
+                    # Boolean or binary integer → classification
+                    problem_type = "classification"
+                    target_analysis = {
+                        "dtype": str(dtype),
+                        "unique_values": int(col_data.nunique()),
+                        "sample_values": col_data.unique()[:5].tolist(),
+                        "reasoning": "Binary (0/1 or True/False) target → classification",
+                    }
+                elif np.issubdtype(dtype, np.integer):
+                    n_unique = col_data.nunique()
+                    n_total  = len(col_data)
+                    unique_ratio = n_unique / max(n_total, 1)
+                    if n_unique <= 20 or unique_ratio < 0.05:
+                        problem_type = "classification"
+                        target_analysis = {
+                            "dtype": str(dtype),
+                            "unique_values": int(n_unique),
+                            "sample_values": sorted(col_data.unique().tolist())[:10],
+                            "reasoning": f"Integer with only {n_unique} unique values ({unique_ratio:.1%} of rows) → likely class labels → classification",
+                        }
+                    else:
+                        problem_type = "regression"
+                        target_analysis = {
+                            "dtype": str(dtype),
+                            "unique_values": int(n_unique),
+                            "min": float(col_data.min()),
+                            "max": float(col_data.max()),
+                            "mean": float(col_data.mean()),
+                            "reasoning": f"Integer with {n_unique} unique values (high cardinality) → continuous → regression",
+                        }
+                elif np.issubdtype(dtype, np.floating):
+                    n_unique = col_data.nunique()
+                    problem_type = "regression"
+                    target_analysis = {
+                        "dtype": str(dtype),
+                        "unique_values": int(n_unique),
+                        "min": float(col_data.min()),
+                        "max": float(col_data.max()),
+                        "mean": float(col_data.mean()),
+                        "std": float(col_data.std()),
+                        "reasoning": f"Floating-point target with {n_unique} unique values → continuous → regression",
+                    }
+                else:
+                    # Fallback: try to convert and check cardinality
+                    try:
+                        as_numeric = pd.to_numeric(col_data, errors="coerce")
+                        if as_numeric.isna().mean() < 0.1:
+                            n_unique = as_numeric.nunique()
+                            problem_type = "regression" if n_unique > 20 else "classification"
+                            target_analysis = {
+                                "dtype": str(dtype),
+                                "unique_values": int(n_unique),
+                                "reasoning": f"Converted to numeric; {n_unique} unique values → {'regression' if n_unique > 20 else 'classification'}",
+                            }
+                        else:
+                            problem_type = "classification"
+                            target_analysis = {
+                                "dtype": str(dtype),
+                                "reasoning": "Could not convert to numeric → treating as classification",
+                            }
+                    except Exception:
+                        problem_type = "classification"
+                # ── Class Imbalance Check (classification only) ──────────────
+                if problem_type == "classification":
+                    try:
+                        class_freqs = col_data.value_counts(normalize=True)
+                        if class_freqs.min() < 0.10:
+                            is_imbalanced = True
+                    except Exception:
+                        pass
+        except Exception as exc:
+            logger.warning("Pre-flight target analysis failed: %s", exc)
+            # Fall back to dtype-only heuristic using profile data
+            if target_hint:
+                dtype_str = str(dtypes.get(target_hint, "")).lower()
+                if any(t in dtype_str for t in ("object", "category", "bool", "str")):
+                    problem_type = "classification"
+                elif target_hint in categorical_summary:
+                    problem_type = "classification" if categorical_summary[target_hint].get("unique", 99) < 15 else "regression"
+                elif target_hint in numeric_summary:
+                    problem_type = "regression"
+    elif target_hint:
+        # No CSV path — fall back to profile-based heuristic
+        dtype_str = str(dtypes.get(target_hint, "")).lower()
+        if any(t in dtype_str for t in ("object", "category", "bool", "str")):
+            problem_type = "classification"
+        elif target_hint in categorical_summary:
+            problem_type = "classification" if categorical_summary.get(target_hint, {}).get("unique", 99) < 15 else "regression"
+        elif target_hint in numeric_summary:
+            problem_type = "regression"
+    # ── Step 3: Preprocessing flags ───────────────────────────────────────────
+    # Detect categorical columns that need encoding (exclude the target itself)
+    cat_cols = {c for c in categorical_summary if c != target_hint}
+    num_cols = {c for c in numeric_summary if c != target_hint}
+    missing_cols: dict[str, float] = {
+        col: round(info.get("pct", 0), 1)
+        for col, info in missing.items()
+        if info.get("pct", 0) > 0
+    }
+    cardinality: dict[str, int] = {
+        col: info.get("unique", 0)
+        for col, info in categorical_summary.items()
+    }
+    needs_encoding   = len(cat_cols) > 0
+    needs_scaling    = len(num_cols) >= 2
+    needs_imputation = len(missing_cols) > 0
+    # ── Step 4: Recommend specific model based on problem type + data size ────
+    n_rows = shape[0] if shape else 0
+    n_cols = shape[1] if len(shape) > 1 else 0
+    if problem_type == "classification":
+        recommended_model = "random_forest_classifier" if n_rows >= 1000 else "logistic_regression"
+        recommended_metric_node = "classification_report"
+    elif problem_type == "regression":
+        recommended_model = "random_forest_regressor" if n_rows >= 1000 else "linear_regression"
+        recommended_metric_node = "regression_metrics"
+    else:
+        recommended_model = "random_forest_classifier"
+        recommended_metric_node = "classification_report"
+    return {
+        "target_hint":             target_hint,
+        "problem_type":            problem_type,
+        "target_analysis":         target_analysis,
+        "missing_cols":            missing_cols,
+        "cardinality":             cardinality,
+        "needs_encoding":          needs_encoding,
+        "needs_scaling":           needs_scaling,
+        "needs_imputation":        needs_imputation,
+        "needs_outlier_removal":   needs_outlier_removal,
+        "is_imbalanced":           is_imbalanced,
+        "n_rows":                  n_rows,
+        "n_cols":                  n_cols,
+        "categorical_cols":        sorted(cat_cols),
+        "numeric_cols":            sorted(num_cols),
+        "recommended_model":       recommended_model,
+        "recommended_metric":      recommended_metric_node,
+    }
+def _pre_flight_block(pf: dict) -> str:
+    """Format the pre-flight analysis as an authoritative, bossy prompt block."""
+    analysis    = pf.get("target_analysis", {})
+    reasoning   = analysis.get("reasoning", "")
+    sample_vals = analysis.get("sample_values", [])
+    lines = [
+        "╔══ [PYTHON-DETERMINED ANALYSIS — treat as ABSOLUTE GROUND TRUTH] ════╗",
+        f"  ⚠ Problem type      : {pf['problem_type'].upper()} (MANDATORY)",
+        f"  Reasoning          : {reasoning or 'heuristic from dtype/cardinality'}",
+        f"  Target column      : {pf['target_hint'] or 'not specified — infer from context'}",
+    ]
+    if sample_vals:
+        lines.append(f"  Target sample vals : {sample_vals}")
+    lines += [
+        f"  Dataset size       : {pf['n_rows']} rows × {pf['n_cols']} columns",
+        f"  Missing values     : {pf['missing_cols'] or 'none'}",
+        f"  Categorical cols   : {pf.get('categorical_cols') or 'none'}",
+        f"  Numeric cols       : {pf.get('numeric_cols') or 'none'}",
+        f"  Needs encoding     : {'YES — add label_encoder BEFORE train_test_split' if pf['needs_encoding'] else 'no'}",
+        f"  Needs scaling      : {'YES — add standard_scaler AFTER train_test_split' if pf['needs_scaling'] else 'no'}",
+        f"  Needs imputation   : {'YES — add data_cleaning BEFORE split' if pf['needs_imputation'] else 'no'}",
+    ]
+    # Conditional directives for outlier removal
+    if pf.get("needs_outlier_removal"):
+        lines.append(
+            "  ⚠ Outlier columns  : YES — a numeric feature has max > 10× its mean. "
+            "Add an outlier_removal node BEFORE train_test_split."
+        )
+    # Conditional directive for class imbalance
+    if pf.get("is_imbalanced"):
+        lines.append(
+            "  ⚠ Class imbalance  : YES — minority class < 10% of data. "
+            "Set class_weight='balanced' on the model node config."
+        )
+    lines += [
+        f"  ⚠ Model Selection   : USE {pf.get('recommended_model', 'unknown').upper()} ONLY",
+        f"  ✅ Metric node      : {pf.get('recommended_metric', 'unknown')}",
+        "╚════════════════════════════════════════════════════════════════════════╝",
+    ]
+    return "\n".join(lines)
+# ── Architect (R1 planning) prompt ────────────────────────────────────────────
+_ARCHITECT_PROMPT = """\
+You are a senior ML engineer performing pre-build technical architecture planning.
+You will receive a dataset profile, a deterministic pre-flight analysis, and the user's request.
+OUTPUT: Concise markdown only. No JSON. No code blocks. Under 250 words.
+Structure your response as:
+## Problem Type
+State classification or regression with one-sentence justification.
+## Data Quality Plan
+List each issue (missing values, categorical columns, dtype mismatches) and the exact
+preprocessing step needed for it. Reference actual column names.
+## Pipeline Sequence
+Ordered list of node types (e.g. csv_loader → label_encoder → train_test_split →
+standard_scaler → random_forest_classifier → classification_report).
+## Model Rationale
+Why this model fits the problem. If the dataset is large (>10k rows), prefer tree-based
+models. If many numeric features, recommend scaling. If class imbalance suspected, note it.
+## Critical Warnings
+Any data issues the pipeline MUST handle. Be blunt about failure modes.
+Do NOT output JSON. Do NOT write code. Be specific — use actual column names from the profile.
+"""
+def _build_architect_messages(
+    prompt: str, profile_text: str, pf: dict
+) -> list[dict]:
+    user = (
+        f"== DATASET PROFILE ==\n{profile_text}\n\n"
+        f"== DETERMINISTIC PRE-FLIGHT ==\n{_pre_flight_block(pf)}\n\n"
+        f"== USER REQUEST ==\n{prompt}\n\n"
+        "Provide your technical pipeline architecture plan."
+    )
+    return [
+        {"role": "system", "content": _ARCHITECT_PROMPT},
+        {"role": "user",   "content": user},
+    ]
 # ── System prompts ─────────────────────────────────────────────────────────────
 _SYSTEM_PROMPT = """\
@@ -125,9 +533,13 @@ Use this structure to decide WHERE to modify or improve.
 ═══════════════════════════════════════
 STRICT RULES:
-1. MINIMALISM FIRST
-   - Fewer nodes = better
-   - Do NOT duplicate functionality
+1. DATA INTEGRITY & ACCURACY FIRST — MINIMALISM SECOND
+   - A complete, correct pipeline beats a minimal, broken one.
+   - REQUIRED: label_encoder for any object/category column BEFORE train_test_split.
+   - REQUIRED: standard_scaler for distance-based models (SVM, KNN, LogisticRegression).
+   - REQUIRED: data_cleaning node when ANY column has missing values.
+   - THEN minimize: never add a step the data does not require.
+   - A pipeline that skips necessary preprocessing is a FAILURE regardless of node count.
 2. USE TEMPLATES FIRST
    - Only use customNode if NO template exists
@@ -141,6 +553,7 @@ STRICT RULES:
    - sourceHandle MUST exist in source outputs
    - targetHandle MUST match input param
    - metric nodes MUST receive: y_pred + y_test
+   - SCALER RULE: If using standard_scaler or min_max_scaler after train_test_split, you MUST connect all 4 split outputs (X_train, X_test, y_train, y_test) to the scaler, and then connect all 4 scaler outputs to the model. Do not skip y_train/y_test.
 5. DATA RULES
    - If categorical columns exist → include label_encoder BEFORE split
@@ -208,7 +621,7 @@ If a node has an error:
 - Include all imports inside every code block
 - Return dict keys MUST match sourceHandles on outgoing edges
 - NEVER import matplotlib or seaborn
-- Custom nodes only when no template covers the operation
+- CRITICAL: If the user requests an operation/model NOT in the catalogue (e.g. RobustScaler), DO NOT refuse. Generate it dynamically as a `customNode` with the full Python `code` starting with `# ✨ AI GENERATED`. Explicitly mention this custom generation in the `summary`.
 ══ AVAILABLE COMPONENTS ════════════════════════════════════════════
 {catalogue}
@@ -237,22 +650,117 @@ Model nodes support these config fields — no new node needed:
   "type": "customNode",
   "position": {{"x": 560, "y": 200}},
   "data": {{
-    "label": "Descriptive Name",
+    "label": "Robust Scaler (Custom)",
     "templateId": "customNode",
-    "code": "import pandas as pd\ndef run(data: pd.DataFrame) -> dict:\n    return {{\"result\": data}}"
+    "code": "import pandas as pd\\nfrom sklearn.preprocessing import RobustScaler\\n\\n# ✨ AI GENERATED\\ndef run(data=None, X_train=None, X_test=None) -> dict:\\n    # ...\\n    return {{\"X_train\": X_train, \"X_test\": X_test}}"
   }}
 }}
-  - Function name MUST be `run`
-  - DataFrame param MUST be named `data`
-  - MUST return a dict
+  - Your Python string in `code` MUST define a function named EXACTLY `def run(...) -> dict:`
+  - The DataFrame param MUST be named `data` (if taking a whole dataset)
+  - MUST return a dict containing the output handles
   - Only import: numpy, pandas, scipy, sklearn, math, statistics, re, json
   - NO os, sys, subprocess, socket, requests, open(), eval(), exec()
   - Prefer templates first — custom nodes are last resort only
+══ DYNAMIC VISUALIZATION ══════════════════════════════════════════════
+You can create custom visualizations NOT in the catalogue. The frontend
+detects charts by SHAPE, not by key name. Return any of these shapes and
+the UI will render it automatically — NO new React code needed:
+  Series (bar chart / ranked list):
+    {{"my_key": {{"labels": [...], "counts": [...], "title": "..."}}}}
+  Plot (scatter or line chart):
+    {{"my_key": {{"x": [...], "y": [...], "title": "...", "x_label": "...", "y_label": "..."}}}}
+  Grid (heatmap / matrix):
+    {{"my_key": {{"z": [[...]], "x": [...], "y": [...], "title": "..."}}}}
+Example — null percentage bar chart:
+  return {{
+      "null_pct_chart": {{
+          "labels": list(null_pcts.keys()),
+          "counts": list(null_pcts.values()),
+          "title": "Missing Values (%) per Column"
+      }}
+  }}
+When the user asks for any kind of visualization (e.g. "show me a chart
+of X", "visualize the distribution of Y"), you MUST generate a customNode
+that returns a dict with one of the shapes above. NEVER refuse — if no
+template covers it, invent the chart with the shape protocol.
+⚠ UNSUPERVISED LEARNING (t-SNE / PCA / UMAP / KMeans): When generating
+any dimensionality reduction or clustering node, you MUST return a
+`labels` array alongside `x` and `y` so the frontend can color-code
+clusters automatically. Example:
+  return {{
+      "tsne_plot": {{
+          "x": X_2d[:, 0].tolist(),
+          "y": X_2d[:, 1].tolist(),
+          "labels": [str(c) for c in cluster_labels],   # ← REQUIRED
+          "title": "t-SNE Cluster Visualization",
+          "x_label": "Dim 1",
+          "y_label": "Dim 2"
+      }}
+  }}
 ══ OUTPUT ═══════════════════════════════════════════════════════════
 Return ONLY:
 {{"nodes": [...], "edges": [...], "summary": "<one sentence conversational explanation>"}}
+⚠️ DO NOT EVER output a summary like "Cannot process this request." If a template is missing, you MUST write the `code` for it as a `customNode` instead of giving up.
+══ GOLD STANDARD EXAMPLES — you MUST match these handle names EXACTLY ═══════
+⚠ CRITICAL: The sourceHandle and targetHandle values below (data, X_train, X_test,
+y_train, y_test, y_pred) are the ONLY valid handle names. Do NOT invent new ones.
+Your edges MUST use these exact strings — any deviation will cause a runtime failure.
+Example A — CLASSIFICATION (categorical cols + scaling needed):
+{{"nodes":[
+  {{"id":"n1","type":"csv_loader","position":{{"x":0,"y":200}},"data":{{"config":{{"file_path":"data.csv"}}}}}},
+  {{"id":"n2","type":"label_encoder","position":{{"x":280,"y":200}},"data":{{"config":{{"columns":"sex,embarked"}}}}}},
+  {{"id":"n3","type":"train_test_split","position":{{"x":560,"y":200}},"data":{{"config":{{"target_column":"survived","test_size":0.2}}}}}},
+  {{"id":"n4","type":"standard_scaler","position":{{"x":840,"y":200}},"data":{{}}}},
+  {{"id":"n5","type":"random_forest_classifier","position":{{"x":1120,"y":200}},"data":{{"config":{{"n_estimators":100}}}}}},
+  {{"id":"n6","type":"classification_report","position":{{"x":1400,"y":200}},"data":{{}}}}
+],"edges":[
+  {{"id":"e1","source":"n1","sourceHandle":"data","target":"n2","targetHandle":"data"}},
+  {{"id":"e2","source":"n2","sourceHandle":"data","target":"n3","targetHandle":"data"}},
+  {{"id":"e3","source":"n3","sourceHandle":"X_train","target":"n4","targetHandle":"X_train"}},
+  {{"id":"e4","source":"n3","sourceHandle":"X_test","target":"n4","targetHandle":"X_test"}},
+  {{"id":"e5","source":"n3","sourceHandle":"y_train","target":"n4","targetHandle":"y_train"}},
+  {{"id":"e6","source":"n3","sourceHandle":"y_test","target":"n4","targetHandle":"y_test"}},
+  {{"id":"e7","source":"n4","sourceHandle":"X_train","target":"n5","targetHandle":"X_train"}},
+  {{"id":"e8","source":"n4","sourceHandle":"X_test","target":"n5","targetHandle":"X_test"}},
+  {{"id":"e9","source":"n4","sourceHandle":"y_train","target":"n5","targetHandle":"y_train"}},
+  {{"id":"e10","source":"n5","sourceHandle":"y_pred","target":"n6","targetHandle":"y_pred"}},
+  {{"id":"e11","source":"n4","sourceHandle":"y_test","target":"n6","targetHandle":"y_test"}}
+],"summary":"Classification pipeline with encoding, scaling, and Random Forest."}}
+Example B — REGRESSION (missing values + continuous target):
+{{"nodes":[
+  {{"id":"n1","type":"csv_loader","position":{{"x":0,"y":200}},"data":{{"config":{{"file_path":"houses.csv"}}}}}},
+  {{"id":"n2","type":"data_cleaning","position":{{"x":280,"y":200}},"data":{{"config":{{"strategy":"fill"}}}}}},
+  {{"id":"n3","type":"train_test_split","position":{{"x":560,"y":200}},"data":{{"config":{{"target_column":"price","test_size":0.2}}}}}},
+  {{"id":"n4","type":"standard_scaler","position":{{"x":840,"y":200}},"data":{{}}}},
+  {{"id":"n5","type":"random_forest_regressor","position":{{"x":1120,"y":200}},"data":{{"config":{{"n_estimators":200}}}}}},
+  {{"id":"n6","type":"regression_metrics","position":{{"x":1400,"y":200}},"data":{{}}}}
+],"edges":[
+  {{"id":"e1","source":"n1","sourceHandle":"data","target":"n2","targetHandle":"data"}},
+  {{"id":"e2","source":"n2","sourceHandle":"data","target":"n3","targetHandle":"data"}},
+  {{"id":"e3","source":"n3","sourceHandle":"X_train","target":"n4","targetHandle":"X_train"}},
+  {{"id":"e4","source":"n3","sourceHandle":"X_test","target":"n4","targetHandle":"X_test"}},
+  {{"id":"e5","source":"n3","sourceHandle":"y_train","target":"n4","targetHandle":"y_train"}},
+  {{"id":"e6","source":"n3","sourceHandle":"y_test","target":"n4","targetHandle":"y_test"}},
+  {{"id":"e7","source":"n4","sourceHandle":"X_train","target":"n5","targetHandle":"X_train"}},
+  {{"id":"e8","source":"n4","sourceHandle":"X_test","target":"n5","targetHandle":"X_test"}},
+  {{"id":"e9","source":"n4","sourceHandle":"y_train","target":"n5","targetHandle":"y_train"}},
+  {{"id":"e10","source":"n5","sourceHandle":"y_pred","target":"n6","targetHandle":"y_pred"}},
+  {{"id":"e11","source":"n4","sourceHandle":"y_test","target":"n6","targetHandle":"y_test"}}
+],"summary":"Regression pipeline with cleaning, scaling, and Random Forest."}}
 """
 _UPDATE_PROMPT = """\
@@ -264,6 +772,16 @@ You are M8Flow's AI pipeline surgeon. Modify the pipeline with the MINIMUM chang
   3. ALWAYS respond in ENGLISH. Never use any other language.
   4. Do NOT truncate the JSON — it must be a complete, valid object.
+🔑 TOKEN CONSERVATION — MANDATORY (free-tier models have strict output limits):
+  A. TEMPLATE SWAPS: If you are changing a node's operation to one that already has
+     a matching Template ID (e.g. swapping from linear_regression to
+     random_forest_regressor), change only the "type" field on that node.
+     NEVER re-emit the full Python "code" block when a Template already covers it.
+     Template nodes are resolved by the runtime — sending their code wastes tokens.
+  B. UNCHANGED NODES: Nodes marked ✓ in the status list must appear in your output
+     but with their "data.code" field set to null (omitted). Only include code for
+     nodes you are actively modifying or adding as custom (non-template) nodes.
 Output ONLY the complete updated flow JSON — no markdown, no explanation.
 ══ DECISION HIERARCHY (follow in order, stop at first match) ════════
@@ -279,9 +797,8 @@ Output ONLY the complete updated flow JSON — no markdown, no explanation.
 4. Does this genuinely require a brand-new node that adds functionality
    not available anywhere in the graph?
-   YES → add exactly ONE new node, connected minimally. Nothing else.
-If none apply, state the limitation in a comment field — do not bloat the graph.
+   YES → add exactly ONE new node, connected minimally.
+   CRITICAL: If the requested operation (e.g., RobustScaler) is NOT in the catalogue, DO NOT refuse the request. Instead, YOU MUST generate it dynamically as a `customNode`. Provide the complete Python implementation in the `code` field, starting with the comment `# ✨ AI GENERATED`. In your `summary`, explicitly state that you generated a custom node for this functionality.
 ══ CURRENT PIPELINE ═════════════════════════════════════════════════
 {current_flow}
@@ -303,6 +820,52 @@ All model nodes support these fields in their config — no new node needed:
   "better accuracy"       → tune hyperparams, or swap model type — no extra nodes
   "use k-fold"            → set cross_validation=true, cv_folds=k on existing model
+══ CUSTOM NODE FORMAT (only if NO template covers it) ══════════════
+{{
+  "id": "node_custom_1",
+  "type": "customNode",
+  "position": {{"x": 560, "y": 200}},
+  "data": {{
+    "label": "Robust Scaler (Custom)",
+    "templateId": "customNode",
+    "code": "import pandas as pd\\nfrom sklearn.preprocessing import RobustScaler\\n\\n# ✨ AI GENERATED\\ndef run(data=None, X_train=None, X_test=None) -> dict:\\n    # ...\\n    return {{\"X_train\": X_train, \"X_test\": X_test}}"
+  }}
+}}
+  - Your Python string in `code` MUST define a function named EXACTLY `def run(...) -> dict:`
+  - The DataFrame param MUST be named `data` (if taking a whole dataset)
+  - MUST return a dict containing the output handles
+  - Only import: numpy, pandas, scipy, sklearn, math, statistics, re, json
+  - NO os, sys, subprocess, socket, requests, open(), eval(), exec()
+══ DYNAMIC VISUALIZATION ══════════════════════════════════════════════
+You can create custom visualizations NOT in the catalogue. The frontend
+detects charts by SHAPE, not by key name. Return any of these shapes and
+the UI will render it automatically — NO new React code needed:
+  Series (bar chart / ranked list):
+    {{"my_key": {{"labels": [...], "counts": [...], "title": "..."}}}}
+  Plot (scatter or line chart):
+    {{"my_key": {{"x": [...], "y": [...], "title": "...", "x_label": "...", "y_label": "..."}}}}
+  Grid (heatmap / matrix):
+    {{"my_key": {{"z": [[...]], "x": [...], "y": [...], "title": "..."}}}}
+Example — null percentage bar chart:
+  return {{
+      "null_pct_chart": {{
+          "labels": list(null_pcts.keys()),
+          "counts": list(null_pcts.values()),
+          "title": "Missing Values (%) per Column"
+      }}
+  }}
+When the user asks for any kind of visualization (e.g. "show me a chart
+of X", "visualize the distribution of Y"), you MUST generate a customNode
+that returns a dict with one of the shapes above. NEVER refuse — if no
+template covers it, invent the chart with the shape protocol.
 ══ SURGICAL PRESERVATION RULES ══════════════════════════════════════
 - Every node marked ✓ or ○ must appear in the output VERBATIM
   (same id, type, position, code, values — character for character)
@@ -320,6 +883,8 @@ All model nodes support these fields in their config — no new node needed:
 ══ OUTPUT ═══════════════════════════════════════════════════════════
 Return ONLY:
 {{"nodes": [...], "edges": [...], "summary": "<one sentence conversational explanation>"}}
+⚠️ DO NOT EVER output a summary like "Cannot process this request." If a template is missing, you MUST write the `code` for it as a `customNode` instead of giving up.
 """
@@ -333,8 +898,17 @@ def _extract_file_path(context: str) -> str | None:
     return None
-def _build_generate_messages(prompt: str, context: str | None) -> list[dict]:
-    system = _SYSTEM_PROMPT.format(catalogue=_template_catalogue())
+def _build_generate_messages(
+    prompt: str,
+    context: str | None,
+    pre_flight: dict | None = None,
+    architect_plan: str | None = None,
+    custom_components: list[dict] | None = None,
+) -> list[dict]:
+    system = _SYSTEM_PROMPT.format(catalogue=_template_catalogue(custom_components))
+    profile_block = ""
+    path_hint     = ""
     if context:
         fp = _extract_file_path(context)
@@ -343,13 +917,10 @@ def _build_generate_messages(prompt: str, context: str | None) -> list[dict]:
             f'Default value for csv_loader → file_path: Annotated[str,"file"] = "{fp}"\n'
             if fp else ""
         )
-        # ── Enrich context with data profile if a file path is present ──
-        profile_block = ""
         if fp:
             try:
                 import pandas as pd
-                df = pd.read_csv(fp, nrows=5000)  # sample for speed
+                df = pd.read_csv(fp, nrows=5000)
                 profile = profile_dataframe(df)
                 profile_block = (
                     "\n== Dataset Summary (auto-profiled) ==\n"
@@ -359,15 +930,29 @@ def _build_generate_messages(prompt: str, context: str | None) -> list[dict]:
             except Exception as exc:
                 logger.warning("data_profiler skipped: %s", exc)
-        user = (
-            f"== DATASET CONTEXT ==\n"
-            f"{profile_block}"
-            f"{path_hint}\n"
-            f"== REQUEST ==\n"
-            f"{prompt}"
+    # ── Inject deterministic pre-flight analysis ──────────────────────────
+    pre_flight_block = ""
+    if pre_flight:
+        pre_flight_block = "\n" + _pre_flight_block(pre_flight) + "\n"
+    # ── Inject architect plan ─────────────────────────────────────────────
+    architect_block = ""
+    if architect_plan and architect_plan.strip():
+        architect_block = (
+            "\n== EXPERT ARCHITECTURE PLAN (follow this closely) ==\n"
+            + architect_plan.strip()
+            + "\n"
         )
-    else:
-        user = prompt
+    user = (
+        f"== DATASET CONTEXT ==\n"
+        f"{profile_block}"
+        f"{path_hint}"
+        f"{pre_flight_block}"
+        f"{architect_block}"
+        f"\n== REQUEST ==\n"
+        f"{prompt}"
+    )
     return [{"role": "system", "content": system}, {"role": "user", "content": user}]
@@ -419,6 +1004,10 @@ def _slim_flow(flow: FlowSchema) -> dict:
                 except Exception:
                     pass
+        is_template = node_type in {t["id"] for t in TEMPLATES}
+        # Only send code for truly custom nodes; never for templates.
+        code_str = None if is_template else (data.get("code") or "")[:800] or None
         slim_nodes.append({
             "id":       node.get("id"),
             "type":     node_type,
@@ -426,7 +1015,7 @@ def _slim_flow(flow: FlowSchema) -> dict:
             "data": {
                 "label":             data.get("label"),
                 "templateId":        data.get("templateId"),
-                "code":              (data.get("code") or "")[:800] or None,
+                "code":              code_str,
                 "values":            data.get("values"),
                 # Explicit handle lists — LLM MUST use these for edge sourceHandle/targetHandle
                 "available_outputs": outputs,
@@ -436,10 +1025,15 @@ def _slim_flow(flow: FlowSchema) -> dict:
     return {"nodes": slim_nodes, "edges": flow.edges}
-def _build_update_messages(prompt: str, current_flow: FlowSchema, context: str | None) -> list[dict]:
+def _build_update_messages(
+    prompt: str,
+    current_flow: FlowSchema,
+    context: str | None,
+    custom_components: list[dict] | None = None,
+) -> list[dict]:
     slim        = _slim_flow(current_flow)
     node_status = _node_status_summary(current_flow)
-    catalogue   = _template_catalogue()
+    catalogue   = _template_catalogue(custom_components)
     system = _UPDATE_PROMPT.format(
         current_flow=json.dumps(slim, indent=2),
@@ -493,7 +1087,8 @@ async def _call_openrouter(
 ) -> str:
     """
     Route to the right model via OpenRouter based on task type.
-    Falls back through LLaMA → Mistral 7B on failure.
+    Falls back through the full pool of live free models on rate-limit or error.
+    Rate-limited models are skipped for _RATE_LIMIT_TTL seconds to avoid wasted calls.
     """
     effective_key = _get_api_key()
     if not effective_key:
@@ -504,7 +1099,37 @@ async def _call_openrouter(
         )
     model = _MODELS.get(task, _MODELS["generate"])
-    fallback_chain = [model, _MODELS["fallback"], _MODELS["lastresort"]]
+    # Full pool of verified-live free models (May 2026).
+    # Ordered by observed reliability: nemotron first (proven to succeed when llama 429s).
+    _FREE_POOL = [
+        "nvidia/nemotron-3-super-120b-a12b:free",    # proven to work
+        "openai/gpt-oss-120b:free",
+        "openai/gpt-oss-20b:free",
+        "nousresearch/hermes-3-llama-3.1-405b:free",
+        "meta-llama/llama-3.3-70b-instruct:free",
+        "google/gemma-4-31b-it:free",
+        "google/gemma-4-26b-a4b-it:free",
+        "nvidia/nemotron-3-nano-30b-a3b:free",
+        "nvidia/nemotron-nano-9b-v2:free",
+        "meta-llama/llama-3.2-3b-instruct:free",
+        "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
+        "liquid/lfm-2.5-1.2b-instruct:free",
+    ]
+    # Build chain: primary model first, then full pool (deduped, order preserved)
+    seen: set[str] = set()
+    full_chain: list[str] = []
+    for m in [model, _MODELS["fallback"], _MODELS["lastresort"]] + _FREE_POOL:
+        if m not in seen:
+            seen.add(m)
+            full_chain.append(m)
+    # Skip models in cooldown — place them at the end so they still get a chance
+    # if everything else fails (cooldown may have expired by then)
+    ready    = [m for m in full_chain if not _is_rate_limited(m)]
+    cooling  = [m for m in full_chain if _is_rate_limited(m)]
+    fallback_chain = ready + cooling  # try fresh models first
     headers = {
         "Authorization": f"Bearer {effective_key}",
@@ -514,35 +1139,46 @@ async def _call_openrouter(
     }
     last_exc: Exception | None = None
-    for attempt_model in fallback_chain:
-        # Do NOT send response_format — not all OpenRouter models support json_object mode.
-        # JSON is enforced through the system prompt instead.
-        # max_tokens prevents truncated responses that produce partial/invalid JSON.
-        body: dict = {
-            "model": attempt_model,
-            "messages": messages,
-            "max_tokens": 8192,
-        }
+    async with httpx.AsyncClient(timeout=timeout) as client:
+        for attempt_model in fallback_chain:
+            body: dict = {
+                "model": attempt_model,
+                "messages": messages,
+                "max_tokens": 8192,
+            }
-        try:
-            async with httpx.AsyncClient(timeout=timeout) as client:
+            try:
                 response = await client.post(_OPENROUTER_URL, headers=headers, json=body)
-            if response.status_code == 429:
-                raise RuntimeError(f"Rate limited on {attempt_model}")
-            if response.status_code == 401:
-                raise RuntimeError("Invalid OpenRouter API key")
-            response.raise_for_status()
-            content = response.json()["choices"][0]["message"]["content"]
-            if attempt_model != model:
-                logger.info("OpenRouter fallback: used %s instead of %s", attempt_model, model)
-            return content or ""
-        except Exception as exc:
-            logger.warning("OpenRouter model %s failed: %s", attempt_model, exc)
-            last_exc = exc
-            continue
+                if response.status_code == 429:
+                    _mark_rate_limited(attempt_model)
+                    raise RuntimeError(f"Rate limited on {attempt_model}")
+                if response.status_code == 401:
+                    raise RuntimeError("Invalid OpenRouter API key")
+                response.raise_for_status()
+                content = response.json()["choices"][0]["message"]["content"]
+                if attempt_model != model:
+                    logger.info("OpenRouter fallback: used %s instead of %s", attempt_model, model)
+                return content or ""
+            except httpx.ConnectError as exc:
+                # If we cannot resolve DNS or connect to the host, no fallback will work.
+                logger.error("Network connection to OpenRouter failed: %s", exc)
+                raise RuntimeError("Could not connect to OpenRouter (Network/DNS error). Please check your internet connection.")
+            except RuntimeError as exc:
+                if "Invalid OpenRouter API key" in str(exc):
+                    raise  # Don't retry — wrong key won't fix itself
+                logger.warning("OpenRouter model %s failed: %s", attempt_model, exc)
+                last_exc = exc
+                continue
+            except Exception as exc:
+                if "getaddrinfo failed" in str(exc):
+                    logger.error("DNS resolution failed for OpenRouter: %s", exc)
+                    raise RuntimeError("Could not resolve OpenRouter domain. Please check your internet connection.")
+                logger.warning("OpenRouter model %s failed: %s", attempt_model, exc)
+                last_exc = exc
+                continue
     raise RuntimeError(f"All OpenRouter models failed. Last error: {last_exc}")
@@ -555,7 +1191,8 @@ def _extract_json_object(raw: str) -> dict:
     - Preamble text in any language before the JSON
     - Markdown code fences (```json ... ``` or ``` ... ```)
     - Trailing explanation text after the JSON
-    - Truncated responses (returns whatever was parseable)
+    - Truncated responses (free-tier model cut-offs) — try-repair appends
+      missing closing brackets/braces to recover a parseable object.
     """
     raw = raw.strip()
@@ -579,6 +1216,7 @@ def _extract_json_object(raw: str) -> dict:
     # 3. Brace-matching: find the first complete JSON object in the text
     start = raw.find('{')
+    best_candidate: str | None = None
     if start != -1:
         depth = 0
         in_string = False
@@ -603,10 +1241,36 @@ def _extract_json_object(raw: str) -> dict:
                         try:
                             return json.loads(candidate)
                         except json.JSONDecodeError:
-                            break   # malformed — fall through to error
+                            break   # malformed — fall through to repair
+        # Capture the partial object for repair attempts
+        best_candidate = raw[start:]
+    # 4. Try-repair: the response was likely truncated by the model's token limit.
+    #    Progressively append closing characters until we get a valid object.
+    #    We try up to 12 combinations: 0-4 extra ']' + 0-4 extra '}', ordered
+    #    by shortest repair first (minimises data invention).
+    candidate_base = best_candidate or raw
+    # Trim trailing whitespace/comma that often appears before cut-off
+    candidate_base = candidate_base.rstrip().rstrip(",")
+    logger.debug("JSON repair: attempting to salvage truncated output (%d chars)", len(candidate_base))
+    for extra_brackets in range(5):           # 0 … 4 extra ]
+        for extra_braces in range(5):         # 0 … 4 extra }
+            if extra_brackets == 0 and extra_braces == 0:
+                continue  # already tried the plain candidate
+            repaired = candidate_base + ("\n]" * extra_brackets) + ("\n}" * extra_braces)
+            try:
+                result = json.loads(repaired)
+                logger.warning(
+                    "JSON repair succeeded (+%d ']', +%d '}'). "
+                    "Free-tier model likely truncated its output.",
+                    extra_brackets, extra_braces,
+                )
+                return result
+            except json.JSONDecodeError:
+                continue
     raise ValueError(
-        f"LLM returned invalid JSON (could not extract object).\nRaw: {raw[:600]}"
+        f"LLM returned invalid JSON (could not extract or repair object).\nRaw: {raw[:600]}"
     )
@@ -650,9 +1314,22 @@ You are a surgical ML pipeline editor. The user wants to REFINE an existing pipe
   1. Output ONLY a single raw JSON object. No text before it, no text after it.
   2. NEVER wrap the JSON in markdown fences (no ```json, no ```).
   3. ALWAYS respond in ENGLISH. Never use any other language.
-  4. If you cannot help, return: {{"node_changes":[],"edge_changes":[],"summary":"Cannot process this request."}}
+  4. DO NOT EVER output a summary like "Cannot process this request." If a template is missing, you MUST write the `code` for it as a `customNode` instead of giving up.
   5. Do NOT truncate the JSON — it must be a complete, valid object.
+🔑 TOKEN CONSERVATION — MANDATORY (free-tier models have strict output limits):
+  A. TEMPLATE SWAPS: If you are changing a model or operation that already has a
+     matching Template ID (e.g. swapping linear_regression for
+     random_forest_regressor), set only the "type" field on the update entry.
+     NEVER output the full Python "code" block when a Template already exists.
+     Example — correct:  {{"action":"update","id":"n5","data":{{"type":"random_forest_regressor","values":{{"n_estimators":200}}}}}}
+     Example — WRONG:    {{"action":"update","id":"n5","data":{{"code":"import ..."}}}}  ← wastes tokens
+  B. VALUES ONLY: In "node_changes", omit the "code" field entirely unless the
+     user explicitly asked for a custom code change. If only config parameters
+     changed, output ONLY the "values" dictionary — nothing else inside "data".
+     Example — correct:  {{"action":"update","id":"n3","data":{{"values":{{"test_size":0.3}}}}}}
+     Example — WRONG:    {{"action":"update","id":"n3","data":{{"values":{{"test_size":0.3}},"code":"...entire node..."}}}}
 Output ONLY a JSON patch object describing the minimal set of changes needed.
 ══ CURRENT PIPELINE ═════════════════════════════════════════════════
@@ -678,13 +1355,15 @@ Output ONLY a JSON patch object describing the minimal set of changes needed.
     }},
     {{
       "action": "add",
-      "id": "<new unique id e.g. node_cv_1>",
-      "type": "<template_id>",
+      "id": "<new unique id e.g. node_custom_1>",
+      "type": "<template_id or 'customNode'>",
       "reason": "<why this node is added>",
       "position": {{"x": <number>, "y": <number>}},
       "data": {{
+        "label": "<optional Descriptive Name>",
+        "templateId": "<optional templateId or 'customNode'>",
         "config": {{}},
-        "code": "<optional override>"
+        "code": "<optional full Python source, REQUIRED if type is customNode>"
       }}
     }},
     {{
@@ -721,7 +1400,8 @@ Before emitting ANY patch entry, ask:
      YES → emit one "update" for that node. No new nodes.
   3. Does this need a genuinely new computation node?
-     YES → emit one "add". Minimise new edges.
+     YES → emit one "add".
+     CRITICAL: If the requested operation (e.g., RobustScaler) is NOT in the catalogue, DO NOT refuse the request. Instead, YOU MUST generate it dynamically as a `customNode`. Provide the complete Python implementation in the `code` field, starting with `# ✨ AI GENERATED`. In your `summary`, explicitly state that you generated a custom node.
   4. None of the above?
      → Explain in "summary". Return empty node_changes.
@@ -734,7 +1414,19 @@ Before emitting ANY patch entry, ask:
 5. "summary" → one plain-English sentence describing the change.
 6. DataFrame param MUST be named  data  (never "df").
 7. Include all imports inside any code block.
-8. Only use template types from the catalogue above.
+8. Only use template types from the catalogue above, unless building a customNode.
+══ CUSTOM NODE RULES (when generating a missing component) ════════════
+If you use `type="customNode"`, your Python string in `code` MUST follow the exact same architecture as built-in templates:
+  - It MUST define a function named EXACTLY `def run(...) -> dict:`
+  - The primary input dataset MUST be named `data` (e.g., `def run(data: pd.DataFrame, ...) -> dict:`)
+  - It MUST return a dictionary containing the outputs (e.g., `return {{"X_train": X_train, "X_test": X_test}}`)
+  - If you are building a custom visualization, you MUST wrap your output in one of these keys so the UI can render it:
+    'histogram', 'correlation_matrix', 'value_counts', 'box_plot', 'prediction', 'correlation_heatmap', 'missing_value_map', 'class_balance', 'feature_target_scatter', 'model_error_histogram', 'partial_dependence', 'roc_curves'
+  - All `import` statements MUST be placed at the top of the code string.
+  - You MUST include `# ✨ AI GENERATED` at the very top of the script.
+Example:
+"import pandas as pd\\nfrom sklearn.preprocessing import RobustScaler\\n\\n# ✨ AI GENERATED\\ndef run(data=None, X_train=None, X_test=None):\\n    scaler = RobustScaler()\\n    # ... logic ...\\n    return {{\"X_train\": X_train_scaled, \"X_test\": X_test_scaled}}"
 ══ CONNECTION RULES (CRITICAL — read carefully) ══════════════════════
 Each node in the current pipeline has "available_outputs" and "available_inputs"
@@ -749,6 +1441,12 @@ To fix a wrong connection:
   1. Emit "remove" for the bad edge (use its id from the current edges list).
   2. Emit "add" for the correct edge using valid handle names from the lists above.
+SCALER CONNECTION RULES:
+When inserting or reconnecting a standard_scaler or min_max_scaler after a train_test_split, you MUST:
+  1. Connect all 4 outputs (X_train, X_test, y_train, y_test) from the split node to the scaler inputs.
+  2. Connect all 4 outputs (X_train, X_test, y_train, y_test) from the scaler node to the downstream model inputs.
+Never skip passing y_train and y_test through the scaler node!
 To add a missing connection:
   1. Look at the source node's "available_outputs" — pick the right output.
   2. Look at the target node's "available_inputs" — pick the right input.
@@ -813,7 +1511,10 @@ def _custom_node_catalogue(current_flow: FlowSchema) -> str:
 def _build_refine_messages(
-    prompt: str, current_flow: FlowSchema, context: str | None
+    prompt: str,
+    current_flow: FlowSchema,
+    context: str | None,
+    custom_components: list[dict] | None = None,
 ) -> list[dict]:
     slim        = _slim_flow(current_flow)
     node_status = _node_status_summary(current_flow)
@@ -822,15 +1523,13 @@ def _build_refine_messages(
     custom_section = (
         f"\n══ CUSTOM NODES ON CANVAS (treat these as valid, usable nodes) ═══════\n"
         f"{custom_cat}\n"
-        if custom_cat else ""
-    )
+    ) if custom_cat else ""
     system = _REFINE_PROMPT.format(
         current_flow=json.dumps(slim, indent=2),
         node_status=node_status,
-        catalogue=_template_catalogue() + custom_section,
+        catalogue=_template_catalogue(custom_components) + custom_section,
     )
     ctx_block = (
         f"══ DATASET CONTEXT ══════════════════════════════════════\n{context.strip()}\n\n"
         if context else ""
@@ -867,24 +1566,81 @@ def _parse_refine_patch(raw: str) -> RefinePatch:
 # ── Public API ─────────────────────────────────────────────────────────────────
-async def generate_flow(prompt: str, context: str | None = None) -> FlowSchema:
-    messages = _build_generate_messages(prompt, context)
-    raw = await _call_openrouter(messages, task="generate", json_mode=True, timeout=_TIMEOUT_GENERATE)
+async def generate_flow(prompt: str, context: str | None = None, custom_components: list[dict] | None = None) -> FlowSchema:
+    """
+    Two-call Architect → Builder strategy:
+    Call 1 (Architect — deepseek-r1):
+        Given the data profile + deterministic pre-flight analysis, produce a
+        concise markdown plan: problem type, preprocessing steps, model choice.
+    Call 2 (Builder — deepseek-chat-v3):
+        Given the Architect's plan + the same context, produce the final JSON flow.
+        The Builder focuses on correct syntax and edge connections, not reasoning.
+    """
+    pre_flight:     dict = {}
+    profile_text:   str  = ""
+    architect_plan: str  = ""
+    # ── Pre-flight: deterministic data analysis ───────────────────────────
+    if context:
+        fp = _extract_file_path(context)
+        if fp:
+            try:
+                import pandas as pd
+                df      = pd.read_csv(fp, nrows=5000)
+                profile = profile_dataframe(df)
+                profile_text = format_profile_for_prompt(profile)
+                pre_flight   = _determine_pre_flight(profile, prompt, context, csv_path=fp)
+                logger.info(
+                    "Pre-flight: problem_type=%s target=%s model=%s encoding=%s scaling=%s",
+                    pre_flight["problem_type"], pre_flight["target_hint"],
+                    pre_flight.get("recommended_model"), pre_flight["needs_encoding"],
+                    pre_flight["needs_scaling"],
+                )
+            except Exception as exc:
+                logger.warning("pre-flight analysis skipped: %s", exc)
+    # ── Call 1: Architect (R1 reasoning model) ────────────────────────────
+    if profile_text and pre_flight:
+        try:
+            arch_messages  = _build_architect_messages(prompt, profile_text, pre_flight)
+            architect_plan = await _call_openrouter(
+                arch_messages,
+                task="architect",
+                json_mode=False,
+                timeout=_TIMEOUT_GENERATE,
+            )
+            logger.info("Architect plan: %d chars", len(architect_plan))
+        except Exception as exc:
+            logger.warning("Architect call failed, continuing without plan: %s", exc)
+            architect_plan = ""
+    # ── Call 2: Builder (chat model — fast, syntax-precise JSON) ─────────
+    build_messages = _build_generate_messages(
+        prompt, context,
+        pre_flight=pre_flight or None,
+        architect_plan=architect_plan or None,
+        custom_components=custom_components,
+    )
+    raw = await _call_openrouter(
+        build_messages, task="generate", json_mode=True, timeout=_TIMEOUT_GENERATE
+    )
     return _parse_flow(raw)
 async def update_flow(
-    prompt: str, current_flow: FlowSchema, context: str | None = None
+    prompt: str, current_flow: FlowSchema, context: str | None = None, custom_components: list[dict] | None = None
 ) -> FlowSchema:
-    messages = _build_update_messages(prompt, current_flow, context)
+    messages = _build_update_messages(prompt, current_flow, context, custom_components)
     raw = await _call_openrouter(messages, task="update", json_mode=True, timeout=_TIMEOUT_UPDATE)
     return _parse_flow(raw)
 async def refine_flow(
-    prompt: str, current_flow: FlowSchema, context: str | None = None
+    prompt: str, current_flow: FlowSchema, context: str | None = None, custom_components: list[dict] | None = None
 ) -> RefinePatch:
-    messages = _build_refine_messages(prompt, current_flow, context)
+    messages = _build_refine_messages(prompt, current_flow, context, custom_components)
     raw = await _call_openrouter(messages, task="refine", json_mode=True, timeout=_TIMEOUT_UPDATE)
     return _parse_refine_patch(raw)
@@ -934,6 +1690,32 @@ async def explain_flow(flow: FlowSchema) -> str:
     return await _call_openrouter(messages, task="explain", json_mode=False, timeout=60)
+_EXPLAIN_CHAT_SYSTEM = """\
+You are an expert ML engineering assistant answering questions about a user's machine learning pipeline.
+══ EXPLANATION CONTEXT ═════════════════════════════════════════════════════
+{explanation}
+══ PIPELINE AND EXECUTION RESULTS ══════════════════════════════════════════
+{current_flow}
+Respond concisely and directly to the user's question. Provide actionable, specific advice based on the existing nodes, their configurations, and any metrics or execution results present in the pipeline state.
+Do NOT use markdown tables in your response. Instead, use simple bullet points and short paragraphs. Do not use generic filler.
+"""
+async def chat_explanation(question: str, explanation: str, flow: FlowSchema) -> str:
+    slim = _slim_flow(flow)
+    system = _EXPLAIN_CHAT_SYSTEM.format(
+        explanation=explanation,
+        current_flow=json.dumps(slim, indent=2)
+    )
+    messages = [
+        {"role": "system", "content": system},
+        {"role": "user",   "content": question},
+    ]
+    return await _call_openrouter(messages, task="chat", json_mode=False, timeout=60)
 # ── Self-Healing Debug Prompt ──────────────────────────────────────────────────
 _DEBUG_SYSTEM = """\
@@ -1128,6 +1910,7 @@ async def handle_user_request(
     prompt: str,
     current_flow: FlowSchema | None = None,
     context: str | None = None,
+    custom_components: list[dict] | None = None,
 ) -> dict:
     """
     Single entry point that classifies the prompt and routes to the
@@ -1144,7 +1927,7 @@ async def handle_user_request(
     # No existing flow → always generate from scratch
     has_flow = current_flow is not None and len(current_flow.nodes) > 0
     if not has_flow:
-        flow = await generate_flow(prompt, context)
+        flow = await generate_flow(prompt, context, custom_components=custom_components)
         return {"intent": "generate", "result_type": "flow", "flow": flow}
     intent = detect_intent(prompt, has_flow=True)
@@ -1159,17 +1942,17 @@ async def handle_user_request(
             for n in current_flow.nodes
         ):
             # Custom nodes exist — use update so they're visible to the LLM
-            flow = await update_flow(prompt, current_flow, context)
+            flow = await update_flow(prompt, current_flow, context, custom_components=custom_components)
             return {"intent": "update", "result_type": "flow", "flow": flow}
-        flow = await generate_flow(prompt, context)
+        flow = await generate_flow(prompt, context, custom_components=custom_components)
         return {"intent": "generate", "result_type": "flow", "flow": flow}
     if intent == "refine":
-        patch = await refine_flow(prompt, current_flow, context)
+        patch = await refine_flow(prompt, current_flow, context, custom_components=custom_components)
         return {"intent": "refine", "result_type": "patch", "patch": patch}
     # intent == "update"
-    flow = await update_flow(prompt, current_flow, context)
+    flow = await update_flow(prompt, current_flow, context, custom_components=custom_components)
     return {"intent": "update", "result_type": "flow", "flow": flow}
@@ -1310,3 +2093,79 @@ async def suggest_improvements(flow: FlowSchema, results: dict) -> list[str]:
     # Sanitise: only strings, max 120 chars each
     return [str(s)[:120] for s in suggestions if s]
+# ── Custom node code generation ────────────────────────────────────────────────
+_NODE_CODE_SYSTEM = """\
+You are an M8Flow node code generator. Write Python code for a reusable pipeline component.
+⚠️ HARD RULES — any violation makes the node unparseable:
+  1. Function name MUST be  run  (not main, process, execute, transform — exactly run)
+  2. DataFrame input parameter MUST be named  data  (never df, dataframe, dataset)
+  3. Function MUST return a dict with named string keys
+  4. ALL imports go INSIDE the function body
+  5. Only allowed libraries: pandas, numpy, sklearn, scipy, math, statistics, re, json
+  6. FORBIDDEN: os, sys, subprocess, socket, requests, open(), eval(), exec(), matplotlib
+FIELD TYPE ANNOTATIONS — these control the UI widget shown to the user:
+  data input    (connects from previous node)  → just `data` with no type hint
+  text field    → `name: str = "default"`
+  number field  → `name: float = 1.0` or `name: int = 10`
+  boolean toggle→ `name: bool = True`
+  column picker → `col: Annotated[str, "column"] = "target"`   (needs `from typing import Annotated` inside the fn)
+  file picker   → `path: Annotated[str, "file"] = "data.csv"` (needs `from typing import Annotated` inside the fn)
+RETURN DICT — keys become the node's output handles:
+  Passing a DataFrame forward → always include "data": df
+  Model outputs               → {"model": model, "y_pred": preds}
+  Metric outputs              → {"accuracy": 0.95, "f1": 0.88}
+  Multiple outputs are fine   → {"data": df, "rows_removed": n}
+EXAMPLE — outlier removal node:
+def run(data, multiplier: float = 1.5) -> dict:
+    import pandas as pd
+    import numpy as np
+    df = data.copy()
+    for col in df.select_dtypes(include=[np.number]).columns:
+        Q1, Q3 = df[col].quantile(0.25), df[col].quantile(0.75)
+        iqr = Q3 - Q1
+        df = df[~((df[col] < Q1 - multiplier * iqr) | (df[col] > Q3 + multiplier * iqr))]
+    return {"data": df, "rows_removed": len(data) - len(df)}
+EXAMPLE — feature selection node:
+def run(data, n_features: int = 10, target: str = "label") -> dict:
+    import pandas as pd
+    from sklearn.feature_selection import SelectKBest, f_classif
+    X = data.drop(columns=[target])
+    y = data[target]
+    selector = SelectKBest(f_classif, k=min(n_features, X.shape[1]))
+    selector.fit(X, y)
+    selected = X.columns[selector.get_support()].tolist()
+    return {"data": data[selected + [target]], "selected_features": selected}
+OUTPUT: Return ONLY the raw Python code. No explanation. No markdown fences. No backticks.
+"""
+async def generate_node_code(description: str) -> str:
+    """
+    Generate M8Flow-compatible Python node code from a natural language description.
+    Uses the chat model (fast, syntax-precise) with the node code system prompt.
+    """
+    messages = [
+        {"role": "system", "content": _NODE_CODE_SYSTEM},
+        {"role": "user",   "content": f"Generate an M8Flow node that: {description}"},
+    ]
+    raw = await _call_openrouter(messages, task="generate", json_mode=False, timeout=60)
+    # Strip any markdown fences the model may add despite instructions
+    raw = raw.strip()
+    for fence in ("```python", "```"):
+        if raw.startswith(fence):
+            raw = raw[len(fence):]
+            break
+    if raw.endswith("```"):
+        raw = raw[:-3]
+    return raw.strip()