npm - m8flow - Versions diffs - 1.1.1 → 1.1.3 - Mend

m8flow 1.1.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/bundled/backend/api/routes/flows.py +43 -3
package/bundled/backend/api/routes/nodes.py +3 -3
package/bundled/backend/config.py +4 -2
package/bundled/backend/core/code_validator.py +1 -1
package/bundled/backend/core/executor.py +67 -0
package/bundled/backend/core/runtime.py +36 -1
package/bundled/backend/main.py +21 -0
package/bundled/backend/services/llm_service.py +611 -67
package/bundled/backend/services/pipeline_executor.py +41 -0
package/bundled/backend/templates.py +8 -13
package/bundled/frontend-dist/assets/index-BI1hb_gi.js +45 -0
package/bundled/frontend-dist/assets/index-D9h1Krrv.css +1 -0
package/bundled/frontend-dist/index.html +2 -2
package/package.json +1 -1
package/bundled/frontend-dist/assets/index-CKUZ27n8.css +0 -1
package/bundled/frontend-dist/assets/index-DNaB6zf0.js +0 -46

package/bundled/backend/services/llm_service.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""LLM service — OpenRouter-powered flow generation with per-task model routing."""
+"""LLM service — OpenRouter or Google Gemini flow generation with per-task model routing."""
 import json
 import logging
 import time
@@ -9,14 +9,30 @@ from domain.models import FlowSchema, RefinePatch, NodePatch, EdgePatch
 from templates import TEMPLATES
 from core.data_profiler import profile_dataframe, format_profile_for_prompt
-# Per-request API key override (set by the route handler from X-OpenRouter-Key header)
-_request_api_key: ContextVar[str | None] = ContextVar('request_api_key', default=None)
+# ── Per-request overrides (set by route handler from request headers) ─────────
+_request_api_key:       ContextVar[str | None] = ContextVar('request_api_key',       default=None)
+_request_gemini_key:    ContextVar[str | None] = ContextVar('request_gemini_key',    default=None)
+_request_mistral_key:   ContextVar[str | None] = ContextVar('request_mistral_key',   default=None)
+_request_model_override:ContextVar[str | None] = ContextVar('request_model_override',default=None)
+_request_agent_models:  ContextVar[dict | None] = ContextVar('request_agent_models', default=None)
 def _get_api_key() -> str:
-    """Return the request-scoped key if provided, otherwise fall back to env/config."""
     return _request_api_key.get() or config.OPENROUTER_API_KEY
+def _get_gemini_key() -> str | None:
+    return _request_gemini_key.get() or config.GEMINI_API_KEY
+def _get_mistral_key() -> str | None:
+    return _request_mistral_key.get() or config.MISTRAL_API_KEY
+def _use_gemini() -> bool:
+    return bool(_get_gemini_key())
+def _use_mistral() -> bool:
+    return bool(_get_mistral_key())
 logger = logging.getLogger(__name__)
 # ── OpenRouter config ─────────────────────────────────────────────────────────
@@ -24,6 +40,43 @@ _OPENROUTER_URL  = "https://openrouter.ai/api/v1/chat/completions"
 _TIMEOUT_GENERATE = 120
 _TIMEOUT_UPDATE   = 180
+# ── Google Gemini config ──────────────────────────────────────────────────────
+_GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
+_GEMINI_MODELS = {
+    # Fast, accurate structural blueprinting
+    "architect":  "gemini-2.5-flash",
+    # Core generation — handles multi-node creation flawlessly
+    "generate":   "gemini-2.5-flash",
+    "refine":     "gemini-2.5-flash",
+    "update":     "gemini-2.5-flash",
+    # Debugging / healing — intercepts errors and applies patches
+    "debug":      "gemini-2.5-flash",
+    "heal":       "gemini-2.5-flash",
+    # Explaining / suggesting — fast response for UI copy
+    "explain":    "gemini-2.5-flash-lite",
+    "suggest":    "gemini-2.5-flash-lite",
+    # Safety nets
+    "fallback":   "gemini-2.5-flash",
+    "lastresort": "gemini-2.5-flash-lite",
+}
+# ── Mistral config ────────────────────────────────────────────────────────────
+_MISTRAL_URL = "https://api.mistral.ai/v1/chat/completions"
+_MISTRAL_MODELS = {
+    "architect":  "codestral-latest",
+    "generate":   "mistral-small-latest",
+    "refine":     "mistral-small-latest",
+    "update":     "mistral-small-latest",
+    "debug":      "codestral-latest",
+    "heal":       "codestral-latest",
+    "explain":    "mistral-small-latest",
+    "suggest":    "mistral-small-latest",
+    "fallback":   "mistral-tiny",
+    "lastresort": "mistral-tiny",
+}
 # Per-task model routing — slugs verified live against OpenRouter API (May 2026)
 _MODELS = {
     # Reasoning/Architecting — best available free reasoning model
@@ -144,11 +197,30 @@ def _determine_pre_flight(profile: dict, prompt: str, context: str | None, csv_p
     import pandas as pd
     import numpy as np
-    dtypes              = profile.get("dtypes", {})
-    missing             = profile.get("missing", {})
-    numeric_summary     = profile.get("numeric_summary", {})
-    categorical_summary = profile.get("categorical_summary", {})
-    shape               = profile.get("shape", [0, 0])
+    df: pd.DataFrame | None = None
+    if csv_path:
+        try:
+            df = pd.read_csv(csv_path, nrows=5000)
+        except Exception:
+            pass
+    # Extract columns and properties reliably from the DataFrame if available
+    if df is not None:
+        all_columns = list(df.columns)
+        numeric_cols = set(df.select_dtypes(include=[np.number]).columns)
+        cat_cols = set(df.select_dtypes(exclude=[np.number]).columns)
+    else:
+        # Fallback to profile keys
+        numeric_cols = set(profile.get("numeric_features", []))
+        cat_cols = set(profile.get("categorical_features", []))
+        all_columns = list(numeric_cols | cat_cols)
+        # If still empty, try to parse from context string
+        if not all_columns and context:
+            m = re.search(r"Columns:\s*(.*)", context)
+            if m:
+                all_columns = [c.strip() for c in m.group(1).split(",")]
+    shape = profile.get("shape", [len(df) if df is not None else 0, len(all_columns)])
     # ── Step 1: Find the target column ────────────────────────────────────────
     # Priority: explicit mention in prompt/context  > heuristic column names.
@@ -169,37 +241,57 @@ def _determine_pre_flight(profile: dict, prompt: str, context: str | None, csv_p
         if m:
             candidate = m.group(1)
             # Validate the candidate actually exists in the data
-            if candidate in dtypes or candidate in numeric_summary or candidate in categorical_summary:
-                target_hint = candidate
+            # Use case-insensitive check
+            matched = next((c for c in all_columns if c.lower() == candidate), None)
+            if matched:
+                target_hint = matched
                 break
     # (b) If still unknown, use heuristic column-name scoring on the real columns
     if target_hint is None:
-        TARGET_KEYWORDS = [
-            "target", "label", "class", "output", "y", "result",
-            "outcome", "diagnosis", "status", "type", "category",
-            "survived", "churn", "default", "fraud", "price",
-            "salary", "value", "score", "sales", "revenue", "cost",
-            "medv", "charges", "fare",
-        ]
-        all_columns = list(dtypes.keys())
+        # ── Two tiers of keywords ─────────────────────────────────────────────
+        # STRONG: column IS the target in the vast majority of real datasets
+        # WEAK  : only a rough signal — requires additional confirmation
+        STRONG_KEYWORDS = {
+            "target", "label", "output", "y", "result",
+            "survived", "churn", "default", "fraud",
+            "price", "salary", "value", "sales", "revenue",
+            "cost", "medv", "charges", "fare", "income",
+            "score", "rate", "amount", "demand",
+        }
+        WEAK_KEYWORDS = {
+            "class", "outcome", "diagnosis", "status",
+            "type", "category", "flag", "ind", "indicator",
+        }
+        # all_columns is already defined above
         best_col: str | None = None
         best_score = -1
         for col in all_columns:
-            col_lower = col.lower().replace("_", " ").replace("-", " ")
+            # Tokenise: split on _ / - / space so "furnishingstatus" → ["furnishingstatus"]
+            # and "loan_status" → ["loan", "status"] — only whole tokens are matched.
+            col_lower  = col.lower()
+            col_tokens = set(re.split(r"[_\-\s]+", col_lower))
             score = 0
-            # Keyword match against column name
-            for kw in TARGET_KEYWORDS:
-                if kw in col_lower:
-                    score += 3
-                    break
-            # Last column is commonly the target in many datasets
-            if col == all_columns[-1]:
+            # Exact token match (e.g. col="price" → token="price" ∈ STRONG)
+            if col_tokens & STRONG_KEYWORDS:
+                score += 6
+            elif col_tokens & WEAK_KEYWORDS:
+                # Only a weak signal — avoids picking "furnishingstatus" over "price"
                 score += 2
-            # Column mentioned in prompt text
-            if col_lower in search_text or col.lower() in search_text:
+            # Full name contained in strong keywords (catches single-word col names)
+            if col_lower in STRONG_KEYWORDS:
+                score += 2   # bonus on top of token score
+            # Last column bonus (weaker than before)
+            if col == all_columns[-1]:
+                score += 1
+            # Column explicitly mentioned in prompt/context text
+            if col_lower in search_text or col in search_text.split():
                 score += 4
             if score > best_score:
@@ -327,43 +419,38 @@ def _determine_pre_flight(profile: dict, prompt: str, context: str | None, csv_p
         except Exception as exc:
             logger.warning("Pre-flight target analysis failed: %s", exc)
-            # Fall back to dtype-only heuristic using profile data
             if target_hint:
-                dtype_str = str(dtypes.get(target_hint, "")).lower()
-                if any(t in dtype_str for t in ("object", "category", "bool", "str")):
+                if target_hint in cat_cols:
                     problem_type = "classification"
-                elif target_hint in categorical_summary:
-                    problem_type = "classification" if categorical_summary[target_hint].get("unique", 99) < 15 else "regression"
-                elif target_hint in numeric_summary:
+                elif target_hint in numeric_cols:
                     problem_type = "regression"
     elif target_hint:
         # No CSV path — fall back to profile-based heuristic
-        dtype_str = str(dtypes.get(target_hint, "")).lower()
-        if any(t in dtype_str for t in ("object", "category", "bool", "str")):
+        if target_hint in cat_cols:
             problem_type = "classification"
-        elif target_hint in categorical_summary:
-            problem_type = "classification" if categorical_summary.get(target_hint, {}).get("unique", 99) < 15 else "regression"
-        elif target_hint in numeric_summary:
+        elif target_hint in numeric_cols:
             problem_type = "regression"
     # ── Step 3: Preprocessing flags ───────────────────────────────────────────
     # Detect categorical columns that need encoding (exclude the target itself)
-    cat_cols = {c for c in categorical_summary if c != target_hint}
-    num_cols = {c for c in numeric_summary if c != target_hint}
+    cat_cols_filtered = {c for c in cat_cols if c != target_hint}
+    num_cols_filtered = {c for c in numeric_cols if c != target_hint}
+    # Profile from data_profiler uses 'missing_values'
+    missing_dict = profile.get("missing_values", {})
     missing_cols: dict[str, float] = {
-        col: round(info.get("pct", 0), 1)
-        for col, info in missing.items()
-        if info.get("pct", 0) > 0
-    }
-    cardinality: dict[str, int] = {
-        col: info.get("unique", 0)
-        for col, info in categorical_summary.items()
+        col: float(pct) for col, pct in missing_dict.items() if pct > 0
     }
-    needs_encoding   = len(cat_cols) > 0
-    needs_scaling    = len(num_cols) >= 2
+    # We no longer have cardinality dict readily available without df
+    cardinality = {}
+    if df is not None:
+        for c in cat_cols_filtered:
+            cardinality[c] = df[c].nunique()
+    needs_encoding   = len(cat_cols_filtered) > 0
+    needs_scaling    = len(num_cols_filtered) >= 2
     needs_imputation = len(missing_cols) > 0
     # ── Step 4: Recommend specific model based on problem type + data size ────
@@ -393,8 +480,8 @@ def _determine_pre_flight(profile: dict, prompt: str, context: str | None, csv_p
         "is_imbalanced":           is_imbalanced,
         "n_rows":                  n_rows,
         "n_cols":                  n_cols,
-        "categorical_cols":        sorted(cat_cols),
-        "numeric_cols":            sorted(num_cols),
+        "categorical_cols":        sorted(cat_cols_filtered),
+        "numeric_cols":            sorted(num_cols_filtered),
         "recommended_model":       recommended_model,
         "recommended_metric":      recommended_metric_node,
     }
@@ -490,6 +577,227 @@ def _build_architect_messages(
     ]
+# ── Interactive Interview Prompt ──────────────────────────────────────────────
+#
+# This prompt is used when the user FIRST uploads a dataset.
+# Instead of immediately building a pipeline, the agent analyses the data,
+# makes its best guess about the target column + ML task, and PAUSES to
+# confirm with the user before generating any graph nodes.
+#
+_INTERVIEW_PROMPT = """\
+You are the M8Flow AI assistant — an expert, friendly ML engineer with terminal flair.
+A user has just uploaded a dataset. Your job is to perform Phase 1 of a STRICT
+interactive interview before building anything. Do NOT generate any pipeline nodes.
+Do NOT output JSON. Output ONLY conversational plain text.
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+PHASE 1 — ANALYSIS & CONFIRMATION PAUSE
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+⚠️ CRITICAL INSTRUCTION — READ BEFORE ANYTHING ELSE:
+The "== DETERMINISTIC PRE-FLIGHT ANALYSIS ==" block below was computed by
+RUNNING ACTUAL PYTHON CODE on the dataset. It is ground truth — it is NOT a
+suggestion. You MUST use the values it reports:
+  - "Target column" → this IS the target, use it verbatim
+  - "Problem type"  → this IS the task (CLASSIFICATION or REGRESSION), use it verbatim
+  - "Model Selection" → this IS the recommended model, use it verbatim
+DO NOT override these values based on your own assumptions.
+Follow this EXACT structure in your response:
+[PLANNING] Start with this label on its own line.
+Write 1 sentence describing what you are about to analyse.
+[ANALYSIS] Start with this label on its own line.
+Summarise what you see using ONLY real values from the profile:
+  • Dataset: <rows> rows × <cols> columns
+  • Key columns noticed (list 4–6 of the most interesting ones)
+  • Data quality: missing values, categorical columns, numeric features
+[DEDUCTION] Start with this label on its own line.
+State the pre-flight conclusions clearly (take them from the DETERMINISTIC PRE-FLIGHT block):
+  • 🎯 Target column: `<exact column name from pre-flight>` — <one-sentence reasoning>
+  • 📊 ML Task: <CLASSIFICATION or REGRESSION from pre-flight> — <one-sentence justification>
+  • 🤖 Recommended model: <model name from pre-flight> — <one-sentence rationale>
+  • ⚙️ Pipeline plan (STRICT ORDER — never deviate):
+      csv_loader
+      → [data_cleaning, if missing values]
+      → [label_encoder, if categorical columns exist — MUST be BEFORE train_test_split]
+      → train_test_split (target_column = `<target>`)
+      → [standard_scaler, if scaling needed — MUST be AFTER train_test_split]
+      → <recommended model>
+      → <metric node: classification_report for classification | regression_metrics for regression>
+[AWAITING CONFIRMATION]
+Ask ONE clear, direct question: confirm target column, task type, and whether to proceed.
+Example: "Does `price` as the regression target look right? Say yes to build, or tell me what to change."
+RULES:
+- Use REAL column names from the dataset profile. NEVER hallucinate or invent names.
+- The pipeline order above is MANDATORY. Never put standard_scaler before train_test_split.
+- Keep each section brief — this is a conversation, not a report.
+- Do NOT build any nodes. Do NOT output JSON.
+"""
+def _build_interview_messages(profile_text: str, pf: dict) -> list[dict]:
+    user = (
+        f"== DATASET PROFILE ==\n{profile_text}\n\n"
+        f"== DETERMINISTIC PRE-FLIGHT ANALYSIS ==\n{_pre_flight_block(pf)}\n\n"
+        "Perform the Phase 1 interview analysis now."
+    )
+    return [
+        {"role": "system", "content": _INTERVIEW_PROMPT},
+        {"role": "user",   "content": user},
+    ]
+_ANALYSIS_ONLY_PROMPT = """\
+You are an ML dataset analyst. Write the [ANALYSIS] section of an interview message.
+Output ONLY the bullet-point body — no heading, no JSON, no other sections.
+Use EXACTLY this format (3 bullets):
+  • Dataset: <rows> rows × <cols> columns
+  • Key columns: <list 5-7 notable column names from the profile>
+  • Data quality: <brief note on missing values, categorical columns, numeric features>
+Rules:
+- Use REAL values from the profile only. No guessing.
+- Maximum 3 lines. No extra commentary.
+"""
+def _build_pipeline_plan(pf: dict) -> str:
+    """Build the canonical pipeline sequence string from pre-flight flags."""
+    steps = ["csv_loader"]
+    if pf.get("needs_imputation"):
+        steps.append("data_cleaning (fill/drop missing values)")
+    if pf.get("needs_encoding"):
+        cat_cols = pf.get("categorical_cols", [])
+        col_str = ", ".join(cat_cols[:4]) + ("…" if len(cat_cols) > 4 else "")
+        steps.append(f"label_encoder (for: {col_str})")
+    steps.append(f"train_test_split  ← target_column = `{pf.get('target_hint', '?')}`")
+    if pf.get("needs_scaling"):
+        steps.append("standard_scaler")
+    steps.append(pf.get("recommended_model", "random_forest_classifier"))
+    metric = pf.get("recommended_metric", "classification_report")
+    steps.append(metric)
+    return "\n      → ".join(steps)
+async def interview_dataset(context: str) -> str:
+    """
+    Phase 1 of the interactive pipeline-building flow.
+    Strategy:
+      - [PLANNING]              → static string
+      - [ANALYSIS]              → LLM-generated (describe the data shape/quality)
+      - [DEDUCTION]             → HARD-CODED from deterministic pre-flight dict
+      - [AWAITING CONFIRMATION] → HARD-CODED template from pre-flight dict
+    This ensures the target column and task type are NEVER overridden by
+    LLM hallucination — they come directly from Python pandas analysis.
+    """
+    fp = _extract_file_path(context)
+    profile_text = ""
+    pf: dict = {}
+    if fp:
+        try:
+            import pandas as pd
+            df = pd.read_csv(fp, nrows=5000)
+            profile = profile_dataframe(df)
+            profile_text = format_profile_for_prompt(profile)
+            pf = _determine_pre_flight(profile, "", context, csv_path=fp)
+            logger.info(
+                "Interview pre-flight: target=%s type=%s model=%s",
+                pf.get("target_hint"), pf.get("problem_type"), pf.get("recommended_model"),
+            )
+        except Exception as exc:
+            logger.warning("interview_dataset: pre-flight skipped: %s", exc)
+    if not profile_text or not pf:
+        return (
+            "[PLANNING] Dataset context received — reading file directly wasn't possible.\n\n"
+            "[AWAITING CONFIRMATION] Could you describe what this dataset is about? "
+            "Which column is the prediction target, and is this a classification or regression problem?"
+        )
+    # ── [ANALYSIS] — only this section is LLM-generated ─────────────────────
+    analysis_body = ""
+    try:
+        messages = [
+            {"role": "system", "content": _ANALYSIS_ONLY_PROMPT},
+            {"role": "user",   "content": f"== DATASET PROFILE ==\n{profile_text}\n\nWrite the 3-bullet [ANALYSIS] body now."},
+        ]
+        analysis_body = await _call_openrouter(
+            messages, task="explain", json_mode=False, timeout=30
+        )
+        # Strip any accidental heading the LLM adds
+        analysis_body = analysis_body.strip()
+        for prefix in ("[ANALYSIS]", "[analysis]", "ANALYSIS:", "Analysis:"):
+            if analysis_body.startswith(prefix):
+                analysis_body = analysis_body[len(prefix):].strip()
+    except Exception as exc:
+        logger.warning("interview_dataset: analysis LLM call failed: %s", exc)
+        n_rows   = pf.get("n_rows", "?")
+        n_cols   = pf.get("n_cols", "?")
+        cat_cols = pf.get("categorical_cols", [])
+        num_cols = pf.get("numeric_cols", [])
+        analysis_body = (
+            f"  • Dataset: {n_rows} rows × {n_cols} columns\n"
+            f"  • Categorical cols: {', '.join(cat_cols[:6]) or 'none'}\n"
+            f"  • Numeric cols: {', '.join(num_cols[:6]) or 'none'}"
+        )
+    # ── [DEDUCTION] — 100% hard-coded from pf, no LLM ───────────────────────
+    target      = pf.get("target_hint") or "unknown"
+    task        = pf.get("problem_type", "unknown").upper()
+    model       = pf.get("recommended_model", "random_forest_classifier")
+    metric      = pf.get("recommended_metric", "classification_report")
+    analysis    = pf.get("target_analysis", {})
+    reasoning   = analysis.get("reasoning", f"column `{target}` identified as target by Python analysis")
+    model_label = model.replace("_", " ").title()
+    task_label  = "Classification" if task == "CLASSIFICATION" else "Regression"
+    pipeline    = _build_pipeline_plan(pf)
+    # Extra notes
+    notes: list[str] = []
+    if pf.get("is_imbalanced"):
+        notes.append("⚠️ Class imbalance detected — the model will use `class_weight=balanced`.")
+    if pf.get("needs_outlier_removal"):
+        notes.append("⚠️ Outlier columns detected — an outlier removal step will be added.")
+    deduction_block = (
+        f"  • 🎯 Target column: `{target}` — {reasoning}\n"
+        f"  • 📊 ML Task: {task_label} — {task} determined from actual data values\n"
+        f"  • 🤖 Recommended model: {model_label}\n"
+        f"  • ⚙️ Pipeline plan:\n"
+        f"      {pipeline}"
+    )
+    if notes:
+        deduction_block += "\n" + "\n".join(f"      {n}" for n in notes)
+    # ── [AWAITING CONFIRMATION] — hard-coded template ────────────────────────
+    confirm_q = (
+        f"Does **`{target}`** as the {task_label.lower()} target look right?\n"
+        f"Reply **yes** to build the pipeline, or tell me the correct target column / task type."
+    )
+    return (
+        "[PLANNING]\n"
+        "Scanning the uploaded dataset to confirm the ML task, target column, and pipeline plan.\n\n"
+        "[ANALYSIS]\n"
+        f"{analysis_body}\n\n"
+        "[DEDUCTION]\n"
+        f"{deduction_block}\n\n"
+        "[AWAITING CONFIRMATION]\n"
+        f"{confirm_q}"
+    )
 # ── System prompts ─────────────────────────────────────────────────────────────
 _SYSTEM_PROMPT = """\
@@ -908,10 +1216,42 @@ Return ONLY:
 # ── Message builders ───────────────────────────────────────────────────────────
 def _extract_file_path(context: str) -> str | None:
+    """Extract the absolute CSV file path from a dataset context string.
+    The frontend sends context like:
+        File: Housing.csv
+        Path: D:\\ISSM\\M8Flow\\...\\uploads\\Housing.csv
+        Columns: ...
+    We also fall back to a regex scan for any absolute .csv path in the string.
+    """
+    import re as _re
+    # Primary: look for "Path:" line — split on first ": " to preserve Windows drive letter
     for line in context.splitlines():
-        ll = line.lower()
-        if ll.startswith("path:") or ll.startswith("full path:"):
-            return line.split(":", 1)[1].strip()
+        stripped = line.strip()
+        if stripped.lower().startswith("path:"):
+            # Use ": " as the delimiter (not ":" alone) to keep "C:\" intact
+            if ": " in stripped:
+                candidate = stripped.split(": ", 1)[1].strip()
+            else:
+                # No space after colon — try taking everything after "Path:"
+                candidate = stripped[5:].strip()
+            if candidate:
+                logger.debug("_extract_file_path: found via Path: line → %r", candidate)
+                return candidate
+    # Fallback: regex scan for any absolute path ending in .csv
+    m = _re.search(
+        r'([A-Za-z]:[/\\][^\s\n\r"\']+\.csv|/[^\s\n\r"\']+\.csv)',
+        context,
+        _re.IGNORECASE,
+    )
+    if m:
+        candidate = m.group(1)
+        logger.debug("_extract_file_path: found via regex → %r", candidate)
+        return candidate
+    logger.debug("_extract_file_path: no path found in context")
     return None
@@ -1096,6 +1436,175 @@ def _build_update_messages(
 # ── OpenRouter unified call ───────────────────────────────────────────────────
+def _convert_to_gemini(messages: list[dict]) -> tuple[list[dict], str | None]:
+    """Convert OpenAI-style messages to Gemini format.
+    Returns (contents, system_instruction_text).
+    """
+    contents: list[dict] = []
+    system_text: str | None = None
+    for msg in messages:
+        role    = msg.get("role", "user")
+        content = msg.get("content", "")
+        if role == "system":
+            system_text = content
+        elif role in ("assistant", "model"):
+            contents.append({"role": "model", "parts": [{"text": content}]})
+        else:
+            contents.append({"role": "user",  "parts": [{"text": content}]})
+    return contents, system_text
+async def _call_gemini(
+    messages: list[dict],
+    task: str = "generate",
+    timeout: int = _TIMEOUT_GENERATE,
+) -> str:
+    """Call Google Gemini API directly using the user's AI Studio key."""
+    key   = _get_gemini_key()
+    model = _GEMINI_MODELS.get(task, _GEMINI_MODELS["generate"])
+    url   = _GEMINI_URL.format(model=model)
+    contents, system_text = _convert_to_gemini(messages)
+    body: dict = {
+        "contents": contents,
+        "generationConfig": {"maxOutputTokens": 8192, "temperature": 0.7},
+    }
+    if system_text:
+        body["systemInstruction"] = {"parts": [{"text": system_text}]}
+    async with httpx.AsyncClient(timeout=timeout) as client:
+        response = await client.post(
+            url,
+            params={"key": key},
+            headers={"Content-Type": "application/json"},
+            json=body,
+        )
+    if response.status_code == 401:
+        raise RuntimeError("Invalid Gemini API key — check AI Studio at https://aistudio.google.com/app/apikey")
+    if response.status_code == 429:
+        raise RuntimeError("Gemini rate limit reached — wait a moment and try again")
+    response.raise_for_status()
+    data = response.json()
+    try:
+        return data["candidates"][0]["content"]["parts"][0]["text"] or ""
+    except (KeyError, IndexError) as exc:
+        raise RuntimeError(f"Unexpected Gemini response shape: {exc}. Raw: {str(data)[:300]}")
+async def _call_gemini_with_model(
+    messages: list[dict],
+    model: str,
+    timeout: int = _TIMEOUT_GENERATE,
+) -> str:
+    """Call Gemini with a specific model ID chosen by the user in the agent matrix."""
+    key = _get_gemini_key()
+    url = _GEMINI_URL.format(model=model)
+    contents, system_text = _convert_to_gemini(messages)
+    body: dict = {
+        "contents": contents,
+        "generationConfig": {"maxOutputTokens": 8192, "temperature": 0.7},
+    }
+    if system_text:
+        body["systemInstruction"] = {"parts": [{"text": system_text}]}
+    async with httpx.AsyncClient(timeout=timeout) as client:
+        response = await client.post(
+            url, params={"key": key},
+            headers={"Content-Type": "application/json"}, json=body,
+        )
+    if response.status_code == 401:
+        raise RuntimeError(f"Invalid Gemini key for model {model}")
+    response.raise_for_status()
+    try:
+        return response.json()["candidates"][0]["content"]["parts"][0]["text"] or ""
+    except (KeyError, IndexError) as exc:
+        raise RuntimeError(f"Unexpected Gemini response: {exc}")
+async def _call_openrouter_with_model(
+    messages: list[dict],
+    model: str,
+    api_key: str,
+    timeout: int = _TIMEOUT_GENERATE,
+) -> str:
+    """Call OpenRouter with a specific model ID chosen by the user in the agent matrix.
+    No fallback — if the chosen model fails, the error surfaces immediately."""
+    body: dict = {"model": model, "messages": messages, "max_tokens": 8192}
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type":  "application/json",
+        "HTTP-Referer":  "https://m8flow.app",
+        "X-Title":       "M8Flow",
+    }
+    async with httpx.AsyncClient(timeout=timeout) as client:
+        response = await client.post(_OPENROUTER_URL, headers=headers, json=body)
+    if response.status_code == 401:
+        raise RuntimeError("Invalid OpenRouter API key")
+    if response.status_code == 429:
+        raise RuntimeError(f"Rate limit hit on {model} — choose a different model or wait a moment")
+    response.raise_for_status()
+    try:
+        return response.json()["choices"][0]["message"]["content"] or ""
+    except (KeyError, IndexError) as exc:
+        raise RuntimeError(f"Unexpected OpenRouter response: {exc}")
+async def _call_mistral(
+    messages: list[dict],
+    task: str = "generate",
+    timeout: int = _TIMEOUT_GENERATE,
+) -> str:
+    """Call Mistral La Plateforme using the user's API key.
+    Uses the same OpenAI-compatible chat/completions format as OpenRouter.
+    """
+    key   = _get_mistral_key()
+    model = _MISTRAL_MODELS.get(task, _MISTRAL_MODELS["generate"])
+    async with httpx.AsyncClient(timeout=timeout) as client:
+        response = await client.post(
+            _MISTRAL_URL,
+            headers={
+                "Authorization": f"Bearer {key}",
+                "Content-Type":  "application/json",
+            },
+            json={"model": model, "messages": messages, "max_tokens": 8192},
+        )
+    if response.status_code == 401:
+        raise RuntimeError("Invalid Mistral API key — check la Plateforme at https://console.mistral.ai")
+    if response.status_code == 429:
+        raise RuntimeError("Mistral rate limit reached — wait a moment and try again")
+    response.raise_for_status()
+    try:
+        return response.json()["choices"][0]["message"]["content"] or ""
+    except (KeyError, IndexError) as exc:
+        raise RuntimeError(f"Unexpected Mistral response: {exc}")
+async def _call_mistral_with_model(
+    messages: list[dict],
+    model: str,
+    timeout: int = _TIMEOUT_GENERATE,
+) -> str:
+    """Call Mistral with a user-selected model from the agent matrix."""
+    key = _get_mistral_key()
+    async with httpx.AsyncClient(timeout=timeout) as client:
+        response = await client.post(
+            _MISTRAL_URL,
+            headers={"Authorization": f"Bearer {key}", "Content-Type": "application/json"},
+            json={"model": model, "messages": messages, "max_tokens": 8192},
+        )
+    if response.status_code == 401:
+        raise RuntimeError(f"Invalid Mistral key for model {model}")
+    response.raise_for_status()
+    try:
+        return response.json()["choices"][0]["message"]["content"] or ""
+    except (KeyError, IndexError) as exc:
+        raise RuntimeError(f"Unexpected Mistral response: {exc}")
 async def _call_openrouter(
     messages: list[dict],
     task: str = "generate",
@@ -1103,18 +1612,52 @@ async def _call_openrouter(
     timeout: int = _TIMEOUT_GENERATE,
 ) -> str:
     """
-    Route to the right model via OpenRouter based on task type.
-    Falls back through the full pool of live free models on rate-limit or error.
-    Rate-limited models are skipped for _RATE_LIMIT_TTL seconds to avoid wasted calls.
+    Unified LLM entry point.
+    • If a Gemini key is available → calls Gemini 2.5 Flash directly (priority).
+    • Otherwise → routes through OpenRouter free models with rate-limit fallback.
     """
-    effective_key = _get_api_key()
-    if not effective_key:
+    # ── Demo-mode enforcement ─────────────────────────────────────────────────
+    or_key      = _get_api_key()
+    gemini_key  = _get_gemini_key()
+    mistral_key = _get_mistral_key()
+    if not or_key and not gemini_key and not mistral_key:
         raise RuntimeError(
-            "No OpenRouter API key found. "
-            "Enter your key in the AI Assistant panel (sk-or-…) or "
-            "set OPENROUTER_API_KEY in backend/.env."
+            "No API key configured. "
+            "Add an OpenRouter, Gemini, or Mistral key in Settings → API Keys."
         )
+    # ── Per-agent model routing ───────────────────────────────────────────────
+    agent_config = _request_agent_models.get()
+    agent_model  = (agent_config or {}).get(task)
+    def _is_gemini(mid: str) -> bool:
+        return mid.startswith("gemini-") or "gemini" in mid.lower()
+    def _is_mistral(mid: str) -> bool:
+        return any(mid.startswith(p) for p in (
+            "codestral", "mistral-", "open-mistral", "open-mixtral", "pixtral",
+        ))
+    if agent_model and agent_model != "auto":
+        if _is_gemini(agent_model) and gemini_key:
+            return await _call_gemini_with_model(messages, agent_model, timeout)
+        elif _is_mistral(agent_model) and mistral_key:
+            return await _call_mistral_with_model(messages, agent_model, timeout)
+        elif not _is_gemini(agent_model) and not _is_mistral(agent_model) and or_key:
+            return await _call_openrouter_with_model(messages, agent_model, or_key, timeout)
+        # Key unavailable for chosen model — fall through to auto routing
+    # ── Auto routing: priority Gemini > Mistral > OpenRouter ─────────────────
+    if gemini_key:
+        return await _call_gemini(messages, task=task, timeout=timeout)
+    if mistral_key:
+        return await _call_mistral(messages, task=task, timeout=timeout)
+    # ── OpenRouter path (with rate-limit fallback chain) ─────────────────────
+    effective_key = or_key
+    if not effective_key:
+        raise RuntimeError("No API key available. Add one in Settings → API Keys.")
     model = _MODELS.get(task, _MODELS["generate"])
     # Full pool of verified-live free models (May 2026).
@@ -2121,8 +2664,9 @@ You are an M8Flow node code generator. Write Python code for a reusable pipeline
   2. DataFrame input parameter MUST be named  data  (never df, dataframe, dataset)
   3. Function MUST return a dict with named string keys
   4. ALL imports go INSIDE the function body
-  5. Only allowed libraries: pandas, numpy, sklearn, scipy, math, statistics, re, json
-  6. FORBIDDEN: os, sys, subprocess, socket, requests, open(), eval(), exec(), matplotlib
+  5. Scientific libraries: pandas, numpy, sklearn, scipy, statsmodels, imblearn
+  6. Visualisation: ALWAYS prefer `plotly` (px or go) for interactive charts. `matplotlib` and `seaborn` are also allowed for static plots.
+  7. FORBIDDEN: os, sys, subprocess, socket, requests, open(), eval(), exec()
 FIELD TYPE ANNOTATIONS — these control the UI widget shown to the user:
   data input    (connects from previous node)  → just `data` with no type hint