m8flow 1.1.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- """LLM service — OpenRouter-powered flow generation with per-task model routing."""
1
+ """LLM service — OpenRouter or Google Gemini flow generation with per-task model routing."""
2
2
  import json
3
3
  import logging
4
4
  import time
@@ -9,14 +9,30 @@ from domain.models import FlowSchema, RefinePatch, NodePatch, EdgePatch
9
9
  from templates import TEMPLATES
10
10
  from core.data_profiler import profile_dataframe, format_profile_for_prompt
11
11
 
12
- # Per-request API key override (set by the route handler from X-OpenRouter-Key header)
13
- _request_api_key: ContextVar[str | None] = ContextVar('request_api_key', default=None)
12
+ # ── Per-request overrides (set by route handler from request headers) ─────────
13
+ _request_api_key: ContextVar[str | None] = ContextVar('request_api_key', default=None)
14
+ _request_gemini_key: ContextVar[str | None] = ContextVar('request_gemini_key', default=None)
15
+ _request_mistral_key: ContextVar[str | None] = ContextVar('request_mistral_key', default=None)
16
+ _request_model_override:ContextVar[str | None] = ContextVar('request_model_override',default=None)
17
+ _request_agent_models: ContextVar[dict | None] = ContextVar('request_agent_models', default=None)
14
18
 
15
19
 
16
20
  def _get_api_key() -> str:
17
- """Return the request-scoped key if provided, otherwise fall back to env/config."""
18
21
  return _request_api_key.get() or config.OPENROUTER_API_KEY
19
22
 
23
+ def _get_gemini_key() -> str | None:
24
+ return _request_gemini_key.get() or config.GEMINI_API_KEY
25
+
26
+ def _get_mistral_key() -> str | None:
27
+ return _request_mistral_key.get() or config.MISTRAL_API_KEY
28
+
29
+ def _use_gemini() -> bool:
30
+ return bool(_get_gemini_key())
31
+
32
+ def _use_mistral() -> bool:
33
+ return bool(_get_mistral_key())
34
+
35
+
20
36
  logger = logging.getLogger(__name__)
21
37
 
22
38
  # ── OpenRouter config ─────────────────────────────────────────────────────────
@@ -24,6 +40,43 @@ _OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
24
40
  _TIMEOUT_GENERATE = 120
25
41
  _TIMEOUT_UPDATE = 180
26
42
 
43
+ # ── Google Gemini config ──────────────────────────────────────────────────────
44
+ _GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
45
+
46
+ _GEMINI_MODELS = {
47
+ # Fast, accurate structural blueprinting
48
+ "architect": "gemini-2.5-flash",
49
+ # Core generation — handles multi-node creation flawlessly
50
+ "generate": "gemini-2.5-flash",
51
+ "refine": "gemini-2.5-flash",
52
+ "update": "gemini-2.5-flash",
53
+ # Debugging / healing — intercepts errors and applies patches
54
+ "debug": "gemini-2.5-flash",
55
+ "heal": "gemini-2.5-flash",
56
+ # Explaining / suggesting — fast response for UI copy
57
+ "explain": "gemini-2.5-flash-lite",
58
+ "suggest": "gemini-2.5-flash-lite",
59
+ # Safety nets
60
+ "fallback": "gemini-2.5-flash",
61
+ "lastresort": "gemini-2.5-flash-lite",
62
+ }
63
+
64
+ # ── Mistral config ────────────────────────────────────────────────────────────
65
+ _MISTRAL_URL = "https://api.mistral.ai/v1/chat/completions"
66
+
67
+ _MISTRAL_MODELS = {
68
+ "architect": "codestral-latest",
69
+ "generate": "mistral-small-latest",
70
+ "refine": "mistral-small-latest",
71
+ "update": "mistral-small-latest",
72
+ "debug": "codestral-latest",
73
+ "heal": "codestral-latest",
74
+ "explain": "mistral-small-latest",
75
+ "suggest": "mistral-small-latest",
76
+ "fallback": "mistral-tiny",
77
+ "lastresort": "mistral-tiny",
78
+ }
79
+
27
80
  # Per-task model routing — slugs verified live against OpenRouter API (May 2026)
28
81
  _MODELS = {
29
82
  # Reasoning/Architecting — best available free reasoning model
@@ -144,11 +197,30 @@ def _determine_pre_flight(profile: dict, prompt: str, context: str | None, csv_p
144
197
  import pandas as pd
145
198
  import numpy as np
146
199
 
147
- dtypes = profile.get("dtypes", {})
148
- missing = profile.get("missing", {})
149
- numeric_summary = profile.get("numeric_summary", {})
150
- categorical_summary = profile.get("categorical_summary", {})
151
- shape = profile.get("shape", [0, 0])
200
+ df: pd.DataFrame | None = None
201
+ if csv_path:
202
+ try:
203
+ df = pd.read_csv(csv_path, nrows=5000)
204
+ except Exception:
205
+ pass
206
+
207
+ # Extract columns and properties reliably from the DataFrame if available
208
+ if df is not None:
209
+ all_columns = list(df.columns)
210
+ numeric_cols = set(df.select_dtypes(include=[np.number]).columns)
211
+ cat_cols = set(df.select_dtypes(exclude=[np.number]).columns)
212
+ else:
213
+ # Fallback to profile keys
214
+ numeric_cols = set(profile.get("numeric_features", []))
215
+ cat_cols = set(profile.get("categorical_features", []))
216
+ all_columns = list(numeric_cols | cat_cols)
217
+ # If still empty, try to parse from context string
218
+ if not all_columns and context:
219
+ m = re.search(r"Columns:\s*(.*)", context)
220
+ if m:
221
+ all_columns = [c.strip() for c in m.group(1).split(",")]
222
+
223
+ shape = profile.get("shape", [len(df) if df is not None else 0, len(all_columns)])
152
224
 
153
225
  # ── Step 1: Find the target column ────────────────────────────────────────
154
226
  # Priority: explicit mention in prompt/context > heuristic column names.
@@ -169,37 +241,57 @@ def _determine_pre_flight(profile: dict, prompt: str, context: str | None, csv_p
169
241
  if m:
170
242
  candidate = m.group(1)
171
243
  # Validate the candidate actually exists in the data
172
- if candidate in dtypes or candidate in numeric_summary or candidate in categorical_summary:
173
- target_hint = candidate
244
+ # Use case-insensitive check
245
+ matched = next((c for c in all_columns if c.lower() == candidate), None)
246
+ if matched:
247
+ target_hint = matched
174
248
  break
175
249
 
176
250
  # (b) If still unknown, use heuristic column-name scoring on the real columns
177
251
  if target_hint is None:
178
- TARGET_KEYWORDS = [
179
- "target", "label", "class", "output", "y", "result",
180
- "outcome", "diagnosis", "status", "type", "category",
181
- "survived", "churn", "default", "fraud", "price",
182
- "salary", "value", "score", "sales", "revenue", "cost",
183
- "medv", "charges", "fare",
184
- ]
185
- all_columns = list(dtypes.keys())
252
+ # ── Two tiers of keywords ─────────────────────────────────────────────
253
+ # STRONG: column IS the target in the vast majority of real datasets
254
+ # WEAK : only a rough signal — requires additional confirmation
255
+ STRONG_KEYWORDS = {
256
+ "target", "label", "output", "y", "result",
257
+ "survived", "churn", "default", "fraud",
258
+ "price", "salary", "value", "sales", "revenue",
259
+ "cost", "medv", "charges", "fare", "income",
260
+ "score", "rate", "amount", "demand",
261
+ }
262
+ WEAK_KEYWORDS = {
263
+ "class", "outcome", "diagnosis", "status",
264
+ "type", "category", "flag", "ind", "indicator",
265
+ }
266
+
267
+ # all_columns is already defined above
186
268
  best_col: str | None = None
187
269
  best_score = -1
188
270
 
189
271
  for col in all_columns:
190
- col_lower = col.lower().replace("_", " ").replace("-", " ")
272
+ # Tokenise: split on _ / - / space so "furnishingstatus" → ["furnishingstatus"]
273
+ # and "loan_status" → ["loan", "status"] — only whole tokens are matched.
274
+ col_lower = col.lower()
275
+ col_tokens = set(re.split(r"[_\-\s]+", col_lower))
191
276
  score = 0
192
277
 
193
- # Keyword match against column name
194
- for kw in TARGET_KEYWORDS:
195
- if kw in col_lower:
196
- score += 3
197
- break
198
- # Last column is commonly the target in many datasets
199
- if col == all_columns[-1]:
278
+ # Exact token match (e.g. col="price" → token="price" ∈ STRONG)
279
+ if col_tokens & STRONG_KEYWORDS:
280
+ score += 6
281
+ elif col_tokens & WEAK_KEYWORDS:
282
+ # Only a weak signal — avoids picking "furnishingstatus" over "price"
200
283
  score += 2
201
- # Column mentioned in prompt text
202
- if col_lower in search_text or col.lower() in search_text:
284
+
285
+ # Full name contained in strong keywords (catches single-word col names)
286
+ if col_lower in STRONG_KEYWORDS:
287
+ score += 2 # bonus on top of token score
288
+
289
+ # Last column bonus (weaker than before)
290
+ if col == all_columns[-1]:
291
+ score += 1
292
+
293
+ # Column explicitly mentioned in prompt/context text
294
+ if col_lower in search_text or col in search_text.split():
203
295
  score += 4
204
296
 
205
297
  if score > best_score:
@@ -327,43 +419,38 @@ def _determine_pre_flight(profile: dict, prompt: str, context: str | None, csv_p
327
419
 
328
420
  except Exception as exc:
329
421
  logger.warning("Pre-flight target analysis failed: %s", exc)
330
- # Fall back to dtype-only heuristic using profile data
331
422
  if target_hint:
332
- dtype_str = str(dtypes.get(target_hint, "")).lower()
333
- if any(t in dtype_str for t in ("object", "category", "bool", "str")):
423
+ if target_hint in cat_cols:
334
424
  problem_type = "classification"
335
- elif target_hint in categorical_summary:
336
- problem_type = "classification" if categorical_summary[target_hint].get("unique", 99) < 15 else "regression"
337
- elif target_hint in numeric_summary:
425
+ elif target_hint in numeric_cols:
338
426
  problem_type = "regression"
339
427
 
340
428
  elif target_hint:
341
429
  # No CSV path — fall back to profile-based heuristic
342
- dtype_str = str(dtypes.get(target_hint, "")).lower()
343
- if any(t in dtype_str for t in ("object", "category", "bool", "str")):
430
+ if target_hint in cat_cols:
344
431
  problem_type = "classification"
345
- elif target_hint in categorical_summary:
346
- problem_type = "classification" if categorical_summary.get(target_hint, {}).get("unique", 99) < 15 else "regression"
347
- elif target_hint in numeric_summary:
432
+ elif target_hint in numeric_cols:
348
433
  problem_type = "regression"
349
434
 
350
435
  # ── Step 3: Preprocessing flags ───────────────────────────────────────────
351
436
  # Detect categorical columns that need encoding (exclude the target itself)
352
- cat_cols = {c for c in categorical_summary if c != target_hint}
353
- num_cols = {c for c in numeric_summary if c != target_hint}
437
+ cat_cols_filtered = {c for c in cat_cols if c != target_hint}
438
+ num_cols_filtered = {c for c in numeric_cols if c != target_hint}
354
439
 
440
+ # Profile from data_profiler uses 'missing_values'
441
+ missing_dict = profile.get("missing_values", {})
355
442
  missing_cols: dict[str, float] = {
356
- col: round(info.get("pct", 0), 1)
357
- for col, info in missing.items()
358
- if info.get("pct", 0) > 0
359
- }
360
- cardinality: dict[str, int] = {
361
- col: info.get("unique", 0)
362
- for col, info in categorical_summary.items()
443
+ col: float(pct) for col, pct in missing_dict.items() if pct > 0
363
444
  }
364
-
365
- needs_encoding = len(cat_cols) > 0
366
- needs_scaling = len(num_cols) >= 2
445
+
446
+ # We no longer have cardinality dict readily available without df
447
+ cardinality = {}
448
+ if df is not None:
449
+ for c in cat_cols_filtered:
450
+ cardinality[c] = df[c].nunique()
451
+
452
+ needs_encoding = len(cat_cols_filtered) > 0
453
+ needs_scaling = len(num_cols_filtered) >= 2
367
454
  needs_imputation = len(missing_cols) > 0
368
455
 
369
456
  # ── Step 4: Recommend specific model based on problem type + data size ────
@@ -393,8 +480,8 @@ def _determine_pre_flight(profile: dict, prompt: str, context: str | None, csv_p
393
480
  "is_imbalanced": is_imbalanced,
394
481
  "n_rows": n_rows,
395
482
  "n_cols": n_cols,
396
- "categorical_cols": sorted(cat_cols),
397
- "numeric_cols": sorted(num_cols),
483
+ "categorical_cols": sorted(cat_cols_filtered),
484
+ "numeric_cols": sorted(num_cols_filtered),
398
485
  "recommended_model": recommended_model,
399
486
  "recommended_metric": recommended_metric_node,
400
487
  }
@@ -490,6 +577,227 @@ def _build_architect_messages(
490
577
  ]
491
578
 
492
579
 
580
+ # ── Interactive Interview Prompt ──────────────────────────────────────────────
581
+ #
582
+ # This prompt is used when the user FIRST uploads a dataset.
583
+ # Instead of immediately building a pipeline, the agent analyses the data,
584
+ # makes its best guess about the target column + ML task, and PAUSES to
585
+ # confirm with the user before generating any graph nodes.
586
+ #
587
+
588
+ _INTERVIEW_PROMPT = """\
589
+ You are the M8Flow AI assistant — an expert, friendly ML engineer with terminal flair.
590
+
591
+ A user has just uploaded a dataset. Your job is to perform Phase 1 of a STRICT
592
+ interactive interview before building anything. Do NOT generate any pipeline nodes.
593
+ Do NOT output JSON. Output ONLY conversational plain text.
594
+
595
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
596
+ PHASE 1 — ANALYSIS & CONFIRMATION PAUSE
597
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
598
+
599
+ ⚠️ CRITICAL INSTRUCTION — READ BEFORE ANYTHING ELSE:
600
+ The "== DETERMINISTIC PRE-FLIGHT ANALYSIS ==" block below was computed by
601
+ RUNNING ACTUAL PYTHON CODE on the dataset. It is ground truth — it is NOT a
602
+ suggestion. You MUST use the values it reports:
603
+ - "Target column" → this IS the target, use it verbatim
604
+ - "Problem type" → this IS the task (CLASSIFICATION or REGRESSION), use it verbatim
605
+ - "Model Selection" → this IS the recommended model, use it verbatim
606
+ DO NOT override these values based on your own assumptions.
607
+
608
+ Follow this EXACT structure in your response:
609
+
610
+ [PLANNING] Start with this label on its own line.
611
+ Write 1 sentence describing what you are about to analyse.
612
+
613
+ [ANALYSIS] Start with this label on its own line.
614
+ Summarise what you see using ONLY real values from the profile:
615
+ • Dataset: <rows> rows × <cols> columns
616
+ • Key columns noticed (list 4–6 of the most interesting ones)
617
+ • Data quality: missing values, categorical columns, numeric features
618
+
619
+ [DEDUCTION] Start with this label on its own line.
620
+ State the pre-flight conclusions clearly (take them from the DETERMINISTIC PRE-FLIGHT block):
621
+ • 🎯 Target column: `<exact column name from pre-flight>` — <one-sentence reasoning>
622
+ • 📊 ML Task: <CLASSIFICATION or REGRESSION from pre-flight> — <one-sentence justification>
623
+ • 🤖 Recommended model: <model name from pre-flight> — <one-sentence rationale>
624
+ • ⚙️ Pipeline plan (STRICT ORDER — never deviate):
625
+ csv_loader
626
+ → [data_cleaning, if missing values]
627
+ → [label_encoder, if categorical columns exist — MUST be BEFORE train_test_split]
628
+ → train_test_split (target_column = `<target>`)
629
+ → [standard_scaler, if scaling needed — MUST be AFTER train_test_split]
630
+ → <recommended model>
631
+ → <metric node: classification_report for classification | regression_metrics for regression>
632
+
633
+ [AWAITING CONFIRMATION]
634
+ Ask ONE clear, direct question: confirm target column, task type, and whether to proceed.
635
+ Example: "Does `price` as the regression target look right? Say yes to build, or tell me what to change."
636
+
637
+ RULES:
638
+ - Use REAL column names from the dataset profile. NEVER hallucinate or invent names.
639
+ - The pipeline order above is MANDATORY. Never put standard_scaler before train_test_split.
640
+ - Keep each section brief — this is a conversation, not a report.
641
+ - Do NOT build any nodes. Do NOT output JSON.
642
+ """
643
+
644
+
645
+ def _build_interview_messages(profile_text: str, pf: dict) -> list[dict]:
646
+ user = (
647
+ f"== DATASET PROFILE ==\n{profile_text}\n\n"
648
+ f"== DETERMINISTIC PRE-FLIGHT ANALYSIS ==\n{_pre_flight_block(pf)}\n\n"
649
+ "Perform the Phase 1 interview analysis now."
650
+ )
651
+ return [
652
+ {"role": "system", "content": _INTERVIEW_PROMPT},
653
+ {"role": "user", "content": user},
654
+ ]
655
+
656
+
657
+ _ANALYSIS_ONLY_PROMPT = """\
658
+ You are an ML dataset analyst. Write the [ANALYSIS] section of an interview message.
659
+ Output ONLY the bullet-point body — no heading, no JSON, no other sections.
660
+
661
+ Use EXACTLY this format (3 bullets):
662
+ • Dataset: <rows> rows × <cols> columns
663
+ • Key columns: <list 5-7 notable column names from the profile>
664
+ • Data quality: <brief note on missing values, categorical columns, numeric features>
665
+
666
+ Rules:
667
+ - Use REAL values from the profile only. No guessing.
668
+ - Maximum 3 lines. No extra commentary.
669
+ """
670
+
671
+
672
+ def _build_pipeline_plan(pf: dict) -> str:
673
+ """Build the canonical pipeline sequence string from pre-flight flags."""
674
+ steps = ["csv_loader"]
675
+ if pf.get("needs_imputation"):
676
+ steps.append("data_cleaning (fill/drop missing values)")
677
+ if pf.get("needs_encoding"):
678
+ cat_cols = pf.get("categorical_cols", [])
679
+ col_str = ", ".join(cat_cols[:4]) + ("…" if len(cat_cols) > 4 else "")
680
+ steps.append(f"label_encoder (for: {col_str})")
681
+ steps.append(f"train_test_split ← target_column = `{pf.get('target_hint', '?')}`")
682
+ if pf.get("needs_scaling"):
683
+ steps.append("standard_scaler")
684
+ steps.append(pf.get("recommended_model", "random_forest_classifier"))
685
+ metric = pf.get("recommended_metric", "classification_report")
686
+ steps.append(metric)
687
+ return "\n → ".join(steps)
688
+
689
+
690
+ async def interview_dataset(context: str) -> str:
691
+ """
692
+ Phase 1 of the interactive pipeline-building flow.
693
+
694
+ Strategy:
695
+ - [PLANNING] → static string
696
+ - [ANALYSIS] → LLM-generated (describe the data shape/quality)
697
+ - [DEDUCTION] → HARD-CODED from deterministic pre-flight dict
698
+ - [AWAITING CONFIRMATION] → HARD-CODED template from pre-flight dict
699
+
700
+ This ensures the target column and task type are NEVER overridden by
701
+ LLM hallucination — they come directly from Python pandas analysis.
702
+ """
703
+ fp = _extract_file_path(context)
704
+ profile_text = ""
705
+ pf: dict = {}
706
+
707
+ if fp:
708
+ try:
709
+ import pandas as pd
710
+ df = pd.read_csv(fp, nrows=5000)
711
+ profile = profile_dataframe(df)
712
+ profile_text = format_profile_for_prompt(profile)
713
+ pf = _determine_pre_flight(profile, "", context, csv_path=fp)
714
+ logger.info(
715
+ "Interview pre-flight: target=%s type=%s model=%s",
716
+ pf.get("target_hint"), pf.get("problem_type"), pf.get("recommended_model"),
717
+ )
718
+ except Exception as exc:
719
+ logger.warning("interview_dataset: pre-flight skipped: %s", exc)
720
+
721
+ if not profile_text or not pf:
722
+ return (
723
+ "[PLANNING] Dataset context received — reading file directly wasn't possible.\n\n"
724
+ "[AWAITING CONFIRMATION] Could you describe what this dataset is about? "
725
+ "Which column is the prediction target, and is this a classification or regression problem?"
726
+ )
727
+
728
+ # ── [ANALYSIS] — only this section is LLM-generated ─────────────────────
729
+ analysis_body = ""
730
+ try:
731
+ messages = [
732
+ {"role": "system", "content": _ANALYSIS_ONLY_PROMPT},
733
+ {"role": "user", "content": f"== DATASET PROFILE ==\n{profile_text}\n\nWrite the 3-bullet [ANALYSIS] body now."},
734
+ ]
735
+ analysis_body = await _call_openrouter(
736
+ messages, task="explain", json_mode=False, timeout=30
737
+ )
738
+ # Strip any accidental heading the LLM adds
739
+ analysis_body = analysis_body.strip()
740
+ for prefix in ("[ANALYSIS]", "[analysis]", "ANALYSIS:", "Analysis:"):
741
+ if analysis_body.startswith(prefix):
742
+ analysis_body = analysis_body[len(prefix):].strip()
743
+ except Exception as exc:
744
+ logger.warning("interview_dataset: analysis LLM call failed: %s", exc)
745
+ n_rows = pf.get("n_rows", "?")
746
+ n_cols = pf.get("n_cols", "?")
747
+ cat_cols = pf.get("categorical_cols", [])
748
+ num_cols = pf.get("numeric_cols", [])
749
+ analysis_body = (
750
+ f" • Dataset: {n_rows} rows × {n_cols} columns\n"
751
+ f" • Categorical cols: {', '.join(cat_cols[:6]) or 'none'}\n"
752
+ f" • Numeric cols: {', '.join(num_cols[:6]) or 'none'}"
753
+ )
754
+
755
+ # ── [DEDUCTION] — 100% hard-coded from pf, no LLM ───────────────────────
756
+ target = pf.get("target_hint") or "unknown"
757
+ task = pf.get("problem_type", "unknown").upper()
758
+ model = pf.get("recommended_model", "random_forest_classifier")
759
+ metric = pf.get("recommended_metric", "classification_report")
760
+ analysis = pf.get("target_analysis", {})
761
+ reasoning = analysis.get("reasoning", f"column `{target}` identified as target by Python analysis")
762
+ model_label = model.replace("_", " ").title()
763
+ task_label = "Classification" if task == "CLASSIFICATION" else "Regression"
764
+ pipeline = _build_pipeline_plan(pf)
765
+
766
+ # Extra notes
767
+ notes: list[str] = []
768
+ if pf.get("is_imbalanced"):
769
+ notes.append("⚠️ Class imbalance detected — the model will use `class_weight=balanced`.")
770
+ if pf.get("needs_outlier_removal"):
771
+ notes.append("⚠️ Outlier columns detected — an outlier removal step will be added.")
772
+
773
+ deduction_block = (
774
+ f" • 🎯 Target column: `{target}` — {reasoning}\n"
775
+ f" • 📊 ML Task: {task_label} — {task} determined from actual data values\n"
776
+ f" • 🤖 Recommended model: {model_label}\n"
777
+ f" • ⚙️ Pipeline plan:\n"
778
+ f" {pipeline}"
779
+ )
780
+ if notes:
781
+ deduction_block += "\n" + "\n".join(f" {n}" for n in notes)
782
+
783
+ # ── [AWAITING CONFIRMATION] — hard-coded template ────────────────────────
784
+ confirm_q = (
785
+ f"Does **`{target}`** as the {task_label.lower()} target look right?\n"
786
+ f"Reply **yes** to build the pipeline, or tell me the correct target column / task type."
787
+ )
788
+
789
+ return (
790
+ "[PLANNING]\n"
791
+ "Scanning the uploaded dataset to confirm the ML task, target column, and pipeline plan.\n\n"
792
+ "[ANALYSIS]\n"
793
+ f"{analysis_body}\n\n"
794
+ "[DEDUCTION]\n"
795
+ f"{deduction_block}\n\n"
796
+ "[AWAITING CONFIRMATION]\n"
797
+ f"{confirm_q}"
798
+ )
799
+
800
+
493
801
  # ── System prompts ─────────────────────────────────────────────────────────────
494
802
 
495
803
  _SYSTEM_PROMPT = """\
@@ -908,10 +1216,42 @@ Return ONLY:
908
1216
  # ── Message builders ───────────────────────────────────────────────────────────
909
1217
 
910
1218
  def _extract_file_path(context: str) -> str | None:
1219
+ """Extract the absolute CSV file path from a dataset context string.
1220
+
1221
+ The frontend sends context like:
1222
+ File: Housing.csv
1223
+ Path: D:\\ISSM\\M8Flow\\...\\uploads\\Housing.csv
1224
+ Columns: ...
1225
+
1226
+ We also fall back to a regex scan for any absolute .csv path in the string.
1227
+ """
1228
+ import re as _re
1229
+ # Primary: look for "Path:" line — split on first ": " to preserve Windows drive letter
911
1230
  for line in context.splitlines():
912
- ll = line.lower()
913
- if ll.startswith("path:") or ll.startswith("full path:"):
914
- return line.split(":", 1)[1].strip()
1231
+ stripped = line.strip()
1232
+ if stripped.lower().startswith("path:"):
1233
+ # Use ": " as the delimiter (not ":" alone) to keep "C:\" intact
1234
+ if ": " in stripped:
1235
+ candidate = stripped.split(": ", 1)[1].strip()
1236
+ else:
1237
+ # No space after colon — try taking everything after "Path:"
1238
+ candidate = stripped[5:].strip()
1239
+ if candidate:
1240
+ logger.debug("_extract_file_path: found via Path: line → %r", candidate)
1241
+ return candidate
1242
+
1243
+ # Fallback: regex scan for any absolute path ending in .csv
1244
+ m = _re.search(
1245
+ r'([A-Za-z]:[/\\][^\s\n\r"\']+\.csv|/[^\s\n\r"\']+\.csv)',
1246
+ context,
1247
+ _re.IGNORECASE,
1248
+ )
1249
+ if m:
1250
+ candidate = m.group(1)
1251
+ logger.debug("_extract_file_path: found via regex → %r", candidate)
1252
+ return candidate
1253
+
1254
+ logger.debug("_extract_file_path: no path found in context")
915
1255
  return None
916
1256
 
917
1257
 
@@ -1096,6 +1436,175 @@ def _build_update_messages(
1096
1436
 
1097
1437
  # ── OpenRouter unified call ───────────────────────────────────────────────────
1098
1438
 
1439
+ def _convert_to_gemini(messages: list[dict]) -> tuple[list[dict], str | None]:
1440
+ """Convert OpenAI-style messages to Gemini format.
1441
+ Returns (contents, system_instruction_text).
1442
+ """
1443
+ contents: list[dict] = []
1444
+ system_text: str | None = None
1445
+ for msg in messages:
1446
+ role = msg.get("role", "user")
1447
+ content = msg.get("content", "")
1448
+ if role == "system":
1449
+ system_text = content
1450
+ elif role in ("assistant", "model"):
1451
+ contents.append({"role": "model", "parts": [{"text": content}]})
1452
+ else:
1453
+ contents.append({"role": "user", "parts": [{"text": content}]})
1454
+ return contents, system_text
1455
+
1456
+
1457
+ async def _call_gemini(
1458
+ messages: list[dict],
1459
+ task: str = "generate",
1460
+ timeout: int = _TIMEOUT_GENERATE,
1461
+ ) -> str:
1462
+ """Call Google Gemini API directly using the user's AI Studio key."""
1463
+ key = _get_gemini_key()
1464
+ model = _GEMINI_MODELS.get(task, _GEMINI_MODELS["generate"])
1465
+ url = _GEMINI_URL.format(model=model)
1466
+
1467
+ contents, system_text = _convert_to_gemini(messages)
1468
+
1469
+ body: dict = {
1470
+ "contents": contents,
1471
+ "generationConfig": {"maxOutputTokens": 8192, "temperature": 0.7},
1472
+ }
1473
+ if system_text:
1474
+ body["systemInstruction"] = {"parts": [{"text": system_text}]}
1475
+
1476
+ async with httpx.AsyncClient(timeout=timeout) as client:
1477
+ response = await client.post(
1478
+ url,
1479
+ params={"key": key},
1480
+ headers={"Content-Type": "application/json"},
1481
+ json=body,
1482
+ )
1483
+
1484
+ if response.status_code == 401:
1485
+ raise RuntimeError("Invalid Gemini API key — check AI Studio at https://aistudio.google.com/app/apikey")
1486
+ if response.status_code == 429:
1487
+ raise RuntimeError("Gemini rate limit reached — wait a moment and try again")
1488
+ response.raise_for_status()
1489
+
1490
+ data = response.json()
1491
+ try:
1492
+ return data["candidates"][0]["content"]["parts"][0]["text"] or ""
1493
+ except (KeyError, IndexError) as exc:
1494
+ raise RuntimeError(f"Unexpected Gemini response shape: {exc}. Raw: {str(data)[:300]}")
1495
+
1496
+
1497
+ async def _call_gemini_with_model(
1498
+ messages: list[dict],
1499
+ model: str,
1500
+ timeout: int = _TIMEOUT_GENERATE,
1501
+ ) -> str:
1502
+ """Call Gemini with a specific model ID chosen by the user in the agent matrix."""
1503
+ key = _get_gemini_key()
1504
+ url = _GEMINI_URL.format(model=model)
1505
+ contents, system_text = _convert_to_gemini(messages)
1506
+ body: dict = {
1507
+ "contents": contents,
1508
+ "generationConfig": {"maxOutputTokens": 8192, "temperature": 0.7},
1509
+ }
1510
+ if system_text:
1511
+ body["systemInstruction"] = {"parts": [{"text": system_text}]}
1512
+ async with httpx.AsyncClient(timeout=timeout) as client:
1513
+ response = await client.post(
1514
+ url, params={"key": key},
1515
+ headers={"Content-Type": "application/json"}, json=body,
1516
+ )
1517
+ if response.status_code == 401:
1518
+ raise RuntimeError(f"Invalid Gemini key for model {model}")
1519
+ response.raise_for_status()
1520
+ try:
1521
+ return response.json()["candidates"][0]["content"]["parts"][0]["text"] or ""
1522
+ except (KeyError, IndexError) as exc:
1523
+ raise RuntimeError(f"Unexpected Gemini response: {exc}")
1524
+
1525
+
1526
+ async def _call_openrouter_with_model(
1527
+ messages: list[dict],
1528
+ model: str,
1529
+ api_key: str,
1530
+ timeout: int = _TIMEOUT_GENERATE,
1531
+ ) -> str:
1532
+ """Call OpenRouter with a specific model ID chosen by the user in the agent matrix.
1533
+ No fallback — if the chosen model fails, the error surfaces immediately."""
1534
+ body: dict = {"model": model, "messages": messages, "max_tokens": 8192}
1535
+ headers = {
1536
+ "Authorization": f"Bearer {api_key}",
1537
+ "Content-Type": "application/json",
1538
+ "HTTP-Referer": "https://m8flow.app",
1539
+ "X-Title": "M8Flow",
1540
+ }
1541
+ async with httpx.AsyncClient(timeout=timeout) as client:
1542
+ response = await client.post(_OPENROUTER_URL, headers=headers, json=body)
1543
+ if response.status_code == 401:
1544
+ raise RuntimeError("Invalid OpenRouter API key")
1545
+ if response.status_code == 429:
1546
+ raise RuntimeError(f"Rate limit hit on {model} — choose a different model or wait a moment")
1547
+ response.raise_for_status()
1548
+ try:
1549
+ return response.json()["choices"][0]["message"]["content"] or ""
1550
+ except (KeyError, IndexError) as exc:
1551
+ raise RuntimeError(f"Unexpected OpenRouter response: {exc}")
1552
+
1553
+
1554
+ async def _call_mistral(
1555
+ messages: list[dict],
1556
+ task: str = "generate",
1557
+ timeout: int = _TIMEOUT_GENERATE,
1558
+ ) -> str:
1559
+ """Call Mistral La Plateforme using the user's API key.
1560
+ Uses the same OpenAI-compatible chat/completions format as OpenRouter.
1561
+ """
1562
+ key = _get_mistral_key()
1563
+ model = _MISTRAL_MODELS.get(task, _MISTRAL_MODELS["generate"])
1564
+
1565
+ async with httpx.AsyncClient(timeout=timeout) as client:
1566
+ response = await client.post(
1567
+ _MISTRAL_URL,
1568
+ headers={
1569
+ "Authorization": f"Bearer {key}",
1570
+ "Content-Type": "application/json",
1571
+ },
1572
+ json={"model": model, "messages": messages, "max_tokens": 8192},
1573
+ )
1574
+
1575
+ if response.status_code == 401:
1576
+ raise RuntimeError("Invalid Mistral API key — check la Plateforme at https://console.mistral.ai")
1577
+ if response.status_code == 429:
1578
+ raise RuntimeError("Mistral rate limit reached — wait a moment and try again")
1579
+ response.raise_for_status()
1580
+ try:
1581
+ return response.json()["choices"][0]["message"]["content"] or ""
1582
+ except (KeyError, IndexError) as exc:
1583
+ raise RuntimeError(f"Unexpected Mistral response: {exc}")
1584
+
1585
+
1586
+ async def _call_mistral_with_model(
1587
+ messages: list[dict],
1588
+ model: str,
1589
+ timeout: int = _TIMEOUT_GENERATE,
1590
+ ) -> str:
1591
+ """Call Mistral with a user-selected model from the agent matrix."""
1592
+ key = _get_mistral_key()
1593
+ async with httpx.AsyncClient(timeout=timeout) as client:
1594
+ response = await client.post(
1595
+ _MISTRAL_URL,
1596
+ headers={"Authorization": f"Bearer {key}", "Content-Type": "application/json"},
1597
+ json={"model": model, "messages": messages, "max_tokens": 8192},
1598
+ )
1599
+ if response.status_code == 401:
1600
+ raise RuntimeError(f"Invalid Mistral key for model {model}")
1601
+ response.raise_for_status()
1602
+ try:
1603
+ return response.json()["choices"][0]["message"]["content"] or ""
1604
+ except (KeyError, IndexError) as exc:
1605
+ raise RuntimeError(f"Unexpected Mistral response: {exc}")
1606
+
1607
+
1099
1608
  async def _call_openrouter(
1100
1609
  messages: list[dict],
1101
1610
  task: str = "generate",
@@ -1103,18 +1612,52 @@ async def _call_openrouter(
1103
1612
  timeout: int = _TIMEOUT_GENERATE,
1104
1613
  ) -> str:
1105
1614
  """
1106
- Route to the right model via OpenRouter based on task type.
1107
- Falls back through the full pool of live free models on rate-limit or error.
1108
- Rate-limited models are skipped for _RATE_LIMIT_TTL seconds to avoid wasted calls.
1615
+ Unified LLM entry point.
1616
+ If a Gemini key is available calls Gemini 2.5 Flash directly (priority).
1617
+ Otherwise routes through OpenRouter free models with rate-limit fallback.
1109
1618
  """
1110
- effective_key = _get_api_key()
1111
- if not effective_key:
1619
+ # ── Demo-mode enforcement ─────────────────────────────────────────────────
1620
+ or_key = _get_api_key()
1621
+ gemini_key = _get_gemini_key()
1622
+ mistral_key = _get_mistral_key()
1623
+ if not or_key and not gemini_key and not mistral_key:
1112
1624
  raise RuntimeError(
1113
- "No OpenRouter API key found. "
1114
- "Enter your key in the AI Assistant panel (sk-or-…) or "
1115
- "set OPENROUTER_API_KEY in backend/.env."
1625
+ "No API key configured. "
1626
+ "Add an OpenRouter, Gemini, or Mistral key in Settings API Keys."
1116
1627
  )
1117
1628
 
1629
+ # ── Per-agent model routing ───────────────────────────────────────────────
1630
+ agent_config = _request_agent_models.get()
1631
+ agent_model = (agent_config or {}).get(task)
1632
+
1633
+ def _is_gemini(mid: str) -> bool:
1634
+ return mid.startswith("gemini-") or "gemini" in mid.lower()
1635
+
1636
+ def _is_mistral(mid: str) -> bool:
1637
+ return any(mid.startswith(p) for p in (
1638
+ "codestral", "mistral-", "open-mistral", "open-mixtral", "pixtral",
1639
+ ))
1640
+
1641
+ if agent_model and agent_model != "auto":
1642
+ if _is_gemini(agent_model) and gemini_key:
1643
+ return await _call_gemini_with_model(messages, agent_model, timeout)
1644
+ elif _is_mistral(agent_model) and mistral_key:
1645
+ return await _call_mistral_with_model(messages, agent_model, timeout)
1646
+ elif not _is_gemini(agent_model) and not _is_mistral(agent_model) and or_key:
1647
+ return await _call_openrouter_with_model(messages, agent_model, or_key, timeout)
1648
+ # Key unavailable for chosen model — fall through to auto routing
1649
+
1650
+ # ── Auto routing: priority Gemini > Mistral > OpenRouter ─────────────────
1651
+ if gemini_key:
1652
+ return await _call_gemini(messages, task=task, timeout=timeout)
1653
+ if mistral_key:
1654
+ return await _call_mistral(messages, task=task, timeout=timeout)
1655
+
1656
+ # ── OpenRouter path (with rate-limit fallback chain) ─────────────────────
1657
+ effective_key = or_key
1658
+ if not effective_key:
1659
+ raise RuntimeError("No API key available. Add one in Settings → API Keys.")
1660
+
1118
1661
  model = _MODELS.get(task, _MODELS["generate"])
1119
1662
 
1120
1663
  # Full pool of verified-live free models (May 2026).
@@ -2121,8 +2664,9 @@ You are an M8Flow node code generator. Write Python code for a reusable pipeline
2121
2664
  2. DataFrame input parameter MUST be named data (never df, dataframe, dataset)
2122
2665
  3. Function MUST return a dict with named string keys
2123
2666
  4. ALL imports go INSIDE the function body
2124
- 5. Only allowed libraries: pandas, numpy, sklearn, scipy, math, statistics, re, json
2125
- 6. FORBIDDEN: os, sys, subprocess, socket, requests, open(), eval(), exec(), matplotlib
2667
+ 5. Scientific libraries: pandas, numpy, sklearn, scipy, statsmodels, imblearn
2668
+ 6. Visualisation: ALWAYS prefer `plotly` (px or go) for interactive charts. `matplotlib` and `seaborn` are also allowed for static plots.
2669
+ 7. FORBIDDEN: os, sys, subprocess, socket, requests, open(), eval(), exec()
2126
2670
 
2127
2671
  FIELD TYPE ANNOTATIONS — these control the UI widget shown to the user:
2128
2672
  data input (connects from previous node) → just `data` with no type hint