npm - @thispointon/kondi-chat - Versions diffs - 0.1.2 - Mend

@thispointon/kondi-chat 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

package/LICENSE +21 -0
package/README.md +556 -0
package/bin/kondi-chat +56 -0
package/bin/kondi-chat.js +72 -0
package/package.json +55 -0
package/scripts/demo.tape +49 -0
package/scripts/postinstall.cjs +103 -0
package/src/audit/analytics.ts +261 -0
package/src/audit/ledger.ts +253 -0
package/src/audit/telemetry.ts +165 -0
package/src/cli/backend.ts +675 -0
package/src/cli/commands.ts +419 -0
package/src/cli/help.ts +182 -0
package/src/cli/submit-helpers.ts +159 -0
package/src/cli/submit.ts +539 -0
package/src/cli/wizard.ts +121 -0
package/src/context/bootstrap.ts +138 -0
package/src/context/budget.ts +100 -0
package/src/context/manager.ts +666 -0
package/src/context/memory.ts +160 -0
package/src/context/preflight.ts +176 -0
package/src/context/project-brain.ts +101 -0
package/src/context/receipts.ts +108 -0
package/src/context/skills.ts +154 -0
package/src/context/symbol-index.ts +240 -0
package/src/council/profiles.ts +137 -0
package/src/council/tool.ts +138 -0
package/src/council-engine/cli/council-artifacts.ts +230 -0
package/src/council-engine/cli/council-config.ts +178 -0
package/src/council-engine/cli/council-session-export.ts +116 -0
package/src/council-engine/cli/kondi.ts +98 -0
package/src/council-engine/cli/llm-caller.ts +229 -0
package/src/council-engine/cli/localStorage-shim.ts +119 -0
package/src/council-engine/cli/node-platform.ts +68 -0
package/src/council-engine/cli/run-council.ts +481 -0
package/src/council-engine/cli/run-pipeline.ts +772 -0
package/src/council-engine/cli/session-export.ts +153 -0
package/src/council-engine/configs/councils/analysis.json +101 -0
package/src/council-engine/configs/councils/code-planning.json +86 -0
package/src/council-engine/configs/councils/coding.json +89 -0
package/src/council-engine/configs/councils/debate.json +97 -0
package/src/council-engine/configs/councils/solo-claude.json +34 -0
package/src/council-engine/configs/councils/solo-gpt.json +34 -0
package/src/council-engine/council/coding-orchestrator.ts +1205 -0
package/src/council-engine/council/context-bootstrap.ts +147 -0
package/src/council-engine/council/context-inspection.ts +42 -0
package/src/council-engine/council/context-store.ts +763 -0
package/src/council-engine/council/deliberation-orchestrator.ts +2762 -0
package/src/council-engine/council/factory.ts +164 -0
package/src/council-engine/council/index.ts +201 -0
package/src/council-engine/council/ledger-store.ts +438 -0
package/src/council-engine/council/prompts.ts +1689 -0
package/src/council-engine/council/storage-cleanup.ts +164 -0
package/src/council-engine/council/store.ts +1110 -0
package/src/council-engine/council/synthesis.ts +291 -0
package/src/council-engine/council/types.ts +845 -0
package/src/council-engine/council/validation.ts +613 -0
package/src/council-engine/pipeline/build-detect.ts +73 -0
package/src/council-engine/pipeline/executor.ts +1048 -0
package/src/council-engine/pipeline/index.ts +9 -0
package/src/council-engine/pipeline/install-detect.ts +84 -0
package/src/council-engine/pipeline/memory-store.ts +182 -0
package/src/council-engine/pipeline/output-parsers.ts +146 -0
package/src/council-engine/pipeline/run-output.ts +149 -0
package/src/council-engine/pipeline/session-import.ts +177 -0
package/src/council-engine/pipeline/store.ts +753 -0
package/src/council-engine/pipeline/test-detect.ts +82 -0
package/src/council-engine/pipeline/types.ts +401 -0
package/src/council-engine/services/deliberationSummary.ts +114 -0
package/src/council-engine/tsconfig.json +16 -0
package/src/council-engine/types/mcp.ts +122 -0
package/src/council-engine/utils/filterTools.ts +73 -0
package/src/engine/apply.ts +238 -0
package/src/engine/checkpoints.ts +237 -0
package/src/engine/consultants.ts +347 -0
package/src/engine/diff.ts +171 -0
package/src/engine/errors.ts +102 -0
package/src/engine/git-tools.ts +246 -0
package/src/engine/hooks.ts +181 -0
package/src/engine/loop-guard.ts +155 -0
package/src/engine/permissions.ts +293 -0
package/src/engine/pipeline.ts +376 -0
package/src/engine/sub-agents.ts +133 -0
package/src/engine/task-card.ts +185 -0
package/src/engine/task-router.ts +256 -0
package/src/engine/task-store.ts +86 -0
package/src/engine/tools.ts +783 -0
package/src/engine/verify.ts +111 -0
package/src/mcp/client.ts +225 -0
package/src/mcp/config.ts +120 -0
package/src/mcp/tool-manager.ts +192 -0
package/src/mcp/types.ts +61 -0
package/src/providers/llm-caller.ts +943 -0
package/src/providers/rate-limiter.ts +238 -0
package/src/router/NOTES.md +28 -0
package/src/router/collector.ts +474 -0
package/src/router/embeddings.ts +286 -0
package/src/router/index.ts +299 -0
package/src/router/intent-router.ts +225 -0
package/src/router/nn-router.ts +205 -0
package/src/router/profiles.ts +309 -0
package/src/router/registry.ts +565 -0
package/src/router/rules.ts +274 -0
package/src/router/train.py +408 -0
package/src/session/store.ts +211 -0
package/src/test-utils/mock-llm.ts +39 -0
package/src/types.ts +322 -0
package/src/web/manager.ts +311 -0

package/src/router/rules.ts ADDED Viewed

@@ -0,0 +1,274 @@
+/**
+ * Rule-Based Router — the "teacher" that makes routing decisions.
+ *
+ * Maps (phase, task_kind) to the best model from the registry.
+ * This is the initial routing strategy. Every decision it makes
+ * gets logged by the collector, which eventually trains an NN
+ * to replace it.
+ *
+ * Strategy:
+ *   - discuss/dispatch/reflect → best reasoning model
+ *   - execute → cheapest coding model (promote on failure)
+ *   - compress/state_update → cheapest summarization model
+ *   - verify → no LLM call (local tools)
+ */
+import type { LedgerPhase, ProviderId, TaskKind } from '../types.ts';
+import { ModelRegistry, type ModelCapability, type ModelEntry } from './registry.ts';
+import type { BudgetProfile } from './profiles.ts';
+/**
+ * Minimal subset of ModelRegistry used by the routing strategy helpers.
+ * A scoped view (see scopedRegistry) implements this without inheriting.
+ */
+interface RegistryView {
+  getEnabled(): ModelEntry[];
+  getByCapability(capability: ModelCapability): ModelEntry[];
+  getCheapest(capability: ModelCapability): ModelEntry | undefined;
+  getBest(capability: ModelCapability): ModelEntry | undefined;
+}
+function scopedRegistry(registry: ModelRegistry, providers: ProviderId[]): RegistryView {
+  const allowed = new Set(providers);
+  const filter = (m: ModelEntry) => allowed.has(m.provider);
+  return {
+    getEnabled: () => registry.getEnabled().filter(filter),
+    getByCapability: (cap) => registry.getByCapability(cap).filter(filter),
+    getCheapest: (cap) => registry.getByCapability(cap).filter(filter)[0],
+    getBest: (cap) => {
+      const list = registry.getByCapability(cap).filter(filter);
+      return list[list.length - 1];
+    },
+  };
+}
+// ---------------------------------------------------------------------------
+// Route decision
+// ---------------------------------------------------------------------------
+export interface RouteDecision {
+  model: ModelEntry;
+  reason: string;
+  /** Was this a promotion (retry after failure)? */
+  promoted: boolean;
+}
+// ---------------------------------------------------------------------------
+// Rule-based router
+// ---------------------------------------------------------------------------
+export class RuleRouter {
+  private registry: ModelRegistry;
+  private profile?: BudgetProfile;
+  private override?: ModelEntry;
+  constructor(registry: ModelRegistry) {
+    this.registry = registry;
+  }
+  /** Set the active budget profile — changes model selection priorities */
+  setProfile(profile: BudgetProfile): void {
+    this.profile = profile;
+  }
+  /** Registry view scoped to the profile's declared models (via rolePinning). */
+  private reg(): RegistryView {
+    if (!this.profile?.rolePinning) return this.registry;
+    const providers = new Set<ProviderId>();
+    for (const modelId of Object.values(this.profile.rolePinning)) {
+      const m = this.registry.getById(modelId);
+      if (m) providers.add(m.provider);
+    }
+    return providers.size > 0
+      ? scopedRegistry(this.registry, [...providers])
+      : this.registry;
+  }
+  /** Force all routing to a specific model. Pass undefined to clear. */
+  setOverride(model: ModelEntry | undefined): void {
+    this.override = model;
+  }
+  /** Get the current override, if any */
+  getOverride(): ModelEntry | undefined {
+    return this.override;
+  }
+  /**
+   * Select the best model for a given phase and optional task context.
+   *
+   * @param phase      Pipeline phase (discuss, dispatch, execute, etc.)
+   * @param taskKind   Type of task being executed (if in a task context)
+   * @param failures   Number of prior failures for this task (triggers promotion)
+   * @param promotionThreshold  Failures before promoting to best model
+   */
+  select(
+    phase: LedgerPhase,
+    taskKind?: TaskKind,
+    failures = 0,
+    promotionThreshold = 2,
+  ): RouteDecision {
+    // Manual override — user forced a specific model with /use
+    if (this.override) {
+      return { model: this.override, reason: `override: ${this.override.alias || this.override.id}`, promoted: false };
+    }
+    const promoted = failures >= promotionThreshold;
+    // Promotion overrides: if the cheap model failed enough, use the best
+    if (promoted && (phase === 'execute')) {
+      const best = this.reg().getBest('coding');
+      if (best) {
+        return { model: best, reason: `promoted after ${failures} failures`, promoted: true };
+      }
+    }
+    // Phase-based routing
+    switch (phase) {
+      case 'discuss':
+      case 'dispatch':
+        return this.selectForReasoning();
+      case 'reflect':
+        return this.selectForReview();
+      case 'execute':
+        return this.selectForExecution(taskKind);
+      case 'compress':
+      case 'state_update':
+        return this.selectForCheap();
+      default:
+        return this.selectForReasoning();
+    }
+  }
+  // -------------------------------------------------------------------------
+  // Strategy helpers
+  // -------------------------------------------------------------------------
+  private selectForReasoning(): RouteDecision {
+    // Use profile preferences if available
+    if (this.profile) {
+      const prefs = this.profile.planningPreference;
+      const selector = this.profile.preferLocal
+        ? (cap: string) => this.reg().getCheapest(cap)
+        : (cap: string) => this.reg().getBest(cap);
+      for (const cap of prefs) {
+        const model = selector(cap);
+        if (model) return { model, reason: `${this.profile.name}: ${cap}`, promoted: false };
+      }
+    }
+    // Default: best planning model
+    const model = this.reg().getBest('planning')
+      || this.reg().getBest('reasoning')
+      || this.reg().getBest('coding')
+      || this.fallback();
+    return { model, reason: 'reasoning phase — best planner', promoted: false };
+  }
+  private selectForExecution(taskKind?: TaskKind): RouteDecision {
+    // Use profile preferences if available
+    if (this.profile) {
+      // Try direct task kind match first
+      if (taskKind) {
+        const directMatch = this.profile.preferLocal
+          ? this.reg().getCheapest(taskKind)
+          : this.reg().getByCapability(taskKind)[0];
+        if (directMatch) {
+          return { model: directMatch, reason: `${this.profile.name}: ${taskKind} match`, promoted: false };
+        }
+      }
+      // Then profile's execution preferences
+      const prefs = this.profile.executionPreference;
+      for (const cap of prefs) {
+        const model = this.reg().getCheapest(cap);
+        if (model) return { model, reason: `${this.profile.name}: ${cap}`, promoted: false };
+      }
+    }
+    // Default: try to match task kind directly to a capability
+    if (taskKind) {
+      const directMatch = this.reg().getCheapest(taskKind);
+      if (directMatch) {
+        return { model: directMatch, reason: `${taskKind} task — direct capability match`, promoted: false };
+      }
+    }
+    // Known task kind → capability mapping
+    switch (taskKind) {
+      case 'analysis':
+      case 'code-review':
+        const reviewer = this.reg().getBest('code-review')
+          || this.reg().getBest('analysis')
+          || this.reg().getBest('reasoning')
+          || this.fallback();
+        return { model: reviewer, reason: `${taskKind} task — best reviewer`, promoted: false };
+      case 'marketing':
+      case 'writing':
+        const writer = this.reg().getCheapest('marketing')
+          || this.reg().getCheapest('writing')
+          || this.reg().getCheapest('general')
+          || this.fallback();
+        return { model: writer, reason: `${taskKind} task — best writer`, promoted: false };
+      case 'test':
+      case 'fix':
+        const fixer = this.reg().getCheapest('fast-coding')
+          || this.reg().getCheapest('coding')
+          || this.fallback();
+        return { model: fixer, reason: `${taskKind} task — cheapest coder`, promoted: false };
+      case 'implementation':
+      case 'refactor':
+      case 'refactoring':
+        const coder = this.reg().getCheapest('coding')
+          || this.fallback();
+        return { model: coder, reason: `${taskKind} task — cheapest coder`, promoted: false };
+      default:
+        // Unknown kind — use cheapest coding model as default for execution
+        const defaultModel = this.reg().getCheapest('coding')
+          || this.reg().getCheapest('general')
+          || this.fallback();
+        return { model: defaultModel, reason: `${taskKind || 'unknown'} task — default`, promoted: false };
+    }
+  }
+  private selectForReview(): RouteDecision {
+    // Honor the profile's reviewPreference capability list when set;
+    // otherwise fall back to the reasoning path so reflect still gets a
+    // strong model. Profiles with `reviewPreference: []` (e.g. `cheap`)
+    // skip directly to the reasoning fallback — that's the documented
+    // "no separate reviewer" behavior.
+    if (this.profile && this.profile.reviewPreference.length > 0) {
+      const selector = this.profile.preferLocal
+        ? (cap: string) => this.reg().getCheapest(cap)
+        : (cap: string) => this.reg().getBest(cap);
+      for (const cap of this.profile.reviewPreference) {
+        const model = selector(cap);
+        if (model) return { model, reason: `${this.profile.name}: review ${cap}`, promoted: false };
+      }
+    }
+    return this.selectForReasoning();
+  }
+  private selectForCheap(): RouteDecision {
+    const model = this.reg().getCheapest('summarization')
+      || this.reg().getCheapest('general')
+      || this.fallback();
+    return { model, reason: 'cheap phase — summarization', promoted: false };
+  }
+  private fallback(): ModelEntry {
+    const enabled = this.reg().getEnabled();
+    if (enabled.length === 0) {
+      throw new Error('No models enabled in registry. Run /models to configure.');
+    }
+    return enabled[0];
+  }
+}

package/src/router/train.py ADDED Viewed

@@ -0,0 +1,408 @@
+#!/usr/bin/env python3
+"""
+Train a lightweight neural network router from kondi-chat routing data.
+Consumes the JSONL training data collected by the orchestrator's rule-based
+router and trains an NN that predicts which model will succeed for a given
+task. The orchestrator is the teacher; this NN is the student.
+Usage:
+  python src/router/train.py [--data-dir .kondi-chat] [--out router_model.json]
+The trained model is exported as JSON (weights + config) so it can be
+loaded in TypeScript without a Python runtime.
+"""
+import json
+import sys
+import argparse
+from pathlib import Path
+import numpy as np
+# ---------------------------------------------------------------------------
+# Data loading
+# ---------------------------------------------------------------------------
+def load_samples(data_dir: str) -> list[dict]:
+    """Load routing samples from the collector's JSONL file."""
+    path = Path(data_dir) / "routing-data.jsonl"
+    if not path.exists():
+        print(f"No routing data found at {path}")
+        sys.exit(1)
+    samples = []
+    for line in path.read_text().splitlines():
+        if line.strip():
+            samples.append(json.loads(line))
+    print(f"Loaded {len(samples)} routing samples")
+    return samples
+def encode_features(samples: list[dict]) -> tuple[np.ndarray, dict]:
+    """
+    Encode samples into feature vectors. Features are dynamically
+    discovered from the data — no hardcoded categories.
+    If samples have embeddings, they are concatenated with structured
+    features: [embedding(768D) | phase_onehot | kind_onehot | scalars]
+    Returns (feature_matrix, feature_info) where feature_info contains
+    the encoding schema needed for inference.
+    """
+    # Discover categories from data
+    phases = sorted(set(s["phase"] for s in samples))
+    task_kinds = sorted(set(s.get("taskKind") or "none" for s in samples))
+    # Check for embeddings
+    samples_with_embeddings = [s for s in samples if s.get("embedding")]
+    has_embeddings = len(samples_with_embeddings) > len(samples) * 0.5  # Need >50%
+    embedding_dim = 0
+    if has_embeddings:
+        embedding_dim = len(samples_with_embeddings[0]["embedding"])
+        print(f"Using embeddings: {embedding_dim}D ({len(samples_with_embeddings)}/{len(samples)} samples)")
+    else:
+        if samples_with_embeddings:
+            print(f"Too few embeddings ({len(samples_with_embeddings)}/{len(samples)}), using structured features only")
+        else:
+            print("No embeddings found, using structured features only")
+    structured_names = (
+        [f"phase:{p}" for p in phases] +
+        [f"kind:{k}" for k in task_kinds] +
+        ["prompt_length", "context_tokens", "failures"]
+    )
+    feature_names = (
+        ([f"emb_{i}" for i in range(embedding_dim)] if has_embeddings else []) +
+        structured_names
+    )
+    features = []
+    for s in samples:
+        # Structured features
+        phase_vec = [1 if p == s["phase"] else 0 for p in phases]
+        kind_vec = [1 if k == (s.get("taskKind") or "none") else 0 for k in task_kinds]
+        prompt_norm = min(s.get("promptLength", 0) / 10_000, 1.0)
+        context_norm = min(s.get("contextTokens", 0) / 100_000, 1.0)
+        failure_norm = min(s.get("failures", 0) / 5.0, 1.0)
+        structured = phase_vec + kind_vec + [prompt_norm, context_norm, failure_norm]
+        if has_embeddings:
+            emb = s.get("embedding") or [0.0] * embedding_dim
+            features.append(emb + structured)
+        else:
+            features.append(structured)
+    feature_info = {
+        "phases": phases,
+        "taskKinds": task_kinds,
+        "featureNames": feature_names,
+        "inputDim": len(feature_names),
+        "embeddingDim": embedding_dim,
+        "hasEmbeddings": has_embeddings,
+    }
+    return np.array(features, dtype=np.float32), feature_info
+def encode_labels(samples: list[dict], model_names: list[str]) -> np.ndarray:
+    """
+    Encode labels: for each sample, 1 if the model succeeded, 0 if it
+    failed, -1 if we don't know (model wasn't tried on this sample).
+    """
+    labels = []
+    for s in samples:
+        row = []
+        for name in model_names:
+            if s["modelId"] == name:
+                row.append(1.0 if s.get("succeeded", False) else 0.0)
+            else:
+                row.append(-1.0)  # Unknown — exclude from loss
+        labels.append(row)
+    return np.array(labels, dtype=np.float32)
+# ---------------------------------------------------------------------------
+# Neural Network (numpy only — no PyTorch dependency)
+# ---------------------------------------------------------------------------
+def relu(x: np.ndarray) -> np.ndarray:
+    return np.maximum(0, x)
+def sigmoid(x: np.ndarray) -> np.ndarray:
+    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
+def relu_derivative(x: np.ndarray) -> np.ndarray:
+    return (x > 0).astype(np.float32)
+class SimpleNN:
+    """
+    Multi-layer neural network trained with backprop.
+    No PyTorch needed — pure numpy for minimal dependencies.
+    """
+    def __init__(self, layer_dims: list[int]):
+        """layer_dims: [input_dim, hidden1, hidden2, ..., output_dim]"""
+        self.weights: list[np.ndarray] = []
+        self.biases: list[np.ndarray] = []
+        for i in range(len(layer_dims) - 1):
+            # Xavier initialization
+            scale = np.sqrt(2.0 / layer_dims[i])
+            self.weights.append(np.random.randn(layer_dims[i], layer_dims[i + 1]).astype(np.float32) * scale)
+            self.biases.append(np.zeros(layer_dims[i + 1], dtype=np.float32))
+    def forward(self, x: np.ndarray) -> np.ndarray:
+        """Forward pass. Hidden layers use ReLU, output uses sigmoid."""
+        self._activations = [x]
+        self._pre_activations = []
+        for i, (w, b) in enumerate(zip(self.weights, self.biases)):
+            z = x @ w + b
+            self._pre_activations.append(z)
+            if i < len(self.weights) - 1:
+                x = relu(z)
+            else:
+                x = sigmoid(z)
+            self._activations.append(x)
+        return x
+    def backward(self, y_true: np.ndarray, mask: np.ndarray, lr: float = 0.001):
+        """
+        Backprop with masked loss (ignore samples where mask == 0).
+        mask: same shape as y_true, 1 where we have labels, 0 where unknown.
+        """
+        n = max(mask.sum(), 1)
+        y_pred = self._activations[-1]
+        # Output gradient (BCE with mask)
+        delta = (y_pred - y_true) * mask / n
+        for i in range(len(self.weights) - 1, -1, -1):
+            a_prev = self._activations[i]
+            dw = a_prev.T @ delta
+            db = delta.sum(axis=0)
+            self.weights[i] -= lr * dw
+            self.biases[i] -= lr * db
+            if i > 0:
+                delta = (delta @ self.weights[i].T) * relu_derivative(self._pre_activations[i - 1])
+    def predict(self, x: np.ndarray) -> np.ndarray:
+        return self.forward(x)
+    def to_json(self) -> dict:
+        """Export weights as JSON-serializable dict."""
+        return {
+            "weights": [w.tolist() for w in self.weights],
+            "biases": [b.tolist() for b in self.biases],
+            "layerDims": [self.weights[0].shape[0]] + [w.shape[1] for w in self.weights],
+        }
+    @classmethod
+    def from_json(cls, data: dict) -> "SimpleNN":
+        dims = data["layerDims"]
+        nn = cls(dims)
+        nn.weights = [np.array(w, dtype=np.float32) for w in data["weights"]]
+        nn.biases = [np.array(b, dtype=np.float32) for b in data["biases"]]
+        return nn
+# ---------------------------------------------------------------------------
+# Training
+# ---------------------------------------------------------------------------
+def train(
+    X: np.ndarray,
+    Y: np.ndarray,
+    model_names: list[str],
+    hidden_dims: list[int] = [64, 32],
+    epochs: int = 200,
+    lr: float = 0.005,
+    val_split: float = 0.15,
+) -> tuple[SimpleNN, dict]:
+    """Train the router NN and return (model, metrics)."""
+    # Train/val split
+    n = len(X)
+    idx = np.random.permutation(n)
+    val_n = int(n * val_split)
+    val_idx, train_idx = idx[:val_n], idx[val_n:]
+    X_train, X_val = X[train_idx], X[val_idx]
+    Y_train, Y_val = Y[train_idx], Y[val_idx]
+    # Mask: 1 where we have labels, 0 where unknown (-1)
+    mask_train = (Y_train >= 0).astype(np.float32)
+    mask_val = (Y_val >= 0).astype(np.float32)
+    # Replace -1 with 0 for computation (masked out anyway)
+    Y_train_clean = np.maximum(Y_train, 0)
+    Y_val_clean = np.maximum(Y_val, 0)
+    input_dim = X.shape[1]
+    output_dim = Y.shape[1]
+    layer_dims = [input_dim] + hidden_dims + [output_dim]
+    nn = SimpleNN(layer_dims)
+    best_val_loss = float("inf")
+    best_weights = None
+    patience = 20
+    patience_counter = 0
+    print(f"\nTraining: {len(X_train)} train, {len(X_val)} val")
+    print(f"Architecture: {layer_dims}")
+    print(f"Models: {model_names}\n")
+    for epoch in range(epochs):
+        # Forward + backward on train
+        pred = nn.forward(X_train)
+        nn.backward(Y_train_clean, mask_train, lr=lr)
+        if (epoch + 1) % 20 == 0 or epoch == 0:
+            # Compute masked BCE loss on validation
+            val_pred = nn.predict(X_val)
+            eps = 1e-8
+            bce = -(Y_val_clean * np.log(val_pred + eps) + (1 - Y_val_clean) * np.log(1 - val_pred + eps))
+            val_loss = (bce * mask_val).sum() / max(mask_val.sum(), 1)
+            print(f"  Epoch {epoch + 1:4d}/{epochs}: val_loss={val_loss:.4f}")
+            if val_loss < best_val_loss:
+                best_val_loss = val_loss
+                best_weights = ([w.copy() for w in nn.weights], [b.copy() for b in nn.biases])
+                patience_counter = 0
+            else:
+                patience_counter += 1
+                if patience_counter >= patience:
+                    print(f"  Early stopping at epoch {epoch + 1}")
+                    break
+    # Restore best weights
+    if best_weights:
+        nn.weights, nn.biases = best_weights
+    # Evaluate
+    val_pred = nn.predict(X_val)
+    metrics = evaluate(val_pred, Y_val, mask_val, model_names)
+    return nn, metrics
+def evaluate(
+    pred: np.ndarray,
+    y_true: np.ndarray,
+    mask: np.ndarray,
+    model_names: list[str],
+) -> dict:
+    """Evaluate the trained model."""
+    results = {}
+    for i, name in enumerate(model_names):
+        m = mask[:, i] > 0
+        if m.sum() == 0:
+            continue
+        y = y_true[m, i]
+        p = pred[m, i]
+        preds = (p >= 0.5).astype(float)
+        acc = (preds == y).mean()
+        results[name] = {
+            "accuracy": float(acc),
+            "samples": int(m.sum()),
+            "positive_rate": float(y.mean()),
+        }
+    # System accuracy: pick model with highest predicted prob
+    chosen_idx = np.argmax(pred, axis=1)
+    # Only count where we have a label for the chosen model
+    correct = 0
+    counted = 0
+    for i in range(len(pred)):
+        ci = chosen_idx[i]
+        if mask[i, ci] > 0:
+            correct += y_true[i, ci]
+            counted += 1
+    results["_system"] = {
+        "accuracy": float(correct / max(counted, 1)),
+        "evaluated": int(counted),
+    }
+    return results
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+def main():
+    parser = argparse.ArgumentParser(description="Train kondi-chat routing NN")
+    parser.add_argument("--data-dir", default=".kondi-chat", help="Directory with routing-data.jsonl")
+    parser.add_argument("--out", default=".kondi-chat/router-model.json", help="Output model path")
+    parser.add_argument("--hidden", default="auto", help="Hidden layer dimensions (comma-separated, or 'auto')")
+    parser.add_argument("--epochs", type=int, default=200)
+    parser.add_argument("--lr", type=float, default=0.005)
+    args = parser.parse_args()
+    # Load and encode data
+    samples = load_samples(args.data_dir)
+    model_names = sorted(set(s["modelId"] for s in samples))
+    if len(model_names) < 2:
+        print(f"Need samples from at least 2 models to train. Found: {model_names}")
+        sys.exit(1)
+    X, feature_info = encode_features(samples)
+    Y = encode_labels(samples, model_names)
+    print(f"Features: {X.shape[1]} dimensions")
+    print(f"Models to route between: {model_names}")
+    for i, name in enumerate(model_names):
+        known = (Y[:, i] >= 0).sum()
+        positive = (Y[:, i] == 1).sum()
+        print(f"  {name}: {known} samples, {positive} successes ({positive/max(known,1)*100:.0f}%)")
+    # Train
+    if args.hidden == "auto":
+        # Auto-size: larger hidden layers when embeddings are present
+        if feature_info.get("hasEmbeddings"):
+            hidden_dims = [256, 128]
+        else:
+            hidden_dims = [64, 32]
+        print(f"Auto-selected hidden dims: {hidden_dims}")
+    else:
+        hidden_dims = [int(x) for x in args.hidden.split(",")]
+    nn, metrics = train(X, Y, model_names, hidden_dims=hidden_dims, epochs=args.epochs, lr=args.lr)
+    # Print results
+    print("\nResults:")
+    print("=" * 60)
+    for name, m in metrics.items():
+        if name == "_system":
+            print(f"  System accuracy: {m['accuracy']:.3f} ({m['evaluated']} samples)")
+        else:
+            print(f"  {name:35s}: acc={m['accuracy']:.3f} (n={m['samples']}, pos_rate={m['positive_rate']:.2f})")
+    # Export
+    out_path = Path(args.out)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    model_data = {
+        "nn": nn.to_json(),
+        "featureInfo": feature_info,
+        "modelNames": model_names,
+        "metrics": metrics,
+        "trainedAt": str(np.datetime64("now")),
+        "sampleCount": len(samples),
+    }
+    out_path.write_text(json.dumps(model_data, indent=2))
+    print(f"\nModel saved to {out_path}")
+    print(f"Load in TypeScript with: JSON.parse(readFileSync('{out_path}', 'utf-8'))")
+if __name__ == "__main__":
+    main()