npm - polymath-agent - Versions diffs - 0.2.0 → 0.3.1 - Mend

polymath-agent 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -93,13 +93,38 @@ poly usage                                # cost by date + model
 | `poly recommend <goal>` | Pre-run recommendation: cheapest / best-value / best-quality model combos + savings. |
 | `poly models` | Browse the catalog with pricing, tier, tool support. Filters: `--tier`, `--tools`, `--search`. |
 | `poly usage` | Recorded usage & cost grouped by **date + model**. `--today`, `--since`, `--sync`. |
-| `poly analyze` | **Which approach reaches the goal with the fewest tokens** — best model per task type, objective × achievement, usage per command. |
-| `poly sync` | Push the analytics ledger to Firebase ([Data Connect SQL](dataconnect/) and/or Firestore). |
-| `poly config show\|set\|firestore\|dataconnect` | View/change settings. |
+| `poly analyze` | **Which approach reaches the goal with the fewest tokens** — efficiency playbook, best model per task type, objective × achievement, usage per command. |
+| `poly sync` | Push **distilled efficiency insights** to Firebase ([Data Connect SQL](dataconnect/) / Firestore). Raw logs stay local unless `--raw`. |
+| `poly config show\|set\|firestore\|dataconnect\|local` | View/change settings. |
 After each `poly run`, rate the result 0–9 (one keypress) — your goal-achievement
 rating joins the auto score (completed/planned steps) to power `poly analyze`.
+### The efficiency playbook (learned routing)
+Everything is captured locally (SQLite). `poly analyze` distills it into a **playbook**
+of *notably* efficient approaches — a (task, model) pair qualifies only with ≥3
+successful runs, ≥70% success, and **≥20% fewer tokens than the median** of its
+competitors. The playbook then **boosts routing**: proven-efficient models get
+preferred under the `value` objective (`reason: proven 54% fewer tokens on edit`).
+`poly sync` uploads *only* the playbook by default — your goals and raw logs never
+leave the machine unless you pass `--raw`.
+### Local LLMs (Ollama / LM Studio) — $0 routing
+```bash
+ollama serve                                # or LM Studio's local server
+poly config local on                        # default base: http://localhost:11434/v1
+poly config local on --base http://localhost:1234/v1   # LM Studio
+poly models -s local/                       # local models join the catalog at $0
+poly run "..."                              # cheapest objective → local wins what it can
+```
+Local models appear as `local/<name>`, cost $0, and need **no API key** — with
+`local on` and no OpenRouter key, Polymath runs fully offline on your machine.
+Tokens are still tracked, so the playbook learns when your local model is the
+most efficient approach.
 ### Routing objectives
 Routing is **skill-aware**: each task type maps to a skill (coding / reasoning /

package/dist/cli.js CHANGED Viewed

@@ -49,6 +49,11 @@ var DEFAULT_CONFIG = {
     enabled: false,
     location: "us-east4",
     serviceId: "polymath"
+  },
+  local: {
+    enabled: false,
+    baseUrl: "http://localhost:11434/v1"
+    // Ollama default; LM Studio: http://localhost:1234/v1
   }
 };
 function loadConfig() {
@@ -60,7 +65,8 @@ function loadConfig() {
       ...DEFAULT_CONFIG,
       ...raw,
       firestore: { ...DEFAULT_CONFIG.firestore, ...raw.firestore ?? {} },
-      dataconnect: { ...DEFAULT_CONFIG.dataconnect, ...raw.dataconnect ?? {} }
+      dataconnect: { ...DEFAULT_CONFIG.dataconnect, ...raw.dataconnect ?? {} },
+      local: { ...DEFAULT_CONFIG.local, ...raw.local ?? {} }
     };
   } catch {
     return { ...DEFAULT_CONFIG };
@@ -81,6 +87,7 @@ function resolveApiKey(config) {
 // src/providers/openrouter.ts
 var BASE = globalThis.process?.env?.OPENROUTER_BASE_URL?.replace(/\/$/, "") || "https://openrouter.ai/api/v1";
+var LOCAL_PREFIX = "local/";
 var OpenRouterError = class extends Error {
   status;
   constructor(message, status) {
@@ -93,10 +100,12 @@ var OpenRouterClient = class {
   apiKey;
   referer;
   title;
+  localBaseUrl;
   constructor(opts = {}) {
     this.apiKey = opts.apiKey;
     this.referer = opts.referer ?? "https://github.com/polymath-agent";
     this.title = opts.title ?? "Polymath";
+    this.localBaseUrl = opts.localBaseUrl?.replace(/\/$/, "");
   }
   headers(json = true) {
     const h = {
@@ -107,6 +116,24 @@ var OpenRouterClient = class {
     if (json) h["Content-Type"] = "application/json";
     return h;
   }
+  /** Resolve where a model's request goes: the local server for `local/*`, else OpenRouter. */
+  target(modelId) {
+    if (this.localBaseUrl && modelId.startsWith(LOCAL_PREFIX)) {
+      return { base: this.localBaseUrl, model: modelId.slice(LOCAL_PREFIX.length), isLocal: true };
+    }
+    return { base: BASE, model: modelId, isLocal: false };
+  }
+  requireKeyFor(isLocal) {
+    if (!isLocal && !this.apiKey) throw new OpenRouterError("No API key set. Run `poly login`.");
+  }
+  /** List models from the local OpenAI-compatible server (Ollama / LM Studio). */
+  async listLocalRawModels() {
+    if (!this.localBaseUrl) return [];
+    const res = await fetch(`${this.localBaseUrl}/models`);
+    if (!res.ok) throw new OpenRouterError(`Local server: failed to list models (${res.status})`, res.status);
+    const json = await res.json();
+    return json.data ?? [];
+  }
   /** Raw /models payload (no auth required). */
   async listRawModels() {
     const res = await fetch(`${BASE}/models`, { headers: this.headers(false) });
@@ -126,24 +153,28 @@ var OpenRouterClient = class {
     const d = json.data ?? {};
     return { label: d.label, usage: d.usage, limit: d.limit };
   }
-  buildBody(req, stream) {
+  buildBody(req, stream, modelOverride, isLocal) {
     return {
-      model: req.model,
+      model: modelOverride,
       messages: req.messages.map(serializeMessage),
       ...req.tools && req.tools.length ? { tools: req.tools, tool_choice: "auto" } : {},
       temperature: req.temperature ?? 0.2,
       ...req.maxTokens ? { max_tokens: req.maxTokens } : {},
       stream,
-      usage: { include: true }
+      // OpenRouter-specific accounting param; local servers may reject unknown fields.
+      ...isLocal ? {} : { usage: { include: true } },
+      // OpenAI-compat way to get token usage in the final stream chunk (Ollama/LM Studio).
+      ...isLocal && stream ? { stream_options: { include_usage: true } } : {}
     };
   }
   /** Non-streaming completion. costUsd is computed from `pricing` (deterministic). */
   async complete(req, pricing) {
-    if (!this.apiKey) throw new OpenRouterError("No API key set. Run `poly login`.");
-    const res = await fetch(`${BASE}/chat/completions`, {
+    const t = this.target(req.model);
+    this.requireKeyFor(t.isLocal);
+    const res = await fetch(`${t.base}/chat/completions`, {
       method: "POST",
       headers: this.headers(),
-      body: JSON.stringify(this.buildBody(req, false))
+      body: JSON.stringify(this.buildBody(req, false, t.model, t.isLocal))
     });
     if (!res.ok) {
       const text = await res.text().catch(() => "");
@@ -164,8 +195,10 @@ var OpenRouterClient = class {
       content: typeof msg.content === "string" ? msg.content : "",
       toolCalls: parseToolCalls(msg.tool_calls),
       usage,
-      model: json.model ?? req.model,
-      costUsd: computeCost(usage, pricing, json.usage?.cost),
+      // Keep the prefixed id for local models so the ledger stays consistent.
+      model: t.isLocal ? req.model : json.model ?? req.model,
+      // Local inference is free regardless of what the server claims to report.
+      costUsd: computeCost(usage, pricing, t.isLocal ? void 0 : json.usage?.cost),
       finishReason: choice.finish_reason ?? null
     };
   }
@@ -174,11 +207,12 @@ var OpenRouterClient = class {
    * Tool-call deltas are accumulated and surfaced in the final result.
    */
   async *stream(req, pricing) {
-    if (!this.apiKey) throw new OpenRouterError("No API key set. Run `poly login`.");
-    const res = await fetch(`${BASE}/chat/completions`, {
+    const t = this.target(req.model);
+    this.requireKeyFor(t.isLocal);
+    const res = await fetch(`${t.base}/chat/completions`, {
       method: "POST",
       headers: this.headers(),
-      body: JSON.stringify(this.buildBody(req, true))
+      body: JSON.stringify(this.buildBody(req, true, t.model, t.isLocal))
     });
     if (!res.ok || !res.body) {
       const text = await res.text().catch(() => "");
@@ -212,7 +246,7 @@ var OpenRouterClient = class {
         if (evt?.error) {
           throw new OpenRouterError(evt.error.message ?? "Stream provider error", evt.error.code);
         }
-        if (evt.model) model = evt.model;
+        if (evt.model && !t.isLocal) model = evt.model;
         if (evt.usage) usageJson = evt.usage;
         const choice = evt.choices?.[0];
         if (!choice) continue;
@@ -239,17 +273,17 @@ var OpenRouterClient = class {
       completionTokens: usageJson?.completion_tokens ?? 0,
       totalTokens: usageJson?.total_tokens ?? 0
     };
-    const toolCalls = [...toolAcc.values()].filter((t) => t.name).map((t) => ({
-      id: t.id || `call_${t.name}`,
+    const toolCalls = [...toolAcc.values()].filter((t2) => t2.name).map((t2) => ({
+      id: t2.id || `call_${t2.name}`,
       type: "function",
-      function: { name: t.name, arguments: t.args || "{}" }
+      function: { name: t2.name, arguments: t2.args || "{}" }
     }));
     return {
       content,
       toolCalls,
       usage,
       model,
-      costUsd: computeCost(usage, pricing, usageJson?.cost),
+      costUsd: computeCost(usage, pricing, t.isLocal ? void 0 : usageJson?.cost),
       finishReason
     };
   }
@@ -387,6 +421,38 @@ async function getModels(client2, opts = {}) {
   return models;
 }
+// src/models/local.ts
+function parseLocalModels(raw) {
+  const out = [];
+  for (const m of raw) {
+    if (!m?.id) continue;
+    const name = String(m.id);
+    out.push({
+      id: LOCAL_PREFIX + name,
+      name: `${name} (local)`,
+      provider: "local",
+      contextLength: m.context_length ?? 8192,
+      pricing: { promptUsdPerMTok: 0, completionUsdPerMTok: 0 },
+      tier: classifyTier(name, 0),
+      capabilities: {
+        // OpenAI-compatible local servers pass tool schemas through; models that
+        // can't call tools simply reply with text, which the agent loop handles.
+        tools: true,
+        vision: /llava|vision|vl\b|moondream/i.test(name)
+      }
+    });
+  }
+  return out;
+}
+async function getLocalModels(client2) {
+  try {
+    const raw = await client2.listLocalRawModels();
+    return parseLocalModels(raw);
+  } catch {
+    return [];
+  }
+}
 // src/auth/onboarding.ts
 import readline from "node:readline";
@@ -723,8 +789,11 @@ var HEADLINE_SKILLS = ["coding", "reasoning", "retrieval", "speed"];
 function projectCost(m, est) {
   return est.promptTokens / 1e6 * m.pricing.promptUsdPerMTok + est.completionTokens / 1e6 * m.pricing.completionUsdPerMTok;
 }
-function taskValue(m, taskType) {
-  return taskStrength(m, taskType) / Math.max(blendedPrice(m), 0.01);
+function taskValue(m, taskType, empirical) {
+  const base = taskStrength(m, taskType) / Math.max(blendedPrice(m), 0.01);
+  const savings = empirical?.[`${taskType}:${m.id}`];
+  const boost = savings ? 1 + Math.min(savings, 100) / 100 : 1;
+  return base * boost;
 }
 function candidatesFor(taskType, models, policy, est) {
   const spec = TASK_SPECS[taskType];
@@ -753,7 +822,9 @@ function rank(models, policy, taskType) {
       break;
     case "value":
     default:
-      sorted.sort((a, b) => taskValue(b, taskType) - taskValue(a, taskType));
+      sorted.sort(
+        (a, b) => taskValue(b, taskType, policy.empirical) - taskValue(a, taskType, policy.empirical)
+      );
       break;
   }
   return sorted;
@@ -771,7 +842,8 @@ function route(taskType, models, policy, est = { promptTokens: 4e3, completionTo
   const ranked = rank(cands, policy, taskType);
   const chosen = ranked[0];
   const skill = TASK_SKILL[taskType];
-  const reason = policy.objective === "cheapest" ? `cheapest model that covers ${skill}` : policy.objective === "quality" ? `strongest at ${skill}` : `best ${skill}-per-dollar`;
+  const proven = policy.empirical?.[`${taskType}:${chosen.id}`];
+  const reason = policy.objective === "cheapest" ? `cheapest model that covers ${skill}` : policy.objective === "quality" ? `strongest at ${skill}` : proven ? `proven ${Math.round(proven)}% fewer tokens on ${taskType} (playbook)` : `best ${skill}-per-dollar`;
   return { model: chosen, reason, estCostUsd: projectCost(chosen, est) };
 }
@@ -1002,6 +1074,23 @@ function getDb() {
       synced INTEGER NOT NULL DEFAULT 0
     );
     CREATE INDEX IF NOT EXISTS idx_cmd_date ON command_runs(date);
+    -- Distilled efficiency insights: ONLY the notably cost-efficient approaches.
+    -- This is what syncs to the cloud by default (raw logs stay local).
+    CREATE TABLE IF NOT EXISTS insights (
+      id TEXT PRIMARY KEY,            -- "<task_type>__<model>"
+      computed_at INTEGER NOT NULL,
+      task_type TEXT NOT NULL,
+      model TEXT NOT NULL,
+      provider TEXT NOT NULL,
+      samples INTEGER NOT NULL,       -- successful steps observed
+      success_rate REAL NOT NULL,
+      avg_tokens REAL NOT NULL,       -- per successful step
+      baseline_tokens REAL NOT NULL,  -- median across qualified competitors
+      savings_pct REAL NOT NULL,      -- vs baseline (the "\uC720\uB3C5" margin)
+      avg_cost_usd REAL NOT NULL,
+      synced INTEGER NOT NULL DEFAULT 0
+    );
   `);
   const cols = db.prepare(`PRAGMA table_info(usage_log)`).all();
   if (!cols.some((c2) => c2.name === "command")) {
@@ -1304,9 +1393,62 @@ function unsyncedCommandRuns() {
 }
 function markTableSynced(table2, ids) {
   if (!ids.length) return;
-  const stmt = getDb().prepare(`UPDATE ${table2} SET synced=1 WHERE ${table2 === "sessions" ? "id" : "id"}=?`);
+  const stmt = getDb().prepare(`UPDATE ${table2} SET synced=1 WHERE id=?`);
   for (const id of ids) stmt.run(id);
 }
+function upsertInsight(i) {
+  getDb().prepare(
+    `INSERT INTO insights (id, computed_at, task_type, model, provider, samples, success_rate,
+         avg_tokens, baseline_tokens, savings_pct, avg_cost_usd, synced)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0)
+       ON CONFLICT(id) DO UPDATE SET
+         computed_at=excluded.computed_at, samples=excluded.samples,
+         success_rate=excluded.success_rate, avg_tokens=excluded.avg_tokens,
+         baseline_tokens=excluded.baseline_tokens, savings_pct=excluded.savings_pct,
+         avg_cost_usd=excluded.avg_cost_usd, synced=0`
+  ).run(
+    i.id,
+    i.computedAt,
+    i.taskType,
+    i.model,
+    i.provider,
+    i.samples,
+    i.successRate,
+    i.avgTokens,
+    i.baselineTokens,
+    i.savingsPct,
+    i.avgCostUsd
+  );
+}
+function deleteInsightsExcept(validIds) {
+  const all = getDb().prepare(`SELECT id FROM insights`).all();
+  const keep = new Set(validIds);
+  const del = getDb().prepare(`DELETE FROM insights WHERE id=?`);
+  for (const r of all) if (!keep.has(String(r.id))) del.run(String(r.id));
+}
+function listInsights() {
+  const rows = getDb().prepare(`SELECT * FROM insights ORDER BY savings_pct DESC`).all();
+  return rows.map(mapInsight);
+}
+function unsyncedInsights() {
+  const rows = getDb().prepare(`SELECT * FROM insights WHERE synced=0`).all();
+  return rows.map(mapInsight);
+}
+function mapInsight(r) {
+  return {
+    id: String(r.id),
+    computedAt: Number(r.computed_at),
+    taskType: String(r.task_type),
+    model: String(r.model),
+    provider: String(r.provider),
+    samples: Number(r.samples),
+    successRate: Number(r.success_rate),
+    avgTokens: Number(r.avg_tokens),
+    baselineTokens: Number(r.baseline_tokens),
+    savingsPct: Number(r.savings_pct),
+    avgCostUsd: Number(r.avg_cost_usd)
+  };
+}
 // src/usage/report.ts
 function renderUsageReport(filter = {}) {
@@ -1349,6 +1491,80 @@ function renderUsageReport(filter = {}) {
   ].join("\n");
 }
+// src/usage/insights.ts
+var MIN_SAMPLES = 3;
+var MIN_SUCCESS = 0.7;
+var MIN_MARGIN = 0.2;
+function median(xs) {
+  const s = [...xs].sort((a, b) => a - b);
+  const mid = Math.floor(s.length / 2);
+  return s.length % 2 ? s[mid] : (s[mid - 1] + s[mid]) / 2;
+}
+function distillInsights(now = Date.now()) {
+  const eff = modelTaskEfficiency();
+  const byTask = /* @__PURE__ */ new Map();
+  for (const r of eff) {
+    const list = byTask.get(r.taskType) ?? [];
+    list.push(r);
+    byTask.set(r.taskType, list);
+  }
+  const valid = [];
+  for (const [taskType, list] of byTask) {
+    const qualified = list.filter(
+      (r) => r.steps >= MIN_SAMPLES && r.successRate >= MIN_SUCCESS && r.avgTokensPerSuccess > 0
+    );
+    if (qualified.length < 2) continue;
+    const baseline = median(qualified.map((r) => r.avgTokensPerSuccess));
+    for (const r of qualified) {
+      const savings = 1 - r.avgTokensPerSuccess / baseline;
+      if (savings >= MIN_MARGIN) {
+        const id = `${taskType}__${r.model}`;
+        valid.push(id);
+        upsertInsight({
+          id,
+          computedAt: now,
+          taskType,
+          model: r.model,
+          provider: r.model.split("/")[0] ?? "unknown",
+          samples: r.steps,
+          successRate: r.successRate,
+          avgTokens: r.avgTokensPerSuccess,
+          baselineTokens: baseline,
+          savingsPct: savings * 100,
+          avgCostUsd: r.avgCostPerSuccess
+        });
+      }
+    }
+  }
+  deleteInsightsExcept(valid);
+  return listInsights();
+}
+function insightBoostMap(insights) {
+  const map = {};
+  for (const i of insights) map[`${i.taskType}:${i.model}`] = i.savingsPct;
+  return map;
+}
+function renderPlaybook(insights) {
+  if (!insights.length) {
+    return c.bold("Efficiency playbook") + "\n" + c.dim(
+      `Nothing distilled yet \u2014 needs \u22652 models with \u2265${MIN_SAMPLES} successful steps on the same task type,
+where one beats the median by \u2265${MIN_MARGIN * 100}% tokens. Keep running tasks (vary models with -o / pins).`
+    );
+  }
+  return c.bold("Efficiency playbook") + c.dim("  (the notably efficient approaches \u2014 this is what `poly sync` uploads)") + "\n" + table(
+    ["Task", "Model", "Avg tok", "Baseline", "Savings", "Success", "n"],
+    insights.map((i) => [
+      i.taskType,
+      c.green(i.model),
+      tokens(Math.round(i.avgTokens)),
+      tokens(Math.round(i.baselineTokens)),
+      c.green(`-${i.savingsPct.toFixed(0)}%`),
+      `${Math.round(i.successRate * 100)}%`,
+      String(i.samples)
+    ])
+  );
+}
 // src/usage/analyze.ts
 var MIN_SUCCESS_RATE = 0.5;
 function renderAnalysis(filter = {}) {
@@ -1359,6 +1575,9 @@ function renderAnalysis(filter = {}) {
   if (!byModelTask.length && !byObjective.length && !byCommand.length) {
     return c.dim('No analytics yet. Run `poly run "<task>"` a few times (and rate the result) first.');
   }
+  const insights = distillInsights();
+  out.push(renderPlaybook(insights));
+  out.push("");
   if (byModelTask.length) {
     const byTask = /* @__PURE__ */ new Map();
     for (const r of byModelTask) {
@@ -1445,7 +1664,7 @@ function renderAnalysis(filter = {}) {
 }
 // src/usage/firestoreSync.ts
-async function syncUsage(config) {
+async function syncUsage(config, opts = {}) {
   if (!config.firestore.enabled) {
     return { synced: 0, message: "Firestore sync is disabled (enable with `poly config firestore on`)." };
   }
@@ -1473,28 +1692,62 @@ async function syncUsage(config) {
     }
   }
   const fdb = fsMod.getFirestore();
+  distillInsights();
+  const insights = unsyncedInsights();
+  if (insights.length) {
+    const batch = fdb.batch();
+    const col = fdb.collection("polymath_insights");
+    for (const i of insights) {
+      batch.set(col.doc(i.id), {
+        computedAt: i.computedAt,
+        taskType: i.taskType,
+        model: i.model,
+        provider: i.provider,
+        samples: i.samples,
+        successRate: i.successRate,
+        avgTokens: i.avgTokens,
+        baselineTokens: i.baselineTokens,
+        savingsPct: i.savingsPct,
+        avgCostUsd: i.avgCostUsd
+      });
+    }
+    await batch.commit();
+    markTableSynced("insights", insights.map((i) => i.id));
+  }
+  if (!opts.raw) {
+    return {
+      synced: insights.length,
+      message: insights.length ? `Synced ${insights.length} efficiency insight(s) to polymath_insights. Raw logs stayed local (use --raw to push).` : "No new insights to sync \u2014 raw logs stay local by default (use --raw to push them)."
+    };
+  }
   const rows = unsyncedRows();
-  if (!rows.length) return { synced: 0, message: "Nothing to sync \u2014 all rows already pushed." };
-  const batch = fdb.batch();
-  const col = fdb.collection(config.firestore.collection);
-  for (const r of rows) {
-    const ref = col.doc(`${r.date}__${r.id}`);
-    batch.set(ref, {
-      ts: r.ts,
-      date: r.date,
-      provider: r.provider,
-      model: r.model,
-      taskType: r.taskType,
-      promptTokens: r.promptTokens,
-      completionTokens: r.completionTokens,
-      totalTokens: r.totalTokens,
-      costUsd: r.costUsd,
-      sessionId: r.sessionId ?? null
-    });
+  if (!rows.length && !insights.length) return { synced: 0, message: "Nothing to sync \u2014 all rows already pushed." };
+  if (rows.length) {
+    const batch = fdb.batch();
+    const col = fdb.collection(config.firestore.collection);
+    for (const r of rows) {
+      const ref = col.doc(`${r.date}__${r.id}`);
+      batch.set(ref, {
+        ts: r.ts,
+        date: r.date,
+        provider: r.provider,
+        model: r.model,
+        taskType: r.taskType,
+        command: r.command ?? "run",
+        promptTokens: r.promptTokens,
+        completionTokens: r.completionTokens,
+        totalTokens: r.totalTokens,
+        costUsd: r.costUsd,
+        sessionId: r.sessionId ?? null
+      });
+    }
+    await batch.commit();
+    markSynced(rows.map((r) => r.id));
   }
-  await batch.commit();
-  markSynced(rows.map((r) => r.id));
-  return { synced: rows.length, message: `Synced ${rows.length} rows to ${config.firestore.collection}.` };
+  return {
+    synced: insights.length + rows.length,
+    message: `Synced ${insights.length} insights + ${rows.length} raw rows to Firestore.`
+  };
 }
 // src/usage/dataconnect.ts
@@ -1539,14 +1792,56 @@ async function executeGraphql(cfg2, token, query, variables) {
   }
 }
 var iso = (ms) => new Date(ms).toISOString();
-async function syncDataConnect(config) {
+async function syncDataConnect(config, opts = {}) {
   const dc = config.dataconnect;
   if (!dc?.enabled) {
-    return { sessions: 0, steps: 0, commands: 0, calls: 0, message: "Data Connect sync is disabled (enable with `poly config dataconnect on`)." };
+    return { insights: 0, sessions: 0, steps: 0, commands: 0, calls: 0, message: "Data Connect sync is disabled (enable with `poly config dataconnect on`)." };
   }
   const projectId = config.firestore.projectId;
   const token = await adminAccessToken(projectId);
   const cfg2 = { projectId, location: dc.location, serviceId: dc.serviceId };
+  distillInsights();
+  const insights = unsyncedInsights();
+  for (const i of insights) {
+    await executeGraphql(
+      cfg2,
+      token,
+      `mutation UpsertInsight($id: String!, $computedAt: Timestamp!, $taskType: String!,
+         $model: String!, $provider: String!, $samples: Int!, $successRate: Float!,
+         $avgTokens: Float!, $baselineTokens: Float!, $savingsPct: Float!, $avgCostUsd: Float!) {
+         insight_upsert(data: {
+           id: $id, computedAt: $computedAt, taskType: $taskType, model: $model,
+           provider: $provider, samples: $samples, successRate: $successRate,
+           avgTokens: $avgTokens, baselineTokens: $baselineTokens,
+           savingsPct: $savingsPct, avgCostUsd: $avgCostUsd
+         })
+       }`,
+      {
+        id: i.id,
+        computedAt: iso(i.computedAt),
+        taskType: i.taskType,
+        model: i.model,
+        provider: i.provider,
+        samples: i.samples,
+        successRate: i.successRate,
+        avgTokens: i.avgTokens,
+        baselineTokens: i.baselineTokens,
+        savingsPct: i.savingsPct,
+        avgCostUsd: i.avgCostUsd
+      }
+    );
+  }
+  markTableSynced("insights", insights.map((i) => i.id));
+  if (!opts.raw) {
+    return {
+      insights: insights.length,
+      sessions: 0,
+      steps: 0,
+      commands: 0,
+      calls: 0,
+      message: `Synced ${insights.length} efficiency insight(s) to Data Connect (${cfg2.serviceId}@${cfg2.location}). Raw logs stayed local \u2014 use \`poly sync --raw\` to push everything.`
+    };
+  }
   const sessions = unsyncedSessions();
   for (const s of sessions) {
     await executeGraphql(
@@ -1679,11 +1974,12 @@ async function syncDataConnect(config) {
   }
   markSynced(calls.map((c2) => c2.id));
   return {
+    insights: insights.length,
     sessions: sessions.length,
     steps: steps.length,
     commands: commands.length,
     calls: calls.length,
-    message: `Synced ${sessions.length} sessions, ${steps.length} steps, ${commands.length} commands, ${calls.length} calls to Data Connect (${cfg2.serviceId}@${cfg2.location}).`
+    message: `Synced ${insights.length} insights + raw: ${sessions.length} sessions, ${steps.length} steps, ${commands.length} commands, ${calls.length} calls (${cfg2.serviceId}@${cfg2.location}).`
   };
 }
@@ -1873,6 +2169,25 @@ ${stderr}`)) };
 // src/agent/loop.ts
 var MAX_ITERS_PER_STEP = 6;
+var KNOWN_TOOLS = new Set(TOOL_SCHEMAS.map((t) => t.function.name));
+function parseTextToolCall(content) {
+  if (!content) return null;
+  const json = extractJson(content);
+  if (!json) return null;
+  try {
+    const obj = JSON.parse(json);
+    const name = obj?.name ?? obj?.tool ?? obj?.function?.name;
+    if (typeof name !== "string" || !KNOWN_TOOLS.has(name)) return null;
+    const args = obj.arguments ?? obj.parameters ?? obj.function?.arguments ?? {};
+    return {
+      id: `textcall_${name}`,
+      type: "function",
+      function: { name, arguments: typeof args === "string" ? args : JSON.stringify(args) }
+    };
+  } catch {
+    return null;
+  }
+}
 async function runAgent(goal, deps, emit) {
   const { client: client2, models, policy, sessionId, cwd } = deps;
   let totalCostUsd = 0;
@@ -1992,6 +2307,26 @@ async function runAgent(goal, deps, emit) {
           }
           continue;
         }
+        const textCall = useTools ? parseTextToolCall(result.content) : null;
+        if (textCall) {
+          stepToolCalls++;
+          emit({ type: "tool-call", name: textCall.function.name, args: textCall.function.arguments });
+          const outcome = executeTool(textCall.function.name, textCall.function.arguments, toolCtx);
+          emit({ type: "tool-result", name: textCall.function.name, result: outcome.result });
+          if (outcome.finishSummary != null) {
+            summary = outcome.finishSummary;
+            finishedBy = "finish-tool";
+            break;
+          }
+          messages.push({ role: "assistant", content: result.content });
+          messages.push({
+            role: "user",
+            content: `Tool ${textCall.function.name} returned:
+${outcome.result}
+Continue with this step. When the objective is met, reply with ONLY {"name":"finish","arguments":{"summary":"<one line>"}}.`
+          });
+          continue;
+        }
         summary = result.content || summary;
         if (summary) finishedBy = "text";
         break;
@@ -2048,7 +2383,8 @@ function stepSystemPrompt(goal, step, priorSummaries, useTools) {
 What previous steps accomplished:
 ${priorSummaries.join("\n")}` : "";
   const toolNote = useTools ? `
-You may use the provided tools (read_file, write_file, list_dir, run_command). Call the \`finish\` tool with a one-line summary when this step's objective is met.` : `
+You may use the provided tools (read_file, write_file, list_dir, run_command). Call the \`finish\` tool with a one-line summary when this step's objective is met.
+If you cannot call tools natively, reply with ONLY one JSON object per turn, no prose: {"name":"<tool>","arguments":{...}}` : `
 Return a concise result for this step. Do not ask the user questions.`;
   return `You are the "${step.type}" stage of an autonomous coding agent.
 Overall goal: ${goal}
@@ -2274,21 +2610,30 @@ function truncate2(s, n) {
 // src/index.ts
 var program = new Command();
-program.name("poly").description("Polymath \u2014 cost-optimized, multi-model TUI coding agent").version("0.2.0");
+program.name("poly").description("Polymath \u2014 cost-optimized, multi-model TUI coding agent").version("0.3.1");
 function client(config) {
   return new OpenRouterClient({
     apiKey: resolveApiKey(config),
     referer: config.referer,
-    title: config.title
+    title: config.title,
+    localBaseUrl: config.local.enabled ? config.local.baseUrl : void 0
   });
 }
 function buildPolicy(config, opts) {
   const objective = opts.objective || config.defaultObjective;
   const maxCost = opts.maxCost != null ? parseFloat(opts.maxCost) : config.maxCostPerCallUsd;
+  let empirical;
+  try {
+    empirical = insightBoostMap(listInsights());
+    if (!Object.keys(empirical).length) empirical = void 0;
+  } catch {
+    empirical = void 0;
+  }
   return {
     objective,
     maxCostPerCallUsd: Number.isFinite(maxCost) ? maxCost : void 0,
-    pinned: config.pinned
+    pinned: config.pinned,
+    empirical
   };
 }
 function localDate3(d = /* @__PURE__ */ new Date()) {
@@ -2315,9 +2660,23 @@ function trackCommand(opts) {
   }
 }
 async function loadCatalog(config, refresh = false) {
-  const models = await getModels(client(config), { refresh });
+  const cl = client(config);
+  const hasKey = !!resolveApiKey(config);
+  let models = [];
+  try {
+    models = await getModels(cl, { refresh });
+  } catch (e) {
+    if (!config.local.enabled) throw e;
+  }
+  if (config.local.enabled) {
+    const local = await getLocalModels(cl);
+    if (!local.length) {
+      console.error(c.yellow(`Local server (${config.local.baseUrl}) returned no models \u2014 is it running?`));
+    }
+    models = hasKey ? [...local, ...models] : local;
+  }
   if (!models.length) {
-    console.error(c.red("Could not load the model catalog. Check your connection."));
+    console.error(c.red("No models available. Check your connection, or `poly config local on` with a running Ollama/LM Studio."));
     process.exit(1);
   }
   return models;
@@ -2328,10 +2687,12 @@ program.command("login").description("Connect Polymath to OpenRouter (set/replac
 program.command("run", { isDefault: true }).description("Launch the interactive agent (TUI)").argument("[goal...]", "what to do (optional; prompts if omitted)").option("-o, --objective <name>", "routing objective: cheapest | value | quality").option("--max-cost <usd>", "exclude models whose projected per-call cost exceeds this").option("-w, --write", "allow the agent to write files (confined to --cwd)", false).option("-x, --commands", "DANGER: let the model run arbitrary shell commands in --cwd", false).option("-C, --cwd <dir>", "working directory", process.cwd()).action(async (goalParts, opts) => {
   const startedAt = Date.now();
   const config = loadConfig();
-  const key = await ensureApiKey(config);
-  if (!key) {
-    console.error(c.red("No API key \u2014 cannot run. Try `poly login`."));
-    process.exit(1);
+  if (!config.local.enabled || resolveApiKey(config)) {
+    const key = await ensureApiKey(config);
+    if (!key && !config.local.enabled) {
+      console.error(c.red("No API key \u2014 cannot run. Try `poly login`, or `poly config local on` for a local LLM."));
+      process.exit(1);
+    }
   }
   const reloaded = loadConfig();
   const models = await loadCatalog(reloaded);
@@ -2434,21 +2795,22 @@ program.command("usage").description("Show recorded usage & cost by date + model
 program.command("analyze").description("Which approach reaches the goal with the FEWEST tokens \u2014 per model, task, objective, command").option("--since <date>", "YYYY-MM-DD inclusive").option("--until <date>", "YYYY-MM-DD inclusive").action(async (opts) => {
   console.log(renderAnalysis({ since: opts.since, until: opts.until }));
 });
-program.command("sync").description("Push the local analytics ledger to Firebase (Data Connect SQL and/or Firestore)").action(async () => {
+program.command("sync").description("Push DISTILLED efficiency insights to Firebase (raw logs stay local unless --raw)").option("--raw", "also push the full raw ledger (sessions/steps/calls/commands)", false).action(async (opts) => {
   const config = loadConfig();
   let pushed = false;
   if (config.dataconnect.enabled) {
     pushed = true;
     try {
-      const res = await syncDataConnect(config);
-      console.log(res.sessions + res.steps + res.commands + res.calls > 0 ? c.green(res.message) : c.dim(res.message));
+      const res = await syncDataConnect(config, { raw: !!opts.raw });
+      const n = res.insights + res.sessions + res.steps + res.commands + res.calls;
+      console.log(n > 0 ? c.green(res.message) : c.dim(res.message));
     } catch (e) {
       console.error(c.red(`Data Connect sync failed: ${e?.message ?? e}`));
     }
   }
   if (config.firestore.enabled) {
     pushed = true;
-    const res = await syncUsage(config);
+    const res = await syncUsage(config, { raw: !!opts.raw });
     console.log(res.synced > 0 ? c.green(res.message) : c.dim(res.message));
   }
   if (!pushed) {
@@ -2499,6 +2861,17 @@ cfg.command("firestore").description("Enable/disable Firestore sync: on | off").
   saveConfig(config);
   console.log(c.green(`Firestore sync ${config.firestore.enabled ? "enabled" : "disabled"}.`));
 });
+cfg.command("local").description("Enable/disable a local LLM server (Ollama/LM Studio): on | off [--base <url>]").argument("<state>").option("--base <url>", "OpenAI-compatible base URL (default http://localhost:11434/v1)").action((state, opts) => {
+  const config = loadConfig();
+  config.local.enabled = /^on|true|1$/i.test(state);
+  if (opts.base) config.local.baseUrl = String(opts.base).replace(/\/$/, "");
+  saveConfig(config);
+  console.log(
+    c.green(
+      `Local LLM ${config.local.enabled ? "enabled" : "disabled"} (${config.local.baseUrl}). Models appear as local/<name> with $0 cost.`
+    )
+  );
+});
 cfg.command("dataconnect").description("Enable/disable Firebase Data Connect (SQL) sync: on | off [--location <loc>] [--service <id>]").argument("<state>").option("--location <loc>", "Data Connect location (default us-east4)").option("--service <id>", "Data Connect service id (default polymath)").action((state, opts) => {
   const config = loadConfig();
   config.dataconnect.enabled = /^on|true|1$/i.test(state);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "polymath-agent",
-  "version": "0.2.0",
+  "version": "0.3.1",
   "description": "Polymath — a cost-optimized, multi-model TUI coding agent. Decomposes work into typed tasks, routes each task to the cheapest capable model via OpenRouter, and logs real usage/cost by date + model.",
   "type": "module",
   "bin": {