polymath-agent 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +28 -3
  2. package/dist/cli.js +433 -60
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -93,13 +93,38 @@ poly usage # cost by date + model
93
93
  | `poly recommend <goal>` | Pre-run recommendation: cheapest / best-value / best-quality model combos + savings. |
94
94
  | `poly models` | Browse the catalog with pricing, tier, tool support. Filters: `--tier`, `--tools`, `--search`. |
95
95
  | `poly usage` | Recorded usage & cost grouped by **date + model**. `--today`, `--since`, `--sync`. |
96
- | `poly analyze` | **Which approach reaches the goal with the fewest tokens** — best model per task type, objective × achievement, usage per command. |
97
- | `poly sync` | Push the analytics ledger to Firebase ([Data Connect SQL](dataconnect/) and/or Firestore). |
98
- | `poly config show\|set\|firestore\|dataconnect` | View/change settings. |
96
+ | `poly analyze` | **Which approach reaches the goal with the fewest tokens** — efficiency playbook, best model per task type, objective × achievement, usage per command. |
97
+ | `poly sync` | Push **distilled efficiency insights** to Firebase ([Data Connect SQL](dataconnect/) / Firestore). Raw logs stay local unless `--raw`. |
98
+ | `poly config show\|set\|firestore\|dataconnect\|local` | View/change settings. |
99
99
 
100
100
  After each `poly run`, rate the result 0–9 (one keypress) — your goal-achievement
101
101
  rating joins the auto score (completed/planned steps) to power `poly analyze`.
102
102
 
103
+ ### The efficiency playbook (learned routing)
104
+
105
+ Everything is captured locally (SQLite). `poly analyze` distills it into a **playbook**
106
+ of *notably* efficient approaches — a (task, model) pair qualifies only with ≥3
107
+ successful runs, ≥70% success, and **≥20% fewer tokens than the median** of its
108
+ competitors. The playbook then **boosts routing**: proven-efficient models get
109
+ preferred under the `value` objective (`reason: proven 54% fewer tokens on edit`).
110
+ `poly sync` uploads *only* the playbook by default — your goals and raw logs never
111
+ leave the machine unless you pass `--raw`.
112
+
113
+ ### Local LLMs (Ollama / LM Studio) — $0 routing
114
+
115
+ ```bash
116
+ ollama serve # or LM Studio's local server
117
+ poly config local on # default base: http://localhost:11434/v1
118
+ poly config local on --base http://localhost:1234/v1 # LM Studio
119
+ poly models -s local/ # local models join the catalog at $0
120
+ poly run "..." # cheapest objective → local wins what it can
121
+ ```
122
+
123
+ Local models appear as `local/<name>`, cost $0, and need **no API key** — with
124
+ `local on` and no OpenRouter key, Polymath runs fully offline on your machine.
125
+ Tokens are still tracked, so the playbook learns when your local model is the
126
+ most efficient approach.
127
+
103
128
  ### Routing objectives
104
129
 
105
130
  Routing is **skill-aware**: each task type maps to a skill (coding / reasoning /
package/dist/cli.js CHANGED
@@ -49,6 +49,11 @@ var DEFAULT_CONFIG = {
49
49
  enabled: false,
50
50
  location: "us-east4",
51
51
  serviceId: "polymath"
52
+ },
53
+ local: {
54
+ enabled: false,
55
+ baseUrl: "http://localhost:11434/v1"
56
+ // Ollama default; LM Studio: http://localhost:1234/v1
52
57
  }
53
58
  };
54
59
  function loadConfig() {
@@ -60,7 +65,8 @@ function loadConfig() {
60
65
  ...DEFAULT_CONFIG,
61
66
  ...raw,
62
67
  firestore: { ...DEFAULT_CONFIG.firestore, ...raw.firestore ?? {} },
63
- dataconnect: { ...DEFAULT_CONFIG.dataconnect, ...raw.dataconnect ?? {} }
68
+ dataconnect: { ...DEFAULT_CONFIG.dataconnect, ...raw.dataconnect ?? {} },
69
+ local: { ...DEFAULT_CONFIG.local, ...raw.local ?? {} }
64
70
  };
65
71
  } catch {
66
72
  return { ...DEFAULT_CONFIG };
@@ -81,6 +87,7 @@ function resolveApiKey(config) {
81
87
 
82
88
  // src/providers/openrouter.ts
83
89
  var BASE = globalThis.process?.env?.OPENROUTER_BASE_URL?.replace(/\/$/, "") || "https://openrouter.ai/api/v1";
90
+ var LOCAL_PREFIX = "local/";
84
91
  var OpenRouterError = class extends Error {
85
92
  status;
86
93
  constructor(message, status) {
@@ -93,10 +100,12 @@ var OpenRouterClient = class {
93
100
  apiKey;
94
101
  referer;
95
102
  title;
103
+ localBaseUrl;
96
104
  constructor(opts = {}) {
97
105
  this.apiKey = opts.apiKey;
98
106
  this.referer = opts.referer ?? "https://github.com/polymath-agent";
99
107
  this.title = opts.title ?? "Polymath";
108
+ this.localBaseUrl = opts.localBaseUrl?.replace(/\/$/, "");
100
109
  }
101
110
  headers(json = true) {
102
111
  const h = {
@@ -107,6 +116,24 @@ var OpenRouterClient = class {
107
116
  if (json) h["Content-Type"] = "application/json";
108
117
  return h;
109
118
  }
119
+ /** Resolve where a model's request goes: the local server for `local/*`, else OpenRouter. */
120
+ target(modelId) {
121
+ if (this.localBaseUrl && modelId.startsWith(LOCAL_PREFIX)) {
122
+ return { base: this.localBaseUrl, model: modelId.slice(LOCAL_PREFIX.length), isLocal: true };
123
+ }
124
+ return { base: BASE, model: modelId, isLocal: false };
125
+ }
126
+ requireKeyFor(isLocal) {
127
+ if (!isLocal && !this.apiKey) throw new OpenRouterError("No API key set. Run `poly login`.");
128
+ }
129
+ /** List models from the local OpenAI-compatible server (Ollama / LM Studio). */
130
+ async listLocalRawModels() {
131
+ if (!this.localBaseUrl) return [];
132
+ const res = await fetch(`${this.localBaseUrl}/models`);
133
+ if (!res.ok) throw new OpenRouterError(`Local server: failed to list models (${res.status})`, res.status);
134
+ const json = await res.json();
135
+ return json.data ?? [];
136
+ }
110
137
  /** Raw /models payload (no auth required). */
111
138
  async listRawModels() {
112
139
  const res = await fetch(`${BASE}/models`, { headers: this.headers(false) });
@@ -126,24 +153,28 @@ var OpenRouterClient = class {
126
153
  const d = json.data ?? {};
127
154
  return { label: d.label, usage: d.usage, limit: d.limit };
128
155
  }
129
- buildBody(req, stream) {
156
+ buildBody(req, stream, modelOverride, isLocal) {
130
157
  return {
131
- model: req.model,
158
+ model: modelOverride,
132
159
  messages: req.messages.map(serializeMessage),
133
160
  ...req.tools && req.tools.length ? { tools: req.tools, tool_choice: "auto" } : {},
134
161
  temperature: req.temperature ?? 0.2,
135
162
  ...req.maxTokens ? { max_tokens: req.maxTokens } : {},
136
163
  stream,
137
- usage: { include: true }
164
+ // OpenRouter-specific accounting param; local servers may reject unknown fields.
165
+ ...isLocal ? {} : { usage: { include: true } },
166
+ // OpenAI-compat way to get token usage in the final stream chunk (Ollama/LM Studio).
167
+ ...isLocal && stream ? { stream_options: { include_usage: true } } : {}
138
168
  };
139
169
  }
140
170
  /** Non-streaming completion. costUsd is computed from `pricing` (deterministic). */
141
171
  async complete(req, pricing) {
142
- if (!this.apiKey) throw new OpenRouterError("No API key set. Run `poly login`.");
143
- const res = await fetch(`${BASE}/chat/completions`, {
172
+ const t = this.target(req.model);
173
+ this.requireKeyFor(t.isLocal);
174
+ const res = await fetch(`${t.base}/chat/completions`, {
144
175
  method: "POST",
145
176
  headers: this.headers(),
146
- body: JSON.stringify(this.buildBody(req, false))
177
+ body: JSON.stringify(this.buildBody(req, false, t.model, t.isLocal))
147
178
  });
148
179
  if (!res.ok) {
149
180
  const text = await res.text().catch(() => "");
@@ -164,8 +195,10 @@ var OpenRouterClient = class {
164
195
  content: typeof msg.content === "string" ? msg.content : "",
165
196
  toolCalls: parseToolCalls(msg.tool_calls),
166
197
  usage,
167
- model: json.model ?? req.model,
168
- costUsd: computeCost(usage, pricing, json.usage?.cost),
198
+ // Keep the prefixed id for local models so the ledger stays consistent.
199
+ model: t.isLocal ? req.model : json.model ?? req.model,
200
+ // Local inference is free regardless of what the server claims to report.
201
+ costUsd: computeCost(usage, pricing, t.isLocal ? void 0 : json.usage?.cost),
169
202
  finishReason: choice.finish_reason ?? null
170
203
  };
171
204
  }
@@ -174,11 +207,12 @@ var OpenRouterClient = class {
174
207
  * Tool-call deltas are accumulated and surfaced in the final result.
175
208
  */
176
209
  async *stream(req, pricing) {
177
- if (!this.apiKey) throw new OpenRouterError("No API key set. Run `poly login`.");
178
- const res = await fetch(`${BASE}/chat/completions`, {
210
+ const t = this.target(req.model);
211
+ this.requireKeyFor(t.isLocal);
212
+ const res = await fetch(`${t.base}/chat/completions`, {
179
213
  method: "POST",
180
214
  headers: this.headers(),
181
- body: JSON.stringify(this.buildBody(req, true))
215
+ body: JSON.stringify(this.buildBody(req, true, t.model, t.isLocal))
182
216
  });
183
217
  if (!res.ok || !res.body) {
184
218
  const text = await res.text().catch(() => "");
@@ -212,7 +246,7 @@ var OpenRouterClient = class {
212
246
  if (evt?.error) {
213
247
  throw new OpenRouterError(evt.error.message ?? "Stream provider error", evt.error.code);
214
248
  }
215
- if (evt.model) model = evt.model;
249
+ if (evt.model && !t.isLocal) model = evt.model;
216
250
  if (evt.usage) usageJson = evt.usage;
217
251
  const choice = evt.choices?.[0];
218
252
  if (!choice) continue;
@@ -239,17 +273,17 @@ var OpenRouterClient = class {
239
273
  completionTokens: usageJson?.completion_tokens ?? 0,
240
274
  totalTokens: usageJson?.total_tokens ?? 0
241
275
  };
242
- const toolCalls = [...toolAcc.values()].filter((t) => t.name).map((t) => ({
243
- id: t.id || `call_${t.name}`,
276
+ const toolCalls = [...toolAcc.values()].filter((t2) => t2.name).map((t2) => ({
277
+ id: t2.id || `call_${t2.name}`,
244
278
  type: "function",
245
- function: { name: t.name, arguments: t.args || "{}" }
279
+ function: { name: t2.name, arguments: t2.args || "{}" }
246
280
  }));
247
281
  return {
248
282
  content,
249
283
  toolCalls,
250
284
  usage,
251
285
  model,
252
- costUsd: computeCost(usage, pricing, usageJson?.cost),
286
+ costUsd: computeCost(usage, pricing, t.isLocal ? void 0 : usageJson?.cost),
253
287
  finishReason
254
288
  };
255
289
  }
@@ -387,6 +421,38 @@ async function getModels(client2, opts = {}) {
387
421
  return models;
388
422
  }
389
423
 
424
+ // src/models/local.ts
425
+ function parseLocalModels(raw) {
426
+ const out = [];
427
+ for (const m of raw) {
428
+ if (!m?.id) continue;
429
+ const name = String(m.id);
430
+ out.push({
431
+ id: LOCAL_PREFIX + name,
432
+ name: `${name} (local)`,
433
+ provider: "local",
434
+ contextLength: m.context_length ?? 8192,
435
+ pricing: { promptUsdPerMTok: 0, completionUsdPerMTok: 0 },
436
+ tier: classifyTier(name, 0),
437
+ capabilities: {
438
+ // OpenAI-compatible local servers pass tool schemas through; models that
439
+ // can't call tools simply reply with text, which the agent loop handles.
440
+ tools: true,
441
+ vision: /llava|vision|vl\b|moondream/i.test(name)
442
+ }
443
+ });
444
+ }
445
+ return out;
446
+ }
447
+ async function getLocalModels(client2) {
448
+ try {
449
+ const raw = await client2.listLocalRawModels();
450
+ return parseLocalModels(raw);
451
+ } catch {
452
+ return [];
453
+ }
454
+ }
455
+
390
456
  // src/auth/onboarding.ts
391
457
  import readline from "node:readline";
392
458
 
@@ -723,8 +789,11 @@ var HEADLINE_SKILLS = ["coding", "reasoning", "retrieval", "speed"];
723
789
  function projectCost(m, est) {
724
790
  return est.promptTokens / 1e6 * m.pricing.promptUsdPerMTok + est.completionTokens / 1e6 * m.pricing.completionUsdPerMTok;
725
791
  }
726
- function taskValue(m, taskType) {
727
- return taskStrength(m, taskType) / Math.max(blendedPrice(m), 0.01);
792
+ function taskValue(m, taskType, empirical) {
793
+ const base = taskStrength(m, taskType) / Math.max(blendedPrice(m), 0.01);
794
+ const savings = empirical?.[`${taskType}:${m.id}`];
795
+ const boost = savings ? 1 + Math.min(savings, 100) / 100 : 1;
796
+ return base * boost;
728
797
  }
729
798
  function candidatesFor(taskType, models, policy, est) {
730
799
  const spec = TASK_SPECS[taskType];
@@ -753,7 +822,9 @@ function rank(models, policy, taskType) {
753
822
  break;
754
823
  case "value":
755
824
  default:
756
- sorted.sort((a, b) => taskValue(b, taskType) - taskValue(a, taskType));
825
+ sorted.sort(
826
+ (a, b) => taskValue(b, taskType, policy.empirical) - taskValue(a, taskType, policy.empirical)
827
+ );
757
828
  break;
758
829
  }
759
830
  return sorted;
@@ -771,7 +842,8 @@ function route(taskType, models, policy, est = { promptTokens: 4e3, completionTo
771
842
  const ranked = rank(cands, policy, taskType);
772
843
  const chosen = ranked[0];
773
844
  const skill = TASK_SKILL[taskType];
774
- const reason = policy.objective === "cheapest" ? `cheapest model that covers ${skill}` : policy.objective === "quality" ? `strongest at ${skill}` : `best ${skill}-per-dollar`;
845
+ const proven = policy.empirical?.[`${taskType}:${chosen.id}`];
846
+ const reason = policy.objective === "cheapest" ? `cheapest model that covers ${skill}` : policy.objective === "quality" ? `strongest at ${skill}` : proven ? `proven ${Math.round(proven)}% fewer tokens on ${taskType} (playbook)` : `best ${skill}-per-dollar`;
775
847
  return { model: chosen, reason, estCostUsd: projectCost(chosen, est) };
776
848
  }
777
849
 
@@ -1002,6 +1074,23 @@ function getDb() {
1002
1074
  synced INTEGER NOT NULL DEFAULT 0
1003
1075
  );
1004
1076
  CREATE INDEX IF NOT EXISTS idx_cmd_date ON command_runs(date);
1077
+
1078
+ -- Distilled efficiency insights: ONLY the notably cost-efficient approaches.
1079
+ -- This is what syncs to the cloud by default (raw logs stay local).
1080
+ CREATE TABLE IF NOT EXISTS insights (
1081
+ id TEXT PRIMARY KEY, -- "<task_type>__<model>"
1082
+ computed_at INTEGER NOT NULL,
1083
+ task_type TEXT NOT NULL,
1084
+ model TEXT NOT NULL,
1085
+ provider TEXT NOT NULL,
1086
+ samples INTEGER NOT NULL, -- successful steps observed
1087
+ success_rate REAL NOT NULL,
1088
+ avg_tokens REAL NOT NULL, -- per successful step
1089
+ baseline_tokens REAL NOT NULL, -- median across qualified competitors
1090
+ savings_pct REAL NOT NULL, -- vs baseline (the "\uC720\uB3C5" margin)
1091
+ avg_cost_usd REAL NOT NULL,
1092
+ synced INTEGER NOT NULL DEFAULT 0
1093
+ );
1005
1094
  `);
1006
1095
  const cols = db.prepare(`PRAGMA table_info(usage_log)`).all();
1007
1096
  if (!cols.some((c2) => c2.name === "command")) {
@@ -1304,9 +1393,62 @@ function unsyncedCommandRuns() {
1304
1393
  }
1305
1394
  function markTableSynced(table2, ids) {
1306
1395
  if (!ids.length) return;
1307
- const stmt = getDb().prepare(`UPDATE ${table2} SET synced=1 WHERE ${table2 === "sessions" ? "id" : "id"}=?`);
1396
+ const stmt = getDb().prepare(`UPDATE ${table2} SET synced=1 WHERE id=?`);
1308
1397
  for (const id of ids) stmt.run(id);
1309
1398
  }
1399
+ function upsertInsight(i) {
1400
+ getDb().prepare(
1401
+ `INSERT INTO insights (id, computed_at, task_type, model, provider, samples, success_rate,
1402
+ avg_tokens, baseline_tokens, savings_pct, avg_cost_usd, synced)
1403
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0)
1404
+ ON CONFLICT(id) DO UPDATE SET
1405
+ computed_at=excluded.computed_at, samples=excluded.samples,
1406
+ success_rate=excluded.success_rate, avg_tokens=excluded.avg_tokens,
1407
+ baseline_tokens=excluded.baseline_tokens, savings_pct=excluded.savings_pct,
1408
+ avg_cost_usd=excluded.avg_cost_usd, synced=0`
1409
+ ).run(
1410
+ i.id,
1411
+ i.computedAt,
1412
+ i.taskType,
1413
+ i.model,
1414
+ i.provider,
1415
+ i.samples,
1416
+ i.successRate,
1417
+ i.avgTokens,
1418
+ i.baselineTokens,
1419
+ i.savingsPct,
1420
+ i.avgCostUsd
1421
+ );
1422
+ }
1423
+ function deleteInsightsExcept(validIds) {
1424
+ const all = getDb().prepare(`SELECT id FROM insights`).all();
1425
+ const keep = new Set(validIds);
1426
+ const del = getDb().prepare(`DELETE FROM insights WHERE id=?`);
1427
+ for (const r of all) if (!keep.has(String(r.id))) del.run(String(r.id));
1428
+ }
1429
+ function listInsights() {
1430
+ const rows = getDb().prepare(`SELECT * FROM insights ORDER BY savings_pct DESC`).all();
1431
+ return rows.map(mapInsight);
1432
+ }
1433
+ function unsyncedInsights() {
1434
+ const rows = getDb().prepare(`SELECT * FROM insights WHERE synced=0`).all();
1435
+ return rows.map(mapInsight);
1436
+ }
1437
+ function mapInsight(r) {
1438
+ return {
1439
+ id: String(r.id),
1440
+ computedAt: Number(r.computed_at),
1441
+ taskType: String(r.task_type),
1442
+ model: String(r.model),
1443
+ provider: String(r.provider),
1444
+ samples: Number(r.samples),
1445
+ successRate: Number(r.success_rate),
1446
+ avgTokens: Number(r.avg_tokens),
1447
+ baselineTokens: Number(r.baseline_tokens),
1448
+ savingsPct: Number(r.savings_pct),
1449
+ avgCostUsd: Number(r.avg_cost_usd)
1450
+ };
1451
+ }
1310
1452
 
1311
1453
  // src/usage/report.ts
1312
1454
  function renderUsageReport(filter = {}) {
@@ -1349,6 +1491,80 @@ function renderUsageReport(filter = {}) {
1349
1491
  ].join("\n");
1350
1492
  }
1351
1493
 
1494
+ // src/usage/insights.ts
1495
+ var MIN_SAMPLES = 3;
1496
+ var MIN_SUCCESS = 0.7;
1497
+ var MIN_MARGIN = 0.2;
1498
+ function median(xs) {
1499
+ const s = [...xs].sort((a, b) => a - b);
1500
+ const mid = Math.floor(s.length / 2);
1501
+ return s.length % 2 ? s[mid] : (s[mid - 1] + s[mid]) / 2;
1502
+ }
1503
+ function distillInsights(now = Date.now()) {
1504
+ const eff = modelTaskEfficiency();
1505
+ const byTask = /* @__PURE__ */ new Map();
1506
+ for (const r of eff) {
1507
+ const list = byTask.get(r.taskType) ?? [];
1508
+ list.push(r);
1509
+ byTask.set(r.taskType, list);
1510
+ }
1511
+ const valid = [];
1512
+ for (const [taskType, list] of byTask) {
1513
+ const qualified = list.filter(
1514
+ (r) => r.steps >= MIN_SAMPLES && r.successRate >= MIN_SUCCESS && r.avgTokensPerSuccess > 0
1515
+ );
1516
+ if (qualified.length < 2) continue;
1517
+ const baseline = median(qualified.map((r) => r.avgTokensPerSuccess));
1518
+ for (const r of qualified) {
1519
+ const savings = 1 - r.avgTokensPerSuccess / baseline;
1520
+ if (savings >= MIN_MARGIN) {
1521
+ const id = `${taskType}__${r.model}`;
1522
+ valid.push(id);
1523
+ upsertInsight({
1524
+ id,
1525
+ computedAt: now,
1526
+ taskType,
1527
+ model: r.model,
1528
+ provider: r.model.split("/")[0] ?? "unknown",
1529
+ samples: r.steps,
1530
+ successRate: r.successRate,
1531
+ avgTokens: r.avgTokensPerSuccess,
1532
+ baselineTokens: baseline,
1533
+ savingsPct: savings * 100,
1534
+ avgCostUsd: r.avgCostPerSuccess
1535
+ });
1536
+ }
1537
+ }
1538
+ }
1539
+ deleteInsightsExcept(valid);
1540
+ return listInsights();
1541
+ }
1542
+ function insightBoostMap(insights) {
1543
+ const map = {};
1544
+ for (const i of insights) map[`${i.taskType}:${i.model}`] = i.savingsPct;
1545
+ return map;
1546
+ }
1547
+ function renderPlaybook(insights) {
1548
+ if (!insights.length) {
1549
+ return c.bold("Efficiency playbook") + "\n" + c.dim(
1550
+ `Nothing distilled yet \u2014 needs \u22652 models with \u2265${MIN_SAMPLES} successful steps on the same task type,
1551
+ where one beats the median by \u2265${MIN_MARGIN * 100}% tokens. Keep running tasks (vary models with -o / pins).`
1552
+ );
1553
+ }
1554
+ return c.bold("Efficiency playbook") + c.dim(" (the notably efficient approaches \u2014 this is what `poly sync` uploads)") + "\n" + table(
1555
+ ["Task", "Model", "Avg tok", "Baseline", "Savings", "Success", "n"],
1556
+ insights.map((i) => [
1557
+ i.taskType,
1558
+ c.green(i.model),
1559
+ tokens(Math.round(i.avgTokens)),
1560
+ tokens(Math.round(i.baselineTokens)),
1561
+ c.green(`-${i.savingsPct.toFixed(0)}%`),
1562
+ `${Math.round(i.successRate * 100)}%`,
1563
+ String(i.samples)
1564
+ ])
1565
+ );
1566
+ }
1567
+
1352
1568
  // src/usage/analyze.ts
1353
1569
  var MIN_SUCCESS_RATE = 0.5;
1354
1570
  function renderAnalysis(filter = {}) {
@@ -1359,6 +1575,9 @@ function renderAnalysis(filter = {}) {
1359
1575
  if (!byModelTask.length && !byObjective.length && !byCommand.length) {
1360
1576
  return c.dim('No analytics yet. Run `poly run "<task>"` a few times (and rate the result) first.');
1361
1577
  }
1578
+ const insights = distillInsights();
1579
+ out.push(renderPlaybook(insights));
1580
+ out.push("");
1362
1581
  if (byModelTask.length) {
1363
1582
  const byTask = /* @__PURE__ */ new Map();
1364
1583
  for (const r of byModelTask) {
@@ -1445,7 +1664,7 @@ function renderAnalysis(filter = {}) {
1445
1664
  }
1446
1665
 
1447
1666
  // src/usage/firestoreSync.ts
1448
- async function syncUsage(config) {
1667
+ async function syncUsage(config, opts = {}) {
1449
1668
  if (!config.firestore.enabled) {
1450
1669
  return { synced: 0, message: "Firestore sync is disabled (enable with `poly config firestore on`)." };
1451
1670
  }
@@ -1473,28 +1692,62 @@ async function syncUsage(config) {
1473
1692
  }
1474
1693
  }
1475
1694
  const fdb = fsMod.getFirestore();
1695
+ distillInsights();
1696
+ const insights = unsyncedInsights();
1697
+ if (insights.length) {
1698
+ const batch = fdb.batch();
1699
+ const col = fdb.collection("polymath_insights");
1700
+ for (const i of insights) {
1701
+ batch.set(col.doc(i.id), {
1702
+ computedAt: i.computedAt,
1703
+ taskType: i.taskType,
1704
+ model: i.model,
1705
+ provider: i.provider,
1706
+ samples: i.samples,
1707
+ successRate: i.successRate,
1708
+ avgTokens: i.avgTokens,
1709
+ baselineTokens: i.baselineTokens,
1710
+ savingsPct: i.savingsPct,
1711
+ avgCostUsd: i.avgCostUsd
1712
+ });
1713
+ }
1714
+ await batch.commit();
1715
+ markTableSynced("insights", insights.map((i) => i.id));
1716
+ }
1717
+ if (!opts.raw) {
1718
+ return {
1719
+ synced: insights.length,
1720
+ message: insights.length ? `Synced ${insights.length} efficiency insight(s) to polymath_insights. Raw logs stayed local (use --raw to push).` : "No new insights to sync \u2014 raw logs stay local by default (use --raw to push them)."
1721
+ };
1722
+ }
1476
1723
  const rows = unsyncedRows();
1477
- if (!rows.length) return { synced: 0, message: "Nothing to sync \u2014 all rows already pushed." };
1478
- const batch = fdb.batch();
1479
- const col = fdb.collection(config.firestore.collection);
1480
- for (const r of rows) {
1481
- const ref = col.doc(`${r.date}__${r.id}`);
1482
- batch.set(ref, {
1483
- ts: r.ts,
1484
- date: r.date,
1485
- provider: r.provider,
1486
- model: r.model,
1487
- taskType: r.taskType,
1488
- promptTokens: r.promptTokens,
1489
- completionTokens: r.completionTokens,
1490
- totalTokens: r.totalTokens,
1491
- costUsd: r.costUsd,
1492
- sessionId: r.sessionId ?? null
1493
- });
1724
+ if (!rows.length && !insights.length) return { synced: 0, message: "Nothing to sync \u2014 all rows already pushed." };
1725
+ if (rows.length) {
1726
+ const batch = fdb.batch();
1727
+ const col = fdb.collection(config.firestore.collection);
1728
+ for (const r of rows) {
1729
+ const ref = col.doc(`${r.date}__${r.id}`);
1730
+ batch.set(ref, {
1731
+ ts: r.ts,
1732
+ date: r.date,
1733
+ provider: r.provider,
1734
+ model: r.model,
1735
+ taskType: r.taskType,
1736
+ command: r.command ?? "run",
1737
+ promptTokens: r.promptTokens,
1738
+ completionTokens: r.completionTokens,
1739
+ totalTokens: r.totalTokens,
1740
+ costUsd: r.costUsd,
1741
+ sessionId: r.sessionId ?? null
1742
+ });
1743
+ }
1744
+ await batch.commit();
1745
+ markSynced(rows.map((r) => r.id));
1494
1746
  }
1495
- await batch.commit();
1496
- markSynced(rows.map((r) => r.id));
1497
- return { synced: rows.length, message: `Synced ${rows.length} rows to ${config.firestore.collection}.` };
1747
+ return {
1748
+ synced: insights.length + rows.length,
1749
+ message: `Synced ${insights.length} insights + ${rows.length} raw rows to Firestore.`
1750
+ };
1498
1751
  }
1499
1752
 
1500
1753
  // src/usage/dataconnect.ts
@@ -1539,14 +1792,56 @@ async function executeGraphql(cfg2, token, query, variables) {
1539
1792
  }
1540
1793
  }
1541
1794
  var iso = (ms) => new Date(ms).toISOString();
1542
- async function syncDataConnect(config) {
1795
+ async function syncDataConnect(config, opts = {}) {
1543
1796
  const dc = config.dataconnect;
1544
1797
  if (!dc?.enabled) {
1545
- return { sessions: 0, steps: 0, commands: 0, calls: 0, message: "Data Connect sync is disabled (enable with `poly config dataconnect on`)." };
1798
+ return { insights: 0, sessions: 0, steps: 0, commands: 0, calls: 0, message: "Data Connect sync is disabled (enable with `poly config dataconnect on`)." };
1546
1799
  }
1547
1800
  const projectId = config.firestore.projectId;
1548
1801
  const token = await adminAccessToken(projectId);
1549
1802
  const cfg2 = { projectId, location: dc.location, serviceId: dc.serviceId };
1803
+ distillInsights();
1804
+ const insights = unsyncedInsights();
1805
+ for (const i of insights) {
1806
+ await executeGraphql(
1807
+ cfg2,
1808
+ token,
1809
+ `mutation UpsertInsight($id: String!, $computedAt: Timestamp!, $taskType: String!,
1810
+ $model: String!, $provider: String!, $samples: Int!, $successRate: Float!,
1811
+ $avgTokens: Float!, $baselineTokens: Float!, $savingsPct: Float!, $avgCostUsd: Float!) {
1812
+ insight_upsert(data: {
1813
+ id: $id, computedAt: $computedAt, taskType: $taskType, model: $model,
1814
+ provider: $provider, samples: $samples, successRate: $successRate,
1815
+ avgTokens: $avgTokens, baselineTokens: $baselineTokens,
1816
+ savingsPct: $savingsPct, avgCostUsd: $avgCostUsd
1817
+ })
1818
+ }`,
1819
+ {
1820
+ id: i.id,
1821
+ computedAt: iso(i.computedAt),
1822
+ taskType: i.taskType,
1823
+ model: i.model,
1824
+ provider: i.provider,
1825
+ samples: i.samples,
1826
+ successRate: i.successRate,
1827
+ avgTokens: i.avgTokens,
1828
+ baselineTokens: i.baselineTokens,
1829
+ savingsPct: i.savingsPct,
1830
+ avgCostUsd: i.avgCostUsd
1831
+ }
1832
+ );
1833
+ }
1834
+ markTableSynced("insights", insights.map((i) => i.id));
1835
+ if (!opts.raw) {
1836
+ return {
1837
+ insights: insights.length,
1838
+ sessions: 0,
1839
+ steps: 0,
1840
+ commands: 0,
1841
+ calls: 0,
1842
+ message: `Synced ${insights.length} efficiency insight(s) to Data Connect (${cfg2.serviceId}@${cfg2.location}). Raw logs stayed local \u2014 use \`poly sync --raw\` to push everything.`
1843
+ };
1844
+ }
1550
1845
  const sessions = unsyncedSessions();
1551
1846
  for (const s of sessions) {
1552
1847
  await executeGraphql(
@@ -1679,11 +1974,12 @@ async function syncDataConnect(config) {
1679
1974
  }
1680
1975
  markSynced(calls.map((c2) => c2.id));
1681
1976
  return {
1977
+ insights: insights.length,
1682
1978
  sessions: sessions.length,
1683
1979
  steps: steps.length,
1684
1980
  commands: commands.length,
1685
1981
  calls: calls.length,
1686
- message: `Synced ${sessions.length} sessions, ${steps.length} steps, ${commands.length} commands, ${calls.length} calls to Data Connect (${cfg2.serviceId}@${cfg2.location}).`
1982
+ message: `Synced ${insights.length} insights + raw: ${sessions.length} sessions, ${steps.length} steps, ${commands.length} commands, ${calls.length} calls (${cfg2.serviceId}@${cfg2.location}).`
1687
1983
  };
1688
1984
  }
1689
1985
 
@@ -1873,6 +2169,25 @@ ${stderr}`)) };
1873
2169
 
1874
2170
  // src/agent/loop.ts
1875
2171
  var MAX_ITERS_PER_STEP = 6;
2172
+ var KNOWN_TOOLS = new Set(TOOL_SCHEMAS.map((t) => t.function.name));
2173
+ function parseTextToolCall(content) {
2174
+ if (!content) return null;
2175
+ const json = extractJson(content);
2176
+ if (!json) return null;
2177
+ try {
2178
+ const obj = JSON.parse(json);
2179
+ const name = obj?.name ?? obj?.tool ?? obj?.function?.name;
2180
+ if (typeof name !== "string" || !KNOWN_TOOLS.has(name)) return null;
2181
+ const args = obj.arguments ?? obj.parameters ?? obj.function?.arguments ?? {};
2182
+ return {
2183
+ id: `textcall_${name}`,
2184
+ type: "function",
2185
+ function: { name, arguments: typeof args === "string" ? args : JSON.stringify(args) }
2186
+ };
2187
+ } catch {
2188
+ return null;
2189
+ }
2190
+ }
1876
2191
  async function runAgent(goal, deps, emit) {
1877
2192
  const { client: client2, models, policy, sessionId, cwd } = deps;
1878
2193
  let totalCostUsd = 0;
@@ -1992,6 +2307,26 @@ async function runAgent(goal, deps, emit) {
1992
2307
  }
1993
2308
  continue;
1994
2309
  }
2310
+ const textCall = useTools ? parseTextToolCall(result.content) : null;
2311
+ if (textCall) {
2312
+ stepToolCalls++;
2313
+ emit({ type: "tool-call", name: textCall.function.name, args: textCall.function.arguments });
2314
+ const outcome = executeTool(textCall.function.name, textCall.function.arguments, toolCtx);
2315
+ emit({ type: "tool-result", name: textCall.function.name, result: outcome.result });
2316
+ if (outcome.finishSummary != null) {
2317
+ summary = outcome.finishSummary;
2318
+ finishedBy = "finish-tool";
2319
+ break;
2320
+ }
2321
+ messages.push({ role: "assistant", content: result.content });
2322
+ messages.push({
2323
+ role: "user",
2324
+ content: `Tool ${textCall.function.name} returned:
2325
+ ${outcome.result}
2326
+ Continue with this step. When the objective is met, reply with ONLY {"name":"finish","arguments":{"summary":"<one line>"}}.`
2327
+ });
2328
+ continue;
2329
+ }
1995
2330
  summary = result.content || summary;
1996
2331
  if (summary) finishedBy = "text";
1997
2332
  break;
@@ -2048,7 +2383,8 @@ function stepSystemPrompt(goal, step, priorSummaries, useTools) {
2048
2383
  What previous steps accomplished:
2049
2384
  ${priorSummaries.join("\n")}` : "";
2050
2385
  const toolNote = useTools ? `
2051
- You may use the provided tools (read_file, write_file, list_dir, run_command). Call the \`finish\` tool with a one-line summary when this step's objective is met.` : `
2386
+ You may use the provided tools (read_file, write_file, list_dir, run_command). Call the \`finish\` tool with a one-line summary when this step's objective is met.
2387
+ If you cannot call tools natively, reply with ONLY one JSON object per turn, no prose: {"name":"<tool>","arguments":{...}}` : `
2052
2388
  Return a concise result for this step. Do not ask the user questions.`;
2053
2389
  return `You are the "${step.type}" stage of an autonomous coding agent.
2054
2390
  Overall goal: ${goal}
@@ -2274,21 +2610,30 @@ function truncate2(s, n) {
2274
2610
 
2275
2611
  // src/index.ts
2276
2612
  var program = new Command();
2277
- program.name("poly").description("Polymath \u2014 cost-optimized, multi-model TUI coding agent").version("0.2.0");
2613
+ program.name("poly").description("Polymath \u2014 cost-optimized, multi-model TUI coding agent").version("0.3.1");
2278
2614
  function client(config) {
2279
2615
  return new OpenRouterClient({
2280
2616
  apiKey: resolveApiKey(config),
2281
2617
  referer: config.referer,
2282
- title: config.title
2618
+ title: config.title,
2619
+ localBaseUrl: config.local.enabled ? config.local.baseUrl : void 0
2283
2620
  });
2284
2621
  }
2285
2622
  function buildPolicy(config, opts) {
2286
2623
  const objective = opts.objective || config.defaultObjective;
2287
2624
  const maxCost = opts.maxCost != null ? parseFloat(opts.maxCost) : config.maxCostPerCallUsd;
2625
+ let empirical;
2626
+ try {
2627
+ empirical = insightBoostMap(listInsights());
2628
+ if (!Object.keys(empirical).length) empirical = void 0;
2629
+ } catch {
2630
+ empirical = void 0;
2631
+ }
2288
2632
  return {
2289
2633
  objective,
2290
2634
  maxCostPerCallUsd: Number.isFinite(maxCost) ? maxCost : void 0,
2291
- pinned: config.pinned
2635
+ pinned: config.pinned,
2636
+ empirical
2292
2637
  };
2293
2638
  }
2294
2639
  function localDate3(d = /* @__PURE__ */ new Date()) {
@@ -2315,9 +2660,23 @@ function trackCommand(opts) {
2315
2660
  }
2316
2661
  }
2317
2662
  async function loadCatalog(config, refresh = false) {
2318
- const models = await getModels(client(config), { refresh });
2663
+ const cl = client(config);
2664
+ const hasKey = !!resolveApiKey(config);
2665
+ let models = [];
2666
+ try {
2667
+ models = await getModels(cl, { refresh });
2668
+ } catch (e) {
2669
+ if (!config.local.enabled) throw e;
2670
+ }
2671
+ if (config.local.enabled) {
2672
+ const local = await getLocalModels(cl);
2673
+ if (!local.length) {
2674
+ console.error(c.yellow(`Local server (${config.local.baseUrl}) returned no models \u2014 is it running?`));
2675
+ }
2676
+ models = hasKey ? [...local, ...models] : local;
2677
+ }
2319
2678
  if (!models.length) {
2320
- console.error(c.red("Could not load the model catalog. Check your connection."));
2679
+ console.error(c.red("No models available. Check your connection, or `poly config local on` with a running Ollama/LM Studio."));
2321
2680
  process.exit(1);
2322
2681
  }
2323
2682
  return models;
@@ -2328,10 +2687,12 @@ program.command("login").description("Connect Polymath to OpenRouter (set/replac
2328
2687
  program.command("run", { isDefault: true }).description("Launch the interactive agent (TUI)").argument("[goal...]", "what to do (optional; prompts if omitted)").option("-o, --objective <name>", "routing objective: cheapest | value | quality").option("--max-cost <usd>", "exclude models whose projected per-call cost exceeds this").option("-w, --write", "allow the agent to write files (confined to --cwd)", false).option("-x, --commands", "DANGER: let the model run arbitrary shell commands in --cwd", false).option("-C, --cwd <dir>", "working directory", process.cwd()).action(async (goalParts, opts) => {
2329
2688
  const startedAt = Date.now();
2330
2689
  const config = loadConfig();
2331
- const key = await ensureApiKey(config);
2332
- if (!key) {
2333
- console.error(c.red("No API key \u2014 cannot run. Try `poly login`."));
2334
- process.exit(1);
2690
+ if (!config.local.enabled || resolveApiKey(config)) {
2691
+ const key = await ensureApiKey(config);
2692
+ if (!key && !config.local.enabled) {
2693
+ console.error(c.red("No API key \u2014 cannot run. Try `poly login`, or `poly config local on` for a local LLM."));
2694
+ process.exit(1);
2695
+ }
2335
2696
  }
2336
2697
  const reloaded = loadConfig();
2337
2698
  const models = await loadCatalog(reloaded);
@@ -2434,21 +2795,22 @@ program.command("usage").description("Show recorded usage & cost by date + model
2434
2795
  program.command("analyze").description("Which approach reaches the goal with the FEWEST tokens \u2014 per model, task, objective, command").option("--since <date>", "YYYY-MM-DD inclusive").option("--until <date>", "YYYY-MM-DD inclusive").action(async (opts) => {
2435
2796
  console.log(renderAnalysis({ since: opts.since, until: opts.until }));
2436
2797
  });
2437
- program.command("sync").description("Push the local analytics ledger to Firebase (Data Connect SQL and/or Firestore)").action(async () => {
2798
+ program.command("sync").description("Push DISTILLED efficiency insights to Firebase (raw logs stay local unless --raw)").option("--raw", "also push the full raw ledger (sessions/steps/calls/commands)", false).action(async (opts) => {
2438
2799
  const config = loadConfig();
2439
2800
  let pushed = false;
2440
2801
  if (config.dataconnect.enabled) {
2441
2802
  pushed = true;
2442
2803
  try {
2443
- const res = await syncDataConnect(config);
2444
- console.log(res.sessions + res.steps + res.commands + res.calls > 0 ? c.green(res.message) : c.dim(res.message));
2804
+ const res = await syncDataConnect(config, { raw: !!opts.raw });
2805
+ const n = res.insights + res.sessions + res.steps + res.commands + res.calls;
2806
+ console.log(n > 0 ? c.green(res.message) : c.dim(res.message));
2445
2807
  } catch (e) {
2446
2808
  console.error(c.red(`Data Connect sync failed: ${e?.message ?? e}`));
2447
2809
  }
2448
2810
  }
2449
2811
  if (config.firestore.enabled) {
2450
2812
  pushed = true;
2451
- const res = await syncUsage(config);
2813
+ const res = await syncUsage(config, { raw: !!opts.raw });
2452
2814
  console.log(res.synced > 0 ? c.green(res.message) : c.dim(res.message));
2453
2815
  }
2454
2816
  if (!pushed) {
@@ -2499,6 +2861,17 @@ cfg.command("firestore").description("Enable/disable Firestore sync: on | off").
2499
2861
  saveConfig(config);
2500
2862
  console.log(c.green(`Firestore sync ${config.firestore.enabled ? "enabled" : "disabled"}.`));
2501
2863
  });
2864
+ cfg.command("local").description("Enable/disable a local LLM server (Ollama/LM Studio): on | off [--base <url>]").argument("<state>").option("--base <url>", "OpenAI-compatible base URL (default http://localhost:11434/v1)").action((state, opts) => {
2865
+ const config = loadConfig();
2866
+ config.local.enabled = /^on|true|1$/i.test(state);
2867
+ if (opts.base) config.local.baseUrl = String(opts.base).replace(/\/$/, "");
2868
+ saveConfig(config);
2869
+ console.log(
2870
+ c.green(
2871
+ `Local LLM ${config.local.enabled ? "enabled" : "disabled"} (${config.local.baseUrl}). Models appear as local/<name> with $0 cost.`
2872
+ )
2873
+ );
2874
+ });
2502
2875
  cfg.command("dataconnect").description("Enable/disable Firebase Data Connect (SQL) sync: on | off [--location <loc>] [--service <id>]").argument("<state>").option("--location <loc>", "Data Connect location (default us-east4)").option("--service <id>", "Data Connect service id (default polymath)").action((state, opts) => {
2503
2876
  const config = loadConfig();
2504
2877
  config.dataconnect.enabled = /^on|true|1$/i.test(state);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "polymath-agent",
3
- "version": "0.2.0",
3
+ "version": "0.3.1",
4
4
  "description": "Polymath — a cost-optimized, multi-model TUI coding agent. Decomposes work into typed tasks, routes each task to the cheapest capable model via OpenRouter, and logs real usage/cost by date + model.",
5
5
  "type": "module",
6
6
  "bin": {