polymath-agent 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +31 -2
  2. package/dist/cli.js +1218 -132
  3. package/package.json +1 -1
package/dist/cli.js CHANGED
@@ -44,6 +44,16 @@ var DEFAULT_CONFIG = {
44
44
  enabled: false,
45
45
  projectId: "mathology-b8e3d",
46
46
  collection: "polymath_usage"
47
+ },
48
+ dataconnect: {
49
+ enabled: false,
50
+ location: "us-east4",
51
+ serviceId: "polymath"
52
+ },
53
+ local: {
54
+ enabled: false,
55
+ baseUrl: "http://localhost:11434/v1"
56
+ // Ollama default; LM Studio: http://localhost:1234/v1
47
57
  }
48
58
  };
49
59
  function loadConfig() {
@@ -54,7 +64,9 @@ function loadConfig() {
54
64
  return {
55
65
  ...DEFAULT_CONFIG,
56
66
  ...raw,
57
- firestore: { ...DEFAULT_CONFIG.firestore, ...raw.firestore ?? {} }
67
+ firestore: { ...DEFAULT_CONFIG.firestore, ...raw.firestore ?? {} },
68
+ dataconnect: { ...DEFAULT_CONFIG.dataconnect, ...raw.dataconnect ?? {} },
69
+ local: { ...DEFAULT_CONFIG.local, ...raw.local ?? {} }
58
70
  };
59
71
  } catch {
60
72
  return { ...DEFAULT_CONFIG };
@@ -75,6 +87,7 @@ function resolveApiKey(config) {
75
87
 
76
88
  // src/providers/openrouter.ts
77
89
  var BASE = globalThis.process?.env?.OPENROUTER_BASE_URL?.replace(/\/$/, "") || "https://openrouter.ai/api/v1";
90
+ var LOCAL_PREFIX = "local/";
78
91
  var OpenRouterError = class extends Error {
79
92
  status;
80
93
  constructor(message, status) {
@@ -87,10 +100,12 @@ var OpenRouterClient = class {
87
100
  apiKey;
88
101
  referer;
89
102
  title;
103
+ localBaseUrl;
90
104
  constructor(opts = {}) {
91
105
  this.apiKey = opts.apiKey;
92
106
  this.referer = opts.referer ?? "https://github.com/polymath-agent";
93
107
  this.title = opts.title ?? "Polymath";
108
+ this.localBaseUrl = opts.localBaseUrl?.replace(/\/$/, "");
94
109
  }
95
110
  headers(json = true) {
96
111
  const h = {
@@ -101,6 +116,24 @@ var OpenRouterClient = class {
101
116
  if (json) h["Content-Type"] = "application/json";
102
117
  return h;
103
118
  }
119
+ /** Resolve where a model's request goes: the local server for `local/*`, else OpenRouter. */
120
+ target(modelId) {
121
+ if (this.localBaseUrl && modelId.startsWith(LOCAL_PREFIX)) {
122
+ return { base: this.localBaseUrl, model: modelId.slice(LOCAL_PREFIX.length), isLocal: true };
123
+ }
124
+ return { base: BASE, model: modelId, isLocal: false };
125
+ }
126
+ requireKeyFor(isLocal) {
127
+ if (!isLocal && !this.apiKey) throw new OpenRouterError("No API key set. Run `poly login`.");
128
+ }
129
+ /** List models from the local OpenAI-compatible server (Ollama / LM Studio). */
130
+ async listLocalRawModels() {
131
+ if (!this.localBaseUrl) return [];
132
+ const res = await fetch(`${this.localBaseUrl}/models`);
133
+ if (!res.ok) throw new OpenRouterError(`Local server: failed to list models (${res.status})`, res.status);
134
+ const json = await res.json();
135
+ return json.data ?? [];
136
+ }
104
137
  /** Raw /models payload (no auth required). */
105
138
  async listRawModels() {
106
139
  const res = await fetch(`${BASE}/models`, { headers: this.headers(false) });
@@ -120,24 +153,28 @@ var OpenRouterClient = class {
120
153
  const d = json.data ?? {};
121
154
  return { label: d.label, usage: d.usage, limit: d.limit };
122
155
  }
123
- buildBody(req, stream) {
156
+ buildBody(req, stream, modelOverride, isLocal) {
124
157
  return {
125
- model: req.model,
158
+ model: modelOverride,
126
159
  messages: req.messages.map(serializeMessage),
127
160
  ...req.tools && req.tools.length ? { tools: req.tools, tool_choice: "auto" } : {},
128
161
  temperature: req.temperature ?? 0.2,
129
162
  ...req.maxTokens ? { max_tokens: req.maxTokens } : {},
130
163
  stream,
131
- usage: { include: true }
164
+ // OpenRouter-specific accounting param; local servers may reject unknown fields.
165
+ ...isLocal ? {} : { usage: { include: true } },
166
+ // OpenAI-compat way to get token usage in the final stream chunk (Ollama/LM Studio).
167
+ ...isLocal && stream ? { stream_options: { include_usage: true } } : {}
132
168
  };
133
169
  }
134
170
  /** Non-streaming completion. costUsd is computed from `pricing` (deterministic). */
135
171
  async complete(req, pricing) {
136
- if (!this.apiKey) throw new OpenRouterError("No API key set. Run `poly login`.");
137
- const res = await fetch(`${BASE}/chat/completions`, {
172
+ const t = this.target(req.model);
173
+ this.requireKeyFor(t.isLocal);
174
+ const res = await fetch(`${t.base}/chat/completions`, {
138
175
  method: "POST",
139
176
  headers: this.headers(),
140
- body: JSON.stringify(this.buildBody(req, false))
177
+ body: JSON.stringify(this.buildBody(req, false, t.model, t.isLocal))
141
178
  });
142
179
  if (!res.ok) {
143
180
  const text = await res.text().catch(() => "");
@@ -158,8 +195,10 @@ var OpenRouterClient = class {
158
195
  content: typeof msg.content === "string" ? msg.content : "",
159
196
  toolCalls: parseToolCalls(msg.tool_calls),
160
197
  usage,
161
- model: json.model ?? req.model,
162
- costUsd: computeCost(usage, pricing, json.usage?.cost),
198
+ // Keep the prefixed id for local models so the ledger stays consistent.
199
+ model: t.isLocal ? req.model : json.model ?? req.model,
200
+ // Local inference is free regardless of what the server claims to report.
201
+ costUsd: computeCost(usage, pricing, t.isLocal ? void 0 : json.usage?.cost),
163
202
  finishReason: choice.finish_reason ?? null
164
203
  };
165
204
  }
@@ -168,11 +207,12 @@ var OpenRouterClient = class {
168
207
  * Tool-call deltas are accumulated and surfaced in the final result.
169
208
  */
170
209
  async *stream(req, pricing) {
171
- if (!this.apiKey) throw new OpenRouterError("No API key set. Run `poly login`.");
172
- const res = await fetch(`${BASE}/chat/completions`, {
210
+ const t = this.target(req.model);
211
+ this.requireKeyFor(t.isLocal);
212
+ const res = await fetch(`${t.base}/chat/completions`, {
173
213
  method: "POST",
174
214
  headers: this.headers(),
175
- body: JSON.stringify(this.buildBody(req, true))
215
+ body: JSON.stringify(this.buildBody(req, true, t.model, t.isLocal))
176
216
  });
177
217
  if (!res.ok || !res.body) {
178
218
  const text = await res.text().catch(() => "");
@@ -206,7 +246,7 @@ var OpenRouterClient = class {
206
246
  if (evt?.error) {
207
247
  throw new OpenRouterError(evt.error.message ?? "Stream provider error", evt.error.code);
208
248
  }
209
- if (evt.model) model = evt.model;
249
+ if (evt.model && !t.isLocal) model = evt.model;
210
250
  if (evt.usage) usageJson = evt.usage;
211
251
  const choice = evt.choices?.[0];
212
252
  if (!choice) continue;
@@ -233,17 +273,17 @@ var OpenRouterClient = class {
233
273
  completionTokens: usageJson?.completion_tokens ?? 0,
234
274
  totalTokens: usageJson?.total_tokens ?? 0
235
275
  };
236
- const toolCalls = [...toolAcc.values()].filter((t) => t.name).map((t) => ({
237
- id: t.id || `call_${t.name}`,
276
+ const toolCalls = [...toolAcc.values()].filter((t2) => t2.name).map((t2) => ({
277
+ id: t2.id || `call_${t2.name}`,
238
278
  type: "function",
239
- function: { name: t.name, arguments: t.args || "{}" }
279
+ function: { name: t2.name, arguments: t2.args || "{}" }
240
280
  }));
241
281
  return {
242
282
  content,
243
283
  toolCalls,
244
284
  usage,
245
285
  model,
246
- costUsd: computeCost(usage, pricing, usageJson?.cost),
286
+ costUsd: computeCost(usage, pricing, t.isLocal ? void 0 : usageJson?.cost),
247
287
  finishReason
248
288
  };
249
289
  }
@@ -381,6 +421,38 @@ async function getModels(client2, opts = {}) {
381
421
  return models;
382
422
  }
383
423
 
424
+ // src/models/local.ts
425
+ function parseLocalModels(raw) {
426
+ const out = [];
427
+ for (const m of raw) {
428
+ if (!m?.id) continue;
429
+ const name = String(m.id);
430
+ out.push({
431
+ id: LOCAL_PREFIX + name,
432
+ name: `${name} (local)`,
433
+ provider: "local",
434
+ contextLength: m.context_length ?? 8192,
435
+ pricing: { promptUsdPerMTok: 0, completionUsdPerMTok: 0 },
436
+ tier: classifyTier(name, 0),
437
+ capabilities: {
438
+ // OpenAI-compatible local servers pass tool schemas through; models that
439
+ // can't call tools simply reply with text, which the agent loop handles.
440
+ tools: true,
441
+ vision: /llava|vision|vl\b|moondream/i.test(name)
442
+ }
443
+ });
444
+ }
445
+ return out;
446
+ }
447
+ async function getLocalModels(client2) {
448
+ try {
449
+ const raw = await client2.listLocalRawModels();
450
+ return parseLocalModels(raw);
451
+ } catch {
452
+ return [];
453
+ }
454
+ }
455
+
384
456
  // src/auth/onboarding.ts
385
457
  import readline from "node:readline";
386
458
 
@@ -560,7 +632,7 @@ function heuristicPlan(goal) {
560
632
  ];
561
633
  return { goal, steps };
562
634
  }
563
- async function planRequest(goal, client2, planModel) {
635
+ async function planRequest(goal, client2, planModel, onUsage) {
564
636
  const result = await client2.complete(
565
637
  {
566
638
  model: planModel.id,
@@ -573,6 +645,7 @@ async function planRequest(goal, client2, planModel) {
573
645
  },
574
646
  planModel.pricing
575
647
  );
648
+ onUsage?.(result);
576
649
  const parsed = extractPlan(result.content);
577
650
  if (!parsed) return heuristicPlan(goal);
578
651
  return { goal, steps: parsed };
@@ -716,8 +789,11 @@ var HEADLINE_SKILLS = ["coding", "reasoning", "retrieval", "speed"];
716
789
  function projectCost(m, est) {
717
790
  return est.promptTokens / 1e6 * m.pricing.promptUsdPerMTok + est.completionTokens / 1e6 * m.pricing.completionUsdPerMTok;
718
791
  }
719
- function taskValue(m, taskType) {
720
- return taskStrength(m, taskType) / Math.max(blendedPrice(m), 0.01);
792
+ function taskValue(m, taskType, empirical) {
793
+ const base = taskStrength(m, taskType) / Math.max(blendedPrice(m), 0.01);
794
+ const savings = empirical?.[`${taskType}:${m.id}`];
795
+ const boost = savings ? 1 + Math.min(savings, 100) / 100 : 1;
796
+ return base * boost;
721
797
  }
722
798
  function candidatesFor(taskType, models, policy, est) {
723
799
  const spec = TASK_SPECS[taskType];
@@ -746,7 +822,9 @@ function rank(models, policy, taskType) {
746
822
  break;
747
823
  case "value":
748
824
  default:
749
- sorted.sort((a, b) => taskValue(b, taskType) - taskValue(a, taskType));
825
+ sorted.sort(
826
+ (a, b) => taskValue(b, taskType, policy.empirical) - taskValue(a, taskType, policy.empirical)
827
+ );
750
828
  break;
751
829
  }
752
830
  return sorted;
@@ -764,7 +842,8 @@ function route(taskType, models, policy, est = { promptTokens: 4e3, completionTo
764
842
  const ranked = rank(cands, policy, taskType);
765
843
  const chosen = ranked[0];
766
844
  const skill = TASK_SKILL[taskType];
767
- const reason = policy.objective === "cheapest" ? `cheapest model that covers ${skill}` : policy.objective === "quality" ? `strongest at ${skill}` : `best ${skill}-per-dollar`;
845
+ const proven = policy.empirical?.[`${taskType}:${chosen.id}`];
846
+ const reason = policy.objective === "cheapest" ? `cheapest model that covers ${skill}` : policy.objective === "quality" ? `strongest at ${skill}` : proven ? `proven ${Math.round(proven)}% fewer tokens on ${taskType} (playbook)` : `best ${skill}-per-dollar`;
768
847
  return { model: chosen, reason, estCostUsd: projectCost(chosen, est) };
769
848
  }
770
849
 
@@ -935,14 +1014,95 @@ function getDb() {
935
1014
  );
936
1015
  CREATE INDEX IF NOT EXISTS idx_usage_date ON usage_log(date);
937
1016
  CREATE INDEX IF NOT EXISTS idx_usage_model ON usage_log(model);
1017
+
1018
+ -- One row per agent session (a \`poly run\`): goal + outcome + achievement scores.
1019
+ CREATE TABLE IF NOT EXISTS sessions (
1020
+ id TEXT PRIMARY KEY,
1021
+ ts INTEGER NOT NULL,
1022
+ date TEXT NOT NULL,
1023
+ goal TEXT NOT NULL,
1024
+ command TEXT NOT NULL DEFAULT 'run',
1025
+ objective TEXT NOT NULL,
1026
+ planned_steps INTEGER NOT NULL DEFAULT 0,
1027
+ completed_steps INTEGER NOT NULL DEFAULT 0,
1028
+ failed_steps INTEGER NOT NULL DEFAULT 0,
1029
+ auto_score REAL, -- 0..1 = completed/planned (agent-computed)
1030
+ user_score INTEGER, -- 0..9 user-rated goal achievement (nullable)
1031
+ prompt_tokens INTEGER NOT NULL DEFAULT 0,
1032
+ completion_tokens INTEGER NOT NULL DEFAULT 0,
1033
+ cost_usd REAL NOT NULL DEFAULT 0,
1034
+ duration_ms INTEGER NOT NULL DEFAULT 0,
1035
+ synced INTEGER NOT NULL DEFAULT 0
1036
+ );
1037
+ CREATE INDEX IF NOT EXISTS idx_sessions_date ON sessions(date);
1038
+
1039
+ -- One row per executed plan step: which model, how many round-trips, how it ended.
1040
+ CREATE TABLE IF NOT EXISTS step_runs (
1041
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
1042
+ session_id TEXT NOT NULL,
1043
+ step_no INTEGER NOT NULL,
1044
+ task_type TEXT NOT NULL,
1045
+ skill TEXT NOT NULL,
1046
+ model TEXT NOT NULL,
1047
+ provider TEXT NOT NULL,
1048
+ iterations INTEGER NOT NULL, -- LLM round-trips used for this step
1049
+ tool_calls INTEGER NOT NULL,
1050
+ prompt_tokens INTEGER NOT NULL,
1051
+ completion_tokens INTEGER NOT NULL,
1052
+ cost_usd REAL NOT NULL,
1053
+ finished_by TEXT NOT NULL, -- 'finish-tool' | 'text' | 'max-iters' | 'error'
1054
+ success INTEGER NOT NULL, -- 1 = ended cleanly (finish-tool or text)
1055
+ duration_ms INTEGER NOT NULL,
1056
+ synced INTEGER NOT NULL DEFAULT 0
1057
+ );
1058
+ CREATE INDEX IF NOT EXISTS idx_steps_session ON step_runs(session_id);
1059
+ CREATE INDEX IF NOT EXISTS idx_steps_model ON step_runs(model, task_type);
1060
+
1061
+ -- One row per CLI command invocation (run/recommend/...): tokens spent per command.
1062
+ CREATE TABLE IF NOT EXISTS command_runs (
1063
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
1064
+ session_id TEXT,
1065
+ ts INTEGER NOT NULL,
1066
+ date TEXT NOT NULL,
1067
+ command TEXT NOT NULL,
1068
+ args TEXT,
1069
+ objective TEXT,
1070
+ prompt_tokens INTEGER NOT NULL DEFAULT 0,
1071
+ completion_tokens INTEGER NOT NULL DEFAULT 0,
1072
+ cost_usd REAL NOT NULL DEFAULT 0,
1073
+ duration_ms INTEGER NOT NULL DEFAULT 0,
1074
+ synced INTEGER NOT NULL DEFAULT 0
1075
+ );
1076
+ CREATE INDEX IF NOT EXISTS idx_cmd_date ON command_runs(date);
1077
+
1078
+ -- Distilled efficiency insights: ONLY the notably cost-efficient approaches.
1079
+ -- This is what syncs to the cloud by default (raw logs stay local).
1080
+ CREATE TABLE IF NOT EXISTS insights (
1081
+ id TEXT PRIMARY KEY, -- "<task_type>__<model>"
1082
+ computed_at INTEGER NOT NULL,
1083
+ task_type TEXT NOT NULL,
1084
+ model TEXT NOT NULL,
1085
+ provider TEXT NOT NULL,
1086
+ samples INTEGER NOT NULL, -- successful steps observed
1087
+ success_rate REAL NOT NULL,
1088
+ avg_tokens REAL NOT NULL, -- per successful step
1089
+ baseline_tokens REAL NOT NULL, -- median across qualified competitors
1090
+ savings_pct REAL NOT NULL, -- vs baseline (the "\uC720\uB3C5" margin)
1091
+ avg_cost_usd REAL NOT NULL,
1092
+ synced INTEGER NOT NULL DEFAULT 0
1093
+ );
938
1094
  `);
1095
+ const cols = db.prepare(`PRAGMA table_info(usage_log)`).all();
1096
+ if (!cols.some((c2) => c2.name === "command")) {
1097
+ db.exec(`ALTER TABLE usage_log ADD COLUMN command TEXT NOT NULL DEFAULT 'run'`);
1098
+ }
939
1099
  return db;
940
1100
  }
941
1101
  function recordUsage(e) {
942
1102
  const stmt = getDb().prepare(`
943
1103
  INSERT INTO usage_log
944
- (ts, date, provider, model, task_type, prompt_tokens, completion_tokens, total_tokens, cost_usd, session_id)
945
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1104
+ (ts, date, provider, model, task_type, prompt_tokens, completion_tokens, total_tokens, cost_usd, session_id, command)
1105
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
946
1106
  `);
947
1107
  stmt.run(
948
1108
  e.ts,
@@ -954,7 +1114,8 @@ function recordUsage(e) {
954
1114
  e.completionTokens,
955
1115
  e.totalTokens,
956
1116
  e.costUsd,
957
- e.sessionId ?? null
1117
+ e.sessionId ?? null,
1118
+ e.command ?? "run"
958
1119
  );
959
1120
  }
960
1121
  function reportByDateModel(filter = {}) {
@@ -1016,7 +1177,8 @@ function unsyncedRows() {
1016
1177
  completionTokens: Number(r.completion_tokens),
1017
1178
  totalTokens: Number(r.total_tokens),
1018
1179
  costUsd: Number(r.cost_usd),
1019
- sessionId: r.session_id ? String(r.session_id) : void 0
1180
+ sessionId: r.session_id ? String(r.session_id) : void 0,
1181
+ command: r.command ? String(r.command) : "run"
1020
1182
  }));
1021
1183
  }
1022
1184
  function markSynced(ids) {
@@ -1024,6 +1186,269 @@ function markSynced(ids) {
1024
1186
  const stmt = getDb().prepare(`UPDATE usage_log SET synced = 1 WHERE id = ?`);
1025
1187
  for (const id of ids) stmt.run(id);
1026
1188
  }
1189
+ function startSession(s) {
1190
+ getDb().prepare(
1191
+ `INSERT OR REPLACE INTO sessions (id, ts, date, goal, command, objective, planned_steps)
1192
+ VALUES (?, ?, ?, ?, ?, ?, ?)`
1193
+ ).run(s.id, s.ts, s.date, s.goal, s.command, s.objective, s.plannedSteps);
1194
+ }
1195
+ function finishSession(id, u) {
1196
+ getDb().prepare(
1197
+ `UPDATE sessions SET planned_steps=?, completed_steps=?, failed_steps=?, auto_score=?,
1198
+ prompt_tokens=?, completion_tokens=?, cost_usd=?, duration_ms=? WHERE id=?`
1199
+ ).run(
1200
+ u.plannedSteps,
1201
+ u.completedSteps,
1202
+ u.failedSteps,
1203
+ u.autoScore,
1204
+ u.promptTokens,
1205
+ u.completionTokens,
1206
+ u.costUsd,
1207
+ u.durationMs,
1208
+ id
1209
+ );
1210
+ }
1211
+ function setUserScore(sessionId, score) {
1212
+ getDb().prepare(`UPDATE sessions SET user_score=? WHERE id=?`).run(score, sessionId);
1213
+ }
1214
+ function recordStepRun(s) {
1215
+ getDb().prepare(
1216
+ `INSERT INTO step_runs
1217
+ (session_id, step_no, task_type, skill, model, provider, iterations, tool_calls,
1218
+ prompt_tokens, completion_tokens, cost_usd, finished_by, success, duration_ms)
1219
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
1220
+ ).run(
1221
+ s.sessionId,
1222
+ s.stepNo,
1223
+ s.taskType,
1224
+ s.skill,
1225
+ s.model,
1226
+ s.provider,
1227
+ s.iterations,
1228
+ s.toolCalls,
1229
+ s.promptTokens,
1230
+ s.completionTokens,
1231
+ s.costUsd,
1232
+ s.finishedBy,
1233
+ s.success ? 1 : 0,
1234
+ s.durationMs
1235
+ );
1236
+ }
1237
+ function recordCommandRun(c2) {
1238
+ getDb().prepare(
1239
+ `INSERT INTO command_runs
1240
+ (session_id, ts, date, command, args, objective, prompt_tokens, completion_tokens, cost_usd, duration_ms)
1241
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
1242
+ ).run(
1243
+ c2.sessionId ?? null,
1244
+ c2.ts,
1245
+ c2.date,
1246
+ c2.command,
1247
+ c2.args ?? null,
1248
+ c2.objective ?? null,
1249
+ c2.promptTokens,
1250
+ c2.completionTokens,
1251
+ c2.costUsd,
1252
+ c2.durationMs
1253
+ );
1254
+ }
1255
+ function sessionUsageTotals(sessionId) {
1256
+ const r = getDb().prepare(
1257
+ `SELECT COALESCE(SUM(prompt_tokens),0) AS p, COALESCE(SUM(completion_tokens),0) AS c, COALESCE(SUM(cost_usd),0) AS cost
1258
+ FROM usage_log WHERE session_id = ?`
1259
+ ).get(sessionId);
1260
+ return { promptTokens: Number(r?.p ?? 0), completionTokens: Number(r?.c ?? 0), costUsd: Number(r?.cost ?? 0) };
1261
+ }
1262
+ function modelTaskEfficiency(filter = {}) {
1263
+ const { whereSql, params } = dateWhere(filter, "s.ts");
1264
+ const rows = getDb().prepare(
1265
+ `SELECT task_type AS taskType, model,
1266
+ COUNT(*) AS steps,
1267
+ AVG(success) AS successRate,
1268
+ AVG(CASE WHEN success=1 THEN prompt_tokens + completion_tokens END) AS avgTokensPerSuccess,
1269
+ AVG(CASE WHEN success=1 THEN cost_usd END) AS avgCostPerSuccess,
1270
+ AVG(iterations) AS avgIterations
1271
+ FROM step_runs s ${whereSql}
1272
+ GROUP BY task_type, model
1273
+ ORDER BY task_type, avgTokensPerSuccess ASC`
1274
+ ).all(...params);
1275
+ return rows.map((r) => ({
1276
+ taskType: String(r.taskType),
1277
+ model: String(r.model),
1278
+ steps: Number(r.steps),
1279
+ successRate: Number(r.successRate ?? 0),
1280
+ avgTokensPerSuccess: Number(r.avgTokensPerSuccess ?? 0),
1281
+ avgCostPerSuccess: Number(r.avgCostPerSuccess ?? 0),
1282
+ avgIterations: Number(r.avgIterations ?? 0)
1283
+ }));
1284
+ }
1285
+ function objectiveEfficiency(filter = {}) {
1286
+ const { whereSql, params } = dateWhere(filter, "ts");
1287
+ const rows = getDb().prepare(
1288
+ `SELECT objective,
1289
+ COUNT(*) AS sessions,
1290
+ AVG(prompt_tokens + completion_tokens) AS avgTokens,
1291
+ AVG(cost_usd) AS avgCostUsd,
1292
+ AVG(auto_score) AS avgAutoScore,
1293
+ AVG(user_score) AS avgUserScore
1294
+ FROM sessions ${whereSql}
1295
+ GROUP BY objective ORDER BY avgTokens ASC`
1296
+ ).all(...params);
1297
+ return rows.map((r) => ({
1298
+ objective: String(r.objective),
1299
+ sessions: Number(r.sessions),
1300
+ avgTokens: Number(r.avgTokens ?? 0),
1301
+ avgCostUsd: Number(r.avgCostUsd ?? 0),
1302
+ avgAutoScore: r.avgAutoScore == null ? null : Number(r.avgAutoScore),
1303
+ avgUserScore: r.avgUserScore == null ? null : Number(r.avgUserScore)
1304
+ }));
1305
+ }
1306
+ function commandUsage(filter = {}) {
1307
+ const { whereSql, params } = dateWhere(filter, "ts");
1308
+ const rows = getDb().prepare(
1309
+ `SELECT command, COUNT(*) AS runs,
1310
+ SUM(prompt_tokens) AS promptTokens,
1311
+ SUM(completion_tokens) AS completionTokens,
1312
+ SUM(cost_usd) AS costUsd
1313
+ FROM command_runs ${whereSql}
1314
+ GROUP BY command ORDER BY costUsd DESC`
1315
+ ).all(...params);
1316
+ return rows.map((r) => ({
1317
+ command: String(r.command),
1318
+ runs: Number(r.runs),
1319
+ promptTokens: Number(r.promptTokens ?? 0),
1320
+ completionTokens: Number(r.completionTokens ?? 0),
1321
+ costUsd: Number(r.costUsd ?? 0)
1322
+ }));
1323
+ }
1324
+ function dateWhere(filter, tsCol) {
1325
+ const where = [];
1326
+ const params = [];
1327
+ if (filter.since) {
1328
+ where.push(`date(${tsCol}/1000, 'unixepoch', 'localtime') >= ?`);
1329
+ params.push(filter.since);
1330
+ }
1331
+ if (filter.until) {
1332
+ where.push(`date(${tsCol}/1000, 'unixepoch', 'localtime') <= ?`);
1333
+ params.push(filter.until);
1334
+ }
1335
+ return { whereSql: where.length ? `WHERE ${where.join(" AND ")}` : "", params };
1336
+ }
1337
+ function unsyncedSessions() {
1338
+ const rows = getDb().prepare(`SELECT * FROM sessions WHERE synced=0 LIMIT 200`).all();
1339
+ return rows.map((r) => ({
1340
+ _table: "sessions",
1341
+ id: String(r.id),
1342
+ ts: Number(r.ts),
1343
+ date: String(r.date),
1344
+ goal: String(r.goal),
1345
+ command: String(r.command),
1346
+ objective: String(r.objective),
1347
+ plannedSteps: Number(r.planned_steps),
1348
+ completedSteps: Number(r.completed_steps),
1349
+ failedSteps: Number(r.failed_steps),
1350
+ autoScore: r.auto_score == null ? null : Number(r.auto_score),
1351
+ userScore: r.user_score == null ? null : Number(r.user_score),
1352
+ promptTokens: Number(r.prompt_tokens),
1353
+ completionTokens: Number(r.completion_tokens),
1354
+ costUsd: Number(r.cost_usd),
1355
+ durationMs: Number(r.duration_ms)
1356
+ }));
1357
+ }
1358
+ function unsyncedStepRuns() {
1359
+ const rows = getDb().prepare(`SELECT * FROM step_runs WHERE synced=0 LIMIT 500`).all();
1360
+ return rows.map((r) => ({
1361
+ id: Number(r.id),
1362
+ sessionId: String(r.session_id),
1363
+ stepNo: Number(r.step_no),
1364
+ taskType: String(r.task_type),
1365
+ skill: String(r.skill),
1366
+ model: String(r.model),
1367
+ provider: String(r.provider),
1368
+ iterations: Number(r.iterations),
1369
+ toolCalls: Number(r.tool_calls),
1370
+ promptTokens: Number(r.prompt_tokens),
1371
+ completionTokens: Number(r.completion_tokens),
1372
+ costUsd: Number(r.cost_usd),
1373
+ finishedBy: String(r.finished_by),
1374
+ success: Number(r.success) === 1,
1375
+ durationMs: Number(r.duration_ms)
1376
+ }));
1377
+ }
1378
+ function unsyncedCommandRuns() {
1379
+ const rows = getDb().prepare(`SELECT * FROM command_runs WHERE synced=0 LIMIT 500`).all();
1380
+ return rows.map((r) => ({
1381
+ id: Number(r.id),
1382
+ sessionId: r.session_id ? String(r.session_id) : void 0,
1383
+ ts: Number(r.ts),
1384
+ date: String(r.date),
1385
+ command: String(r.command),
1386
+ args: r.args ? String(r.args) : void 0,
1387
+ objective: r.objective ? String(r.objective) : void 0,
1388
+ promptTokens: Number(r.prompt_tokens),
1389
+ completionTokens: Number(r.completion_tokens),
1390
+ costUsd: Number(r.cost_usd),
1391
+ durationMs: Number(r.duration_ms)
1392
+ }));
1393
+ }
1394
+ function markTableSynced(table2, ids) {
1395
+ if (!ids.length) return;
1396
+ const stmt = getDb().prepare(`UPDATE ${table2} SET synced=1 WHERE id=?`);
1397
+ for (const id of ids) stmt.run(id);
1398
+ }
1399
+ function upsertInsight(i) {
1400
+ getDb().prepare(
1401
+ `INSERT INTO insights (id, computed_at, task_type, model, provider, samples, success_rate,
1402
+ avg_tokens, baseline_tokens, savings_pct, avg_cost_usd, synced)
1403
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0)
1404
+ ON CONFLICT(id) DO UPDATE SET
1405
+ computed_at=excluded.computed_at, samples=excluded.samples,
1406
+ success_rate=excluded.success_rate, avg_tokens=excluded.avg_tokens,
1407
+ baseline_tokens=excluded.baseline_tokens, savings_pct=excluded.savings_pct,
1408
+ avg_cost_usd=excluded.avg_cost_usd, synced=0`
1409
+ ).run(
1410
+ i.id,
1411
+ i.computedAt,
1412
+ i.taskType,
1413
+ i.model,
1414
+ i.provider,
1415
+ i.samples,
1416
+ i.successRate,
1417
+ i.avgTokens,
1418
+ i.baselineTokens,
1419
+ i.savingsPct,
1420
+ i.avgCostUsd
1421
+ );
1422
+ }
1423
+ function deleteInsightsExcept(validIds) {
1424
+ const all = getDb().prepare(`SELECT id FROM insights`).all();
1425
+ const keep = new Set(validIds);
1426
+ const del = getDb().prepare(`DELETE FROM insights WHERE id=?`);
1427
+ for (const r of all) if (!keep.has(String(r.id))) del.run(String(r.id));
1428
+ }
1429
+ function listInsights() {
1430
+ const rows = getDb().prepare(`SELECT * FROM insights ORDER BY savings_pct DESC`).all();
1431
+ return rows.map(mapInsight);
1432
+ }
1433
+ function unsyncedInsights() {
1434
+ const rows = getDb().prepare(`SELECT * FROM insights WHERE synced=0`).all();
1435
+ return rows.map(mapInsight);
1436
+ }
1437
+ function mapInsight(r) {
1438
+ return {
1439
+ id: String(r.id),
1440
+ computedAt: Number(r.computed_at),
1441
+ taskType: String(r.task_type),
1442
+ model: String(r.model),
1443
+ provider: String(r.provider),
1444
+ samples: Number(r.samples),
1445
+ successRate: Number(r.success_rate),
1446
+ avgTokens: Number(r.avg_tokens),
1447
+ baselineTokens: Number(r.baseline_tokens),
1448
+ savingsPct: Number(r.savings_pct),
1449
+ avgCostUsd: Number(r.avg_cost_usd)
1450
+ };
1451
+ }
1027
1452
 
1028
1453
  // src/usage/report.ts
1029
1454
  function renderUsageReport(filter = {}) {
@@ -1066,8 +1491,180 @@ function renderUsageReport(filter = {}) {
1066
1491
  ].join("\n");
1067
1492
  }
1068
1493
 
1494
+ // src/usage/insights.ts
1495
+ var MIN_SAMPLES = 3;
1496
+ var MIN_SUCCESS = 0.7;
1497
+ var MIN_MARGIN = 0.2;
1498
+ function median(xs) {
1499
+ const s = [...xs].sort((a, b) => a - b);
1500
+ const mid = Math.floor(s.length / 2);
1501
+ return s.length % 2 ? s[mid] : (s[mid - 1] + s[mid]) / 2;
1502
+ }
1503
+ function distillInsights(now = Date.now()) {
1504
+ const eff = modelTaskEfficiency();
1505
+ const byTask = /* @__PURE__ */ new Map();
1506
+ for (const r of eff) {
1507
+ const list = byTask.get(r.taskType) ?? [];
1508
+ list.push(r);
1509
+ byTask.set(r.taskType, list);
1510
+ }
1511
+ const valid = [];
1512
+ for (const [taskType, list] of byTask) {
1513
+ const qualified = list.filter(
1514
+ (r) => r.steps >= MIN_SAMPLES && r.successRate >= MIN_SUCCESS && r.avgTokensPerSuccess > 0
1515
+ );
1516
+ if (qualified.length < 2) continue;
1517
+ const baseline = median(qualified.map((r) => r.avgTokensPerSuccess));
1518
+ for (const r of qualified) {
1519
+ const savings = 1 - r.avgTokensPerSuccess / baseline;
1520
+ if (savings >= MIN_MARGIN) {
1521
+ const id = `${taskType}__${r.model}`;
1522
+ valid.push(id);
1523
+ upsertInsight({
1524
+ id,
1525
+ computedAt: now,
1526
+ taskType,
1527
+ model: r.model,
1528
+ provider: r.model.split("/")[0] ?? "unknown",
1529
+ samples: r.steps,
1530
+ successRate: r.successRate,
1531
+ avgTokens: r.avgTokensPerSuccess,
1532
+ baselineTokens: baseline,
1533
+ savingsPct: savings * 100,
1534
+ avgCostUsd: r.avgCostPerSuccess
1535
+ });
1536
+ }
1537
+ }
1538
+ }
1539
+ deleteInsightsExcept(valid);
1540
+ return listInsights();
1541
+ }
1542
+ function insightBoostMap(insights) {
1543
+ const map = {};
1544
+ for (const i of insights) map[`${i.taskType}:${i.model}`] = i.savingsPct;
1545
+ return map;
1546
+ }
1547
+ function renderPlaybook(insights) {
1548
+ if (!insights.length) {
1549
+ return c.bold("Efficiency playbook") + "\n" + c.dim(
1550
+ `Nothing distilled yet \u2014 needs \u22652 models with \u2265${MIN_SAMPLES} successful steps on the same task type,
1551
+ where one beats the median by \u2265${MIN_MARGIN * 100}% tokens. Keep running tasks (vary models with -o / pins).`
1552
+ );
1553
+ }
1554
+ return c.bold("Efficiency playbook") + c.dim(" (the notably efficient approaches \u2014 this is what `poly sync` uploads)") + "\n" + table(
1555
+ ["Task", "Model", "Avg tok", "Baseline", "Savings", "Success", "n"],
1556
+ insights.map((i) => [
1557
+ i.taskType,
1558
+ c.green(i.model),
1559
+ tokens(Math.round(i.avgTokens)),
1560
+ tokens(Math.round(i.baselineTokens)),
1561
+ c.green(`-${i.savingsPct.toFixed(0)}%`),
1562
+ `${Math.round(i.successRate * 100)}%`,
1563
+ String(i.samples)
1564
+ ])
1565
+ );
1566
+ }
1567
+
1568
+ // src/usage/analyze.ts
1569
+ var MIN_SUCCESS_RATE = 0.5;
1570
+ function renderAnalysis(filter = {}) {
1571
+ const out = [];
1572
+ const byModelTask = modelTaskEfficiency(filter);
1573
+ const byObjective = objectiveEfficiency(filter);
1574
+ const byCommand = commandUsage(filter);
1575
+ if (!byModelTask.length && !byObjective.length && !byCommand.length) {
1576
+ return c.dim('No analytics yet. Run `poly run "<task>"` a few times (and rate the result) first.');
1577
+ }
1578
+ const insights = distillInsights();
1579
+ out.push(renderPlaybook(insights));
1580
+ out.push("");
1581
+ if (byModelTask.length) {
1582
+ const byTask = /* @__PURE__ */ new Map();
1583
+ for (const r of byModelTask) {
1584
+ const list = byTask.get(r.taskType) ?? [];
1585
+ list.push(r);
1586
+ byTask.set(r.taskType, list);
1587
+ }
1588
+ const rows = [];
1589
+ for (const [task, list] of byTask) {
1590
+ const eligible = list.filter((r) => r.successRate >= MIN_SUCCESS_RATE && r.avgTokensPerSuccess > 0).sort((a, b) => a.avgTokensPerSuccess - b.avgTokensPerSuccess);
1591
+ const best = eligible[0];
1592
+ const runnerUp = eligible[1];
1593
+ if (!best) {
1594
+ rows.push([task, c.dim("(no reliable model yet)"), "-", "-", "-"]);
1595
+ continue;
1596
+ }
1597
+ rows.push([
1598
+ task,
1599
+ c.green(best.model),
1600
+ tokens(Math.round(best.avgTokensPerSuccess)),
1601
+ `${Math.round(best.successRate * 100)}%`,
1602
+ runnerUp ? `${runnerUp.model} ${c.dim(tokens(Math.round(runnerUp.avgTokensPerSuccess)))}` : c.dim("\u2014")
1603
+ ]);
1604
+ }
1605
+ out.push(c.bold("Minimum-token model per task") + c.dim(` (successful steps only, success \u2265 ${MIN_SUCCESS_RATE * 100}%)`));
1606
+ out.push(table(["Task", "Best model", "Avg tok/success", "Success", "Runner-up"], rows));
1607
+ out.push("");
1608
+ out.push(c.bold("Model \xD7 task efficiency (all observations)"));
1609
+ out.push(
1610
+ table(
1611
+ ["Task", "Model", "Steps", "Success", "Avg tok", "Avg iters", "Avg cost"],
1612
+ byModelTask.map((r) => [
1613
+ r.taskType,
1614
+ r.model,
1615
+ String(r.steps),
1616
+ `${Math.round(r.successRate * 100)}%`,
1617
+ r.avgTokensPerSuccess ? tokens(Math.round(r.avgTokensPerSuccess)) : c.dim("-"),
1618
+ r.avgIterations.toFixed(1),
1619
+ r.avgCostPerSuccess ? usd(r.avgCostPerSuccess) : c.dim("-")
1620
+ ])
1621
+ )
1622
+ );
1623
+ out.push("");
1624
+ }
1625
+ if (byObjective.length) {
1626
+ out.push(c.bold("Approach efficiency") + c.dim(" (routing objective: tokens spent vs goal achievement)"));
1627
+ out.push(
1628
+ table(
1629
+ ["Objective", "Sessions", "Avg tokens", "Avg cost", "Auto score", "Your rating"],
1630
+ byObjective.map((r) => [
1631
+ r.objective,
1632
+ String(r.sessions),
1633
+ tokens(Math.round(r.avgTokens)),
1634
+ usd(r.avgCostUsd),
1635
+ r.avgAutoScore == null ? c.dim("-") : `${Math.round(r.avgAutoScore * 100)}%`,
1636
+ r.avgUserScore == null ? c.dim("unrated") : `${r.avgUserScore.toFixed(1)}/9`
1637
+ ])
1638
+ )
1639
+ );
1640
+ const scored = byObjective.filter((r) => r.avgAutoScore != null);
1641
+ if (scored.length >= 2) {
1642
+ const bestScore = Math.max(...scored.map((r) => r.avgAutoScore));
1643
+ const winner = scored.filter((r) => r.avgAutoScore >= bestScore - 0.1).sort((a, b) => a.avgTokens - b.avgTokens)[0];
1644
+ if (winner) {
1645
+ out.push(
1646
+ c.green(
1647
+ `\u2192 Lowest-token approach with top-tier achievement: "${winner.objective}" (${tokens(Math.round(winner.avgTokens))} avg tokens, ${Math.round(winner.avgAutoScore * 100)}% auto score)`
1648
+ )
1649
+ );
1650
+ }
1651
+ }
1652
+ out.push("");
1653
+ }
1654
+ if (byCommand.length) {
1655
+ out.push(c.bold("Usage by command"));
1656
+ out.push(
1657
+ table(
1658
+ ["Command", "Runs", "Prompt", "Compl.", "Cost"],
1659
+ byCommand.map((r) => [r.command, String(r.runs), tokens(r.promptTokens), tokens(r.completionTokens), usd(r.costUsd)])
1660
+ )
1661
+ );
1662
+ }
1663
+ return out.join("\n");
1664
+ }
1665
+
1069
1666
  // src/usage/firestoreSync.ts
1070
- async function syncUsage(config) {
1667
+ async function syncUsage(config, opts = {}) {
1071
1668
  if (!config.firestore.enabled) {
1072
1669
  return { synced: 0, message: "Firestore sync is disabled (enable with `poly config firestore on`)." };
1073
1670
  }
@@ -1095,28 +1692,324 @@ async function syncUsage(config) {
1095
1692
  }
1096
1693
  }
1097
1694
  const fdb = fsMod.getFirestore();
1695
+ distillInsights();
1696
+ const insights = unsyncedInsights();
1697
+ if (insights.length) {
1698
+ const batch = fdb.batch();
1699
+ const col = fdb.collection("polymath_insights");
1700
+ for (const i of insights) {
1701
+ batch.set(col.doc(i.id), {
1702
+ computedAt: i.computedAt,
1703
+ taskType: i.taskType,
1704
+ model: i.model,
1705
+ provider: i.provider,
1706
+ samples: i.samples,
1707
+ successRate: i.successRate,
1708
+ avgTokens: i.avgTokens,
1709
+ baselineTokens: i.baselineTokens,
1710
+ savingsPct: i.savingsPct,
1711
+ avgCostUsd: i.avgCostUsd
1712
+ });
1713
+ }
1714
+ await batch.commit();
1715
+ markTableSynced("insights", insights.map((i) => i.id));
1716
+ }
1717
+ if (!opts.raw) {
1718
+ return {
1719
+ synced: insights.length,
1720
+ message: insights.length ? `Synced ${insights.length} efficiency insight(s) to polymath_insights. Raw logs stayed local (use --raw to push).` : "No new insights to sync \u2014 raw logs stay local by default (use --raw to push them)."
1721
+ };
1722
+ }
1098
1723
  const rows = unsyncedRows();
1099
- if (!rows.length) return { synced: 0, message: "Nothing to sync \u2014 all rows already pushed." };
1100
- const batch = fdb.batch();
1101
- const col = fdb.collection(config.firestore.collection);
1102
- for (const r of rows) {
1103
- const ref = col.doc(`${r.date}__${r.id}`);
1104
- batch.set(ref, {
1105
- ts: r.ts,
1106
- date: r.date,
1107
- provider: r.provider,
1108
- model: r.model,
1109
- taskType: r.taskType,
1110
- promptTokens: r.promptTokens,
1111
- completionTokens: r.completionTokens,
1112
- totalTokens: r.totalTokens,
1113
- costUsd: r.costUsd,
1114
- sessionId: r.sessionId ?? null
1115
- });
1724
+ if (!rows.length && !insights.length) return { synced: 0, message: "Nothing to sync \u2014 all rows already pushed." };
1725
+ if (rows.length) {
1726
+ const batch = fdb.batch();
1727
+ const col = fdb.collection(config.firestore.collection);
1728
+ for (const r of rows) {
1729
+ const ref = col.doc(`${r.date}__${r.id}`);
1730
+ batch.set(ref, {
1731
+ ts: r.ts,
1732
+ date: r.date,
1733
+ provider: r.provider,
1734
+ model: r.model,
1735
+ taskType: r.taskType,
1736
+ command: r.command ?? "run",
1737
+ promptTokens: r.promptTokens,
1738
+ completionTokens: r.completionTokens,
1739
+ totalTokens: r.totalTokens,
1740
+ costUsd: r.costUsd,
1741
+ sessionId: r.sessionId ?? null
1742
+ });
1743
+ }
1744
+ await batch.commit();
1745
+ markSynced(rows.map((r) => r.id));
1746
+ }
1747
+ return {
1748
+ synced: insights.length + rows.length,
1749
+ message: `Synced ${insights.length} insights + ${rows.length} raw rows to Firestore.`
1750
+ };
1751
+ }
1752
+
1753
+ // src/usage/dataconnect.ts
1754
+ async function adminAccessToken(projectId) {
1755
+ let appMod;
1756
+ try {
1757
+ appMod = await import("firebase-admin/app");
1758
+ } catch {
1759
+ throw new Error("firebase-admin is not installed. Run `npm install firebase-admin`.");
1760
+ }
1761
+ const { initializeApp, getApps, cert, applicationDefault } = appMod;
1762
+ let app = getApps()[0];
1763
+ if (!app) {
1764
+ const saJson = process.env.FIREBASE_SERVICE_ACCOUNT_KEY;
1765
+ if (saJson) {
1766
+ try {
1767
+ app = initializeApp({ credential: cert(JSON.parse(saJson)), projectId });
1768
+ } catch {
1769
+ app = initializeApp({ credential: applicationDefault(), projectId });
1770
+ }
1771
+ } else {
1772
+ app = initializeApp({ credential: applicationDefault(), projectId });
1773
+ }
1774
+ }
1775
+ const token = await app.options.credential.getAccessToken();
1776
+ return token.access_token;
1777
+ }
1778
+ async function executeGraphql(cfg2, token, query, variables) {
1779
+ const url = `https://firebasedataconnect.googleapis.com/v1/projects/${cfg2.projectId}/locations/${cfg2.location}/services/${cfg2.serviceId}:executeGraphql`;
1780
+ const res = await fetch(url, {
1781
+ method: "POST",
1782
+ headers: { Authorization: `Bearer ${token}`, "Content-Type": "application/json" },
1783
+ body: JSON.stringify({ query, variables })
1784
+ });
1785
+ if (!res.ok) {
1786
+ const text = await res.text().catch(() => "");
1787
+ throw new Error(`Data Connect ${res.status}: ${text.slice(0, 300)}`);
1788
+ }
1789
+ const json = await res.json();
1790
+ if (json.errors?.length) {
1791
+ throw new Error(`Data Connect GraphQL errors: ${JSON.stringify(json.errors).slice(0, 300)}`);
1792
+ }
1793
+ }
1794
+ var iso = (ms) => new Date(ms).toISOString();
1795
+ async function syncDataConnect(config, opts = {}) {
1796
+ const dc = config.dataconnect;
1797
+ if (!dc?.enabled) {
1798
+ return { insights: 0, sessions: 0, steps: 0, commands: 0, calls: 0, message: "Data Connect sync is disabled (enable with `poly config dataconnect on`)." };
1799
+ }
1800
+ const projectId = config.firestore.projectId;
1801
+ const token = await adminAccessToken(projectId);
1802
+ const cfg2 = { projectId, location: dc.location, serviceId: dc.serviceId };
1803
+ distillInsights();
1804
+ const insights = unsyncedInsights();
1805
+ for (const i of insights) {
1806
+ await executeGraphql(
1807
+ cfg2,
1808
+ token,
1809
+ `mutation UpsertInsight($id: String!, $computedAt: Timestamp!, $taskType: String!,
1810
+ $model: String!, $provider: String!, $samples: Int!, $successRate: Float!,
1811
+ $avgTokens: Float!, $baselineTokens: Float!, $savingsPct: Float!, $avgCostUsd: Float!) {
1812
+ insight_upsert(data: {
1813
+ id: $id, computedAt: $computedAt, taskType: $taskType, model: $model,
1814
+ provider: $provider, samples: $samples, successRate: $successRate,
1815
+ avgTokens: $avgTokens, baselineTokens: $baselineTokens,
1816
+ savingsPct: $savingsPct, avgCostUsd: $avgCostUsd
1817
+ })
1818
+ }`,
1819
+ {
1820
+ id: i.id,
1821
+ computedAt: iso(i.computedAt),
1822
+ taskType: i.taskType,
1823
+ model: i.model,
1824
+ provider: i.provider,
1825
+ samples: i.samples,
1826
+ successRate: i.successRate,
1827
+ avgTokens: i.avgTokens,
1828
+ baselineTokens: i.baselineTokens,
1829
+ savingsPct: i.savingsPct,
1830
+ avgCostUsd: i.avgCostUsd
1831
+ }
1832
+ );
1833
+ }
1834
+ markTableSynced("insights", insights.map((i) => i.id));
1835
+ if (!opts.raw) {
1836
+ return {
1837
+ insights: insights.length,
1838
+ sessions: 0,
1839
+ steps: 0,
1840
+ commands: 0,
1841
+ calls: 0,
1842
+ message: `Synced ${insights.length} efficiency insight(s) to Data Connect (${cfg2.serviceId}@${cfg2.location}). Raw logs stayed local \u2014 use \`poly sync --raw\` to push everything.`
1843
+ };
1844
+ }
1845
+ const sessions = unsyncedSessions();
1846
+ for (const s of sessions) {
1847
+ await executeGraphql(
1848
+ cfg2,
1849
+ token,
1850
+ `mutation UpsertSession($id: String!, $startedAt: Timestamp!, $date: Date!, $goal: String!,
1851
+ $command: String!, $objective: String!, $plannedSteps: Int!, $completedSteps: Int!,
1852
+ $failedSteps: Int!, $autoScore: Float, $userScore: Int, $promptTokens: Int!,
1853
+ $completionTokens: Int!, $costUsd: Float!, $durationMs: Int!) {
1854
+ session_upsert(data: {
1855
+ id: $id, startedAt: $startedAt, date: $date, goal: $goal, command: $command,
1856
+ objective: $objective, plannedSteps: $plannedSteps, completedSteps: $completedSteps,
1857
+ failedSteps: $failedSteps, autoScore: $autoScore, userScore: $userScore,
1858
+ promptTokens: $promptTokens, completionTokens: $completionTokens,
1859
+ costUsd: $costUsd, durationMs: $durationMs
1860
+ })
1861
+ }`,
1862
+ {
1863
+ id: s.id,
1864
+ startedAt: iso(s.ts),
1865
+ date: s.date,
1866
+ goal: s.goal,
1867
+ command: s.command,
1868
+ objective: s.objective,
1869
+ plannedSteps: s.plannedSteps,
1870
+ completedSteps: s.completedSteps,
1871
+ failedSteps: s.failedSteps,
1872
+ autoScore: s.autoScore,
1873
+ userScore: s.userScore,
1874
+ promptTokens: s.promptTokens,
1875
+ completionTokens: s.completionTokens,
1876
+ costUsd: s.costUsd,
1877
+ durationMs: s.durationMs
1878
+ }
1879
+ );
1880
+ }
1881
+ markTableSynced("sessions", sessions.map((s) => s.id));
1882
+ const steps = unsyncedStepRuns();
1883
+ for (const st of steps) {
1884
+ await executeGraphql(
1885
+ cfg2,
1886
+ token,
1887
+ `mutation InsertStep($sessionId: String!, $stepNo: Int!, $taskType: String!, $skill: String!,
1888
+ $model: String!, $provider: String!, $iterations: Int!, $toolCalls: Int!,
1889
+ $promptTokens: Int!, $completionTokens: Int!, $costUsd: Float!,
1890
+ $finishedBy: String!, $success: Boolean!, $durationMs: Int!) {
1891
+ stepRun_insert(data: {
1892
+ sessionId: $sessionId, stepNo: $stepNo, taskType: $taskType, skill: $skill,
1893
+ model: $model, provider: $provider, iterations: $iterations, toolCalls: $toolCalls,
1894
+ promptTokens: $promptTokens, completionTokens: $completionTokens, costUsd: $costUsd,
1895
+ finishedBy: $finishedBy, success: $success, durationMs: $durationMs
1896
+ })
1897
+ }`,
1898
+ {
1899
+ sessionId: st.sessionId,
1900
+ stepNo: st.stepNo,
1901
+ taskType: st.taskType,
1902
+ skill: st.skill,
1903
+ model: st.model,
1904
+ provider: st.provider,
1905
+ iterations: st.iterations,
1906
+ toolCalls: st.toolCalls,
1907
+ promptTokens: st.promptTokens,
1908
+ completionTokens: st.completionTokens,
1909
+ costUsd: st.costUsd,
1910
+ finishedBy: st.finishedBy,
1911
+ success: st.success,
1912
+ durationMs: st.durationMs
1913
+ }
1914
+ );
1915
+ }
1916
+ markTableSynced("step_runs", steps.map((s) => s.id));
1917
+ const commands = unsyncedCommandRuns();
1918
+ for (const cr of commands) {
1919
+ await executeGraphql(
1920
+ cfg2,
1921
+ token,
1922
+ `mutation InsertCommand($sessionId: String, $ts: Timestamp!, $date: Date!, $command: String!,
1923
+ $args: String, $objective: String, $promptTokens: Int!, $completionTokens: Int!,
1924
+ $costUsd: Float!, $durationMs: Int!) {
1925
+ commandRun_insert(data: {
1926
+ sessionId: $sessionId, ts: $ts, date: $date, command: $command, args: $args,
1927
+ objective: $objective, promptTokens: $promptTokens, completionTokens: $completionTokens,
1928
+ costUsd: $costUsd, durationMs: $durationMs
1929
+ })
1930
+ }`,
1931
+ {
1932
+ sessionId: cr.sessionId ?? null,
1933
+ ts: iso(cr.ts),
1934
+ date: cr.date,
1935
+ command: cr.command,
1936
+ args: cr.args ?? null,
1937
+ objective: cr.objective ?? null,
1938
+ promptTokens: cr.promptTokens,
1939
+ completionTokens: cr.completionTokens,
1940
+ costUsd: cr.costUsd,
1941
+ durationMs: cr.durationMs
1942
+ }
1943
+ );
1116
1944
  }
1117
- await batch.commit();
1118
- markSynced(rows.map((r) => r.id));
1119
- return { synced: rows.length, message: `Synced ${rows.length} rows to ${config.firestore.collection}.` };
1945
+ markTableSynced("command_runs", commands.map((c2) => c2.id));
1946
+ const calls = unsyncedRows();
1947
+ for (const u of calls) {
1948
+ await executeGraphql(
1949
+ cfg2,
1950
+ token,
1951
+ `mutation InsertCall($sessionId: String, $ts: Timestamp!, $date: Date!, $command: String!,
1952
+ $taskType: String!, $model: String!, $provider: String!, $promptTokens: Int!,
1953
+ $completionTokens: Int!, $totalTokens: Int!, $costUsd: Float!) {
1954
+ modelCall_insert(data: {
1955
+ sessionId: $sessionId, ts: $ts, date: $date, command: $command, taskType: $taskType,
1956
+ model: $model, provider: $provider, promptTokens: $promptTokens,
1957
+ completionTokens: $completionTokens, totalTokens: $totalTokens, costUsd: $costUsd
1958
+ })
1959
+ }`,
1960
+ {
1961
+ sessionId: u.sessionId ?? null,
1962
+ ts: iso(u.ts),
1963
+ date: u.date,
1964
+ command: u.command ?? "run",
1965
+ taskType: u.taskType,
1966
+ model: u.model,
1967
+ provider: u.provider,
1968
+ promptTokens: u.promptTokens,
1969
+ completionTokens: u.completionTokens,
1970
+ totalTokens: u.totalTokens,
1971
+ costUsd: u.costUsd
1972
+ }
1973
+ );
1974
+ }
1975
+ markSynced(calls.map((c2) => c2.id));
1976
+ return {
1977
+ insights: insights.length,
1978
+ sessions: sessions.length,
1979
+ steps: steps.length,
1980
+ commands: commands.length,
1981
+ calls: calls.length,
1982
+ message: `Synced ${insights.length} insights + raw: ${sessions.length} sessions, ${steps.length} steps, ${commands.length} commands, ${calls.length} calls (${cfg2.serviceId}@${cfg2.location}).`
1983
+ };
1984
+ }
1985
+
1986
+ // src/usage/logger.ts
1987
+ function localDate(d = /* @__PURE__ */ new Date()) {
1988
+ const y = d.getFullYear();
1989
+ const m = String(d.getMonth() + 1).padStart(2, "0");
1990
+ const day = String(d.getDate()).padStart(2, "0");
1991
+ return `${y}-${m}-${day}`;
1992
+ }
1993
+ function providerOf(modelId) {
1994
+ return modelId.split("/")[0] ?? "unknown";
1995
+ }
1996
+ function logCompletion(result, taskType, sessionId, command = "run") {
1997
+ const now = /* @__PURE__ */ new Date();
1998
+ const entry = {
1999
+ ts: now.getTime(),
2000
+ date: localDate(now),
2001
+ provider: providerOf(result.model),
2002
+ model: result.model,
2003
+ taskType,
2004
+ promptTokens: result.usage.promptTokens,
2005
+ completionTokens: result.usage.completionTokens,
2006
+ totalTokens: result.usage.totalTokens,
2007
+ costUsd: result.costUsd,
2008
+ sessionId,
2009
+ command
2010
+ };
2011
+ recordUsage(entry);
2012
+ return entry;
1120
2013
  }
1121
2014
 
1122
2015
  // src/tui/App.tsx
@@ -1274,46 +2167,31 @@ ${stderr}`)) };
1274
2167
  }
1275
2168
  }
1276
2169
 
1277
- // src/usage/logger.ts
1278
- function localDate(d = /* @__PURE__ */ new Date()) {
1279
- const y = d.getFullYear();
1280
- const m = String(d.getMonth() + 1).padStart(2, "0");
1281
- const day = String(d.getDate()).padStart(2, "0");
1282
- return `${y}-${m}-${day}`;
1283
- }
1284
- function providerOf(modelId) {
1285
- return modelId.split("/")[0] ?? "unknown";
1286
- }
1287
- function logCompletion(result, taskType, sessionId) {
1288
- const now = /* @__PURE__ */ new Date();
1289
- const entry = {
1290
- ts: now.getTime(),
1291
- date: localDate(now),
1292
- provider: providerOf(result.model),
1293
- model: result.model,
1294
- taskType,
1295
- promptTokens: result.usage.promptTokens,
1296
- completionTokens: result.usage.completionTokens,
1297
- totalTokens: result.usage.totalTokens,
1298
- costUsd: result.costUsd,
1299
- sessionId
1300
- };
1301
- recordUsage(entry);
1302
- return entry;
1303
- }
1304
-
1305
2170
  // src/agent/loop.ts
1306
2171
  var MAX_ITERS_PER_STEP = 6;
1307
2172
  async function runAgent(goal, deps, emit) {
1308
2173
  const { client: client2, models, policy, sessionId, cwd } = deps;
1309
2174
  let totalCostUsd = 0;
1310
2175
  let totalTokens = 0;
2176
+ let totalPromptTokens = 0;
2177
+ let totalCompletionTokens = 0;
1311
2178
  let calls = 0;
2179
+ const sessionStart = Date.now();
2180
+ let completedSteps = 0;
2181
+ let failedSteps = 0;
1312
2182
  const planRoute = route("plan", models, policy);
1313
2183
  let plan;
1314
2184
  if (planRoute) {
1315
2185
  try {
1316
- plan = await planRequest(goal, client2, planRoute.model);
2186
+ plan = await planRequest(goal, client2, planRoute.model, (result) => {
2187
+ const entry = logCompletion(result, "plan", sessionId);
2188
+ emit({ type: "usage", entry });
2189
+ totalCostUsd += entry.costUsd;
2190
+ totalTokens += entry.totalTokens;
2191
+ totalPromptTokens += entry.promptTokens;
2192
+ totalCompletionTokens += entry.completionTokens;
2193
+ calls++;
2194
+ });
1317
2195
  } catch {
1318
2196
  plan = heuristicPlan(goal);
1319
2197
  }
@@ -1321,6 +2199,15 @@ async function runAgent(goal, deps, emit) {
1321
2199
  plan = heuristicPlan(goal);
1322
2200
  }
1323
2201
  emit({ type: "plan", plan, planModel: planRoute?.model.id ?? "heuristic" });
2202
+ startSession({
2203
+ id: sessionId,
2204
+ ts: sessionStart,
2205
+ date: localDate2(),
2206
+ goal,
2207
+ command: "run",
2208
+ objective: policy.objective,
2209
+ plannedSteps: plan.steps.length
2210
+ });
1324
2211
  const toolCtx = {
1325
2212
  cwd,
1326
2213
  allowWrite: deps.allowWrite,
@@ -1333,6 +2220,7 @@ async function runAgent(goal, deps, emit) {
1333
2220
  completionTokens: step.estCompletionTokens
1334
2221
  });
1335
2222
  if (!r) {
2223
+ failedSteps++;
1336
2224
  emit({ type: "error", message: `No capable model for step ${step.id} (${step.type}).` });
1337
2225
  continue;
1338
2226
  }
@@ -1343,55 +2231,113 @@ async function runAgent(goal, deps, emit) {
1343
2231
  { role: "system", content: stepSystemPrompt(goal, step, priorSummaries, useTools) },
1344
2232
  { role: "user", content: step.description }
1345
2233
  ];
2234
+ const stepStart = Date.now();
2235
+ let stepPrompt = 0;
2236
+ let stepCompletion = 0;
2237
+ let stepCost = 0;
2238
+ let stepToolCalls = 0;
2239
+ let iterations = 0;
2240
+ let finishedBy = "max-iters";
1346
2241
  let summary = "";
1347
- for (let iter = 0; iter < MAX_ITERS_PER_STEP; iter++) {
1348
- const gen = client2.stream(
1349
- {
1350
- model: model.id,
1351
- messages,
1352
- tools: useTools ? TOOL_SCHEMAS : void 0,
1353
- temperature: 0.2,
1354
- maxTokens: 2e3
1355
- },
1356
- model.pricing
1357
- );
1358
- let next = await gen.next();
1359
- while (!next.done) {
1360
- emit({ type: "text", delta: next.value });
1361
- next = await gen.next();
1362
- }
1363
- const result = next.value;
1364
- const entry = logCompletion(result, step.type, sessionId);
1365
- emit({ type: "usage", entry });
1366
- totalCostUsd += entry.costUsd;
1367
- totalTokens += entry.totalTokens;
1368
- calls++;
1369
- if (result.toolCalls.length && useTools) {
1370
- messages.push({ role: "assistant", content: result.content, tool_calls: result.toolCalls });
1371
- let finished = false;
1372
- for (const tc of result.toolCalls) {
1373
- emit({ type: "tool-call", name: tc.function.name, args: tc.function.arguments });
1374
- const outcome = executeTool(tc.function.name, tc.function.arguments, toolCtx);
1375
- emit({ type: "tool-result", name: tc.function.name, result: outcome.result });
1376
- messages.push({ role: "tool", tool_call_id: tc.id, name: tc.function.name, content: outcome.result });
1377
- if (outcome.finishSummary != null) {
1378
- summary = outcome.finishSummary;
1379
- finished = true;
2242
+ try {
2243
+ for (let iter = 0; iter < MAX_ITERS_PER_STEP; iter++) {
2244
+ iterations = iter + 1;
2245
+ const gen = client2.stream(
2246
+ {
2247
+ model: model.id,
2248
+ messages,
2249
+ tools: useTools ? TOOL_SCHEMAS : void 0,
2250
+ temperature: 0.2,
2251
+ maxTokens: 2e3
2252
+ },
2253
+ model.pricing
2254
+ );
2255
+ let next = await gen.next();
2256
+ while (!next.done) {
2257
+ emit({ type: "text", delta: next.value });
2258
+ next = await gen.next();
2259
+ }
2260
+ const result = next.value;
2261
+ const entry = logCompletion(result, step.type, sessionId);
2262
+ emit({ type: "usage", entry });
2263
+ totalCostUsd += entry.costUsd;
2264
+ totalTokens += entry.totalTokens;
2265
+ totalPromptTokens += entry.promptTokens;
2266
+ totalCompletionTokens += entry.completionTokens;
2267
+ stepPrompt += entry.promptTokens;
2268
+ stepCompletion += entry.completionTokens;
2269
+ stepCost += entry.costUsd;
2270
+ calls++;
2271
+ if (result.toolCalls.length && useTools) {
2272
+ messages.push({ role: "assistant", content: result.content, tool_calls: result.toolCalls });
2273
+ let finished = false;
2274
+ for (const tc of result.toolCalls) {
2275
+ stepToolCalls++;
2276
+ emit({ type: "tool-call", name: tc.function.name, args: tc.function.arguments });
2277
+ const outcome = executeTool(tc.function.name, tc.function.arguments, toolCtx);
2278
+ emit({ type: "tool-result", name: tc.function.name, result: outcome.result });
2279
+ messages.push({ role: "tool", tool_call_id: tc.id, name: tc.function.name, content: outcome.result });
2280
+ if (outcome.finishSummary != null) {
2281
+ summary = outcome.finishSummary;
2282
+ finished = true;
2283
+ }
2284
+ }
2285
+ if (finished) {
2286
+ finishedBy = "finish-tool";
2287
+ break;
1380
2288
  }
2289
+ continue;
1381
2290
  }
1382
- if (finished) break;
1383
- continue;
2291
+ summary = result.content || summary;
2292
+ if (summary) finishedBy = "text";
2293
+ break;
1384
2294
  }
1385
- summary = result.content || summary;
1386
- break;
2295
+ } catch (err) {
2296
+ finishedBy = "error";
2297
+ emit({ type: "error", message: `Step ${step.id} failed: ${err?.message ?? err}` });
1387
2298
  }
2299
+ const success = finishedBy === "finish-tool" || finishedBy === "text";
2300
+ if (success) completedSteps++;
2301
+ else failedSteps++;
2302
+ recordStepRun({
2303
+ sessionId,
2304
+ stepNo: step.id,
2305
+ taskType: step.type,
2306
+ skill: TASK_SKILL[step.type],
2307
+ model: model.id,
2308
+ provider: model.provider,
2309
+ iterations,
2310
+ toolCalls: stepToolCalls,
2311
+ promptTokens: stepPrompt,
2312
+ completionTokens: stepCompletion,
2313
+ costUsd: stepCost,
2314
+ finishedBy,
2315
+ success,
2316
+ durationMs: Date.now() - stepStart
2317
+ });
1388
2318
  if (!summary) summary = "(no summary)";
1389
2319
  priorSummaries.push(`Step ${step.id} (${step.type}): ${summary}`);
1390
2320
  emit({ type: "step-end", step, summary });
1391
2321
  }
2322
+ finishSession(sessionId, {
2323
+ plannedSteps: plan.steps.length,
2324
+ completedSteps,
2325
+ failedSteps,
2326
+ autoScore: plan.steps.length ? completedSteps / plan.steps.length : null,
2327
+ promptTokens: totalPromptTokens,
2328
+ completionTokens: totalCompletionTokens,
2329
+ costUsd: totalCostUsd,
2330
+ durationMs: Date.now() - sessionStart
2331
+ });
1392
2332
  emit({ type: "done", totalCostUsd, totalTokens, calls });
1393
2333
  return { totalCostUsd, totalTokens, calls };
1394
2334
  }
2335
+ function localDate2(d = /* @__PURE__ */ new Date()) {
2336
+ const y = d.getFullYear();
2337
+ const m = String(d.getMonth() + 1).padStart(2, "0");
2338
+ const day = String(d.getDate()).padStart(2, "0");
2339
+ return `${y}-${m}-${day}`;
2340
+ }
1395
2341
  function stepSystemPrompt(goal, step, priorSummaries, useTools) {
1396
2342
  const context = priorSummaries.length ? `
1397
2343
 
@@ -1418,6 +2364,7 @@ function App(props) {
1418
2364
  const [cost, setCost] = useState(0);
1419
2365
  const [tok, setTok] = useState(0);
1420
2366
  const [calls, setCalls] = useState(0);
2367
+ const [rated, setRated] = useState(null);
1421
2368
  const push = useCallback((text, color) => {
1422
2369
  setLog((l) => [...l, { key: l.length, text, color }]);
1423
2370
  }, []);
@@ -1484,7 +2431,7 @@ function App(props) {
1484
2431
  } catch (err) {
1485
2432
  push(`Fatal: ${err?.message ?? err}`, "red");
1486
2433
  }
1487
- setPhase("done");
2434
+ setPhase("rate");
1488
2435
  }, [goal, props, push]);
1489
2436
  useInput((input, key) => {
1490
2437
  if (phase === "preview") {
@@ -1493,6 +2440,18 @@ function App(props) {
1493
2440
  setDraft(goal);
1494
2441
  setPhase("input");
1495
2442
  } else if (input === "q") exit();
2443
+ } else if (phase === "rate") {
2444
+ if (/^[0-9]$/.test(input)) {
2445
+ const score = parseInt(input, 10);
2446
+ try {
2447
+ setUserScore(props.sessionId, score);
2448
+ } catch {
2449
+ }
2450
+ setRated(score);
2451
+ setPhase("done");
2452
+ } else if (key.return || input === "q") {
2453
+ setPhase("done");
2454
+ }
1496
2455
  } else if (phase === "done") {
1497
2456
  if (input === "q" || key.return) exit();
1498
2457
  }
@@ -1516,12 +2475,26 @@ function App(props) {
1516
2475
  )
1517
2476
  ] }),
1518
2477
  phase === "preview" && rec && /* @__PURE__ */ jsx(Preview, { rec }),
1519
- (phase === "running" || phase === "done") && /* @__PURE__ */ jsxs(Box, { flexDirection: "column", marginTop: 1, children: [
2478
+ (phase === "running" || phase === "rate" || phase === "done") && /* @__PURE__ */ jsxs(Box, { flexDirection: "column", marginTop: 1, children: [
1520
2479
  log.slice(-18).map((l) => /* @__PURE__ */ jsx(Text, { color: l.color, children: l.text }, l.key)),
1521
2480
  phase === "running" && /* @__PURE__ */ jsxs(Text, { color: "cyan", children: [
1522
2481
  /* @__PURE__ */ jsx(Spinner, { type: "dots" }),
1523
2482
  " working\u2026"
1524
2483
  ] }),
2484
+ phase === "rate" && /* @__PURE__ */ jsxs(Text, { children: [
2485
+ /* @__PURE__ */ jsxs(Text, { color: "green", children: [
2486
+ "\u2713 Done \xB7 ",
2487
+ calls,
2488
+ " calls \xB7 ",
2489
+ tokens(tok),
2490
+ " tokens \xB7 ",
2491
+ usd(cost)
2492
+ ] }),
2493
+ "\n",
2494
+ /* @__PURE__ */ jsx(Text, { color: "cyan", children: "How well was your goal achieved? " }),
2495
+ /* @__PURE__ */ jsx(Text, { color: "yellow", children: "[0-9]" }),
2496
+ /* @__PURE__ */ jsx(Text, { color: "gray", children: " (9 = perfect \xB7 enter = skip) \u2014 feeds `poly analyze`" })
2497
+ ] }),
1525
2498
  phase === "done" && /* @__PURE__ */ jsxs(Text, { color: "green", children: [
1526
2499
  "\u2713 Done \xB7 ",
1527
2500
  calls,
@@ -1529,6 +2502,7 @@ function App(props) {
1529
2502
  tokens(tok),
1530
2503
  " tokens \xB7 ",
1531
2504
  usd(cost),
2505
+ rated != null ? ` \xB7 rated ${rated}/9` : "",
1532
2506
  " \u2014 press q to quit"
1533
2507
  ] })
1534
2508
  ] })
@@ -1596,27 +2570,73 @@ function truncate2(s, n) {
1596
2570
 
1597
2571
  // src/index.ts
1598
2572
  var program = new Command();
1599
- program.name("poly").description("Polymath \u2014 cost-optimized, multi-model TUI coding agent").version("0.1.0");
2573
+ program.name("poly").description("Polymath \u2014 cost-optimized, multi-model TUI coding agent").version("0.3.0");
1600
2574
  function client(config) {
1601
2575
  return new OpenRouterClient({
1602
2576
  apiKey: resolveApiKey(config),
1603
2577
  referer: config.referer,
1604
- title: config.title
2578
+ title: config.title,
2579
+ localBaseUrl: config.local.enabled ? config.local.baseUrl : void 0
1605
2580
  });
1606
2581
  }
1607
2582
  function buildPolicy(config, opts) {
1608
2583
  const objective = opts.objective || config.defaultObjective;
1609
2584
  const maxCost = opts.maxCost != null ? parseFloat(opts.maxCost) : config.maxCostPerCallUsd;
2585
+ let empirical;
2586
+ try {
2587
+ empirical = insightBoostMap(listInsights());
2588
+ if (!Object.keys(empirical).length) empirical = void 0;
2589
+ } catch {
2590
+ empirical = void 0;
2591
+ }
1610
2592
  return {
1611
2593
  objective,
1612
2594
  maxCostPerCallUsd: Number.isFinite(maxCost) ? maxCost : void 0,
1613
- pinned: config.pinned
2595
+ pinned: config.pinned,
2596
+ empirical
1614
2597
  };
1615
2598
  }
2599
+ function localDate3(d = /* @__PURE__ */ new Date()) {
2600
+ const y = d.getFullYear();
2601
+ const m = String(d.getMonth() + 1).padStart(2, "0");
2602
+ const day = String(d.getDate()).padStart(2, "0");
2603
+ return `${y}-${m}-${day}`;
2604
+ }
2605
+ function trackCommand(opts) {
2606
+ try {
2607
+ recordCommandRun({
2608
+ sessionId: opts.sessionId,
2609
+ ts: opts.startedAt,
2610
+ date: localDate3(new Date(opts.startedAt)),
2611
+ command: opts.command,
2612
+ args: opts.args?.slice(0, 300),
2613
+ objective: opts.objective,
2614
+ promptTokens: opts.promptTokens ?? 0,
2615
+ completionTokens: opts.completionTokens ?? 0,
2616
+ costUsd: opts.costUsd ?? 0,
2617
+ durationMs: Date.now() - opts.startedAt
2618
+ });
2619
+ } catch {
2620
+ }
2621
+ }
1616
2622
  async function loadCatalog(config, refresh = false) {
1617
- const models = await getModels(client(config), { refresh });
2623
+ const cl = client(config);
2624
+ const hasKey = !!resolveApiKey(config);
2625
+ let models = [];
2626
+ try {
2627
+ models = await getModels(cl, { refresh });
2628
+ } catch (e) {
2629
+ if (!config.local.enabled) throw e;
2630
+ }
2631
+ if (config.local.enabled) {
2632
+ const local = await getLocalModels(cl);
2633
+ if (!local.length) {
2634
+ console.error(c.yellow(`Local server (${config.local.baseUrl}) returned no models \u2014 is it running?`));
2635
+ }
2636
+ models = hasKey ? [...local, ...models] : local;
2637
+ }
1618
2638
  if (!models.length) {
1619
- console.error(c.red("Could not load the model catalog. Check your connection."));
2639
+ console.error(c.red("No models available. Check your connection, or `poly config local on` with a running Ollama/LM Studio."));
1620
2640
  process.exit(1);
1621
2641
  }
1622
2642
  return models;
@@ -1625,22 +2645,26 @@ program.command("login").description("Connect Polymath to OpenRouter (set/replac
1625
2645
  await runLogin();
1626
2646
  });
1627
2647
  program.command("run", { isDefault: true }).description("Launch the interactive agent (TUI)").argument("[goal...]", "what to do (optional; prompts if omitted)").option("-o, --objective <name>", "routing objective: cheapest | value | quality").option("--max-cost <usd>", "exclude models whose projected per-call cost exceeds this").option("-w, --write", "allow the agent to write files (confined to --cwd)", false).option("-x, --commands", "DANGER: let the model run arbitrary shell commands in --cwd", false).option("-C, --cwd <dir>", "working directory", process.cwd()).action(async (goalParts, opts) => {
2648
+ const startedAt = Date.now();
1628
2649
  const config = loadConfig();
1629
- const key = await ensureApiKey(config);
1630
- if (!key) {
1631
- console.error(c.red("No API key \u2014 cannot run. Try `poly login`."));
1632
- process.exit(1);
2650
+ if (!config.local.enabled || resolveApiKey(config)) {
2651
+ const key = await ensureApiKey(config);
2652
+ if (!key && !config.local.enabled) {
2653
+ console.error(c.red("No API key \u2014 cannot run. Try `poly login`, or `poly config local on` for a local LLM."));
2654
+ process.exit(1);
2655
+ }
1633
2656
  }
1634
2657
  const reloaded = loadConfig();
1635
2658
  const models = await loadCatalog(reloaded);
1636
2659
  const policy = buildPolicy(reloaded, opts);
1637
2660
  const goal = goalParts?.join(" ").trim() || void 0;
2661
+ const sessionId = randomUUID();
1638
2662
  const instance = render(
1639
2663
  createElement(App, {
1640
2664
  client: client(reloaded),
1641
2665
  models,
1642
2666
  policy,
1643
- sessionId: randomUUID(),
2667
+ sessionId,
1644
2668
  cwd: opts.cwd,
1645
2669
  allowWrite: !!opts.write,
1646
2670
  allowCommands: !!opts.commands,
@@ -1649,11 +2673,22 @@ program.command("run", { isDefault: true }).description("Launch the interactive
1649
2673
  })
1650
2674
  );
1651
2675
  await instance.waitUntilExit();
2676
+ const totals2 = sessionUsageTotals(sessionId);
2677
+ trackCommand({
2678
+ command: "run",
2679
+ startedAt,
2680
+ sessionId,
2681
+ args: goal,
2682
+ objective: policy.objective,
2683
+ ...totals2
2684
+ });
1652
2685
  });
1653
2686
  program.command("recommend").description("Recommend the best / best-value model combos for a task BEFORE running").argument("<goal...>", "task description").option("--smart", "use an LLM to produce a tailored plan (costs a few cents)", false).option("-o, --objective <name>", "highlight a specific objective").action(async (goalParts, opts) => {
2687
+ const startedAt = Date.now();
1654
2688
  const config = loadConfig();
1655
2689
  const models = await loadCatalog(config);
1656
2690
  const goal = goalParts.join(" ");
2691
+ const sessionId = randomUUID();
1657
2692
  let plan = heuristicPlan(goal);
1658
2693
  if (opts.smart) {
1659
2694
  const key = resolveApiKey(config);
@@ -1663,7 +2698,9 @@ program.command("recommend").description("Recommend the best / best-value model
1663
2698
  const planRoute = route("plan", models, buildPolicy(config, {}));
1664
2699
  if (planRoute) {
1665
2700
  try {
1666
- plan = await planRequest(goal, client(config), planRoute.model);
2701
+ plan = await planRequest(goal, client(config), planRoute.model, (result) => {
2702
+ logCompletion(result, "plan", sessionId, "recommend");
2703
+ });
1667
2704
  } catch (e) {
1668
2705
  console.error(c.yellow(`Smart plan failed (${e?.message}); using heuristic.`));
1669
2706
  }
@@ -1671,6 +2708,8 @@ program.command("recommend").description("Recommend the best / best-value model
1671
2708
  }
1672
2709
  }
1673
2710
  console.log(renderRecommendation(buildRecommendation(plan, models)));
2711
+ const totals2 = sessionUsageTotals(sessionId);
2712
+ trackCommand({ command: "recommend", startedAt, sessionId, args: goal, objective: config.defaultObjective, ...totals2 });
1674
2713
  });
1675
2714
  program.command("models").description("Browse the model catalog with pricing and tiers").option("-t, --tier <tier>", "filter by tier: cheap | standard | frontier").option("--tools", "only models that support tool/function calling", false).option("-s, --search <text>", "filter by id/name substring").option("--refresh", "force-refresh the catalog from OpenRouter", false).option("-n, --limit <n>", "max rows", "40").action(async (opts) => {
1676
2715
  const config = loadConfig();
@@ -1701,11 +2740,11 @@ program.command("usage").description("Show recorded usage & cost by date + model
1701
2740
  let until = opts.until;
1702
2741
  if (opts.today) {
1703
2742
  const d = /* @__PURE__ */ new Date();
1704
- const iso = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(
2743
+ const iso2 = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(
1705
2744
  d.getDate()
1706
2745
  ).padStart(2, "0")}`;
1707
- since = iso;
1708
- until = iso;
2746
+ since = iso2;
2747
+ until = iso2;
1709
2748
  }
1710
2749
  console.log(renderUsageReport({ since, until }));
1711
2750
  if (opts.sync) {
@@ -1713,10 +2752,34 @@ program.command("usage").description("Show recorded usage & cost by date + model
1713
2752
  console.log(res.synced > 0 ? c.green(res.message) : c.dim(res.message));
1714
2753
  }
1715
2754
  });
1716
- program.command("sync").description("Push unsynced usage rows to Firestore (mathology-b8e3d)").action(async () => {
2755
+ program.command("analyze").description("Which approach reaches the goal with the FEWEST tokens \u2014 per model, task, objective, command").option("--since <date>", "YYYY-MM-DD inclusive").option("--until <date>", "YYYY-MM-DD inclusive").action(async (opts) => {
2756
+ console.log(renderAnalysis({ since: opts.since, until: opts.until }));
2757
+ });
2758
+ program.command("sync").description("Push DISTILLED efficiency insights to Firebase (raw logs stay local unless --raw)").option("--raw", "also push the full raw ledger (sessions/steps/calls/commands)", false).action(async (opts) => {
1717
2759
  const config = loadConfig();
1718
- const res = await syncUsage(config);
1719
- console.log(res.synced > 0 ? c.green(res.message) : c.yellow(res.message));
2760
+ let pushed = false;
2761
+ if (config.dataconnect.enabled) {
2762
+ pushed = true;
2763
+ try {
2764
+ const res = await syncDataConnect(config, { raw: !!opts.raw });
2765
+ const n = res.insights + res.sessions + res.steps + res.commands + res.calls;
2766
+ console.log(n > 0 ? c.green(res.message) : c.dim(res.message));
2767
+ } catch (e) {
2768
+ console.error(c.red(`Data Connect sync failed: ${e?.message ?? e}`));
2769
+ }
2770
+ }
2771
+ if (config.firestore.enabled) {
2772
+ pushed = true;
2773
+ const res = await syncUsage(config, { raw: !!opts.raw });
2774
+ console.log(res.synced > 0 ? c.green(res.message) : c.dim(res.message));
2775
+ }
2776
+ if (!pushed) {
2777
+ console.log(
2778
+ c.yellow(
2779
+ "No sync target enabled. Use `poly config dataconnect on` (SQL) or `poly config firestore on`."
2780
+ )
2781
+ );
2782
+ }
1720
2783
  });
1721
2784
  var cfg = program.command("config").description("View or change Polymath settings");
1722
2785
  cfg.command("show").description("Print the current config (key is masked)").action(() => {
@@ -1758,6 +2821,29 @@ cfg.command("firestore").description("Enable/disable Firestore sync: on | off").
1758
2821
  saveConfig(config);
1759
2822
  console.log(c.green(`Firestore sync ${config.firestore.enabled ? "enabled" : "disabled"}.`));
1760
2823
  });
2824
+ cfg.command("local").description("Enable/disable a local LLM server (Ollama/LM Studio): on | off [--base <url>]").argument("<state>").option("--base <url>", "OpenAI-compatible base URL (default http://localhost:11434/v1)").action((state, opts) => {
2825
+ const config = loadConfig();
2826
+ config.local.enabled = /^on|true|1$/i.test(state);
2827
+ if (opts.base) config.local.baseUrl = String(opts.base).replace(/\/$/, "");
2828
+ saveConfig(config);
2829
+ console.log(
2830
+ c.green(
2831
+ `Local LLM ${config.local.enabled ? "enabled" : "disabled"} (${config.local.baseUrl}). Models appear as local/<name> with $0 cost.`
2832
+ )
2833
+ );
2834
+ });
2835
+ cfg.command("dataconnect").description("Enable/disable Firebase Data Connect (SQL) sync: on | off [--location <loc>] [--service <id>]").argument("<state>").option("--location <loc>", "Data Connect location (default us-east4)").option("--service <id>", "Data Connect service id (default polymath)").action((state, opts) => {
2836
+ const config = loadConfig();
2837
+ config.dataconnect.enabled = /^on|true|1$/i.test(state);
2838
+ if (opts.location) config.dataconnect.location = opts.location;
2839
+ if (opts.service) config.dataconnect.serviceId = opts.service;
2840
+ saveConfig(config);
2841
+ console.log(
2842
+ c.green(
2843
+ `Data Connect sync ${config.dataconnect.enabled ? "enabled" : "disabled"} (service ${config.dataconnect.serviceId} @ ${config.dataconnect.location}).`
2844
+ )
2845
+ );
2846
+ });
1761
2847
  program.parseAsync().catch((err) => {
1762
2848
  console.error(c.red(err?.message ?? String(err)));
1763
2849
  process.exit(1);