polymath-agent 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -3
- package/dist/cli.js +392 -59
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -93,13 +93,38 @@ poly usage # cost by date + model
|
|
|
93
93
|
| `poly recommend <goal>` | Pre-run recommendation: cheapest / best-value / best-quality model combos + savings. |
|
|
94
94
|
| `poly models` | Browse the catalog with pricing, tier, tool support. Filters: `--tier`, `--tools`, `--search`. |
|
|
95
95
|
| `poly usage` | Recorded usage & cost grouped by **date + model**. `--today`, `--since`, `--sync`. |
|
|
96
|
-
| `poly analyze` | **Which approach reaches the goal with the fewest tokens** — best model per task type, objective × achievement, usage per command. |
|
|
97
|
-
| `poly sync` | Push
|
|
98
|
-
| `poly config show\|set\|firestore\|dataconnect` | View/change settings. |
|
|
96
|
+
| `poly analyze` | **Which approach reaches the goal with the fewest tokens** — efficiency playbook, best model per task type, objective × achievement, usage per command. |
|
|
97
|
+
| `poly sync` | Push **distilled efficiency insights** to Firebase ([Data Connect SQL](dataconnect/) / Firestore). Raw logs stay local unless `--raw`. |
|
|
98
|
+
| `poly config show\|set\|firestore\|dataconnect\|local` | View/change settings. |
|
|
99
99
|
|
|
100
100
|
After each `poly run`, rate the result 0–9 (one keypress) — your goal-achievement
|
|
101
101
|
rating joins the auto score (completed/planned steps) to power `poly analyze`.
|
|
102
102
|
|
|
103
|
+
### The efficiency playbook (learned routing)
|
|
104
|
+
|
|
105
|
+
Everything is captured locally (SQLite). `poly analyze` distills it into a **playbook**
|
|
106
|
+
of *notably* efficient approaches — a (task, model) pair qualifies only with ≥3
|
|
107
|
+
successful runs, ≥70% success, and **≥20% fewer tokens than the median** of its
|
|
108
|
+
competitors. The playbook then **boosts routing**: proven-efficient models get
|
|
109
|
+
preferred under the `value` objective (`reason: proven 54% fewer tokens on edit`).
|
|
110
|
+
`poly sync` uploads *only* the playbook by default — your goals and raw logs never
|
|
111
|
+
leave the machine unless you pass `--raw`.
|
|
112
|
+
|
|
113
|
+
### Local LLMs (Ollama / LM Studio) — $0 routing
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
ollama serve # or LM Studio's local server
|
|
117
|
+
poly config local on # default base: http://localhost:11434/v1
|
|
118
|
+
poly config local on --base http://localhost:1234/v1 # LM Studio
|
|
119
|
+
poly models -s local/ # local models join the catalog at $0
|
|
120
|
+
poly run "..." # cheapest objective → local wins what it can
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Local models appear as `local/<name>`, cost $0, and need **no API key** — with
|
|
124
|
+
`local on` and no OpenRouter key, Polymath runs fully offline on your machine.
|
|
125
|
+
Tokens are still tracked, so the playbook learns when your local model is the
|
|
126
|
+
most efficient approach.
|
|
127
|
+
|
|
103
128
|
### Routing objectives
|
|
104
129
|
|
|
105
130
|
Routing is **skill-aware**: each task type maps to a skill (coding / reasoning /
|
package/dist/cli.js
CHANGED
|
@@ -49,6 +49,11 @@ var DEFAULT_CONFIG = {
|
|
|
49
49
|
enabled: false,
|
|
50
50
|
location: "us-east4",
|
|
51
51
|
serviceId: "polymath"
|
|
52
|
+
},
|
|
53
|
+
local: {
|
|
54
|
+
enabled: false,
|
|
55
|
+
baseUrl: "http://localhost:11434/v1"
|
|
56
|
+
// Ollama default; LM Studio: http://localhost:1234/v1
|
|
52
57
|
}
|
|
53
58
|
};
|
|
54
59
|
function loadConfig() {
|
|
@@ -60,7 +65,8 @@ function loadConfig() {
|
|
|
60
65
|
...DEFAULT_CONFIG,
|
|
61
66
|
...raw,
|
|
62
67
|
firestore: { ...DEFAULT_CONFIG.firestore, ...raw.firestore ?? {} },
|
|
63
|
-
dataconnect: { ...DEFAULT_CONFIG.dataconnect, ...raw.dataconnect ?? {} }
|
|
68
|
+
dataconnect: { ...DEFAULT_CONFIG.dataconnect, ...raw.dataconnect ?? {} },
|
|
69
|
+
local: { ...DEFAULT_CONFIG.local, ...raw.local ?? {} }
|
|
64
70
|
};
|
|
65
71
|
} catch {
|
|
66
72
|
return { ...DEFAULT_CONFIG };
|
|
@@ -81,6 +87,7 @@ function resolveApiKey(config) {
|
|
|
81
87
|
|
|
82
88
|
// src/providers/openrouter.ts
|
|
83
89
|
var BASE = globalThis.process?.env?.OPENROUTER_BASE_URL?.replace(/\/$/, "") || "https://openrouter.ai/api/v1";
|
|
90
|
+
var LOCAL_PREFIX = "local/";
|
|
84
91
|
var OpenRouterError = class extends Error {
|
|
85
92
|
status;
|
|
86
93
|
constructor(message, status) {
|
|
@@ -93,10 +100,12 @@ var OpenRouterClient = class {
|
|
|
93
100
|
apiKey;
|
|
94
101
|
referer;
|
|
95
102
|
title;
|
|
103
|
+
localBaseUrl;
|
|
96
104
|
constructor(opts = {}) {
|
|
97
105
|
this.apiKey = opts.apiKey;
|
|
98
106
|
this.referer = opts.referer ?? "https://github.com/polymath-agent";
|
|
99
107
|
this.title = opts.title ?? "Polymath";
|
|
108
|
+
this.localBaseUrl = opts.localBaseUrl?.replace(/\/$/, "");
|
|
100
109
|
}
|
|
101
110
|
headers(json = true) {
|
|
102
111
|
const h = {
|
|
@@ -107,6 +116,24 @@ var OpenRouterClient = class {
|
|
|
107
116
|
if (json) h["Content-Type"] = "application/json";
|
|
108
117
|
return h;
|
|
109
118
|
}
|
|
119
|
+
/** Resolve where a model's request goes: the local server for `local/*`, else OpenRouter. */
|
|
120
|
+
target(modelId) {
|
|
121
|
+
if (this.localBaseUrl && modelId.startsWith(LOCAL_PREFIX)) {
|
|
122
|
+
return { base: this.localBaseUrl, model: modelId.slice(LOCAL_PREFIX.length), isLocal: true };
|
|
123
|
+
}
|
|
124
|
+
return { base: BASE, model: modelId, isLocal: false };
|
|
125
|
+
}
|
|
126
|
+
requireKeyFor(isLocal) {
|
|
127
|
+
if (!isLocal && !this.apiKey) throw new OpenRouterError("No API key set. Run `poly login`.");
|
|
128
|
+
}
|
|
129
|
+
/** List models from the local OpenAI-compatible server (Ollama / LM Studio). */
|
|
130
|
+
async listLocalRawModels() {
|
|
131
|
+
if (!this.localBaseUrl) return [];
|
|
132
|
+
const res = await fetch(`${this.localBaseUrl}/models`);
|
|
133
|
+
if (!res.ok) throw new OpenRouterError(`Local server: failed to list models (${res.status})`, res.status);
|
|
134
|
+
const json = await res.json();
|
|
135
|
+
return json.data ?? [];
|
|
136
|
+
}
|
|
110
137
|
/** Raw /models payload (no auth required). */
|
|
111
138
|
async listRawModels() {
|
|
112
139
|
const res = await fetch(`${BASE}/models`, { headers: this.headers(false) });
|
|
@@ -126,24 +153,28 @@ var OpenRouterClient = class {
|
|
|
126
153
|
const d = json.data ?? {};
|
|
127
154
|
return { label: d.label, usage: d.usage, limit: d.limit };
|
|
128
155
|
}
|
|
129
|
-
buildBody(req, stream) {
|
|
156
|
+
buildBody(req, stream, modelOverride, isLocal) {
|
|
130
157
|
return {
|
|
131
|
-
model:
|
|
158
|
+
model: modelOverride,
|
|
132
159
|
messages: req.messages.map(serializeMessage),
|
|
133
160
|
...req.tools && req.tools.length ? { tools: req.tools, tool_choice: "auto" } : {},
|
|
134
161
|
temperature: req.temperature ?? 0.2,
|
|
135
162
|
...req.maxTokens ? { max_tokens: req.maxTokens } : {},
|
|
136
163
|
stream,
|
|
137
|
-
|
|
164
|
+
// OpenRouter-specific accounting param; local servers may reject unknown fields.
|
|
165
|
+
...isLocal ? {} : { usage: { include: true } },
|
|
166
|
+
// OpenAI-compat way to get token usage in the final stream chunk (Ollama/LM Studio).
|
|
167
|
+
...isLocal && stream ? { stream_options: { include_usage: true } } : {}
|
|
138
168
|
};
|
|
139
169
|
}
|
|
140
170
|
/** Non-streaming completion. costUsd is computed from `pricing` (deterministic). */
|
|
141
171
|
async complete(req, pricing) {
|
|
142
|
-
|
|
143
|
-
|
|
172
|
+
const t = this.target(req.model);
|
|
173
|
+
this.requireKeyFor(t.isLocal);
|
|
174
|
+
const res = await fetch(`${t.base}/chat/completions`, {
|
|
144
175
|
method: "POST",
|
|
145
176
|
headers: this.headers(),
|
|
146
|
-
body: JSON.stringify(this.buildBody(req, false))
|
|
177
|
+
body: JSON.stringify(this.buildBody(req, false, t.model, t.isLocal))
|
|
147
178
|
});
|
|
148
179
|
if (!res.ok) {
|
|
149
180
|
const text = await res.text().catch(() => "");
|
|
@@ -164,8 +195,10 @@ var OpenRouterClient = class {
|
|
|
164
195
|
content: typeof msg.content === "string" ? msg.content : "",
|
|
165
196
|
toolCalls: parseToolCalls(msg.tool_calls),
|
|
166
197
|
usage,
|
|
167
|
-
|
|
168
|
-
|
|
198
|
+
// Keep the prefixed id for local models so the ledger stays consistent.
|
|
199
|
+
model: t.isLocal ? req.model : json.model ?? req.model,
|
|
200
|
+
// Local inference is free regardless of what the server claims to report.
|
|
201
|
+
costUsd: computeCost(usage, pricing, t.isLocal ? void 0 : json.usage?.cost),
|
|
169
202
|
finishReason: choice.finish_reason ?? null
|
|
170
203
|
};
|
|
171
204
|
}
|
|
@@ -174,11 +207,12 @@ var OpenRouterClient = class {
|
|
|
174
207
|
* Tool-call deltas are accumulated and surfaced in the final result.
|
|
175
208
|
*/
|
|
176
209
|
async *stream(req, pricing) {
|
|
177
|
-
|
|
178
|
-
|
|
210
|
+
const t = this.target(req.model);
|
|
211
|
+
this.requireKeyFor(t.isLocal);
|
|
212
|
+
const res = await fetch(`${t.base}/chat/completions`, {
|
|
179
213
|
method: "POST",
|
|
180
214
|
headers: this.headers(),
|
|
181
|
-
body: JSON.stringify(this.buildBody(req, true))
|
|
215
|
+
body: JSON.stringify(this.buildBody(req, true, t.model, t.isLocal))
|
|
182
216
|
});
|
|
183
217
|
if (!res.ok || !res.body) {
|
|
184
218
|
const text = await res.text().catch(() => "");
|
|
@@ -212,7 +246,7 @@ var OpenRouterClient = class {
|
|
|
212
246
|
if (evt?.error) {
|
|
213
247
|
throw new OpenRouterError(evt.error.message ?? "Stream provider error", evt.error.code);
|
|
214
248
|
}
|
|
215
|
-
if (evt.model) model = evt.model;
|
|
249
|
+
if (evt.model && !t.isLocal) model = evt.model;
|
|
216
250
|
if (evt.usage) usageJson = evt.usage;
|
|
217
251
|
const choice = evt.choices?.[0];
|
|
218
252
|
if (!choice) continue;
|
|
@@ -239,17 +273,17 @@ var OpenRouterClient = class {
|
|
|
239
273
|
completionTokens: usageJson?.completion_tokens ?? 0,
|
|
240
274
|
totalTokens: usageJson?.total_tokens ?? 0
|
|
241
275
|
};
|
|
242
|
-
const toolCalls = [...toolAcc.values()].filter((
|
|
243
|
-
id:
|
|
276
|
+
const toolCalls = [...toolAcc.values()].filter((t2) => t2.name).map((t2) => ({
|
|
277
|
+
id: t2.id || `call_${t2.name}`,
|
|
244
278
|
type: "function",
|
|
245
|
-
function: { name:
|
|
279
|
+
function: { name: t2.name, arguments: t2.args || "{}" }
|
|
246
280
|
}));
|
|
247
281
|
return {
|
|
248
282
|
content,
|
|
249
283
|
toolCalls,
|
|
250
284
|
usage,
|
|
251
285
|
model,
|
|
252
|
-
costUsd: computeCost(usage, pricing, usageJson?.cost),
|
|
286
|
+
costUsd: computeCost(usage, pricing, t.isLocal ? void 0 : usageJson?.cost),
|
|
253
287
|
finishReason
|
|
254
288
|
};
|
|
255
289
|
}
|
|
@@ -387,6 +421,38 @@ async function getModels(client2, opts = {}) {
|
|
|
387
421
|
return models;
|
|
388
422
|
}
|
|
389
423
|
|
|
424
|
+
// src/models/local.ts
|
|
425
|
+
function parseLocalModels(raw) {
|
|
426
|
+
const out = [];
|
|
427
|
+
for (const m of raw) {
|
|
428
|
+
if (!m?.id) continue;
|
|
429
|
+
const name = String(m.id);
|
|
430
|
+
out.push({
|
|
431
|
+
id: LOCAL_PREFIX + name,
|
|
432
|
+
name: `${name} (local)`,
|
|
433
|
+
provider: "local",
|
|
434
|
+
contextLength: m.context_length ?? 8192,
|
|
435
|
+
pricing: { promptUsdPerMTok: 0, completionUsdPerMTok: 0 },
|
|
436
|
+
tier: classifyTier(name, 0),
|
|
437
|
+
capabilities: {
|
|
438
|
+
// OpenAI-compatible local servers pass tool schemas through; models that
|
|
439
|
+
// can't call tools simply reply with text, which the agent loop handles.
|
|
440
|
+
tools: true,
|
|
441
|
+
vision: /llava|vision|vl\b|moondream/i.test(name)
|
|
442
|
+
}
|
|
443
|
+
});
|
|
444
|
+
}
|
|
445
|
+
return out;
|
|
446
|
+
}
|
|
447
|
+
async function getLocalModels(client2) {
|
|
448
|
+
try {
|
|
449
|
+
const raw = await client2.listLocalRawModels();
|
|
450
|
+
return parseLocalModels(raw);
|
|
451
|
+
} catch {
|
|
452
|
+
return [];
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
|
|
390
456
|
// src/auth/onboarding.ts
|
|
391
457
|
import readline from "node:readline";
|
|
392
458
|
|
|
@@ -723,8 +789,11 @@ var HEADLINE_SKILLS = ["coding", "reasoning", "retrieval", "speed"];
|
|
|
723
789
|
function projectCost(m, est) {
|
|
724
790
|
return est.promptTokens / 1e6 * m.pricing.promptUsdPerMTok + est.completionTokens / 1e6 * m.pricing.completionUsdPerMTok;
|
|
725
791
|
}
|
|
726
|
-
function taskValue(m, taskType) {
|
|
727
|
-
|
|
792
|
+
function taskValue(m, taskType, empirical) {
|
|
793
|
+
const base = taskStrength(m, taskType) / Math.max(blendedPrice(m), 0.01);
|
|
794
|
+
const savings = empirical?.[`${taskType}:${m.id}`];
|
|
795
|
+
const boost = savings ? 1 + Math.min(savings, 100) / 100 : 1;
|
|
796
|
+
return base * boost;
|
|
728
797
|
}
|
|
729
798
|
function candidatesFor(taskType, models, policy, est) {
|
|
730
799
|
const spec = TASK_SPECS[taskType];
|
|
@@ -753,7 +822,9 @@ function rank(models, policy, taskType) {
|
|
|
753
822
|
break;
|
|
754
823
|
case "value":
|
|
755
824
|
default:
|
|
756
|
-
sorted.sort(
|
|
825
|
+
sorted.sort(
|
|
826
|
+
(a, b) => taskValue(b, taskType, policy.empirical) - taskValue(a, taskType, policy.empirical)
|
|
827
|
+
);
|
|
757
828
|
break;
|
|
758
829
|
}
|
|
759
830
|
return sorted;
|
|
@@ -771,7 +842,8 @@ function route(taskType, models, policy, est = { promptTokens: 4e3, completionTo
|
|
|
771
842
|
const ranked = rank(cands, policy, taskType);
|
|
772
843
|
const chosen = ranked[0];
|
|
773
844
|
const skill = TASK_SKILL[taskType];
|
|
774
|
-
const
|
|
845
|
+
const proven = policy.empirical?.[`${taskType}:${chosen.id}`];
|
|
846
|
+
const reason = policy.objective === "cheapest" ? `cheapest model that covers ${skill}` : policy.objective === "quality" ? `strongest at ${skill}` : proven ? `proven ${Math.round(proven)}% fewer tokens on ${taskType} (playbook)` : `best ${skill}-per-dollar`;
|
|
775
847
|
return { model: chosen, reason, estCostUsd: projectCost(chosen, est) };
|
|
776
848
|
}
|
|
777
849
|
|
|
@@ -1002,6 +1074,23 @@ function getDb() {
|
|
|
1002
1074
|
synced INTEGER NOT NULL DEFAULT 0
|
|
1003
1075
|
);
|
|
1004
1076
|
CREATE INDEX IF NOT EXISTS idx_cmd_date ON command_runs(date);
|
|
1077
|
+
|
|
1078
|
+
-- Distilled efficiency insights: ONLY the notably cost-efficient approaches.
|
|
1079
|
+
-- This is what syncs to the cloud by default (raw logs stay local).
|
|
1080
|
+
CREATE TABLE IF NOT EXISTS insights (
|
|
1081
|
+
id TEXT PRIMARY KEY, -- "<task_type>__<model>"
|
|
1082
|
+
computed_at INTEGER NOT NULL,
|
|
1083
|
+
task_type TEXT NOT NULL,
|
|
1084
|
+
model TEXT NOT NULL,
|
|
1085
|
+
provider TEXT NOT NULL,
|
|
1086
|
+
samples INTEGER NOT NULL, -- successful steps observed
|
|
1087
|
+
success_rate REAL NOT NULL,
|
|
1088
|
+
avg_tokens REAL NOT NULL, -- per successful step
|
|
1089
|
+
baseline_tokens REAL NOT NULL, -- median across qualified competitors
|
|
1090
|
+
savings_pct REAL NOT NULL, -- vs baseline (the "\uC720\uB3C5" margin)
|
|
1091
|
+
avg_cost_usd REAL NOT NULL,
|
|
1092
|
+
synced INTEGER NOT NULL DEFAULT 0
|
|
1093
|
+
);
|
|
1005
1094
|
`);
|
|
1006
1095
|
const cols = db.prepare(`PRAGMA table_info(usage_log)`).all();
|
|
1007
1096
|
if (!cols.some((c2) => c2.name === "command")) {
|
|
@@ -1304,9 +1393,62 @@ function unsyncedCommandRuns() {
|
|
|
1304
1393
|
}
|
|
1305
1394
|
function markTableSynced(table2, ids) {
|
|
1306
1395
|
if (!ids.length) return;
|
|
1307
|
-
const stmt = getDb().prepare(`UPDATE ${table2} SET synced=1 WHERE
|
|
1396
|
+
const stmt = getDb().prepare(`UPDATE ${table2} SET synced=1 WHERE id=?`);
|
|
1308
1397
|
for (const id of ids) stmt.run(id);
|
|
1309
1398
|
}
|
|
1399
|
+
function upsertInsight(i) {
|
|
1400
|
+
getDb().prepare(
|
|
1401
|
+
`INSERT INTO insights (id, computed_at, task_type, model, provider, samples, success_rate,
|
|
1402
|
+
avg_tokens, baseline_tokens, savings_pct, avg_cost_usd, synced)
|
|
1403
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0)
|
|
1404
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
1405
|
+
computed_at=excluded.computed_at, samples=excluded.samples,
|
|
1406
|
+
success_rate=excluded.success_rate, avg_tokens=excluded.avg_tokens,
|
|
1407
|
+
baseline_tokens=excluded.baseline_tokens, savings_pct=excluded.savings_pct,
|
|
1408
|
+
avg_cost_usd=excluded.avg_cost_usd, synced=0`
|
|
1409
|
+
).run(
|
|
1410
|
+
i.id,
|
|
1411
|
+
i.computedAt,
|
|
1412
|
+
i.taskType,
|
|
1413
|
+
i.model,
|
|
1414
|
+
i.provider,
|
|
1415
|
+
i.samples,
|
|
1416
|
+
i.successRate,
|
|
1417
|
+
i.avgTokens,
|
|
1418
|
+
i.baselineTokens,
|
|
1419
|
+
i.savingsPct,
|
|
1420
|
+
i.avgCostUsd
|
|
1421
|
+
);
|
|
1422
|
+
}
|
|
1423
|
+
function deleteInsightsExcept(validIds) {
|
|
1424
|
+
const all = getDb().prepare(`SELECT id FROM insights`).all();
|
|
1425
|
+
const keep = new Set(validIds);
|
|
1426
|
+
const del = getDb().prepare(`DELETE FROM insights WHERE id=?`);
|
|
1427
|
+
for (const r of all) if (!keep.has(String(r.id))) del.run(String(r.id));
|
|
1428
|
+
}
|
|
1429
|
+
function listInsights() {
|
|
1430
|
+
const rows = getDb().prepare(`SELECT * FROM insights ORDER BY savings_pct DESC`).all();
|
|
1431
|
+
return rows.map(mapInsight);
|
|
1432
|
+
}
|
|
1433
|
+
function unsyncedInsights() {
|
|
1434
|
+
const rows = getDb().prepare(`SELECT * FROM insights WHERE synced=0`).all();
|
|
1435
|
+
return rows.map(mapInsight);
|
|
1436
|
+
}
|
|
1437
|
+
function mapInsight(r) {
|
|
1438
|
+
return {
|
|
1439
|
+
id: String(r.id),
|
|
1440
|
+
computedAt: Number(r.computed_at),
|
|
1441
|
+
taskType: String(r.task_type),
|
|
1442
|
+
model: String(r.model),
|
|
1443
|
+
provider: String(r.provider),
|
|
1444
|
+
samples: Number(r.samples),
|
|
1445
|
+
successRate: Number(r.success_rate),
|
|
1446
|
+
avgTokens: Number(r.avg_tokens),
|
|
1447
|
+
baselineTokens: Number(r.baseline_tokens),
|
|
1448
|
+
savingsPct: Number(r.savings_pct),
|
|
1449
|
+
avgCostUsd: Number(r.avg_cost_usd)
|
|
1450
|
+
};
|
|
1451
|
+
}
|
|
1310
1452
|
|
|
1311
1453
|
// src/usage/report.ts
|
|
1312
1454
|
function renderUsageReport(filter = {}) {
|
|
@@ -1349,6 +1491,80 @@ function renderUsageReport(filter = {}) {
|
|
|
1349
1491
|
].join("\n");
|
|
1350
1492
|
}
|
|
1351
1493
|
|
|
1494
|
+
// src/usage/insights.ts
|
|
1495
|
+
var MIN_SAMPLES = 3;
|
|
1496
|
+
var MIN_SUCCESS = 0.7;
|
|
1497
|
+
var MIN_MARGIN = 0.2;
|
|
1498
|
+
function median(xs) {
|
|
1499
|
+
const s = [...xs].sort((a, b) => a - b);
|
|
1500
|
+
const mid = Math.floor(s.length / 2);
|
|
1501
|
+
return s.length % 2 ? s[mid] : (s[mid - 1] + s[mid]) / 2;
|
|
1502
|
+
}
|
|
1503
|
+
function distillInsights(now = Date.now()) {
|
|
1504
|
+
const eff = modelTaskEfficiency();
|
|
1505
|
+
const byTask = /* @__PURE__ */ new Map();
|
|
1506
|
+
for (const r of eff) {
|
|
1507
|
+
const list = byTask.get(r.taskType) ?? [];
|
|
1508
|
+
list.push(r);
|
|
1509
|
+
byTask.set(r.taskType, list);
|
|
1510
|
+
}
|
|
1511
|
+
const valid = [];
|
|
1512
|
+
for (const [taskType, list] of byTask) {
|
|
1513
|
+
const qualified = list.filter(
|
|
1514
|
+
(r) => r.steps >= MIN_SAMPLES && r.successRate >= MIN_SUCCESS && r.avgTokensPerSuccess > 0
|
|
1515
|
+
);
|
|
1516
|
+
if (qualified.length < 2) continue;
|
|
1517
|
+
const baseline = median(qualified.map((r) => r.avgTokensPerSuccess));
|
|
1518
|
+
for (const r of qualified) {
|
|
1519
|
+
const savings = 1 - r.avgTokensPerSuccess / baseline;
|
|
1520
|
+
if (savings >= MIN_MARGIN) {
|
|
1521
|
+
const id = `${taskType}__${r.model}`;
|
|
1522
|
+
valid.push(id);
|
|
1523
|
+
upsertInsight({
|
|
1524
|
+
id,
|
|
1525
|
+
computedAt: now,
|
|
1526
|
+
taskType,
|
|
1527
|
+
model: r.model,
|
|
1528
|
+
provider: r.model.split("/")[0] ?? "unknown",
|
|
1529
|
+
samples: r.steps,
|
|
1530
|
+
successRate: r.successRate,
|
|
1531
|
+
avgTokens: r.avgTokensPerSuccess,
|
|
1532
|
+
baselineTokens: baseline,
|
|
1533
|
+
savingsPct: savings * 100,
|
|
1534
|
+
avgCostUsd: r.avgCostPerSuccess
|
|
1535
|
+
});
|
|
1536
|
+
}
|
|
1537
|
+
}
|
|
1538
|
+
}
|
|
1539
|
+
deleteInsightsExcept(valid);
|
|
1540
|
+
return listInsights();
|
|
1541
|
+
}
|
|
1542
|
+
function insightBoostMap(insights) {
|
|
1543
|
+
const map = {};
|
|
1544
|
+
for (const i of insights) map[`${i.taskType}:${i.model}`] = i.savingsPct;
|
|
1545
|
+
return map;
|
|
1546
|
+
}
|
|
1547
|
+
function renderPlaybook(insights) {
|
|
1548
|
+
if (!insights.length) {
|
|
1549
|
+
return c.bold("Efficiency playbook") + "\n" + c.dim(
|
|
1550
|
+
`Nothing distilled yet \u2014 needs \u22652 models with \u2265${MIN_SAMPLES} successful steps on the same task type,
|
|
1551
|
+
where one beats the median by \u2265${MIN_MARGIN * 100}% tokens. Keep running tasks (vary models with -o / pins).`
|
|
1552
|
+
);
|
|
1553
|
+
}
|
|
1554
|
+
return c.bold("Efficiency playbook") + c.dim(" (the notably efficient approaches \u2014 this is what `poly sync` uploads)") + "\n" + table(
|
|
1555
|
+
["Task", "Model", "Avg tok", "Baseline", "Savings", "Success", "n"],
|
|
1556
|
+
insights.map((i) => [
|
|
1557
|
+
i.taskType,
|
|
1558
|
+
c.green(i.model),
|
|
1559
|
+
tokens(Math.round(i.avgTokens)),
|
|
1560
|
+
tokens(Math.round(i.baselineTokens)),
|
|
1561
|
+
c.green(`-${i.savingsPct.toFixed(0)}%`),
|
|
1562
|
+
`${Math.round(i.successRate * 100)}%`,
|
|
1563
|
+
String(i.samples)
|
|
1564
|
+
])
|
|
1565
|
+
);
|
|
1566
|
+
}
|
|
1567
|
+
|
|
1352
1568
|
// src/usage/analyze.ts
|
|
1353
1569
|
var MIN_SUCCESS_RATE = 0.5;
|
|
1354
1570
|
function renderAnalysis(filter = {}) {
|
|
@@ -1359,6 +1575,9 @@ function renderAnalysis(filter = {}) {
|
|
|
1359
1575
|
if (!byModelTask.length && !byObjective.length && !byCommand.length) {
|
|
1360
1576
|
return c.dim('No analytics yet. Run `poly run "<task>"` a few times (and rate the result) first.');
|
|
1361
1577
|
}
|
|
1578
|
+
const insights = distillInsights();
|
|
1579
|
+
out.push(renderPlaybook(insights));
|
|
1580
|
+
out.push("");
|
|
1362
1581
|
if (byModelTask.length) {
|
|
1363
1582
|
const byTask = /* @__PURE__ */ new Map();
|
|
1364
1583
|
for (const r of byModelTask) {
|
|
@@ -1445,7 +1664,7 @@ function renderAnalysis(filter = {}) {
|
|
|
1445
1664
|
}
|
|
1446
1665
|
|
|
1447
1666
|
// src/usage/firestoreSync.ts
|
|
1448
|
-
async function syncUsage(config) {
|
|
1667
|
+
async function syncUsage(config, opts = {}) {
|
|
1449
1668
|
if (!config.firestore.enabled) {
|
|
1450
1669
|
return { synced: 0, message: "Firestore sync is disabled (enable with `poly config firestore on`)." };
|
|
1451
1670
|
}
|
|
@@ -1473,28 +1692,62 @@ async function syncUsage(config) {
|
|
|
1473
1692
|
}
|
|
1474
1693
|
}
|
|
1475
1694
|
const fdb = fsMod.getFirestore();
|
|
1695
|
+
distillInsights();
|
|
1696
|
+
const insights = unsyncedInsights();
|
|
1697
|
+
if (insights.length) {
|
|
1698
|
+
const batch = fdb.batch();
|
|
1699
|
+
const col = fdb.collection("polymath_insights");
|
|
1700
|
+
for (const i of insights) {
|
|
1701
|
+
batch.set(col.doc(i.id), {
|
|
1702
|
+
computedAt: i.computedAt,
|
|
1703
|
+
taskType: i.taskType,
|
|
1704
|
+
model: i.model,
|
|
1705
|
+
provider: i.provider,
|
|
1706
|
+
samples: i.samples,
|
|
1707
|
+
successRate: i.successRate,
|
|
1708
|
+
avgTokens: i.avgTokens,
|
|
1709
|
+
baselineTokens: i.baselineTokens,
|
|
1710
|
+
savingsPct: i.savingsPct,
|
|
1711
|
+
avgCostUsd: i.avgCostUsd
|
|
1712
|
+
});
|
|
1713
|
+
}
|
|
1714
|
+
await batch.commit();
|
|
1715
|
+
markTableSynced("insights", insights.map((i) => i.id));
|
|
1716
|
+
}
|
|
1717
|
+
if (!opts.raw) {
|
|
1718
|
+
return {
|
|
1719
|
+
synced: insights.length,
|
|
1720
|
+
message: insights.length ? `Synced ${insights.length} efficiency insight(s) to polymath_insights. Raw logs stayed local (use --raw to push).` : "No new insights to sync \u2014 raw logs stay local by default (use --raw to push them)."
|
|
1721
|
+
};
|
|
1722
|
+
}
|
|
1476
1723
|
const rows = unsyncedRows();
|
|
1477
|
-
if (!rows.length) return { synced: 0, message: "Nothing to sync \u2014 all rows already pushed." };
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
const
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1724
|
+
if (!rows.length && !insights.length) return { synced: 0, message: "Nothing to sync \u2014 all rows already pushed." };
|
|
1725
|
+
if (rows.length) {
|
|
1726
|
+
const batch = fdb.batch();
|
|
1727
|
+
const col = fdb.collection(config.firestore.collection);
|
|
1728
|
+
for (const r of rows) {
|
|
1729
|
+
const ref = col.doc(`${r.date}__${r.id}`);
|
|
1730
|
+
batch.set(ref, {
|
|
1731
|
+
ts: r.ts,
|
|
1732
|
+
date: r.date,
|
|
1733
|
+
provider: r.provider,
|
|
1734
|
+
model: r.model,
|
|
1735
|
+
taskType: r.taskType,
|
|
1736
|
+
command: r.command ?? "run",
|
|
1737
|
+
promptTokens: r.promptTokens,
|
|
1738
|
+
completionTokens: r.completionTokens,
|
|
1739
|
+
totalTokens: r.totalTokens,
|
|
1740
|
+
costUsd: r.costUsd,
|
|
1741
|
+
sessionId: r.sessionId ?? null
|
|
1742
|
+
});
|
|
1743
|
+
}
|
|
1744
|
+
await batch.commit();
|
|
1745
|
+
markSynced(rows.map((r) => r.id));
|
|
1494
1746
|
}
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1747
|
+
return {
|
|
1748
|
+
synced: insights.length + rows.length,
|
|
1749
|
+
message: `Synced ${insights.length} insights + ${rows.length} raw rows to Firestore.`
|
|
1750
|
+
};
|
|
1498
1751
|
}
|
|
1499
1752
|
|
|
1500
1753
|
// src/usage/dataconnect.ts
|
|
@@ -1539,14 +1792,56 @@ async function executeGraphql(cfg2, token, query, variables) {
|
|
|
1539
1792
|
}
|
|
1540
1793
|
}
|
|
1541
1794
|
var iso = (ms) => new Date(ms).toISOString();
|
|
1542
|
-
async function syncDataConnect(config) {
|
|
1795
|
+
async function syncDataConnect(config, opts = {}) {
|
|
1543
1796
|
const dc = config.dataconnect;
|
|
1544
1797
|
if (!dc?.enabled) {
|
|
1545
|
-
return { sessions: 0, steps: 0, commands: 0, calls: 0, message: "Data Connect sync is disabled (enable with `poly config dataconnect on`)." };
|
|
1798
|
+
return { insights: 0, sessions: 0, steps: 0, commands: 0, calls: 0, message: "Data Connect sync is disabled (enable with `poly config dataconnect on`)." };
|
|
1546
1799
|
}
|
|
1547
1800
|
const projectId = config.firestore.projectId;
|
|
1548
1801
|
const token = await adminAccessToken(projectId);
|
|
1549
1802
|
const cfg2 = { projectId, location: dc.location, serviceId: dc.serviceId };
|
|
1803
|
+
distillInsights();
|
|
1804
|
+
const insights = unsyncedInsights();
|
|
1805
|
+
for (const i of insights) {
|
|
1806
|
+
await executeGraphql(
|
|
1807
|
+
cfg2,
|
|
1808
|
+
token,
|
|
1809
|
+
`mutation UpsertInsight($id: String!, $computedAt: Timestamp!, $taskType: String!,
|
|
1810
|
+
$model: String!, $provider: String!, $samples: Int!, $successRate: Float!,
|
|
1811
|
+
$avgTokens: Float!, $baselineTokens: Float!, $savingsPct: Float!, $avgCostUsd: Float!) {
|
|
1812
|
+
insight_upsert(data: {
|
|
1813
|
+
id: $id, computedAt: $computedAt, taskType: $taskType, model: $model,
|
|
1814
|
+
provider: $provider, samples: $samples, successRate: $successRate,
|
|
1815
|
+
avgTokens: $avgTokens, baselineTokens: $baselineTokens,
|
|
1816
|
+
savingsPct: $savingsPct, avgCostUsd: $avgCostUsd
|
|
1817
|
+
})
|
|
1818
|
+
}`,
|
|
1819
|
+
{
|
|
1820
|
+
id: i.id,
|
|
1821
|
+
computedAt: iso(i.computedAt),
|
|
1822
|
+
taskType: i.taskType,
|
|
1823
|
+
model: i.model,
|
|
1824
|
+
provider: i.provider,
|
|
1825
|
+
samples: i.samples,
|
|
1826
|
+
successRate: i.successRate,
|
|
1827
|
+
avgTokens: i.avgTokens,
|
|
1828
|
+
baselineTokens: i.baselineTokens,
|
|
1829
|
+
savingsPct: i.savingsPct,
|
|
1830
|
+
avgCostUsd: i.avgCostUsd
|
|
1831
|
+
}
|
|
1832
|
+
);
|
|
1833
|
+
}
|
|
1834
|
+
markTableSynced("insights", insights.map((i) => i.id));
|
|
1835
|
+
if (!opts.raw) {
|
|
1836
|
+
return {
|
|
1837
|
+
insights: insights.length,
|
|
1838
|
+
sessions: 0,
|
|
1839
|
+
steps: 0,
|
|
1840
|
+
commands: 0,
|
|
1841
|
+
calls: 0,
|
|
1842
|
+
message: `Synced ${insights.length} efficiency insight(s) to Data Connect (${cfg2.serviceId}@${cfg2.location}). Raw logs stayed local \u2014 use \`poly sync --raw\` to push everything.`
|
|
1843
|
+
};
|
|
1844
|
+
}
|
|
1550
1845
|
const sessions = unsyncedSessions();
|
|
1551
1846
|
for (const s of sessions) {
|
|
1552
1847
|
await executeGraphql(
|
|
@@ -1679,11 +1974,12 @@ async function syncDataConnect(config) {
|
|
|
1679
1974
|
}
|
|
1680
1975
|
markSynced(calls.map((c2) => c2.id));
|
|
1681
1976
|
return {
|
|
1977
|
+
insights: insights.length,
|
|
1682
1978
|
sessions: sessions.length,
|
|
1683
1979
|
steps: steps.length,
|
|
1684
1980
|
commands: commands.length,
|
|
1685
1981
|
calls: calls.length,
|
|
1686
|
-
message: `Synced ${sessions.length} sessions, ${steps.length} steps, ${commands.length} commands, ${calls.length} calls
|
|
1982
|
+
message: `Synced ${insights.length} insights + raw: ${sessions.length} sessions, ${steps.length} steps, ${commands.length} commands, ${calls.length} calls (${cfg2.serviceId}@${cfg2.location}).`
|
|
1687
1983
|
};
|
|
1688
1984
|
}
|
|
1689
1985
|
|
|
@@ -2274,21 +2570,30 @@ function truncate2(s, n) {
|
|
|
2274
2570
|
|
|
2275
2571
|
// src/index.ts
|
|
2276
2572
|
var program = new Command();
|
|
2277
|
-
program.name("poly").description("Polymath \u2014 cost-optimized, multi-model TUI coding agent").version("0.
|
|
2573
|
+
program.name("poly").description("Polymath \u2014 cost-optimized, multi-model TUI coding agent").version("0.3.0");
|
|
2278
2574
|
function client(config) {
|
|
2279
2575
|
return new OpenRouterClient({
|
|
2280
2576
|
apiKey: resolveApiKey(config),
|
|
2281
2577
|
referer: config.referer,
|
|
2282
|
-
title: config.title
|
|
2578
|
+
title: config.title,
|
|
2579
|
+
localBaseUrl: config.local.enabled ? config.local.baseUrl : void 0
|
|
2283
2580
|
});
|
|
2284
2581
|
}
|
|
2285
2582
|
function buildPolicy(config, opts) {
|
|
2286
2583
|
const objective = opts.objective || config.defaultObjective;
|
|
2287
2584
|
const maxCost = opts.maxCost != null ? parseFloat(opts.maxCost) : config.maxCostPerCallUsd;
|
|
2585
|
+
let empirical;
|
|
2586
|
+
try {
|
|
2587
|
+
empirical = insightBoostMap(listInsights());
|
|
2588
|
+
if (!Object.keys(empirical).length) empirical = void 0;
|
|
2589
|
+
} catch {
|
|
2590
|
+
empirical = void 0;
|
|
2591
|
+
}
|
|
2288
2592
|
return {
|
|
2289
2593
|
objective,
|
|
2290
2594
|
maxCostPerCallUsd: Number.isFinite(maxCost) ? maxCost : void 0,
|
|
2291
|
-
pinned: config.pinned
|
|
2595
|
+
pinned: config.pinned,
|
|
2596
|
+
empirical
|
|
2292
2597
|
};
|
|
2293
2598
|
}
|
|
2294
2599
|
function localDate3(d = /* @__PURE__ */ new Date()) {
|
|
@@ -2315,9 +2620,23 @@ function trackCommand(opts) {
|
|
|
2315
2620
|
}
|
|
2316
2621
|
}
|
|
2317
2622
|
async function loadCatalog(config, refresh = false) {
|
|
2318
|
-
const
|
|
2623
|
+
const cl = client(config);
|
|
2624
|
+
const hasKey = !!resolveApiKey(config);
|
|
2625
|
+
let models = [];
|
|
2626
|
+
try {
|
|
2627
|
+
models = await getModels(cl, { refresh });
|
|
2628
|
+
} catch (e) {
|
|
2629
|
+
if (!config.local.enabled) throw e;
|
|
2630
|
+
}
|
|
2631
|
+
if (config.local.enabled) {
|
|
2632
|
+
const local = await getLocalModels(cl);
|
|
2633
|
+
if (!local.length) {
|
|
2634
|
+
console.error(c.yellow(`Local server (${config.local.baseUrl}) returned no models \u2014 is it running?`));
|
|
2635
|
+
}
|
|
2636
|
+
models = hasKey ? [...local, ...models] : local;
|
|
2637
|
+
}
|
|
2319
2638
|
if (!models.length) {
|
|
2320
|
-
console.error(c.red("
|
|
2639
|
+
console.error(c.red("No models available. Check your connection, or `poly config local on` with a running Ollama/LM Studio."));
|
|
2321
2640
|
process.exit(1);
|
|
2322
2641
|
}
|
|
2323
2642
|
return models;
|
|
@@ -2328,10 +2647,12 @@ program.command("login").description("Connect Polymath to OpenRouter (set/replac
|
|
|
2328
2647
|
program.command("run", { isDefault: true }).description("Launch the interactive agent (TUI)").argument("[goal...]", "what to do (optional; prompts if omitted)").option("-o, --objective <name>", "routing objective: cheapest | value | quality").option("--max-cost <usd>", "exclude models whose projected per-call cost exceeds this").option("-w, --write", "allow the agent to write files (confined to --cwd)", false).option("-x, --commands", "DANGER: let the model run arbitrary shell commands in --cwd", false).option("-C, --cwd <dir>", "working directory", process.cwd()).action(async (goalParts, opts) => {
|
|
2329
2648
|
const startedAt = Date.now();
|
|
2330
2649
|
const config = loadConfig();
|
|
2331
|
-
|
|
2332
|
-
|
|
2333
|
-
|
|
2334
|
-
|
|
2650
|
+
if (!config.local.enabled || resolveApiKey(config)) {
|
|
2651
|
+
const key = await ensureApiKey(config);
|
|
2652
|
+
if (!key && !config.local.enabled) {
|
|
2653
|
+
console.error(c.red("No API key \u2014 cannot run. Try `poly login`, or `poly config local on` for a local LLM."));
|
|
2654
|
+
process.exit(1);
|
|
2655
|
+
}
|
|
2335
2656
|
}
|
|
2336
2657
|
const reloaded = loadConfig();
|
|
2337
2658
|
const models = await loadCatalog(reloaded);
|
|
@@ -2434,21 +2755,22 @@ program.command("usage").description("Show recorded usage & cost by date + model
|
|
|
2434
2755
|
program.command("analyze").description("Which approach reaches the goal with the FEWEST tokens \u2014 per model, task, objective, command").option("--since <date>", "YYYY-MM-DD inclusive").option("--until <date>", "YYYY-MM-DD inclusive").action(async (opts) => {
|
|
2435
2756
|
console.log(renderAnalysis({ since: opts.since, until: opts.until }));
|
|
2436
2757
|
});
|
|
2437
|
-
program.command("sync").description("Push
|
|
2758
|
+
program.command("sync").description("Push DISTILLED efficiency insights to Firebase (raw logs stay local unless --raw)").option("--raw", "also push the full raw ledger (sessions/steps/calls/commands)", false).action(async (opts) => {
|
|
2438
2759
|
const config = loadConfig();
|
|
2439
2760
|
let pushed = false;
|
|
2440
2761
|
if (config.dataconnect.enabled) {
|
|
2441
2762
|
pushed = true;
|
|
2442
2763
|
try {
|
|
2443
|
-
const res = await syncDataConnect(config);
|
|
2444
|
-
|
|
2764
|
+
const res = await syncDataConnect(config, { raw: !!opts.raw });
|
|
2765
|
+
const n = res.insights + res.sessions + res.steps + res.commands + res.calls;
|
|
2766
|
+
console.log(n > 0 ? c.green(res.message) : c.dim(res.message));
|
|
2445
2767
|
} catch (e) {
|
|
2446
2768
|
console.error(c.red(`Data Connect sync failed: ${e?.message ?? e}`));
|
|
2447
2769
|
}
|
|
2448
2770
|
}
|
|
2449
2771
|
if (config.firestore.enabled) {
|
|
2450
2772
|
pushed = true;
|
|
2451
|
-
const res = await syncUsage(config);
|
|
2773
|
+
const res = await syncUsage(config, { raw: !!opts.raw });
|
|
2452
2774
|
console.log(res.synced > 0 ? c.green(res.message) : c.dim(res.message));
|
|
2453
2775
|
}
|
|
2454
2776
|
if (!pushed) {
|
|
@@ -2499,6 +2821,17 @@ cfg.command("firestore").description("Enable/disable Firestore sync: on | off").
|
|
|
2499
2821
|
saveConfig(config);
|
|
2500
2822
|
console.log(c.green(`Firestore sync ${config.firestore.enabled ? "enabled" : "disabled"}.`));
|
|
2501
2823
|
});
|
|
2824
|
+
cfg.command("local").description("Enable/disable a local LLM server (Ollama/LM Studio): on | off [--base <url>]").argument("<state>").option("--base <url>", "OpenAI-compatible base URL (default http://localhost:11434/v1)").action((state, opts) => {
|
|
2825
|
+
const config = loadConfig();
|
|
2826
|
+
config.local.enabled = /^on|true|1$/i.test(state);
|
|
2827
|
+
if (opts.base) config.local.baseUrl = String(opts.base).replace(/\/$/, "");
|
|
2828
|
+
saveConfig(config);
|
|
2829
|
+
console.log(
|
|
2830
|
+
c.green(
|
|
2831
|
+
`Local LLM ${config.local.enabled ? "enabled" : "disabled"} (${config.local.baseUrl}). Models appear as local/<name> with $0 cost.`
|
|
2832
|
+
)
|
|
2833
|
+
);
|
|
2834
|
+
});
|
|
2502
2835
|
cfg.command("dataconnect").description("Enable/disable Firebase Data Connect (SQL) sync: on | off [--location <loc>] [--service <id>]").argument("<state>").option("--location <loc>", "Data Connect location (default us-east4)").option("--service <id>", "Data Connect service id (default polymath)").action((state, opts) => {
|
|
2503
2836
|
const config = loadConfig();
|
|
2504
2837
|
config.dataconnect.enabled = /^on|true|1$/i.test(state);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "polymath-agent",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "Polymath — a cost-optimized, multi-model TUI coding agent. Decomposes work into typed tasks, routes each task to the cheapest capable model via OpenRouter, and logs real usage/cost by date + model.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|