polymath-agent 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -2
- package/dist/cli.js +1218 -132
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -44,6 +44,16 @@ var DEFAULT_CONFIG = {
|
|
|
44
44
|
enabled: false,
|
|
45
45
|
projectId: "mathology-b8e3d",
|
|
46
46
|
collection: "polymath_usage"
|
|
47
|
+
},
|
|
48
|
+
dataconnect: {
|
|
49
|
+
enabled: false,
|
|
50
|
+
location: "us-east4",
|
|
51
|
+
serviceId: "polymath"
|
|
52
|
+
},
|
|
53
|
+
local: {
|
|
54
|
+
enabled: false,
|
|
55
|
+
baseUrl: "http://localhost:11434/v1"
|
|
56
|
+
// Ollama default; LM Studio: http://localhost:1234/v1
|
|
47
57
|
}
|
|
48
58
|
};
|
|
49
59
|
function loadConfig() {
|
|
@@ -54,7 +64,9 @@ function loadConfig() {
|
|
|
54
64
|
return {
|
|
55
65
|
...DEFAULT_CONFIG,
|
|
56
66
|
...raw,
|
|
57
|
-
firestore: { ...DEFAULT_CONFIG.firestore, ...raw.firestore ?? {} }
|
|
67
|
+
firestore: { ...DEFAULT_CONFIG.firestore, ...raw.firestore ?? {} },
|
|
68
|
+
dataconnect: { ...DEFAULT_CONFIG.dataconnect, ...raw.dataconnect ?? {} },
|
|
69
|
+
local: { ...DEFAULT_CONFIG.local, ...raw.local ?? {} }
|
|
58
70
|
};
|
|
59
71
|
} catch {
|
|
60
72
|
return { ...DEFAULT_CONFIG };
|
|
@@ -75,6 +87,7 @@ function resolveApiKey(config) {
|
|
|
75
87
|
|
|
76
88
|
// src/providers/openrouter.ts
|
|
77
89
|
var BASE = globalThis.process?.env?.OPENROUTER_BASE_URL?.replace(/\/$/, "") || "https://openrouter.ai/api/v1";
|
|
90
|
+
var LOCAL_PREFIX = "local/";
|
|
78
91
|
var OpenRouterError = class extends Error {
|
|
79
92
|
status;
|
|
80
93
|
constructor(message, status) {
|
|
@@ -87,10 +100,12 @@ var OpenRouterClient = class {
|
|
|
87
100
|
apiKey;
|
|
88
101
|
referer;
|
|
89
102
|
title;
|
|
103
|
+
localBaseUrl;
|
|
90
104
|
constructor(opts = {}) {
|
|
91
105
|
this.apiKey = opts.apiKey;
|
|
92
106
|
this.referer = opts.referer ?? "https://github.com/polymath-agent";
|
|
93
107
|
this.title = opts.title ?? "Polymath";
|
|
108
|
+
this.localBaseUrl = opts.localBaseUrl?.replace(/\/$/, "");
|
|
94
109
|
}
|
|
95
110
|
headers(json = true) {
|
|
96
111
|
const h = {
|
|
@@ -101,6 +116,24 @@ var OpenRouterClient = class {
|
|
|
101
116
|
if (json) h["Content-Type"] = "application/json";
|
|
102
117
|
return h;
|
|
103
118
|
}
|
|
119
|
+
/** Resolve where a model's request goes: the local server for `local/*`, else OpenRouter. */
|
|
120
|
+
target(modelId) {
|
|
121
|
+
if (this.localBaseUrl && modelId.startsWith(LOCAL_PREFIX)) {
|
|
122
|
+
return { base: this.localBaseUrl, model: modelId.slice(LOCAL_PREFIX.length), isLocal: true };
|
|
123
|
+
}
|
|
124
|
+
return { base: BASE, model: modelId, isLocal: false };
|
|
125
|
+
}
|
|
126
|
+
requireKeyFor(isLocal) {
|
|
127
|
+
if (!isLocal && !this.apiKey) throw new OpenRouterError("No API key set. Run `poly login`.");
|
|
128
|
+
}
|
|
129
|
+
/** List models from the local OpenAI-compatible server (Ollama / LM Studio). */
|
|
130
|
+
async listLocalRawModels() {
|
|
131
|
+
if (!this.localBaseUrl) return [];
|
|
132
|
+
const res = await fetch(`${this.localBaseUrl}/models`);
|
|
133
|
+
if (!res.ok) throw new OpenRouterError(`Local server: failed to list models (${res.status})`, res.status);
|
|
134
|
+
const json = await res.json();
|
|
135
|
+
return json.data ?? [];
|
|
136
|
+
}
|
|
104
137
|
/** Raw /models payload (no auth required). */
|
|
105
138
|
async listRawModels() {
|
|
106
139
|
const res = await fetch(`${BASE}/models`, { headers: this.headers(false) });
|
|
@@ -120,24 +153,28 @@ var OpenRouterClient = class {
|
|
|
120
153
|
const d = json.data ?? {};
|
|
121
154
|
return { label: d.label, usage: d.usage, limit: d.limit };
|
|
122
155
|
}
|
|
123
|
-
buildBody(req, stream) {
|
|
156
|
+
buildBody(req, stream, modelOverride, isLocal) {
|
|
124
157
|
return {
|
|
125
|
-
model:
|
|
158
|
+
model: modelOverride,
|
|
126
159
|
messages: req.messages.map(serializeMessage),
|
|
127
160
|
...req.tools && req.tools.length ? { tools: req.tools, tool_choice: "auto" } : {},
|
|
128
161
|
temperature: req.temperature ?? 0.2,
|
|
129
162
|
...req.maxTokens ? { max_tokens: req.maxTokens } : {},
|
|
130
163
|
stream,
|
|
131
|
-
|
|
164
|
+
// OpenRouter-specific accounting param; local servers may reject unknown fields.
|
|
165
|
+
...isLocal ? {} : { usage: { include: true } },
|
|
166
|
+
// OpenAI-compat way to get token usage in the final stream chunk (Ollama/LM Studio).
|
|
167
|
+
...isLocal && stream ? { stream_options: { include_usage: true } } : {}
|
|
132
168
|
};
|
|
133
169
|
}
|
|
134
170
|
/** Non-streaming completion. costUsd is computed from `pricing` (deterministic). */
|
|
135
171
|
async complete(req, pricing) {
|
|
136
|
-
|
|
137
|
-
|
|
172
|
+
const t = this.target(req.model);
|
|
173
|
+
this.requireKeyFor(t.isLocal);
|
|
174
|
+
const res = await fetch(`${t.base}/chat/completions`, {
|
|
138
175
|
method: "POST",
|
|
139
176
|
headers: this.headers(),
|
|
140
|
-
body: JSON.stringify(this.buildBody(req, false))
|
|
177
|
+
body: JSON.stringify(this.buildBody(req, false, t.model, t.isLocal))
|
|
141
178
|
});
|
|
142
179
|
if (!res.ok) {
|
|
143
180
|
const text = await res.text().catch(() => "");
|
|
@@ -158,8 +195,10 @@ var OpenRouterClient = class {
|
|
|
158
195
|
content: typeof msg.content === "string" ? msg.content : "",
|
|
159
196
|
toolCalls: parseToolCalls(msg.tool_calls),
|
|
160
197
|
usage,
|
|
161
|
-
|
|
162
|
-
|
|
198
|
+
// Keep the prefixed id for local models so the ledger stays consistent.
|
|
199
|
+
model: t.isLocal ? req.model : json.model ?? req.model,
|
|
200
|
+
// Local inference is free regardless of what the server claims to report.
|
|
201
|
+
costUsd: computeCost(usage, pricing, t.isLocal ? void 0 : json.usage?.cost),
|
|
163
202
|
finishReason: choice.finish_reason ?? null
|
|
164
203
|
};
|
|
165
204
|
}
|
|
@@ -168,11 +207,12 @@ var OpenRouterClient = class {
|
|
|
168
207
|
* Tool-call deltas are accumulated and surfaced in the final result.
|
|
169
208
|
*/
|
|
170
209
|
async *stream(req, pricing) {
|
|
171
|
-
|
|
172
|
-
|
|
210
|
+
const t = this.target(req.model);
|
|
211
|
+
this.requireKeyFor(t.isLocal);
|
|
212
|
+
const res = await fetch(`${t.base}/chat/completions`, {
|
|
173
213
|
method: "POST",
|
|
174
214
|
headers: this.headers(),
|
|
175
|
-
body: JSON.stringify(this.buildBody(req, true))
|
|
215
|
+
body: JSON.stringify(this.buildBody(req, true, t.model, t.isLocal))
|
|
176
216
|
});
|
|
177
217
|
if (!res.ok || !res.body) {
|
|
178
218
|
const text = await res.text().catch(() => "");
|
|
@@ -206,7 +246,7 @@ var OpenRouterClient = class {
|
|
|
206
246
|
if (evt?.error) {
|
|
207
247
|
throw new OpenRouterError(evt.error.message ?? "Stream provider error", evt.error.code);
|
|
208
248
|
}
|
|
209
|
-
if (evt.model) model = evt.model;
|
|
249
|
+
if (evt.model && !t.isLocal) model = evt.model;
|
|
210
250
|
if (evt.usage) usageJson = evt.usage;
|
|
211
251
|
const choice = evt.choices?.[0];
|
|
212
252
|
if (!choice) continue;
|
|
@@ -233,17 +273,17 @@ var OpenRouterClient = class {
|
|
|
233
273
|
completionTokens: usageJson?.completion_tokens ?? 0,
|
|
234
274
|
totalTokens: usageJson?.total_tokens ?? 0
|
|
235
275
|
};
|
|
236
|
-
const toolCalls = [...toolAcc.values()].filter((
|
|
237
|
-
id:
|
|
276
|
+
const toolCalls = [...toolAcc.values()].filter((t2) => t2.name).map((t2) => ({
|
|
277
|
+
id: t2.id || `call_${t2.name}`,
|
|
238
278
|
type: "function",
|
|
239
|
-
function: { name:
|
|
279
|
+
function: { name: t2.name, arguments: t2.args || "{}" }
|
|
240
280
|
}));
|
|
241
281
|
return {
|
|
242
282
|
content,
|
|
243
283
|
toolCalls,
|
|
244
284
|
usage,
|
|
245
285
|
model,
|
|
246
|
-
costUsd: computeCost(usage, pricing, usageJson?.cost),
|
|
286
|
+
costUsd: computeCost(usage, pricing, t.isLocal ? void 0 : usageJson?.cost),
|
|
247
287
|
finishReason
|
|
248
288
|
};
|
|
249
289
|
}
|
|
@@ -381,6 +421,38 @@ async function getModels(client2, opts = {}) {
|
|
|
381
421
|
return models;
|
|
382
422
|
}
|
|
383
423
|
|
|
424
|
+
// src/models/local.ts
|
|
425
|
+
function parseLocalModels(raw) {
|
|
426
|
+
const out = [];
|
|
427
|
+
for (const m of raw) {
|
|
428
|
+
if (!m?.id) continue;
|
|
429
|
+
const name = String(m.id);
|
|
430
|
+
out.push({
|
|
431
|
+
id: LOCAL_PREFIX + name,
|
|
432
|
+
name: `${name} (local)`,
|
|
433
|
+
provider: "local",
|
|
434
|
+
contextLength: m.context_length ?? 8192,
|
|
435
|
+
pricing: { promptUsdPerMTok: 0, completionUsdPerMTok: 0 },
|
|
436
|
+
tier: classifyTier(name, 0),
|
|
437
|
+
capabilities: {
|
|
438
|
+
// OpenAI-compatible local servers pass tool schemas through; models that
|
|
439
|
+
// can't call tools simply reply with text, which the agent loop handles.
|
|
440
|
+
tools: true,
|
|
441
|
+
vision: /llava|vision|vl\b|moondream/i.test(name)
|
|
442
|
+
}
|
|
443
|
+
});
|
|
444
|
+
}
|
|
445
|
+
return out;
|
|
446
|
+
}
|
|
447
|
+
async function getLocalModels(client2) {
|
|
448
|
+
try {
|
|
449
|
+
const raw = await client2.listLocalRawModels();
|
|
450
|
+
return parseLocalModels(raw);
|
|
451
|
+
} catch {
|
|
452
|
+
return [];
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
|
|
384
456
|
// src/auth/onboarding.ts
|
|
385
457
|
import readline from "node:readline";
|
|
386
458
|
|
|
@@ -560,7 +632,7 @@ function heuristicPlan(goal) {
|
|
|
560
632
|
];
|
|
561
633
|
return { goal, steps };
|
|
562
634
|
}
|
|
563
|
-
async function planRequest(goal, client2, planModel) {
|
|
635
|
+
async function planRequest(goal, client2, planModel, onUsage) {
|
|
564
636
|
const result = await client2.complete(
|
|
565
637
|
{
|
|
566
638
|
model: planModel.id,
|
|
@@ -573,6 +645,7 @@ async function planRequest(goal, client2, planModel) {
|
|
|
573
645
|
},
|
|
574
646
|
planModel.pricing
|
|
575
647
|
);
|
|
648
|
+
onUsage?.(result);
|
|
576
649
|
const parsed = extractPlan(result.content);
|
|
577
650
|
if (!parsed) return heuristicPlan(goal);
|
|
578
651
|
return { goal, steps: parsed };
|
|
@@ -716,8 +789,11 @@ var HEADLINE_SKILLS = ["coding", "reasoning", "retrieval", "speed"];
|
|
|
716
789
|
function projectCost(m, est) {
|
|
717
790
|
return est.promptTokens / 1e6 * m.pricing.promptUsdPerMTok + est.completionTokens / 1e6 * m.pricing.completionUsdPerMTok;
|
|
718
791
|
}
|
|
719
|
-
function taskValue(m, taskType) {
|
|
720
|
-
|
|
792
|
+
function taskValue(m, taskType, empirical) {
|
|
793
|
+
const base = taskStrength(m, taskType) / Math.max(blendedPrice(m), 0.01);
|
|
794
|
+
const savings = empirical?.[`${taskType}:${m.id}`];
|
|
795
|
+
const boost = savings ? 1 + Math.min(savings, 100) / 100 : 1;
|
|
796
|
+
return base * boost;
|
|
721
797
|
}
|
|
722
798
|
function candidatesFor(taskType, models, policy, est) {
|
|
723
799
|
const spec = TASK_SPECS[taskType];
|
|
@@ -746,7 +822,9 @@ function rank(models, policy, taskType) {
|
|
|
746
822
|
break;
|
|
747
823
|
case "value":
|
|
748
824
|
default:
|
|
749
|
-
sorted.sort(
|
|
825
|
+
sorted.sort(
|
|
826
|
+
(a, b) => taskValue(b, taskType, policy.empirical) - taskValue(a, taskType, policy.empirical)
|
|
827
|
+
);
|
|
750
828
|
break;
|
|
751
829
|
}
|
|
752
830
|
return sorted;
|
|
@@ -764,7 +842,8 @@ function route(taskType, models, policy, est = { promptTokens: 4e3, completionTo
|
|
|
764
842
|
const ranked = rank(cands, policy, taskType);
|
|
765
843
|
const chosen = ranked[0];
|
|
766
844
|
const skill = TASK_SKILL[taskType];
|
|
767
|
-
const
|
|
845
|
+
const proven = policy.empirical?.[`${taskType}:${chosen.id}`];
|
|
846
|
+
const reason = policy.objective === "cheapest" ? `cheapest model that covers ${skill}` : policy.objective === "quality" ? `strongest at ${skill}` : proven ? `proven ${Math.round(proven)}% fewer tokens on ${taskType} (playbook)` : `best ${skill}-per-dollar`;
|
|
768
847
|
return { model: chosen, reason, estCostUsd: projectCost(chosen, est) };
|
|
769
848
|
}
|
|
770
849
|
|
|
@@ -935,14 +1014,95 @@ function getDb() {
|
|
|
935
1014
|
);
|
|
936
1015
|
CREATE INDEX IF NOT EXISTS idx_usage_date ON usage_log(date);
|
|
937
1016
|
CREATE INDEX IF NOT EXISTS idx_usage_model ON usage_log(model);
|
|
1017
|
+
|
|
1018
|
+
-- One row per agent session (a \`poly run\`): goal + outcome + achievement scores.
|
|
1019
|
+
CREATE TABLE IF NOT EXISTS sessions (
|
|
1020
|
+
id TEXT PRIMARY KEY,
|
|
1021
|
+
ts INTEGER NOT NULL,
|
|
1022
|
+
date TEXT NOT NULL,
|
|
1023
|
+
goal TEXT NOT NULL,
|
|
1024
|
+
command TEXT NOT NULL DEFAULT 'run',
|
|
1025
|
+
objective TEXT NOT NULL,
|
|
1026
|
+
planned_steps INTEGER NOT NULL DEFAULT 0,
|
|
1027
|
+
completed_steps INTEGER NOT NULL DEFAULT 0,
|
|
1028
|
+
failed_steps INTEGER NOT NULL DEFAULT 0,
|
|
1029
|
+
auto_score REAL, -- 0..1 = completed/planned (agent-computed)
|
|
1030
|
+
user_score INTEGER, -- 0..9 user-rated goal achievement (nullable)
|
|
1031
|
+
prompt_tokens INTEGER NOT NULL DEFAULT 0,
|
|
1032
|
+
completion_tokens INTEGER NOT NULL DEFAULT 0,
|
|
1033
|
+
cost_usd REAL NOT NULL DEFAULT 0,
|
|
1034
|
+
duration_ms INTEGER NOT NULL DEFAULT 0,
|
|
1035
|
+
synced INTEGER NOT NULL DEFAULT 0
|
|
1036
|
+
);
|
|
1037
|
+
CREATE INDEX IF NOT EXISTS idx_sessions_date ON sessions(date);
|
|
1038
|
+
|
|
1039
|
+
-- One row per executed plan step: which model, how many round-trips, how it ended.
|
|
1040
|
+
CREATE TABLE IF NOT EXISTS step_runs (
|
|
1041
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
1042
|
+
session_id TEXT NOT NULL,
|
|
1043
|
+
step_no INTEGER NOT NULL,
|
|
1044
|
+
task_type TEXT NOT NULL,
|
|
1045
|
+
skill TEXT NOT NULL,
|
|
1046
|
+
model TEXT NOT NULL,
|
|
1047
|
+
provider TEXT NOT NULL,
|
|
1048
|
+
iterations INTEGER NOT NULL, -- LLM round-trips used for this step
|
|
1049
|
+
tool_calls INTEGER NOT NULL,
|
|
1050
|
+
prompt_tokens INTEGER NOT NULL,
|
|
1051
|
+
completion_tokens INTEGER NOT NULL,
|
|
1052
|
+
cost_usd REAL NOT NULL,
|
|
1053
|
+
finished_by TEXT NOT NULL, -- 'finish-tool' | 'text' | 'max-iters' | 'error'
|
|
1054
|
+
success INTEGER NOT NULL, -- 1 = ended cleanly (finish-tool or text)
|
|
1055
|
+
duration_ms INTEGER NOT NULL,
|
|
1056
|
+
synced INTEGER NOT NULL DEFAULT 0
|
|
1057
|
+
);
|
|
1058
|
+
CREATE INDEX IF NOT EXISTS idx_steps_session ON step_runs(session_id);
|
|
1059
|
+
CREATE INDEX IF NOT EXISTS idx_steps_model ON step_runs(model, task_type);
|
|
1060
|
+
|
|
1061
|
+
-- One row per CLI command invocation (run/recommend/...): tokens spent per command.
|
|
1062
|
+
CREATE TABLE IF NOT EXISTS command_runs (
|
|
1063
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
1064
|
+
session_id TEXT,
|
|
1065
|
+
ts INTEGER NOT NULL,
|
|
1066
|
+
date TEXT NOT NULL,
|
|
1067
|
+
command TEXT NOT NULL,
|
|
1068
|
+
args TEXT,
|
|
1069
|
+
objective TEXT,
|
|
1070
|
+
prompt_tokens INTEGER NOT NULL DEFAULT 0,
|
|
1071
|
+
completion_tokens INTEGER NOT NULL DEFAULT 0,
|
|
1072
|
+
cost_usd REAL NOT NULL DEFAULT 0,
|
|
1073
|
+
duration_ms INTEGER NOT NULL DEFAULT 0,
|
|
1074
|
+
synced INTEGER NOT NULL DEFAULT 0
|
|
1075
|
+
);
|
|
1076
|
+
CREATE INDEX IF NOT EXISTS idx_cmd_date ON command_runs(date);
|
|
1077
|
+
|
|
1078
|
+
-- Distilled efficiency insights: ONLY the notably cost-efficient approaches.
|
|
1079
|
+
-- This is what syncs to the cloud by default (raw logs stay local).
|
|
1080
|
+
CREATE TABLE IF NOT EXISTS insights (
|
|
1081
|
+
id TEXT PRIMARY KEY, -- "<task_type>__<model>"
|
|
1082
|
+
computed_at INTEGER NOT NULL,
|
|
1083
|
+
task_type TEXT NOT NULL,
|
|
1084
|
+
model TEXT NOT NULL,
|
|
1085
|
+
provider TEXT NOT NULL,
|
|
1086
|
+
samples INTEGER NOT NULL, -- successful steps observed
|
|
1087
|
+
success_rate REAL NOT NULL,
|
|
1088
|
+
avg_tokens REAL NOT NULL, -- per successful step
|
|
1089
|
+
baseline_tokens REAL NOT NULL, -- median across qualified competitors
|
|
1090
|
+
savings_pct REAL NOT NULL, -- vs baseline (the "\uC720\uB3C5" margin)
|
|
1091
|
+
avg_cost_usd REAL NOT NULL,
|
|
1092
|
+
synced INTEGER NOT NULL DEFAULT 0
|
|
1093
|
+
);
|
|
938
1094
|
`);
|
|
1095
|
+
const cols = db.prepare(`PRAGMA table_info(usage_log)`).all();
|
|
1096
|
+
if (!cols.some((c2) => c2.name === "command")) {
|
|
1097
|
+
db.exec(`ALTER TABLE usage_log ADD COLUMN command TEXT NOT NULL DEFAULT 'run'`);
|
|
1098
|
+
}
|
|
939
1099
|
return db;
|
|
940
1100
|
}
|
|
941
1101
|
function recordUsage(e) {
|
|
942
1102
|
const stmt = getDb().prepare(`
|
|
943
1103
|
INSERT INTO usage_log
|
|
944
|
-
(ts, date, provider, model, task_type, prompt_tokens, completion_tokens, total_tokens, cost_usd, session_id)
|
|
945
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1104
|
+
(ts, date, provider, model, task_type, prompt_tokens, completion_tokens, total_tokens, cost_usd, session_id, command)
|
|
1105
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
946
1106
|
`);
|
|
947
1107
|
stmt.run(
|
|
948
1108
|
e.ts,
|
|
@@ -954,7 +1114,8 @@ function recordUsage(e) {
|
|
|
954
1114
|
e.completionTokens,
|
|
955
1115
|
e.totalTokens,
|
|
956
1116
|
e.costUsd,
|
|
957
|
-
e.sessionId ?? null
|
|
1117
|
+
e.sessionId ?? null,
|
|
1118
|
+
e.command ?? "run"
|
|
958
1119
|
);
|
|
959
1120
|
}
|
|
960
1121
|
function reportByDateModel(filter = {}) {
|
|
@@ -1016,7 +1177,8 @@ function unsyncedRows() {
|
|
|
1016
1177
|
completionTokens: Number(r.completion_tokens),
|
|
1017
1178
|
totalTokens: Number(r.total_tokens),
|
|
1018
1179
|
costUsd: Number(r.cost_usd),
|
|
1019
|
-
sessionId: r.session_id ? String(r.session_id) : void 0
|
|
1180
|
+
sessionId: r.session_id ? String(r.session_id) : void 0,
|
|
1181
|
+
command: r.command ? String(r.command) : "run"
|
|
1020
1182
|
}));
|
|
1021
1183
|
}
|
|
1022
1184
|
function markSynced(ids) {
|
|
@@ -1024,6 +1186,269 @@ function markSynced(ids) {
|
|
|
1024
1186
|
const stmt = getDb().prepare(`UPDATE usage_log SET synced = 1 WHERE id = ?`);
|
|
1025
1187
|
for (const id of ids) stmt.run(id);
|
|
1026
1188
|
}
|
|
1189
|
+
function startSession(s) {
|
|
1190
|
+
getDb().prepare(
|
|
1191
|
+
`INSERT OR REPLACE INTO sessions (id, ts, date, goal, command, objective, planned_steps)
|
|
1192
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`
|
|
1193
|
+
).run(s.id, s.ts, s.date, s.goal, s.command, s.objective, s.plannedSteps);
|
|
1194
|
+
}
|
|
1195
|
+
function finishSession(id, u) {
|
|
1196
|
+
getDb().prepare(
|
|
1197
|
+
`UPDATE sessions SET planned_steps=?, completed_steps=?, failed_steps=?, auto_score=?,
|
|
1198
|
+
prompt_tokens=?, completion_tokens=?, cost_usd=?, duration_ms=? WHERE id=?`
|
|
1199
|
+
).run(
|
|
1200
|
+
u.plannedSteps,
|
|
1201
|
+
u.completedSteps,
|
|
1202
|
+
u.failedSteps,
|
|
1203
|
+
u.autoScore,
|
|
1204
|
+
u.promptTokens,
|
|
1205
|
+
u.completionTokens,
|
|
1206
|
+
u.costUsd,
|
|
1207
|
+
u.durationMs,
|
|
1208
|
+
id
|
|
1209
|
+
);
|
|
1210
|
+
}
|
|
1211
|
+
function setUserScore(sessionId, score) {
|
|
1212
|
+
getDb().prepare(`UPDATE sessions SET user_score=? WHERE id=?`).run(score, sessionId);
|
|
1213
|
+
}
|
|
1214
|
+
function recordStepRun(s) {
|
|
1215
|
+
getDb().prepare(
|
|
1216
|
+
`INSERT INTO step_runs
|
|
1217
|
+
(session_id, step_no, task_type, skill, model, provider, iterations, tool_calls,
|
|
1218
|
+
prompt_tokens, completion_tokens, cost_usd, finished_by, success, duration_ms)
|
|
1219
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
1220
|
+
).run(
|
|
1221
|
+
s.sessionId,
|
|
1222
|
+
s.stepNo,
|
|
1223
|
+
s.taskType,
|
|
1224
|
+
s.skill,
|
|
1225
|
+
s.model,
|
|
1226
|
+
s.provider,
|
|
1227
|
+
s.iterations,
|
|
1228
|
+
s.toolCalls,
|
|
1229
|
+
s.promptTokens,
|
|
1230
|
+
s.completionTokens,
|
|
1231
|
+
s.costUsd,
|
|
1232
|
+
s.finishedBy,
|
|
1233
|
+
s.success ? 1 : 0,
|
|
1234
|
+
s.durationMs
|
|
1235
|
+
);
|
|
1236
|
+
}
|
|
1237
|
+
function recordCommandRun(c2) {
|
|
1238
|
+
getDb().prepare(
|
|
1239
|
+
`INSERT INTO command_runs
|
|
1240
|
+
(session_id, ts, date, command, args, objective, prompt_tokens, completion_tokens, cost_usd, duration_ms)
|
|
1241
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
1242
|
+
).run(
|
|
1243
|
+
c2.sessionId ?? null,
|
|
1244
|
+
c2.ts,
|
|
1245
|
+
c2.date,
|
|
1246
|
+
c2.command,
|
|
1247
|
+
c2.args ?? null,
|
|
1248
|
+
c2.objective ?? null,
|
|
1249
|
+
c2.promptTokens,
|
|
1250
|
+
c2.completionTokens,
|
|
1251
|
+
c2.costUsd,
|
|
1252
|
+
c2.durationMs
|
|
1253
|
+
);
|
|
1254
|
+
}
|
|
1255
|
+
function sessionUsageTotals(sessionId) {
|
|
1256
|
+
const r = getDb().prepare(
|
|
1257
|
+
`SELECT COALESCE(SUM(prompt_tokens),0) AS p, COALESCE(SUM(completion_tokens),0) AS c, COALESCE(SUM(cost_usd),0) AS cost
|
|
1258
|
+
FROM usage_log WHERE session_id = ?`
|
|
1259
|
+
).get(sessionId);
|
|
1260
|
+
return { promptTokens: Number(r?.p ?? 0), completionTokens: Number(r?.c ?? 0), costUsd: Number(r?.cost ?? 0) };
|
|
1261
|
+
}
|
|
1262
|
+
function modelTaskEfficiency(filter = {}) {
|
|
1263
|
+
const { whereSql, params } = dateWhere(filter, "s.ts");
|
|
1264
|
+
const rows = getDb().prepare(
|
|
1265
|
+
`SELECT task_type AS taskType, model,
|
|
1266
|
+
COUNT(*) AS steps,
|
|
1267
|
+
AVG(success) AS successRate,
|
|
1268
|
+
AVG(CASE WHEN success=1 THEN prompt_tokens + completion_tokens END) AS avgTokensPerSuccess,
|
|
1269
|
+
AVG(CASE WHEN success=1 THEN cost_usd END) AS avgCostPerSuccess,
|
|
1270
|
+
AVG(iterations) AS avgIterations
|
|
1271
|
+
FROM step_runs s ${whereSql}
|
|
1272
|
+
GROUP BY task_type, model
|
|
1273
|
+
ORDER BY task_type, avgTokensPerSuccess ASC`
|
|
1274
|
+
).all(...params);
|
|
1275
|
+
return rows.map((r) => ({
|
|
1276
|
+
taskType: String(r.taskType),
|
|
1277
|
+
model: String(r.model),
|
|
1278
|
+
steps: Number(r.steps),
|
|
1279
|
+
successRate: Number(r.successRate ?? 0),
|
|
1280
|
+
avgTokensPerSuccess: Number(r.avgTokensPerSuccess ?? 0),
|
|
1281
|
+
avgCostPerSuccess: Number(r.avgCostPerSuccess ?? 0),
|
|
1282
|
+
avgIterations: Number(r.avgIterations ?? 0)
|
|
1283
|
+
}));
|
|
1284
|
+
}
|
|
1285
|
+
function objectiveEfficiency(filter = {}) {
|
|
1286
|
+
const { whereSql, params } = dateWhere(filter, "ts");
|
|
1287
|
+
const rows = getDb().prepare(
|
|
1288
|
+
`SELECT objective,
|
|
1289
|
+
COUNT(*) AS sessions,
|
|
1290
|
+
AVG(prompt_tokens + completion_tokens) AS avgTokens,
|
|
1291
|
+
AVG(cost_usd) AS avgCostUsd,
|
|
1292
|
+
AVG(auto_score) AS avgAutoScore,
|
|
1293
|
+
AVG(user_score) AS avgUserScore
|
|
1294
|
+
FROM sessions ${whereSql}
|
|
1295
|
+
GROUP BY objective ORDER BY avgTokens ASC`
|
|
1296
|
+
).all(...params);
|
|
1297
|
+
return rows.map((r) => ({
|
|
1298
|
+
objective: String(r.objective),
|
|
1299
|
+
sessions: Number(r.sessions),
|
|
1300
|
+
avgTokens: Number(r.avgTokens ?? 0),
|
|
1301
|
+
avgCostUsd: Number(r.avgCostUsd ?? 0),
|
|
1302
|
+
avgAutoScore: r.avgAutoScore == null ? null : Number(r.avgAutoScore),
|
|
1303
|
+
avgUserScore: r.avgUserScore == null ? null : Number(r.avgUserScore)
|
|
1304
|
+
}));
|
|
1305
|
+
}
|
|
1306
|
+
function commandUsage(filter = {}) {
|
|
1307
|
+
const { whereSql, params } = dateWhere(filter, "ts");
|
|
1308
|
+
const rows = getDb().prepare(
|
|
1309
|
+
`SELECT command, COUNT(*) AS runs,
|
|
1310
|
+
SUM(prompt_tokens) AS promptTokens,
|
|
1311
|
+
SUM(completion_tokens) AS completionTokens,
|
|
1312
|
+
SUM(cost_usd) AS costUsd
|
|
1313
|
+
FROM command_runs ${whereSql}
|
|
1314
|
+
GROUP BY command ORDER BY costUsd DESC`
|
|
1315
|
+
).all(...params);
|
|
1316
|
+
return rows.map((r) => ({
|
|
1317
|
+
command: String(r.command),
|
|
1318
|
+
runs: Number(r.runs),
|
|
1319
|
+
promptTokens: Number(r.promptTokens ?? 0),
|
|
1320
|
+
completionTokens: Number(r.completionTokens ?? 0),
|
|
1321
|
+
costUsd: Number(r.costUsd ?? 0)
|
|
1322
|
+
}));
|
|
1323
|
+
}
|
|
1324
|
+
function dateWhere(filter, tsCol) {
|
|
1325
|
+
const where = [];
|
|
1326
|
+
const params = [];
|
|
1327
|
+
if (filter.since) {
|
|
1328
|
+
where.push(`date(${tsCol}/1000, 'unixepoch', 'localtime') >= ?`);
|
|
1329
|
+
params.push(filter.since);
|
|
1330
|
+
}
|
|
1331
|
+
if (filter.until) {
|
|
1332
|
+
where.push(`date(${tsCol}/1000, 'unixepoch', 'localtime') <= ?`);
|
|
1333
|
+
params.push(filter.until);
|
|
1334
|
+
}
|
|
1335
|
+
return { whereSql: where.length ? `WHERE ${where.join(" AND ")}` : "", params };
|
|
1336
|
+
}
|
|
1337
|
+
function unsyncedSessions() {
|
|
1338
|
+
const rows = getDb().prepare(`SELECT * FROM sessions WHERE synced=0 LIMIT 200`).all();
|
|
1339
|
+
return rows.map((r) => ({
|
|
1340
|
+
_table: "sessions",
|
|
1341
|
+
id: String(r.id),
|
|
1342
|
+
ts: Number(r.ts),
|
|
1343
|
+
date: String(r.date),
|
|
1344
|
+
goal: String(r.goal),
|
|
1345
|
+
command: String(r.command),
|
|
1346
|
+
objective: String(r.objective),
|
|
1347
|
+
plannedSteps: Number(r.planned_steps),
|
|
1348
|
+
completedSteps: Number(r.completed_steps),
|
|
1349
|
+
failedSteps: Number(r.failed_steps),
|
|
1350
|
+
autoScore: r.auto_score == null ? null : Number(r.auto_score),
|
|
1351
|
+
userScore: r.user_score == null ? null : Number(r.user_score),
|
|
1352
|
+
promptTokens: Number(r.prompt_tokens),
|
|
1353
|
+
completionTokens: Number(r.completion_tokens),
|
|
1354
|
+
costUsd: Number(r.cost_usd),
|
|
1355
|
+
durationMs: Number(r.duration_ms)
|
|
1356
|
+
}));
|
|
1357
|
+
}
|
|
1358
|
+
function unsyncedStepRuns() {
|
|
1359
|
+
const rows = getDb().prepare(`SELECT * FROM step_runs WHERE synced=0 LIMIT 500`).all();
|
|
1360
|
+
return rows.map((r) => ({
|
|
1361
|
+
id: Number(r.id),
|
|
1362
|
+
sessionId: String(r.session_id),
|
|
1363
|
+
stepNo: Number(r.step_no),
|
|
1364
|
+
taskType: String(r.task_type),
|
|
1365
|
+
skill: String(r.skill),
|
|
1366
|
+
model: String(r.model),
|
|
1367
|
+
provider: String(r.provider),
|
|
1368
|
+
iterations: Number(r.iterations),
|
|
1369
|
+
toolCalls: Number(r.tool_calls),
|
|
1370
|
+
promptTokens: Number(r.prompt_tokens),
|
|
1371
|
+
completionTokens: Number(r.completion_tokens),
|
|
1372
|
+
costUsd: Number(r.cost_usd),
|
|
1373
|
+
finishedBy: String(r.finished_by),
|
|
1374
|
+
success: Number(r.success) === 1,
|
|
1375
|
+
durationMs: Number(r.duration_ms)
|
|
1376
|
+
}));
|
|
1377
|
+
}
|
|
1378
|
+
function unsyncedCommandRuns() {
|
|
1379
|
+
const rows = getDb().prepare(`SELECT * FROM command_runs WHERE synced=0 LIMIT 500`).all();
|
|
1380
|
+
return rows.map((r) => ({
|
|
1381
|
+
id: Number(r.id),
|
|
1382
|
+
sessionId: r.session_id ? String(r.session_id) : void 0,
|
|
1383
|
+
ts: Number(r.ts),
|
|
1384
|
+
date: String(r.date),
|
|
1385
|
+
command: String(r.command),
|
|
1386
|
+
args: r.args ? String(r.args) : void 0,
|
|
1387
|
+
objective: r.objective ? String(r.objective) : void 0,
|
|
1388
|
+
promptTokens: Number(r.prompt_tokens),
|
|
1389
|
+
completionTokens: Number(r.completion_tokens),
|
|
1390
|
+
costUsd: Number(r.cost_usd),
|
|
1391
|
+
durationMs: Number(r.duration_ms)
|
|
1392
|
+
}));
|
|
1393
|
+
}
|
|
1394
|
+
function markTableSynced(table2, ids) {
|
|
1395
|
+
if (!ids.length) return;
|
|
1396
|
+
const stmt = getDb().prepare(`UPDATE ${table2} SET synced=1 WHERE id=?`);
|
|
1397
|
+
for (const id of ids) stmt.run(id);
|
|
1398
|
+
}
|
|
1399
|
+
function upsertInsight(i) {
|
|
1400
|
+
getDb().prepare(
|
|
1401
|
+
`INSERT INTO insights (id, computed_at, task_type, model, provider, samples, success_rate,
|
|
1402
|
+
avg_tokens, baseline_tokens, savings_pct, avg_cost_usd, synced)
|
|
1403
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0)
|
|
1404
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
1405
|
+
computed_at=excluded.computed_at, samples=excluded.samples,
|
|
1406
|
+
success_rate=excluded.success_rate, avg_tokens=excluded.avg_tokens,
|
|
1407
|
+
baseline_tokens=excluded.baseline_tokens, savings_pct=excluded.savings_pct,
|
|
1408
|
+
avg_cost_usd=excluded.avg_cost_usd, synced=0`
|
|
1409
|
+
).run(
|
|
1410
|
+
i.id,
|
|
1411
|
+
i.computedAt,
|
|
1412
|
+
i.taskType,
|
|
1413
|
+
i.model,
|
|
1414
|
+
i.provider,
|
|
1415
|
+
i.samples,
|
|
1416
|
+
i.successRate,
|
|
1417
|
+
i.avgTokens,
|
|
1418
|
+
i.baselineTokens,
|
|
1419
|
+
i.savingsPct,
|
|
1420
|
+
i.avgCostUsd
|
|
1421
|
+
);
|
|
1422
|
+
}
|
|
1423
|
+
function deleteInsightsExcept(validIds) {
|
|
1424
|
+
const all = getDb().prepare(`SELECT id FROM insights`).all();
|
|
1425
|
+
const keep = new Set(validIds);
|
|
1426
|
+
const del = getDb().prepare(`DELETE FROM insights WHERE id=?`);
|
|
1427
|
+
for (const r of all) if (!keep.has(String(r.id))) del.run(String(r.id));
|
|
1428
|
+
}
|
|
1429
|
+
function listInsights() {
|
|
1430
|
+
const rows = getDb().prepare(`SELECT * FROM insights ORDER BY savings_pct DESC`).all();
|
|
1431
|
+
return rows.map(mapInsight);
|
|
1432
|
+
}
|
|
1433
|
+
function unsyncedInsights() {
|
|
1434
|
+
const rows = getDb().prepare(`SELECT * FROM insights WHERE synced=0`).all();
|
|
1435
|
+
return rows.map(mapInsight);
|
|
1436
|
+
}
|
|
1437
|
+
function mapInsight(r) {
|
|
1438
|
+
return {
|
|
1439
|
+
id: String(r.id),
|
|
1440
|
+
computedAt: Number(r.computed_at),
|
|
1441
|
+
taskType: String(r.task_type),
|
|
1442
|
+
model: String(r.model),
|
|
1443
|
+
provider: String(r.provider),
|
|
1444
|
+
samples: Number(r.samples),
|
|
1445
|
+
successRate: Number(r.success_rate),
|
|
1446
|
+
avgTokens: Number(r.avg_tokens),
|
|
1447
|
+
baselineTokens: Number(r.baseline_tokens),
|
|
1448
|
+
savingsPct: Number(r.savings_pct),
|
|
1449
|
+
avgCostUsd: Number(r.avg_cost_usd)
|
|
1450
|
+
};
|
|
1451
|
+
}
|
|
1027
1452
|
|
|
1028
1453
|
// src/usage/report.ts
|
|
1029
1454
|
function renderUsageReport(filter = {}) {
|
|
@@ -1066,8 +1491,180 @@ function renderUsageReport(filter = {}) {
|
|
|
1066
1491
|
].join("\n");
|
|
1067
1492
|
}
|
|
1068
1493
|
|
|
1494
|
+
// src/usage/insights.ts
|
|
1495
|
+
var MIN_SAMPLES = 3;
|
|
1496
|
+
var MIN_SUCCESS = 0.7;
|
|
1497
|
+
var MIN_MARGIN = 0.2;
|
|
1498
|
+
function median(xs) {
|
|
1499
|
+
const s = [...xs].sort((a, b) => a - b);
|
|
1500
|
+
const mid = Math.floor(s.length / 2);
|
|
1501
|
+
return s.length % 2 ? s[mid] : (s[mid - 1] + s[mid]) / 2;
|
|
1502
|
+
}
|
|
1503
|
+
function distillInsights(now = Date.now()) {
|
|
1504
|
+
const eff = modelTaskEfficiency();
|
|
1505
|
+
const byTask = /* @__PURE__ */ new Map();
|
|
1506
|
+
for (const r of eff) {
|
|
1507
|
+
const list = byTask.get(r.taskType) ?? [];
|
|
1508
|
+
list.push(r);
|
|
1509
|
+
byTask.set(r.taskType, list);
|
|
1510
|
+
}
|
|
1511
|
+
const valid = [];
|
|
1512
|
+
for (const [taskType, list] of byTask) {
|
|
1513
|
+
const qualified = list.filter(
|
|
1514
|
+
(r) => r.steps >= MIN_SAMPLES && r.successRate >= MIN_SUCCESS && r.avgTokensPerSuccess > 0
|
|
1515
|
+
);
|
|
1516
|
+
if (qualified.length < 2) continue;
|
|
1517
|
+
const baseline = median(qualified.map((r) => r.avgTokensPerSuccess));
|
|
1518
|
+
for (const r of qualified) {
|
|
1519
|
+
const savings = 1 - r.avgTokensPerSuccess / baseline;
|
|
1520
|
+
if (savings >= MIN_MARGIN) {
|
|
1521
|
+
const id = `${taskType}__${r.model}`;
|
|
1522
|
+
valid.push(id);
|
|
1523
|
+
upsertInsight({
|
|
1524
|
+
id,
|
|
1525
|
+
computedAt: now,
|
|
1526
|
+
taskType,
|
|
1527
|
+
model: r.model,
|
|
1528
|
+
provider: r.model.split("/")[0] ?? "unknown",
|
|
1529
|
+
samples: r.steps,
|
|
1530
|
+
successRate: r.successRate,
|
|
1531
|
+
avgTokens: r.avgTokensPerSuccess,
|
|
1532
|
+
baselineTokens: baseline,
|
|
1533
|
+
savingsPct: savings * 100,
|
|
1534
|
+
avgCostUsd: r.avgCostPerSuccess
|
|
1535
|
+
});
|
|
1536
|
+
}
|
|
1537
|
+
}
|
|
1538
|
+
}
|
|
1539
|
+
deleteInsightsExcept(valid);
|
|
1540
|
+
return listInsights();
|
|
1541
|
+
}
|
|
1542
|
+
function insightBoostMap(insights) {
|
|
1543
|
+
const map = {};
|
|
1544
|
+
for (const i of insights) map[`${i.taskType}:${i.model}`] = i.savingsPct;
|
|
1545
|
+
return map;
|
|
1546
|
+
}
|
|
1547
|
+
function renderPlaybook(insights) {
|
|
1548
|
+
if (!insights.length) {
|
|
1549
|
+
return c.bold("Efficiency playbook") + "\n" + c.dim(
|
|
1550
|
+
`Nothing distilled yet \u2014 needs \u22652 models with \u2265${MIN_SAMPLES} successful steps on the same task type,
|
|
1551
|
+
where one beats the median by \u2265${MIN_MARGIN * 100}% tokens. Keep running tasks (vary models with -o / pins).`
|
|
1552
|
+
);
|
|
1553
|
+
}
|
|
1554
|
+
return c.bold("Efficiency playbook") + c.dim(" (the notably efficient approaches \u2014 this is what `poly sync` uploads)") + "\n" + table(
|
|
1555
|
+
["Task", "Model", "Avg tok", "Baseline", "Savings", "Success", "n"],
|
|
1556
|
+
insights.map((i) => [
|
|
1557
|
+
i.taskType,
|
|
1558
|
+
c.green(i.model),
|
|
1559
|
+
tokens(Math.round(i.avgTokens)),
|
|
1560
|
+
tokens(Math.round(i.baselineTokens)),
|
|
1561
|
+
c.green(`-${i.savingsPct.toFixed(0)}%`),
|
|
1562
|
+
`${Math.round(i.successRate * 100)}%`,
|
|
1563
|
+
String(i.samples)
|
|
1564
|
+
])
|
|
1565
|
+
);
|
|
1566
|
+
}
|
|
1567
|
+
|
|
1568
|
+
// src/usage/analyze.ts
|
|
1569
|
+
var MIN_SUCCESS_RATE = 0.5;
|
|
1570
|
+
function renderAnalysis(filter = {}) {
|
|
1571
|
+
const out = [];
|
|
1572
|
+
const byModelTask = modelTaskEfficiency(filter);
|
|
1573
|
+
const byObjective = objectiveEfficiency(filter);
|
|
1574
|
+
const byCommand = commandUsage(filter);
|
|
1575
|
+
if (!byModelTask.length && !byObjective.length && !byCommand.length) {
|
|
1576
|
+
return c.dim('No analytics yet. Run `poly run "<task>"` a few times (and rate the result) first.');
|
|
1577
|
+
}
|
|
1578
|
+
const insights = distillInsights();
|
|
1579
|
+
out.push(renderPlaybook(insights));
|
|
1580
|
+
out.push("");
|
|
1581
|
+
if (byModelTask.length) {
|
|
1582
|
+
const byTask = /* @__PURE__ */ new Map();
|
|
1583
|
+
for (const r of byModelTask) {
|
|
1584
|
+
const list = byTask.get(r.taskType) ?? [];
|
|
1585
|
+
list.push(r);
|
|
1586
|
+
byTask.set(r.taskType, list);
|
|
1587
|
+
}
|
|
1588
|
+
const rows = [];
|
|
1589
|
+
for (const [task, list] of byTask) {
|
|
1590
|
+
const eligible = list.filter((r) => r.successRate >= MIN_SUCCESS_RATE && r.avgTokensPerSuccess > 0).sort((a, b) => a.avgTokensPerSuccess - b.avgTokensPerSuccess);
|
|
1591
|
+
const best = eligible[0];
|
|
1592
|
+
const runnerUp = eligible[1];
|
|
1593
|
+
if (!best) {
|
|
1594
|
+
rows.push([task, c.dim("(no reliable model yet)"), "-", "-", "-"]);
|
|
1595
|
+
continue;
|
|
1596
|
+
}
|
|
1597
|
+
rows.push([
|
|
1598
|
+
task,
|
|
1599
|
+
c.green(best.model),
|
|
1600
|
+
tokens(Math.round(best.avgTokensPerSuccess)),
|
|
1601
|
+
`${Math.round(best.successRate * 100)}%`,
|
|
1602
|
+
runnerUp ? `${runnerUp.model} ${c.dim(tokens(Math.round(runnerUp.avgTokensPerSuccess)))}` : c.dim("\u2014")
|
|
1603
|
+
]);
|
|
1604
|
+
}
|
|
1605
|
+
out.push(c.bold("Minimum-token model per task") + c.dim(` (successful steps only, success \u2265 ${MIN_SUCCESS_RATE * 100}%)`));
|
|
1606
|
+
out.push(table(["Task", "Best model", "Avg tok/success", "Success", "Runner-up"], rows));
|
|
1607
|
+
out.push("");
|
|
1608
|
+
out.push(c.bold("Model \xD7 task efficiency (all observations)"));
|
|
1609
|
+
out.push(
|
|
1610
|
+
table(
|
|
1611
|
+
["Task", "Model", "Steps", "Success", "Avg tok", "Avg iters", "Avg cost"],
|
|
1612
|
+
byModelTask.map((r) => [
|
|
1613
|
+
r.taskType,
|
|
1614
|
+
r.model,
|
|
1615
|
+
String(r.steps),
|
|
1616
|
+
`${Math.round(r.successRate * 100)}%`,
|
|
1617
|
+
r.avgTokensPerSuccess ? tokens(Math.round(r.avgTokensPerSuccess)) : c.dim("-"),
|
|
1618
|
+
r.avgIterations.toFixed(1),
|
|
1619
|
+
r.avgCostPerSuccess ? usd(r.avgCostPerSuccess) : c.dim("-")
|
|
1620
|
+
])
|
|
1621
|
+
)
|
|
1622
|
+
);
|
|
1623
|
+
out.push("");
|
|
1624
|
+
}
|
|
1625
|
+
if (byObjective.length) {
|
|
1626
|
+
out.push(c.bold("Approach efficiency") + c.dim(" (routing objective: tokens spent vs goal achievement)"));
|
|
1627
|
+
out.push(
|
|
1628
|
+
table(
|
|
1629
|
+
["Objective", "Sessions", "Avg tokens", "Avg cost", "Auto score", "Your rating"],
|
|
1630
|
+
byObjective.map((r) => [
|
|
1631
|
+
r.objective,
|
|
1632
|
+
String(r.sessions),
|
|
1633
|
+
tokens(Math.round(r.avgTokens)),
|
|
1634
|
+
usd(r.avgCostUsd),
|
|
1635
|
+
r.avgAutoScore == null ? c.dim("-") : `${Math.round(r.avgAutoScore * 100)}%`,
|
|
1636
|
+
r.avgUserScore == null ? c.dim("unrated") : `${r.avgUserScore.toFixed(1)}/9`
|
|
1637
|
+
])
|
|
1638
|
+
)
|
|
1639
|
+
);
|
|
1640
|
+
const scored = byObjective.filter((r) => r.avgAutoScore != null);
|
|
1641
|
+
if (scored.length >= 2) {
|
|
1642
|
+
const bestScore = Math.max(...scored.map((r) => r.avgAutoScore));
|
|
1643
|
+
const winner = scored.filter((r) => r.avgAutoScore >= bestScore - 0.1).sort((a, b) => a.avgTokens - b.avgTokens)[0];
|
|
1644
|
+
if (winner) {
|
|
1645
|
+
out.push(
|
|
1646
|
+
c.green(
|
|
1647
|
+
`\u2192 Lowest-token approach with top-tier achievement: "${winner.objective}" (${tokens(Math.round(winner.avgTokens))} avg tokens, ${Math.round(winner.avgAutoScore * 100)}% auto score)`
|
|
1648
|
+
)
|
|
1649
|
+
);
|
|
1650
|
+
}
|
|
1651
|
+
}
|
|
1652
|
+
out.push("");
|
|
1653
|
+
}
|
|
1654
|
+
if (byCommand.length) {
|
|
1655
|
+
out.push(c.bold("Usage by command"));
|
|
1656
|
+
out.push(
|
|
1657
|
+
table(
|
|
1658
|
+
["Command", "Runs", "Prompt", "Compl.", "Cost"],
|
|
1659
|
+
byCommand.map((r) => [r.command, String(r.runs), tokens(r.promptTokens), tokens(r.completionTokens), usd(r.costUsd)])
|
|
1660
|
+
)
|
|
1661
|
+
);
|
|
1662
|
+
}
|
|
1663
|
+
return out.join("\n");
|
|
1664
|
+
}
|
|
1665
|
+
|
|
1069
1666
|
// src/usage/firestoreSync.ts
|
|
1070
|
-
async function syncUsage(config) {
|
|
1667
|
+
async function syncUsage(config, opts = {}) {
|
|
1071
1668
|
if (!config.firestore.enabled) {
|
|
1072
1669
|
return { synced: 0, message: "Firestore sync is disabled (enable with `poly config firestore on`)." };
|
|
1073
1670
|
}
|
|
@@ -1095,28 +1692,324 @@ async function syncUsage(config) {
|
|
|
1095
1692
|
}
|
|
1096
1693
|
}
|
|
1097
1694
|
const fdb = fsMod.getFirestore();
|
|
1695
|
+
distillInsights();
|
|
1696
|
+
const insights = unsyncedInsights();
|
|
1697
|
+
if (insights.length) {
|
|
1698
|
+
const batch = fdb.batch();
|
|
1699
|
+
const col = fdb.collection("polymath_insights");
|
|
1700
|
+
for (const i of insights) {
|
|
1701
|
+
batch.set(col.doc(i.id), {
|
|
1702
|
+
computedAt: i.computedAt,
|
|
1703
|
+
taskType: i.taskType,
|
|
1704
|
+
model: i.model,
|
|
1705
|
+
provider: i.provider,
|
|
1706
|
+
samples: i.samples,
|
|
1707
|
+
successRate: i.successRate,
|
|
1708
|
+
avgTokens: i.avgTokens,
|
|
1709
|
+
baselineTokens: i.baselineTokens,
|
|
1710
|
+
savingsPct: i.savingsPct,
|
|
1711
|
+
avgCostUsd: i.avgCostUsd
|
|
1712
|
+
});
|
|
1713
|
+
}
|
|
1714
|
+
await batch.commit();
|
|
1715
|
+
markTableSynced("insights", insights.map((i) => i.id));
|
|
1716
|
+
}
|
|
1717
|
+
if (!opts.raw) {
|
|
1718
|
+
return {
|
|
1719
|
+
synced: insights.length,
|
|
1720
|
+
message: insights.length ? `Synced ${insights.length} efficiency insight(s) to polymath_insights. Raw logs stayed local (use --raw to push).` : "No new insights to sync \u2014 raw logs stay local by default (use --raw to push them)."
|
|
1721
|
+
};
|
|
1722
|
+
}
|
|
1098
1723
|
const rows = unsyncedRows();
|
|
1099
|
-
if (!rows.length) return { synced: 0, message: "Nothing to sync \u2014 all rows already pushed." };
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
const
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1724
|
+
if (!rows.length && !insights.length) return { synced: 0, message: "Nothing to sync \u2014 all rows already pushed." };
|
|
1725
|
+
if (rows.length) {
|
|
1726
|
+
const batch = fdb.batch();
|
|
1727
|
+
const col = fdb.collection(config.firestore.collection);
|
|
1728
|
+
for (const r of rows) {
|
|
1729
|
+
const ref = col.doc(`${r.date}__${r.id}`);
|
|
1730
|
+
batch.set(ref, {
|
|
1731
|
+
ts: r.ts,
|
|
1732
|
+
date: r.date,
|
|
1733
|
+
provider: r.provider,
|
|
1734
|
+
model: r.model,
|
|
1735
|
+
taskType: r.taskType,
|
|
1736
|
+
command: r.command ?? "run",
|
|
1737
|
+
promptTokens: r.promptTokens,
|
|
1738
|
+
completionTokens: r.completionTokens,
|
|
1739
|
+
totalTokens: r.totalTokens,
|
|
1740
|
+
costUsd: r.costUsd,
|
|
1741
|
+
sessionId: r.sessionId ?? null
|
|
1742
|
+
});
|
|
1743
|
+
}
|
|
1744
|
+
await batch.commit();
|
|
1745
|
+
markSynced(rows.map((r) => r.id));
|
|
1746
|
+
}
|
|
1747
|
+
return {
|
|
1748
|
+
synced: insights.length + rows.length,
|
|
1749
|
+
message: `Synced ${insights.length} insights + ${rows.length} raw rows to Firestore.`
|
|
1750
|
+
};
|
|
1751
|
+
}
|
|
1752
|
+
|
|
1753
|
+
// src/usage/dataconnect.ts
|
|
1754
|
+
async function adminAccessToken(projectId) {
|
|
1755
|
+
let appMod;
|
|
1756
|
+
try {
|
|
1757
|
+
appMod = await import("firebase-admin/app");
|
|
1758
|
+
} catch {
|
|
1759
|
+
throw new Error("firebase-admin is not installed. Run `npm install firebase-admin`.");
|
|
1760
|
+
}
|
|
1761
|
+
const { initializeApp, getApps, cert, applicationDefault } = appMod;
|
|
1762
|
+
let app = getApps()[0];
|
|
1763
|
+
if (!app) {
|
|
1764
|
+
const saJson = process.env.FIREBASE_SERVICE_ACCOUNT_KEY;
|
|
1765
|
+
if (saJson) {
|
|
1766
|
+
try {
|
|
1767
|
+
app = initializeApp({ credential: cert(JSON.parse(saJson)), projectId });
|
|
1768
|
+
} catch {
|
|
1769
|
+
app = initializeApp({ credential: applicationDefault(), projectId });
|
|
1770
|
+
}
|
|
1771
|
+
} else {
|
|
1772
|
+
app = initializeApp({ credential: applicationDefault(), projectId });
|
|
1773
|
+
}
|
|
1774
|
+
}
|
|
1775
|
+
const token = await app.options.credential.getAccessToken();
|
|
1776
|
+
return token.access_token;
|
|
1777
|
+
}
|
|
1778
|
+
async function executeGraphql(cfg2, token, query, variables) {
|
|
1779
|
+
const url = `https://firebasedataconnect.googleapis.com/v1/projects/${cfg2.projectId}/locations/${cfg2.location}/services/${cfg2.serviceId}:executeGraphql`;
|
|
1780
|
+
const res = await fetch(url, {
|
|
1781
|
+
method: "POST",
|
|
1782
|
+
headers: { Authorization: `Bearer ${token}`, "Content-Type": "application/json" },
|
|
1783
|
+
body: JSON.stringify({ query, variables })
|
|
1784
|
+
});
|
|
1785
|
+
if (!res.ok) {
|
|
1786
|
+
const text = await res.text().catch(() => "");
|
|
1787
|
+
throw new Error(`Data Connect ${res.status}: ${text.slice(0, 300)}`);
|
|
1788
|
+
}
|
|
1789
|
+
const json = await res.json();
|
|
1790
|
+
if (json.errors?.length) {
|
|
1791
|
+
throw new Error(`Data Connect GraphQL errors: ${JSON.stringify(json.errors).slice(0, 300)}`);
|
|
1792
|
+
}
|
|
1793
|
+
}
|
|
1794
|
+
var iso = (ms) => new Date(ms).toISOString();
|
|
1795
|
+
async function syncDataConnect(config, opts = {}) {
|
|
1796
|
+
const dc = config.dataconnect;
|
|
1797
|
+
if (!dc?.enabled) {
|
|
1798
|
+
return { insights: 0, sessions: 0, steps: 0, commands: 0, calls: 0, message: "Data Connect sync is disabled (enable with `poly config dataconnect on`)." };
|
|
1799
|
+
}
|
|
1800
|
+
const projectId = config.firestore.projectId;
|
|
1801
|
+
const token = await adminAccessToken(projectId);
|
|
1802
|
+
const cfg2 = { projectId, location: dc.location, serviceId: dc.serviceId };
|
|
1803
|
+
distillInsights();
|
|
1804
|
+
const insights = unsyncedInsights();
|
|
1805
|
+
for (const i of insights) {
|
|
1806
|
+
await executeGraphql(
|
|
1807
|
+
cfg2,
|
|
1808
|
+
token,
|
|
1809
|
+
`mutation UpsertInsight($id: String!, $computedAt: Timestamp!, $taskType: String!,
|
|
1810
|
+
$model: String!, $provider: String!, $samples: Int!, $successRate: Float!,
|
|
1811
|
+
$avgTokens: Float!, $baselineTokens: Float!, $savingsPct: Float!, $avgCostUsd: Float!) {
|
|
1812
|
+
insight_upsert(data: {
|
|
1813
|
+
id: $id, computedAt: $computedAt, taskType: $taskType, model: $model,
|
|
1814
|
+
provider: $provider, samples: $samples, successRate: $successRate,
|
|
1815
|
+
avgTokens: $avgTokens, baselineTokens: $baselineTokens,
|
|
1816
|
+
savingsPct: $savingsPct, avgCostUsd: $avgCostUsd
|
|
1817
|
+
})
|
|
1818
|
+
}`,
|
|
1819
|
+
{
|
|
1820
|
+
id: i.id,
|
|
1821
|
+
computedAt: iso(i.computedAt),
|
|
1822
|
+
taskType: i.taskType,
|
|
1823
|
+
model: i.model,
|
|
1824
|
+
provider: i.provider,
|
|
1825
|
+
samples: i.samples,
|
|
1826
|
+
successRate: i.successRate,
|
|
1827
|
+
avgTokens: i.avgTokens,
|
|
1828
|
+
baselineTokens: i.baselineTokens,
|
|
1829
|
+
savingsPct: i.savingsPct,
|
|
1830
|
+
avgCostUsd: i.avgCostUsd
|
|
1831
|
+
}
|
|
1832
|
+
);
|
|
1833
|
+
}
|
|
1834
|
+
markTableSynced("insights", insights.map((i) => i.id));
|
|
1835
|
+
if (!opts.raw) {
|
|
1836
|
+
return {
|
|
1837
|
+
insights: insights.length,
|
|
1838
|
+
sessions: 0,
|
|
1839
|
+
steps: 0,
|
|
1840
|
+
commands: 0,
|
|
1841
|
+
calls: 0,
|
|
1842
|
+
message: `Synced ${insights.length} efficiency insight(s) to Data Connect (${cfg2.serviceId}@${cfg2.location}). Raw logs stayed local \u2014 use \`poly sync --raw\` to push everything.`
|
|
1843
|
+
};
|
|
1844
|
+
}
|
|
1845
|
+
const sessions = unsyncedSessions();
|
|
1846
|
+
for (const s of sessions) {
|
|
1847
|
+
await executeGraphql(
|
|
1848
|
+
cfg2,
|
|
1849
|
+
token,
|
|
1850
|
+
`mutation UpsertSession($id: String!, $startedAt: Timestamp!, $date: Date!, $goal: String!,
|
|
1851
|
+
$command: String!, $objective: String!, $plannedSteps: Int!, $completedSteps: Int!,
|
|
1852
|
+
$failedSteps: Int!, $autoScore: Float, $userScore: Int, $promptTokens: Int!,
|
|
1853
|
+
$completionTokens: Int!, $costUsd: Float!, $durationMs: Int!) {
|
|
1854
|
+
session_upsert(data: {
|
|
1855
|
+
id: $id, startedAt: $startedAt, date: $date, goal: $goal, command: $command,
|
|
1856
|
+
objective: $objective, plannedSteps: $plannedSteps, completedSteps: $completedSteps,
|
|
1857
|
+
failedSteps: $failedSteps, autoScore: $autoScore, userScore: $userScore,
|
|
1858
|
+
promptTokens: $promptTokens, completionTokens: $completionTokens,
|
|
1859
|
+
costUsd: $costUsd, durationMs: $durationMs
|
|
1860
|
+
})
|
|
1861
|
+
}`,
|
|
1862
|
+
{
|
|
1863
|
+
id: s.id,
|
|
1864
|
+
startedAt: iso(s.ts),
|
|
1865
|
+
date: s.date,
|
|
1866
|
+
goal: s.goal,
|
|
1867
|
+
command: s.command,
|
|
1868
|
+
objective: s.objective,
|
|
1869
|
+
plannedSteps: s.plannedSteps,
|
|
1870
|
+
completedSteps: s.completedSteps,
|
|
1871
|
+
failedSteps: s.failedSteps,
|
|
1872
|
+
autoScore: s.autoScore,
|
|
1873
|
+
userScore: s.userScore,
|
|
1874
|
+
promptTokens: s.promptTokens,
|
|
1875
|
+
completionTokens: s.completionTokens,
|
|
1876
|
+
costUsd: s.costUsd,
|
|
1877
|
+
durationMs: s.durationMs
|
|
1878
|
+
}
|
|
1879
|
+
);
|
|
1880
|
+
}
|
|
1881
|
+
markTableSynced("sessions", sessions.map((s) => s.id));
|
|
1882
|
+
const steps = unsyncedStepRuns();
|
|
1883
|
+
for (const st of steps) {
|
|
1884
|
+
await executeGraphql(
|
|
1885
|
+
cfg2,
|
|
1886
|
+
token,
|
|
1887
|
+
`mutation InsertStep($sessionId: String!, $stepNo: Int!, $taskType: String!, $skill: String!,
|
|
1888
|
+
$model: String!, $provider: String!, $iterations: Int!, $toolCalls: Int!,
|
|
1889
|
+
$promptTokens: Int!, $completionTokens: Int!, $costUsd: Float!,
|
|
1890
|
+
$finishedBy: String!, $success: Boolean!, $durationMs: Int!) {
|
|
1891
|
+
stepRun_insert(data: {
|
|
1892
|
+
sessionId: $sessionId, stepNo: $stepNo, taskType: $taskType, skill: $skill,
|
|
1893
|
+
model: $model, provider: $provider, iterations: $iterations, toolCalls: $toolCalls,
|
|
1894
|
+
promptTokens: $promptTokens, completionTokens: $completionTokens, costUsd: $costUsd,
|
|
1895
|
+
finishedBy: $finishedBy, success: $success, durationMs: $durationMs
|
|
1896
|
+
})
|
|
1897
|
+
}`,
|
|
1898
|
+
{
|
|
1899
|
+
sessionId: st.sessionId,
|
|
1900
|
+
stepNo: st.stepNo,
|
|
1901
|
+
taskType: st.taskType,
|
|
1902
|
+
skill: st.skill,
|
|
1903
|
+
model: st.model,
|
|
1904
|
+
provider: st.provider,
|
|
1905
|
+
iterations: st.iterations,
|
|
1906
|
+
toolCalls: st.toolCalls,
|
|
1907
|
+
promptTokens: st.promptTokens,
|
|
1908
|
+
completionTokens: st.completionTokens,
|
|
1909
|
+
costUsd: st.costUsd,
|
|
1910
|
+
finishedBy: st.finishedBy,
|
|
1911
|
+
success: st.success,
|
|
1912
|
+
durationMs: st.durationMs
|
|
1913
|
+
}
|
|
1914
|
+
);
|
|
1915
|
+
}
|
|
1916
|
+
markTableSynced("step_runs", steps.map((s) => s.id));
|
|
1917
|
+
const commands = unsyncedCommandRuns();
|
|
1918
|
+
for (const cr of commands) {
|
|
1919
|
+
await executeGraphql(
|
|
1920
|
+
cfg2,
|
|
1921
|
+
token,
|
|
1922
|
+
`mutation InsertCommand($sessionId: String, $ts: Timestamp!, $date: Date!, $command: String!,
|
|
1923
|
+
$args: String, $objective: String, $promptTokens: Int!, $completionTokens: Int!,
|
|
1924
|
+
$costUsd: Float!, $durationMs: Int!) {
|
|
1925
|
+
commandRun_insert(data: {
|
|
1926
|
+
sessionId: $sessionId, ts: $ts, date: $date, command: $command, args: $args,
|
|
1927
|
+
objective: $objective, promptTokens: $promptTokens, completionTokens: $completionTokens,
|
|
1928
|
+
costUsd: $costUsd, durationMs: $durationMs
|
|
1929
|
+
})
|
|
1930
|
+
}`,
|
|
1931
|
+
{
|
|
1932
|
+
sessionId: cr.sessionId ?? null,
|
|
1933
|
+
ts: iso(cr.ts),
|
|
1934
|
+
date: cr.date,
|
|
1935
|
+
command: cr.command,
|
|
1936
|
+
args: cr.args ?? null,
|
|
1937
|
+
objective: cr.objective ?? null,
|
|
1938
|
+
promptTokens: cr.promptTokens,
|
|
1939
|
+
completionTokens: cr.completionTokens,
|
|
1940
|
+
costUsd: cr.costUsd,
|
|
1941
|
+
durationMs: cr.durationMs
|
|
1942
|
+
}
|
|
1943
|
+
);
|
|
1116
1944
|
}
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1945
|
+
markTableSynced("command_runs", commands.map((c2) => c2.id));
|
|
1946
|
+
const calls = unsyncedRows();
|
|
1947
|
+
for (const u of calls) {
|
|
1948
|
+
await executeGraphql(
|
|
1949
|
+
cfg2,
|
|
1950
|
+
token,
|
|
1951
|
+
`mutation InsertCall($sessionId: String, $ts: Timestamp!, $date: Date!, $command: String!,
|
|
1952
|
+
$taskType: String!, $model: String!, $provider: String!, $promptTokens: Int!,
|
|
1953
|
+
$completionTokens: Int!, $totalTokens: Int!, $costUsd: Float!) {
|
|
1954
|
+
modelCall_insert(data: {
|
|
1955
|
+
sessionId: $sessionId, ts: $ts, date: $date, command: $command, taskType: $taskType,
|
|
1956
|
+
model: $model, provider: $provider, promptTokens: $promptTokens,
|
|
1957
|
+
completionTokens: $completionTokens, totalTokens: $totalTokens, costUsd: $costUsd
|
|
1958
|
+
})
|
|
1959
|
+
}`,
|
|
1960
|
+
{
|
|
1961
|
+
sessionId: u.sessionId ?? null,
|
|
1962
|
+
ts: iso(u.ts),
|
|
1963
|
+
date: u.date,
|
|
1964
|
+
command: u.command ?? "run",
|
|
1965
|
+
taskType: u.taskType,
|
|
1966
|
+
model: u.model,
|
|
1967
|
+
provider: u.provider,
|
|
1968
|
+
promptTokens: u.promptTokens,
|
|
1969
|
+
completionTokens: u.completionTokens,
|
|
1970
|
+
totalTokens: u.totalTokens,
|
|
1971
|
+
costUsd: u.costUsd
|
|
1972
|
+
}
|
|
1973
|
+
);
|
|
1974
|
+
}
|
|
1975
|
+
markSynced(calls.map((c2) => c2.id));
|
|
1976
|
+
return {
|
|
1977
|
+
insights: insights.length,
|
|
1978
|
+
sessions: sessions.length,
|
|
1979
|
+
steps: steps.length,
|
|
1980
|
+
commands: commands.length,
|
|
1981
|
+
calls: calls.length,
|
|
1982
|
+
message: `Synced ${insights.length} insights + raw: ${sessions.length} sessions, ${steps.length} steps, ${commands.length} commands, ${calls.length} calls (${cfg2.serviceId}@${cfg2.location}).`
|
|
1983
|
+
};
|
|
1984
|
+
}
|
|
1985
|
+
|
|
1986
|
+
// src/usage/logger.ts
|
|
1987
|
+
function localDate(d = /* @__PURE__ */ new Date()) {
|
|
1988
|
+
const y = d.getFullYear();
|
|
1989
|
+
const m = String(d.getMonth() + 1).padStart(2, "0");
|
|
1990
|
+
const day = String(d.getDate()).padStart(2, "0");
|
|
1991
|
+
return `${y}-${m}-${day}`;
|
|
1992
|
+
}
|
|
1993
|
+
function providerOf(modelId) {
|
|
1994
|
+
return modelId.split("/")[0] ?? "unknown";
|
|
1995
|
+
}
|
|
1996
|
+
function logCompletion(result, taskType, sessionId, command = "run") {
|
|
1997
|
+
const now = /* @__PURE__ */ new Date();
|
|
1998
|
+
const entry = {
|
|
1999
|
+
ts: now.getTime(),
|
|
2000
|
+
date: localDate(now),
|
|
2001
|
+
provider: providerOf(result.model),
|
|
2002
|
+
model: result.model,
|
|
2003
|
+
taskType,
|
|
2004
|
+
promptTokens: result.usage.promptTokens,
|
|
2005
|
+
completionTokens: result.usage.completionTokens,
|
|
2006
|
+
totalTokens: result.usage.totalTokens,
|
|
2007
|
+
costUsd: result.costUsd,
|
|
2008
|
+
sessionId,
|
|
2009
|
+
command
|
|
2010
|
+
};
|
|
2011
|
+
recordUsage(entry);
|
|
2012
|
+
return entry;
|
|
1120
2013
|
}
|
|
1121
2014
|
|
|
1122
2015
|
// src/tui/App.tsx
|
|
@@ -1274,46 +2167,31 @@ ${stderr}`)) };
|
|
|
1274
2167
|
}
|
|
1275
2168
|
}
|
|
1276
2169
|
|
|
1277
|
-
// src/usage/logger.ts
|
|
1278
|
-
function localDate(d = /* @__PURE__ */ new Date()) {
|
|
1279
|
-
const y = d.getFullYear();
|
|
1280
|
-
const m = String(d.getMonth() + 1).padStart(2, "0");
|
|
1281
|
-
const day = String(d.getDate()).padStart(2, "0");
|
|
1282
|
-
return `${y}-${m}-${day}`;
|
|
1283
|
-
}
|
|
1284
|
-
function providerOf(modelId) {
|
|
1285
|
-
return modelId.split("/")[0] ?? "unknown";
|
|
1286
|
-
}
|
|
1287
|
-
function logCompletion(result, taskType, sessionId) {
|
|
1288
|
-
const now = /* @__PURE__ */ new Date();
|
|
1289
|
-
const entry = {
|
|
1290
|
-
ts: now.getTime(),
|
|
1291
|
-
date: localDate(now),
|
|
1292
|
-
provider: providerOf(result.model),
|
|
1293
|
-
model: result.model,
|
|
1294
|
-
taskType,
|
|
1295
|
-
promptTokens: result.usage.promptTokens,
|
|
1296
|
-
completionTokens: result.usage.completionTokens,
|
|
1297
|
-
totalTokens: result.usage.totalTokens,
|
|
1298
|
-
costUsd: result.costUsd,
|
|
1299
|
-
sessionId
|
|
1300
|
-
};
|
|
1301
|
-
recordUsage(entry);
|
|
1302
|
-
return entry;
|
|
1303
|
-
}
|
|
1304
|
-
|
|
1305
2170
|
// src/agent/loop.ts
|
|
1306
2171
|
var MAX_ITERS_PER_STEP = 6;
|
|
1307
2172
|
async function runAgent(goal, deps, emit) {
|
|
1308
2173
|
const { client: client2, models, policy, sessionId, cwd } = deps;
|
|
1309
2174
|
let totalCostUsd = 0;
|
|
1310
2175
|
let totalTokens = 0;
|
|
2176
|
+
let totalPromptTokens = 0;
|
|
2177
|
+
let totalCompletionTokens = 0;
|
|
1311
2178
|
let calls = 0;
|
|
2179
|
+
const sessionStart = Date.now();
|
|
2180
|
+
let completedSteps = 0;
|
|
2181
|
+
let failedSteps = 0;
|
|
1312
2182
|
const planRoute = route("plan", models, policy);
|
|
1313
2183
|
let plan;
|
|
1314
2184
|
if (planRoute) {
|
|
1315
2185
|
try {
|
|
1316
|
-
plan = await planRequest(goal, client2, planRoute.model)
|
|
2186
|
+
plan = await planRequest(goal, client2, planRoute.model, (result) => {
|
|
2187
|
+
const entry = logCompletion(result, "plan", sessionId);
|
|
2188
|
+
emit({ type: "usage", entry });
|
|
2189
|
+
totalCostUsd += entry.costUsd;
|
|
2190
|
+
totalTokens += entry.totalTokens;
|
|
2191
|
+
totalPromptTokens += entry.promptTokens;
|
|
2192
|
+
totalCompletionTokens += entry.completionTokens;
|
|
2193
|
+
calls++;
|
|
2194
|
+
});
|
|
1317
2195
|
} catch {
|
|
1318
2196
|
plan = heuristicPlan(goal);
|
|
1319
2197
|
}
|
|
@@ -1321,6 +2199,15 @@ async function runAgent(goal, deps, emit) {
|
|
|
1321
2199
|
plan = heuristicPlan(goal);
|
|
1322
2200
|
}
|
|
1323
2201
|
emit({ type: "plan", plan, planModel: planRoute?.model.id ?? "heuristic" });
|
|
2202
|
+
startSession({
|
|
2203
|
+
id: sessionId,
|
|
2204
|
+
ts: sessionStart,
|
|
2205
|
+
date: localDate2(),
|
|
2206
|
+
goal,
|
|
2207
|
+
command: "run",
|
|
2208
|
+
objective: policy.objective,
|
|
2209
|
+
plannedSteps: plan.steps.length
|
|
2210
|
+
});
|
|
1324
2211
|
const toolCtx = {
|
|
1325
2212
|
cwd,
|
|
1326
2213
|
allowWrite: deps.allowWrite,
|
|
@@ -1333,6 +2220,7 @@ async function runAgent(goal, deps, emit) {
|
|
|
1333
2220
|
completionTokens: step.estCompletionTokens
|
|
1334
2221
|
});
|
|
1335
2222
|
if (!r) {
|
|
2223
|
+
failedSteps++;
|
|
1336
2224
|
emit({ type: "error", message: `No capable model for step ${step.id} (${step.type}).` });
|
|
1337
2225
|
continue;
|
|
1338
2226
|
}
|
|
@@ -1343,55 +2231,113 @@ async function runAgent(goal, deps, emit) {
|
|
|
1343
2231
|
{ role: "system", content: stepSystemPrompt(goal, step, priorSummaries, useTools) },
|
|
1344
2232
|
{ role: "user", content: step.description }
|
|
1345
2233
|
];
|
|
2234
|
+
const stepStart = Date.now();
|
|
2235
|
+
let stepPrompt = 0;
|
|
2236
|
+
let stepCompletion = 0;
|
|
2237
|
+
let stepCost = 0;
|
|
2238
|
+
let stepToolCalls = 0;
|
|
2239
|
+
let iterations = 0;
|
|
2240
|
+
let finishedBy = "max-iters";
|
|
1346
2241
|
let summary = "";
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
next
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
2242
|
+
try {
|
|
2243
|
+
for (let iter = 0; iter < MAX_ITERS_PER_STEP; iter++) {
|
|
2244
|
+
iterations = iter + 1;
|
|
2245
|
+
const gen = client2.stream(
|
|
2246
|
+
{
|
|
2247
|
+
model: model.id,
|
|
2248
|
+
messages,
|
|
2249
|
+
tools: useTools ? TOOL_SCHEMAS : void 0,
|
|
2250
|
+
temperature: 0.2,
|
|
2251
|
+
maxTokens: 2e3
|
|
2252
|
+
},
|
|
2253
|
+
model.pricing
|
|
2254
|
+
);
|
|
2255
|
+
let next = await gen.next();
|
|
2256
|
+
while (!next.done) {
|
|
2257
|
+
emit({ type: "text", delta: next.value });
|
|
2258
|
+
next = await gen.next();
|
|
2259
|
+
}
|
|
2260
|
+
const result = next.value;
|
|
2261
|
+
const entry = logCompletion(result, step.type, sessionId);
|
|
2262
|
+
emit({ type: "usage", entry });
|
|
2263
|
+
totalCostUsd += entry.costUsd;
|
|
2264
|
+
totalTokens += entry.totalTokens;
|
|
2265
|
+
totalPromptTokens += entry.promptTokens;
|
|
2266
|
+
totalCompletionTokens += entry.completionTokens;
|
|
2267
|
+
stepPrompt += entry.promptTokens;
|
|
2268
|
+
stepCompletion += entry.completionTokens;
|
|
2269
|
+
stepCost += entry.costUsd;
|
|
2270
|
+
calls++;
|
|
2271
|
+
if (result.toolCalls.length && useTools) {
|
|
2272
|
+
messages.push({ role: "assistant", content: result.content, tool_calls: result.toolCalls });
|
|
2273
|
+
let finished = false;
|
|
2274
|
+
for (const tc of result.toolCalls) {
|
|
2275
|
+
stepToolCalls++;
|
|
2276
|
+
emit({ type: "tool-call", name: tc.function.name, args: tc.function.arguments });
|
|
2277
|
+
const outcome = executeTool(tc.function.name, tc.function.arguments, toolCtx);
|
|
2278
|
+
emit({ type: "tool-result", name: tc.function.name, result: outcome.result });
|
|
2279
|
+
messages.push({ role: "tool", tool_call_id: tc.id, name: tc.function.name, content: outcome.result });
|
|
2280
|
+
if (outcome.finishSummary != null) {
|
|
2281
|
+
summary = outcome.finishSummary;
|
|
2282
|
+
finished = true;
|
|
2283
|
+
}
|
|
2284
|
+
}
|
|
2285
|
+
if (finished) {
|
|
2286
|
+
finishedBy = "finish-tool";
|
|
2287
|
+
break;
|
|
1380
2288
|
}
|
|
2289
|
+
continue;
|
|
1381
2290
|
}
|
|
1382
|
-
|
|
1383
|
-
|
|
2291
|
+
summary = result.content || summary;
|
|
2292
|
+
if (summary) finishedBy = "text";
|
|
2293
|
+
break;
|
|
1384
2294
|
}
|
|
1385
|
-
|
|
1386
|
-
|
|
2295
|
+
} catch (err) {
|
|
2296
|
+
finishedBy = "error";
|
|
2297
|
+
emit({ type: "error", message: `Step ${step.id} failed: ${err?.message ?? err}` });
|
|
1387
2298
|
}
|
|
2299
|
+
const success = finishedBy === "finish-tool" || finishedBy === "text";
|
|
2300
|
+
if (success) completedSteps++;
|
|
2301
|
+
else failedSteps++;
|
|
2302
|
+
recordStepRun({
|
|
2303
|
+
sessionId,
|
|
2304
|
+
stepNo: step.id,
|
|
2305
|
+
taskType: step.type,
|
|
2306
|
+
skill: TASK_SKILL[step.type],
|
|
2307
|
+
model: model.id,
|
|
2308
|
+
provider: model.provider,
|
|
2309
|
+
iterations,
|
|
2310
|
+
toolCalls: stepToolCalls,
|
|
2311
|
+
promptTokens: stepPrompt,
|
|
2312
|
+
completionTokens: stepCompletion,
|
|
2313
|
+
costUsd: stepCost,
|
|
2314
|
+
finishedBy,
|
|
2315
|
+
success,
|
|
2316
|
+
durationMs: Date.now() - stepStart
|
|
2317
|
+
});
|
|
1388
2318
|
if (!summary) summary = "(no summary)";
|
|
1389
2319
|
priorSummaries.push(`Step ${step.id} (${step.type}): ${summary}`);
|
|
1390
2320
|
emit({ type: "step-end", step, summary });
|
|
1391
2321
|
}
|
|
2322
|
+
finishSession(sessionId, {
|
|
2323
|
+
plannedSteps: plan.steps.length,
|
|
2324
|
+
completedSteps,
|
|
2325
|
+
failedSteps,
|
|
2326
|
+
autoScore: plan.steps.length ? completedSteps / plan.steps.length : null,
|
|
2327
|
+
promptTokens: totalPromptTokens,
|
|
2328
|
+
completionTokens: totalCompletionTokens,
|
|
2329
|
+
costUsd: totalCostUsd,
|
|
2330
|
+
durationMs: Date.now() - sessionStart
|
|
2331
|
+
});
|
|
1392
2332
|
emit({ type: "done", totalCostUsd, totalTokens, calls });
|
|
1393
2333
|
return { totalCostUsd, totalTokens, calls };
|
|
1394
2334
|
}
|
|
2335
|
+
function localDate2(d = /* @__PURE__ */ new Date()) {
|
|
2336
|
+
const y = d.getFullYear();
|
|
2337
|
+
const m = String(d.getMonth() + 1).padStart(2, "0");
|
|
2338
|
+
const day = String(d.getDate()).padStart(2, "0");
|
|
2339
|
+
return `${y}-${m}-${day}`;
|
|
2340
|
+
}
|
|
1395
2341
|
function stepSystemPrompt(goal, step, priorSummaries, useTools) {
|
|
1396
2342
|
const context = priorSummaries.length ? `
|
|
1397
2343
|
|
|
@@ -1418,6 +2364,7 @@ function App(props) {
|
|
|
1418
2364
|
const [cost, setCost] = useState(0);
|
|
1419
2365
|
const [tok, setTok] = useState(0);
|
|
1420
2366
|
const [calls, setCalls] = useState(0);
|
|
2367
|
+
const [rated, setRated] = useState(null);
|
|
1421
2368
|
const push = useCallback((text, color) => {
|
|
1422
2369
|
setLog((l) => [...l, { key: l.length, text, color }]);
|
|
1423
2370
|
}, []);
|
|
@@ -1484,7 +2431,7 @@ function App(props) {
|
|
|
1484
2431
|
} catch (err) {
|
|
1485
2432
|
push(`Fatal: ${err?.message ?? err}`, "red");
|
|
1486
2433
|
}
|
|
1487
|
-
setPhase("
|
|
2434
|
+
setPhase("rate");
|
|
1488
2435
|
}, [goal, props, push]);
|
|
1489
2436
|
useInput((input, key) => {
|
|
1490
2437
|
if (phase === "preview") {
|
|
@@ -1493,6 +2440,18 @@ function App(props) {
|
|
|
1493
2440
|
setDraft(goal);
|
|
1494
2441
|
setPhase("input");
|
|
1495
2442
|
} else if (input === "q") exit();
|
|
2443
|
+
} else if (phase === "rate") {
|
|
2444
|
+
if (/^[0-9]$/.test(input)) {
|
|
2445
|
+
const score = parseInt(input, 10);
|
|
2446
|
+
try {
|
|
2447
|
+
setUserScore(props.sessionId, score);
|
|
2448
|
+
} catch {
|
|
2449
|
+
}
|
|
2450
|
+
setRated(score);
|
|
2451
|
+
setPhase("done");
|
|
2452
|
+
} else if (key.return || input === "q") {
|
|
2453
|
+
setPhase("done");
|
|
2454
|
+
}
|
|
1496
2455
|
} else if (phase === "done") {
|
|
1497
2456
|
if (input === "q" || key.return) exit();
|
|
1498
2457
|
}
|
|
@@ -1516,12 +2475,26 @@ function App(props) {
|
|
|
1516
2475
|
)
|
|
1517
2476
|
] }),
|
|
1518
2477
|
phase === "preview" && rec && /* @__PURE__ */ jsx(Preview, { rec }),
|
|
1519
|
-
(phase === "running" || phase === "done") && /* @__PURE__ */ jsxs(Box, { flexDirection: "column", marginTop: 1, children: [
|
|
2478
|
+
(phase === "running" || phase === "rate" || phase === "done") && /* @__PURE__ */ jsxs(Box, { flexDirection: "column", marginTop: 1, children: [
|
|
1520
2479
|
log.slice(-18).map((l) => /* @__PURE__ */ jsx(Text, { color: l.color, children: l.text }, l.key)),
|
|
1521
2480
|
phase === "running" && /* @__PURE__ */ jsxs(Text, { color: "cyan", children: [
|
|
1522
2481
|
/* @__PURE__ */ jsx(Spinner, { type: "dots" }),
|
|
1523
2482
|
" working\u2026"
|
|
1524
2483
|
] }),
|
|
2484
|
+
phase === "rate" && /* @__PURE__ */ jsxs(Text, { children: [
|
|
2485
|
+
/* @__PURE__ */ jsxs(Text, { color: "green", children: [
|
|
2486
|
+
"\u2713 Done \xB7 ",
|
|
2487
|
+
calls,
|
|
2488
|
+
" calls \xB7 ",
|
|
2489
|
+
tokens(tok),
|
|
2490
|
+
" tokens \xB7 ",
|
|
2491
|
+
usd(cost)
|
|
2492
|
+
] }),
|
|
2493
|
+
"\n",
|
|
2494
|
+
/* @__PURE__ */ jsx(Text, { color: "cyan", children: "How well was your goal achieved? " }),
|
|
2495
|
+
/* @__PURE__ */ jsx(Text, { color: "yellow", children: "[0-9]" }),
|
|
2496
|
+
/* @__PURE__ */ jsx(Text, { color: "gray", children: " (9 = perfect \xB7 enter = skip) \u2014 feeds `poly analyze`" })
|
|
2497
|
+
] }),
|
|
1525
2498
|
phase === "done" && /* @__PURE__ */ jsxs(Text, { color: "green", children: [
|
|
1526
2499
|
"\u2713 Done \xB7 ",
|
|
1527
2500
|
calls,
|
|
@@ -1529,6 +2502,7 @@ function App(props) {
|
|
|
1529
2502
|
tokens(tok),
|
|
1530
2503
|
" tokens \xB7 ",
|
|
1531
2504
|
usd(cost),
|
|
2505
|
+
rated != null ? ` \xB7 rated ${rated}/9` : "",
|
|
1532
2506
|
" \u2014 press q to quit"
|
|
1533
2507
|
] })
|
|
1534
2508
|
] })
|
|
@@ -1596,27 +2570,73 @@ function truncate2(s, n) {
|
|
|
1596
2570
|
|
|
1597
2571
|
// src/index.ts
|
|
1598
2572
|
var program = new Command();
|
|
1599
|
-
program.name("poly").description("Polymath \u2014 cost-optimized, multi-model TUI coding agent").version("0.
|
|
2573
|
+
program.name("poly").description("Polymath \u2014 cost-optimized, multi-model TUI coding agent").version("0.3.0");
|
|
1600
2574
|
function client(config) {
|
|
1601
2575
|
return new OpenRouterClient({
|
|
1602
2576
|
apiKey: resolveApiKey(config),
|
|
1603
2577
|
referer: config.referer,
|
|
1604
|
-
title: config.title
|
|
2578
|
+
title: config.title,
|
|
2579
|
+
localBaseUrl: config.local.enabled ? config.local.baseUrl : void 0
|
|
1605
2580
|
});
|
|
1606
2581
|
}
|
|
1607
2582
|
function buildPolicy(config, opts) {
|
|
1608
2583
|
const objective = opts.objective || config.defaultObjective;
|
|
1609
2584
|
const maxCost = opts.maxCost != null ? parseFloat(opts.maxCost) : config.maxCostPerCallUsd;
|
|
2585
|
+
let empirical;
|
|
2586
|
+
try {
|
|
2587
|
+
empirical = insightBoostMap(listInsights());
|
|
2588
|
+
if (!Object.keys(empirical).length) empirical = void 0;
|
|
2589
|
+
} catch {
|
|
2590
|
+
empirical = void 0;
|
|
2591
|
+
}
|
|
1610
2592
|
return {
|
|
1611
2593
|
objective,
|
|
1612
2594
|
maxCostPerCallUsd: Number.isFinite(maxCost) ? maxCost : void 0,
|
|
1613
|
-
pinned: config.pinned
|
|
2595
|
+
pinned: config.pinned,
|
|
2596
|
+
empirical
|
|
1614
2597
|
};
|
|
1615
2598
|
}
|
|
2599
|
+
function localDate3(d = /* @__PURE__ */ new Date()) {
|
|
2600
|
+
const y = d.getFullYear();
|
|
2601
|
+
const m = String(d.getMonth() + 1).padStart(2, "0");
|
|
2602
|
+
const day = String(d.getDate()).padStart(2, "0");
|
|
2603
|
+
return `${y}-${m}-${day}`;
|
|
2604
|
+
}
|
|
2605
|
+
function trackCommand(opts) {
|
|
2606
|
+
try {
|
|
2607
|
+
recordCommandRun({
|
|
2608
|
+
sessionId: opts.sessionId,
|
|
2609
|
+
ts: opts.startedAt,
|
|
2610
|
+
date: localDate3(new Date(opts.startedAt)),
|
|
2611
|
+
command: opts.command,
|
|
2612
|
+
args: opts.args?.slice(0, 300),
|
|
2613
|
+
objective: opts.objective,
|
|
2614
|
+
promptTokens: opts.promptTokens ?? 0,
|
|
2615
|
+
completionTokens: opts.completionTokens ?? 0,
|
|
2616
|
+
costUsd: opts.costUsd ?? 0,
|
|
2617
|
+
durationMs: Date.now() - opts.startedAt
|
|
2618
|
+
});
|
|
2619
|
+
} catch {
|
|
2620
|
+
}
|
|
2621
|
+
}
|
|
1616
2622
|
async function loadCatalog(config, refresh = false) {
|
|
1617
|
-
const
|
|
2623
|
+
const cl = client(config);
|
|
2624
|
+
const hasKey = !!resolveApiKey(config);
|
|
2625
|
+
let models = [];
|
|
2626
|
+
try {
|
|
2627
|
+
models = await getModels(cl, { refresh });
|
|
2628
|
+
} catch (e) {
|
|
2629
|
+
if (!config.local.enabled) throw e;
|
|
2630
|
+
}
|
|
2631
|
+
if (config.local.enabled) {
|
|
2632
|
+
const local = await getLocalModels(cl);
|
|
2633
|
+
if (!local.length) {
|
|
2634
|
+
console.error(c.yellow(`Local server (${config.local.baseUrl}) returned no models \u2014 is it running?`));
|
|
2635
|
+
}
|
|
2636
|
+
models = hasKey ? [...local, ...models] : local;
|
|
2637
|
+
}
|
|
1618
2638
|
if (!models.length) {
|
|
1619
|
-
console.error(c.red("
|
|
2639
|
+
console.error(c.red("No models available. Check your connection, or `poly config local on` with a running Ollama/LM Studio."));
|
|
1620
2640
|
process.exit(1);
|
|
1621
2641
|
}
|
|
1622
2642
|
return models;
|
|
@@ -1625,22 +2645,26 @@ program.command("login").description("Connect Polymath to OpenRouter (set/replac
|
|
|
1625
2645
|
await runLogin();
|
|
1626
2646
|
});
|
|
1627
2647
|
program.command("run", { isDefault: true }).description("Launch the interactive agent (TUI)").argument("[goal...]", "what to do (optional; prompts if omitted)").option("-o, --objective <name>", "routing objective: cheapest | value | quality").option("--max-cost <usd>", "exclude models whose projected per-call cost exceeds this").option("-w, --write", "allow the agent to write files (confined to --cwd)", false).option("-x, --commands", "DANGER: let the model run arbitrary shell commands in --cwd", false).option("-C, --cwd <dir>", "working directory", process.cwd()).action(async (goalParts, opts) => {
|
|
2648
|
+
const startedAt = Date.now();
|
|
1628
2649
|
const config = loadConfig();
|
|
1629
|
-
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
2650
|
+
if (!config.local.enabled || resolveApiKey(config)) {
|
|
2651
|
+
const key = await ensureApiKey(config);
|
|
2652
|
+
if (!key && !config.local.enabled) {
|
|
2653
|
+
console.error(c.red("No API key \u2014 cannot run. Try `poly login`, or `poly config local on` for a local LLM."));
|
|
2654
|
+
process.exit(1);
|
|
2655
|
+
}
|
|
1633
2656
|
}
|
|
1634
2657
|
const reloaded = loadConfig();
|
|
1635
2658
|
const models = await loadCatalog(reloaded);
|
|
1636
2659
|
const policy = buildPolicy(reloaded, opts);
|
|
1637
2660
|
const goal = goalParts?.join(" ").trim() || void 0;
|
|
2661
|
+
const sessionId = randomUUID();
|
|
1638
2662
|
const instance = render(
|
|
1639
2663
|
createElement(App, {
|
|
1640
2664
|
client: client(reloaded),
|
|
1641
2665
|
models,
|
|
1642
2666
|
policy,
|
|
1643
|
-
sessionId
|
|
2667
|
+
sessionId,
|
|
1644
2668
|
cwd: opts.cwd,
|
|
1645
2669
|
allowWrite: !!opts.write,
|
|
1646
2670
|
allowCommands: !!opts.commands,
|
|
@@ -1649,11 +2673,22 @@ program.command("run", { isDefault: true }).description("Launch the interactive
|
|
|
1649
2673
|
})
|
|
1650
2674
|
);
|
|
1651
2675
|
await instance.waitUntilExit();
|
|
2676
|
+
const totals2 = sessionUsageTotals(sessionId);
|
|
2677
|
+
trackCommand({
|
|
2678
|
+
command: "run",
|
|
2679
|
+
startedAt,
|
|
2680
|
+
sessionId,
|
|
2681
|
+
args: goal,
|
|
2682
|
+
objective: policy.objective,
|
|
2683
|
+
...totals2
|
|
2684
|
+
});
|
|
1652
2685
|
});
|
|
1653
2686
|
program.command("recommend").description("Recommend the best / best-value model combos for a task BEFORE running").argument("<goal...>", "task description").option("--smart", "use an LLM to produce a tailored plan (costs a few cents)", false).option("-o, --objective <name>", "highlight a specific objective").action(async (goalParts, opts) => {
|
|
2687
|
+
const startedAt = Date.now();
|
|
1654
2688
|
const config = loadConfig();
|
|
1655
2689
|
const models = await loadCatalog(config);
|
|
1656
2690
|
const goal = goalParts.join(" ");
|
|
2691
|
+
const sessionId = randomUUID();
|
|
1657
2692
|
let plan = heuristicPlan(goal);
|
|
1658
2693
|
if (opts.smart) {
|
|
1659
2694
|
const key = resolveApiKey(config);
|
|
@@ -1663,7 +2698,9 @@ program.command("recommend").description("Recommend the best / best-value model
|
|
|
1663
2698
|
const planRoute = route("plan", models, buildPolicy(config, {}));
|
|
1664
2699
|
if (planRoute) {
|
|
1665
2700
|
try {
|
|
1666
|
-
plan = await planRequest(goal, client(config), planRoute.model)
|
|
2701
|
+
plan = await planRequest(goal, client(config), planRoute.model, (result) => {
|
|
2702
|
+
logCompletion(result, "plan", sessionId, "recommend");
|
|
2703
|
+
});
|
|
1667
2704
|
} catch (e) {
|
|
1668
2705
|
console.error(c.yellow(`Smart plan failed (${e?.message}); using heuristic.`));
|
|
1669
2706
|
}
|
|
@@ -1671,6 +2708,8 @@ program.command("recommend").description("Recommend the best / best-value model
|
|
|
1671
2708
|
}
|
|
1672
2709
|
}
|
|
1673
2710
|
console.log(renderRecommendation(buildRecommendation(plan, models)));
|
|
2711
|
+
const totals2 = sessionUsageTotals(sessionId);
|
|
2712
|
+
trackCommand({ command: "recommend", startedAt, sessionId, args: goal, objective: config.defaultObjective, ...totals2 });
|
|
1674
2713
|
});
|
|
1675
2714
|
program.command("models").description("Browse the model catalog with pricing and tiers").option("-t, --tier <tier>", "filter by tier: cheap | standard | frontier").option("--tools", "only models that support tool/function calling", false).option("-s, --search <text>", "filter by id/name substring").option("--refresh", "force-refresh the catalog from OpenRouter", false).option("-n, --limit <n>", "max rows", "40").action(async (opts) => {
|
|
1676
2715
|
const config = loadConfig();
|
|
@@ -1701,11 +2740,11 @@ program.command("usage").description("Show recorded usage & cost by date + model
|
|
|
1701
2740
|
let until = opts.until;
|
|
1702
2741
|
if (opts.today) {
|
|
1703
2742
|
const d = /* @__PURE__ */ new Date();
|
|
1704
|
-
const
|
|
2743
|
+
const iso2 = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(
|
|
1705
2744
|
d.getDate()
|
|
1706
2745
|
).padStart(2, "0")}`;
|
|
1707
|
-
since =
|
|
1708
|
-
until =
|
|
2746
|
+
since = iso2;
|
|
2747
|
+
until = iso2;
|
|
1709
2748
|
}
|
|
1710
2749
|
console.log(renderUsageReport({ since, until }));
|
|
1711
2750
|
if (opts.sync) {
|
|
@@ -1713,10 +2752,34 @@ program.command("usage").description("Show recorded usage & cost by date + model
|
|
|
1713
2752
|
console.log(res.synced > 0 ? c.green(res.message) : c.dim(res.message));
|
|
1714
2753
|
}
|
|
1715
2754
|
});
|
|
1716
|
-
program.command("
|
|
2755
|
+
program.command("analyze").description("Which approach reaches the goal with the FEWEST tokens \u2014 per model, task, objective, command").option("--since <date>", "YYYY-MM-DD inclusive").option("--until <date>", "YYYY-MM-DD inclusive").action(async (opts) => {
|
|
2756
|
+
console.log(renderAnalysis({ since: opts.since, until: opts.until }));
|
|
2757
|
+
});
|
|
2758
|
+
program.command("sync").description("Push DISTILLED efficiency insights to Firebase (raw logs stay local unless --raw)").option("--raw", "also push the full raw ledger (sessions/steps/calls/commands)", false).action(async (opts) => {
|
|
1717
2759
|
const config = loadConfig();
|
|
1718
|
-
|
|
1719
|
-
|
|
2760
|
+
let pushed = false;
|
|
2761
|
+
if (config.dataconnect.enabled) {
|
|
2762
|
+
pushed = true;
|
|
2763
|
+
try {
|
|
2764
|
+
const res = await syncDataConnect(config, { raw: !!opts.raw });
|
|
2765
|
+
const n = res.insights + res.sessions + res.steps + res.commands + res.calls;
|
|
2766
|
+
console.log(n > 0 ? c.green(res.message) : c.dim(res.message));
|
|
2767
|
+
} catch (e) {
|
|
2768
|
+
console.error(c.red(`Data Connect sync failed: ${e?.message ?? e}`));
|
|
2769
|
+
}
|
|
2770
|
+
}
|
|
2771
|
+
if (config.firestore.enabled) {
|
|
2772
|
+
pushed = true;
|
|
2773
|
+
const res = await syncUsage(config, { raw: !!opts.raw });
|
|
2774
|
+
console.log(res.synced > 0 ? c.green(res.message) : c.dim(res.message));
|
|
2775
|
+
}
|
|
2776
|
+
if (!pushed) {
|
|
2777
|
+
console.log(
|
|
2778
|
+
c.yellow(
|
|
2779
|
+
"No sync target enabled. Use `poly config dataconnect on` (SQL) or `poly config firestore on`."
|
|
2780
|
+
)
|
|
2781
|
+
);
|
|
2782
|
+
}
|
|
1720
2783
|
});
|
|
1721
2784
|
var cfg = program.command("config").description("View or change Polymath settings");
|
|
1722
2785
|
cfg.command("show").description("Print the current config (key is masked)").action(() => {
|
|
@@ -1758,6 +2821,29 @@ cfg.command("firestore").description("Enable/disable Firestore sync: on | off").
|
|
|
1758
2821
|
saveConfig(config);
|
|
1759
2822
|
console.log(c.green(`Firestore sync ${config.firestore.enabled ? "enabled" : "disabled"}.`));
|
|
1760
2823
|
});
|
|
2824
|
+
cfg.command("local").description("Enable/disable a local LLM server (Ollama/LM Studio): on | off [--base <url>]").argument("<state>").option("--base <url>", "OpenAI-compatible base URL (default http://localhost:11434/v1)").action((state, opts) => {
|
|
2825
|
+
const config = loadConfig();
|
|
2826
|
+
config.local.enabled = /^on|true|1$/i.test(state);
|
|
2827
|
+
if (opts.base) config.local.baseUrl = String(opts.base).replace(/\/$/, "");
|
|
2828
|
+
saveConfig(config);
|
|
2829
|
+
console.log(
|
|
2830
|
+
c.green(
|
|
2831
|
+
`Local LLM ${config.local.enabled ? "enabled" : "disabled"} (${config.local.baseUrl}). Models appear as local/<name> with $0 cost.`
|
|
2832
|
+
)
|
|
2833
|
+
);
|
|
2834
|
+
});
|
|
2835
|
+
cfg.command("dataconnect").description("Enable/disable Firebase Data Connect (SQL) sync: on | off [--location <loc>] [--service <id>]").argument("<state>").option("--location <loc>", "Data Connect location (default us-east4)").option("--service <id>", "Data Connect service id (default polymath)").action((state, opts) => {
|
|
2836
|
+
const config = loadConfig();
|
|
2837
|
+
config.dataconnect.enabled = /^on|true|1$/i.test(state);
|
|
2838
|
+
if (opts.location) config.dataconnect.location = opts.location;
|
|
2839
|
+
if (opts.service) config.dataconnect.serviceId = opts.service;
|
|
2840
|
+
saveConfig(config);
|
|
2841
|
+
console.log(
|
|
2842
|
+
c.green(
|
|
2843
|
+
`Data Connect sync ${config.dataconnect.enabled ? "enabled" : "disabled"} (service ${config.dataconnect.serviceId} @ ${config.dataconnect.location}).`
|
|
2844
|
+
)
|
|
2845
|
+
);
|
|
2846
|
+
});
|
|
1761
2847
|
program.parseAsync().catch((err) => {
|
|
1762
2848
|
console.error(c.red(err?.message ?? String(err)));
|
|
1763
2849
|
process.exit(1);
|