incremnt 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -1
- package/src/browse.js +37 -2
- package/src/contract.js +37 -1
- package/src/format.js +5 -0
- package/src/openrouter.js +81 -24
- package/src/prompt-security.js +13 -0
- package/src/queries.js +190 -25
- package/src/remote.js +98 -1
- package/src/stored-summary-eval-report.js +138 -0
- package/src/summary-evals.js +839 -0
- package/src/sync-service.js +370 -39
- package/src/workout-prompt-variants.js +52 -0
package/src/remote.js
CHANGED
|
@@ -39,7 +39,8 @@ const remoteCommandHandlers = {
|
|
|
39
39
|
'health-ai': executeRemoteRead,
|
|
40
40
|
'training-load': executeRemoteRead,
|
|
41
41
|
'ask-history': executeRemoteRead,
|
|
42
|
-
'ask-show': executeRemoteRead
|
|
42
|
+
'ask-show': executeRemoteRead,
|
|
43
|
+
'program-share-fetch': executeRemoteRead
|
|
43
44
|
};
|
|
44
45
|
|
|
45
46
|
async function executeRemoteRead(options, sessionState, normalizedCommand) {
|
|
@@ -158,6 +159,8 @@ function endpointForCommand(baseUrl, normalizedCommand, options) {
|
|
|
158
159
|
}
|
|
159
160
|
case 'ask-show':
|
|
160
161
|
return resolveServiceUrl(baseUrl, `/cli/ask/history/${options.id}`);
|
|
162
|
+
case 'program-share-fetch':
|
|
163
|
+
return resolveServiceUrl(baseUrl, `/program-share/${options.token}`);
|
|
161
164
|
default:
|
|
162
165
|
return resolveServiceUrl(baseUrl, '/');
|
|
163
166
|
}
|
|
@@ -180,6 +183,10 @@ function resourceNotFoundMessage(normalizedCommand, options) {
|
|
|
180
183
|
return `Conversation not found: ${options.id}`;
|
|
181
184
|
}
|
|
182
185
|
|
|
186
|
+
if (normalizedCommand === 'program-share-fetch') {
|
|
187
|
+
return `Program share not found: ${options.token}`;
|
|
188
|
+
}
|
|
189
|
+
|
|
183
190
|
return 'Requested resource was not found.';
|
|
184
191
|
}
|
|
185
192
|
|
|
@@ -278,6 +285,96 @@ const remoteWriteCommandHandlers = {
|
|
|
278
285
|
throw error;
|
|
279
286
|
}
|
|
280
287
|
|
|
288
|
+
return response.json();
|
|
289
|
+
},
|
|
290
|
+
|
|
291
|
+
'program-share-create': async (options, sessionState) => {
|
|
292
|
+
const baseUrl = sessionState.session?.transport?.baseUrl;
|
|
293
|
+
if (!baseUrl) throw notImplementedError();
|
|
294
|
+
if (!options['program-id']) {
|
|
295
|
+
const error = new Error('--program-id is required for programs share create.');
|
|
296
|
+
error.code = 'MISSING_OPTION';
|
|
297
|
+
throw error;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
const endpoint = resolveServiceUrl(baseUrl, `/cli/programs/${options['program-id']}/share`);
|
|
301
|
+
const response = await fetch(endpoint, {
|
|
302
|
+
method: 'POST',
|
|
303
|
+
headers: {
|
|
304
|
+
Authorization: `Bearer ${sessionState.session?.auth?.accessToken ?? ''}`
|
|
305
|
+
}
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
if (response.status === 401 || response.status === 403) throw authenticationFailedError();
|
|
309
|
+
if (response.status === 404) {
|
|
310
|
+
const error = new Error(`Program not found: ${options['program-id']}`);
|
|
311
|
+
error.code = 'REMOTE_NOT_FOUND';
|
|
312
|
+
throw error;
|
|
313
|
+
}
|
|
314
|
+
if (!response.ok) {
|
|
315
|
+
const payload = await response.json().catch(() => null);
|
|
316
|
+
const error = new Error(payload?.error ?? `Unexpected error (HTTP ${response.status}).`);
|
|
317
|
+
error.code = 'REMOTE_HTTP_ERROR';
|
|
318
|
+
throw error;
|
|
319
|
+
}
|
|
320
|
+
return response.json();
|
|
321
|
+
},
|
|
322
|
+
|
|
323
|
+
'program-share-list': async (options, sessionState) => {
|
|
324
|
+
const baseUrl = sessionState.session?.transport?.baseUrl;
|
|
325
|
+
if (!baseUrl) throw notImplementedError();
|
|
326
|
+
if (!options['program-id']) {
|
|
327
|
+
const error = new Error('--program-id is required for programs share list.');
|
|
328
|
+
error.code = 'MISSING_OPTION';
|
|
329
|
+
throw error;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
const endpoint = resolveServiceUrl(baseUrl, `/cli/programs/${options['program-id']}/shares`);
|
|
333
|
+
const response = await fetch(endpoint, {
|
|
334
|
+
headers: {
|
|
335
|
+
Authorization: `Bearer ${sessionState.session?.auth?.accessToken ?? ''}`
|
|
336
|
+
}
|
|
337
|
+
});
|
|
338
|
+
|
|
339
|
+
if (response.status === 401 || response.status === 403) throw authenticationFailedError();
|
|
340
|
+
if (!response.ok) {
|
|
341
|
+
const payload = await response.json().catch(() => null);
|
|
342
|
+
const error = new Error(payload?.error ?? `Unexpected error (HTTP ${response.status}).`);
|
|
343
|
+
error.code = 'REMOTE_HTTP_ERROR';
|
|
344
|
+
throw error;
|
|
345
|
+
}
|
|
346
|
+
return response.json();
|
|
347
|
+
},
|
|
348
|
+
|
|
349
|
+
'program-share-revoke': async (options, sessionState) => {
|
|
350
|
+
const baseUrl = sessionState.session?.transport?.baseUrl;
|
|
351
|
+
if (!baseUrl) throw notImplementedError();
|
|
352
|
+
if (!options.token) {
|
|
353
|
+
const error = new Error('--token is required for programs share revoke.');
|
|
354
|
+
error.code = 'MISSING_OPTION';
|
|
355
|
+
throw error;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
const endpoint = resolveServiceUrl(baseUrl, `/cli/program-share/${options.token}/revoke`);
|
|
359
|
+
const response = await fetch(endpoint, {
|
|
360
|
+
method: 'POST',
|
|
361
|
+
headers: {
|
|
362
|
+
Authorization: `Bearer ${sessionState.session?.auth?.accessToken ?? ''}`
|
|
363
|
+
}
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
if (response.status === 401 || response.status === 403) throw authenticationFailedError();
|
|
367
|
+
if (response.status === 404) {
|
|
368
|
+
const error = new Error(`Program share not found: ${options.token}`);
|
|
369
|
+
error.code = 'REMOTE_NOT_FOUND';
|
|
370
|
+
throw error;
|
|
371
|
+
}
|
|
372
|
+
if (!response.ok) {
|
|
373
|
+
const payload = await response.json().catch(() => null);
|
|
374
|
+
const error = new Error(payload?.error ?? `Unexpected error (HTTP ${response.status}).`);
|
|
375
|
+
error.code = 'REMOTE_HTTP_ERROR';
|
|
376
|
+
throw error;
|
|
377
|
+
}
|
|
281
378
|
return response.json();
|
|
282
379
|
}
|
|
283
380
|
};
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
export function summarizeResults(results) {
|
|
2
|
+
const counts = {
|
|
3
|
+
total: results.length,
|
|
4
|
+
passed: results.filter((result) => result.passed).length
|
|
5
|
+
};
|
|
6
|
+
counts.failed = counts.total - counts.passed;
|
|
7
|
+
counts.passRate = counts.total > 0 ? counts.passed / counts.total : 0;
|
|
8
|
+
return counts;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function summarizeBySurface(results) {
|
|
12
|
+
const grouped = new Map();
|
|
13
|
+
for (const result of results) {
|
|
14
|
+
const entry = grouped.get(result.surface) ?? { total: 0, passed: 0, failed: 0 };
|
|
15
|
+
entry.total += 1;
|
|
16
|
+
if (result.passed) entry.passed += 1;
|
|
17
|
+
else entry.failed += 1;
|
|
18
|
+
grouped.set(result.surface, entry);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
return Object.fromEntries(
|
|
22
|
+
[...grouped.entries()].map(([surface, entry]) => [
|
|
23
|
+
surface,
|
|
24
|
+
{
|
|
25
|
+
...entry,
|
|
26
|
+
passRate: entry.total > 0 ? entry.passed / entry.total : 0
|
|
27
|
+
}
|
|
28
|
+
])
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function buildStoredSummaryReport(snapshotPath, results) {
|
|
33
|
+
return {
|
|
34
|
+
snapshotPath,
|
|
35
|
+
summary: summarizeResults(results),
|
|
36
|
+
bySurface: summarizeBySurface(results),
|
|
37
|
+
results: results.map((result) => ({
|
|
38
|
+
id: result.id,
|
|
39
|
+
surface: result.surface,
|
|
40
|
+
passed: result.passed,
|
|
41
|
+
output: result.output,
|
|
42
|
+
failedChecks: result.checks.filter((check) => !check.passed)
|
|
43
|
+
}))
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function summarizeBatchReports(reports) {
|
|
48
|
+
const bySurface = new Map();
|
|
49
|
+
let total = 0;
|
|
50
|
+
let passed = 0;
|
|
51
|
+
|
|
52
|
+
for (const report of reports) {
|
|
53
|
+
total += report.summary.total;
|
|
54
|
+
passed += report.summary.passed;
|
|
55
|
+
|
|
56
|
+
for (const [surface, entry] of Object.entries(report.bySurface ?? {})) {
|
|
57
|
+
const current = bySurface.get(surface) ?? { total: 0, passed: 0, failed: 0 };
|
|
58
|
+
current.total += entry.total;
|
|
59
|
+
current.passed += entry.passed;
|
|
60
|
+
current.failed += entry.failed;
|
|
61
|
+
bySurface.set(surface, current);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return {
|
|
66
|
+
snapshotCount: reports.length,
|
|
67
|
+
total,
|
|
68
|
+
passed,
|
|
69
|
+
failed: total - passed,
|
|
70
|
+
passRate: total > 0 ? passed / total : 0,
|
|
71
|
+
bySurface: Object.fromEntries(
|
|
72
|
+
[...bySurface.entries()].map(([surface, entry]) => [
|
|
73
|
+
surface,
|
|
74
|
+
{
|
|
75
|
+
...entry,
|
|
76
|
+
passRate: entry.total > 0 ? entry.passed / entry.total : 0
|
|
77
|
+
}
|
|
78
|
+
])
|
|
79
|
+
)
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function percentage(value) {
|
|
84
|
+
return `${(value * 100).toFixed(1)}%`;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export function evaluateBatchThresholds(summary, {
|
|
88
|
+
minPassRate = null,
|
|
89
|
+
minSurfacePassRates = {}
|
|
90
|
+
} = {}) {
|
|
91
|
+
const failures = [];
|
|
92
|
+
|
|
93
|
+
if (typeof minPassRate === 'number' && summary.passRate < minPassRate) {
|
|
94
|
+
failures.push(`Overall pass rate ${percentage(summary.passRate)} is below required ${percentage(minPassRate)}.`);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
for (const [surface, minimum] of Object.entries(minSurfacePassRates)) {
|
|
98
|
+
const surfaceSummary = summary.bySurface?.[surface];
|
|
99
|
+
if (!surfaceSummary || surfaceSummary.total === 0) continue;
|
|
100
|
+
if (surfaceSummary.passRate < minimum) {
|
|
101
|
+
failures.push(`Surface ${surface} pass rate ${percentage(surfaceSummary.passRate)} is below required ${percentage(minimum)}.`);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return failures;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
export function formatBatchSummaryMarkdown(summary, reports, failures = []) {
|
|
109
|
+
const lines = [
|
|
110
|
+
'# Stored Summary Eval Report',
|
|
111
|
+
'',
|
|
112
|
+
`- Snapshots: ${summary.snapshotCount}`,
|
|
113
|
+
`- Total summaries: ${summary.total}`,
|
|
114
|
+
`- Passed: ${summary.passed}`,
|
|
115
|
+
`- Failed: ${summary.failed}`,
|
|
116
|
+
`- Pass rate: ${(summary.passRate * 100).toFixed(1)}%`,
|
|
117
|
+
'',
|
|
118
|
+
'## By Surface'
|
|
119
|
+
];
|
|
120
|
+
|
|
121
|
+
for (const [surface, entry] of Object.entries(summary.bySurface)) {
|
|
122
|
+
lines.push(`- ${surface}: ${entry.passed}/${entry.total} passed (${(entry.passRate * 100).toFixed(1)}%)`);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
lines.push('', '## Snapshots');
|
|
126
|
+
for (const report of reports) {
|
|
127
|
+
lines.push(`- ${report.snapshotLabel}: ${report.summary.passed}/${report.summary.total} passed`);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (failures.length > 0) {
|
|
131
|
+
lines.push('', '## Threshold Failures');
|
|
132
|
+
for (const failure of failures) {
|
|
133
|
+
lines.push(`- ${failure}`);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
return `${lines.join('\n')}\n`;
|
|
138
|
+
}
|