@aion0/forge 0.10.12 → 0.10.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/RELEASE_NOTES.md +3 -3
- package/app/api/public-info/[resource]/route.ts +40 -0
- package/components/ProjectDetail.tsx +1 -1
- package/components/SettingsModal.tsx +42 -33
- package/components/WebTerminal.tsx +13 -5
- package/components/WorkspaceView.tsx +5 -3
- package/lib/agents/index.ts +6 -1
- package/lib/agents/known-models.ts +75 -0
- package/lib/chat/tool-dispatcher.ts +33 -0
- package/lib/help-docs/05-pipelines.md +9 -0
- package/lib/public-info/fetch.ts +116 -0
- package/lib/public-info/types.ts +38 -0
- package/lib/public-info/use-models-registry.ts +66 -0
- package/lib/settings.ts +9 -0
- package/next-env.d.ts +1 -1
- package/package.json +1 -1
- package/lib/__tests__/foreach-batch-yaml.test.ts +0 -33
- package/lib/__tests__/foreach-before.test.ts +0 -201
- package/lib/__tests__/foreach-parse.test.ts +0 -114
- package/lib/__tests__/foreach-snapshot.test.ts +0 -112
- package/lib/__tests__/foreach-source.test.ts +0 -105
- package/lib/__tests__/foreach-template.test.ts +0 -112
- package/lib/workspace/__tests__/state-machine.test.ts +0 -388
- package/lib/workspace/__tests__/workspace.test.ts +0 -311
- package/scripts/bench/README.md +0 -66
- package/scripts/bench/results/.gitignore +0 -2
- package/scripts/bench/run.ts +0 -635
- package/scripts/bench/tasks/01-text-utils/task.md +0 -26
- package/scripts/bench/tasks/01-text-utils/validator.sh +0 -46
- package/scripts/bench/tasks/02-pagination/setup.sh +0 -19
- package/scripts/bench/tasks/02-pagination/task.md +0 -48
- package/scripts/bench/tasks/02-pagination/validator.sh +0 -69
- package/scripts/bench/tasks/03-bug-fix/setup.sh +0 -82
- package/scripts/bench/tasks/03-bug-fix/task.md +0 -30
- package/scripts/bench/tasks/03-bug-fix/validator.sh +0 -29
- package/scripts/test-agents-migrate.ts +0 -149
- package/scripts/test-mantis.ts +0 -223
- package/scripts/test-memory-local.ts +0 -139
- package/scripts/test-memory-upsert.ts +0 -106
- package/scripts/verify-usage.ts +0 -178
package/scripts/test-mantis.ts
DELETED
|
@@ -1,223 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Mantis connector regression suite.
|
|
3
|
-
*
|
|
4
|
-
* pnpm tsx scripts/test-mantis.ts # all cases
|
|
5
|
-
* pnpm tsx scripts/test-mantis.ts --case source # one case
|
|
6
|
-
* pnpm tsx scripts/test-mantis.ts --bail # stop on first fail
|
|
7
|
-
*
|
|
8
|
-
* Hits the LIVE Mantis via the running Forge's dispatchTool. The
|
|
9
|
-
* browser extension must be paired + logged into Mantis. Each case:
|
|
10
|
-
*
|
|
11
|
-
* 1. Calls mantis.<tool> with `args`.
|
|
12
|
-
* 2. Validates response shape (parseable, has `bugs`).
|
|
13
|
-
* 3. Runs row-level assertions (e.g. every result.source matches
|
|
14
|
-
* the filter).
|
|
15
|
-
*
|
|
16
|
-
* No DB writes, no Job dispatch — pure connector smoke tests.
|
|
17
|
-
*/
|
|
18
|
-
import { dispatchTool } from '@/lib/chat/tool-dispatcher';
|
|
19
|
-
|
|
20
|
-
// ─── Test case shape ──────────────────────────────────────────
|
|
21
|
-
interface Case {
|
|
22
|
-
name: string;
|
|
23
|
-
tool: string;
|
|
24
|
-
args: Record<string, unknown>;
|
|
25
|
-
assertions?: Array<(resp: any) => string | null>; // null = pass, string = fail reason
|
|
26
|
-
// For inspecting a case under debug — print the response keys.
|
|
27
|
-
dump?: boolean;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
// ─── Assertion helpers ────────────────────────────────────────
|
|
31
|
-
const has = (key: string) => (r: any) =>
|
|
32
|
-
Array.isArray(r?.[key]) || typeof r?.[key] === 'object' ? null : `missing field '${key}'`;
|
|
33
|
-
|
|
34
|
-
const nonEmpty = (key: string) => (r: any) =>
|
|
35
|
-
Array.isArray(r?.[key]) && r[key].length > 0 ? null : `expected non-empty '${key}', got len=${r?.[key]?.length ?? 'n/a'}`;
|
|
36
|
-
|
|
37
|
-
/** Every row in r.bugs must have row[col] containing one of `needles` (case-insensitive). */
|
|
38
|
-
const everyRowMatches = (col: string, needles: string[]) => (r: any) => {
|
|
39
|
-
if (!Array.isArray(r.bugs) || r.bugs.length === 0) return `no bugs to check`;
|
|
40
|
-
const lc = needles.map(n => n.toLowerCase());
|
|
41
|
-
const bad = r.bugs.find((b: any) => {
|
|
42
|
-
const v = String(b[col] || '').toLowerCase();
|
|
43
|
-
return !lc.some(n => v.includes(n));
|
|
44
|
-
});
|
|
45
|
-
if (bad) return `bug ${bad.id} ${col}="${bad[col]}" doesn't match any of ${JSON.stringify(needles)}`;
|
|
46
|
-
return null;
|
|
47
|
-
};
|
|
48
|
-
|
|
49
|
-
/** Verify the URL Mantis was hit with carries show_status for the
|
|
50
|
-
* requested states. Mantis applies status filter server-side via
|
|
51
|
-
* show_status=<id>; per-row b.status is unreliable on customized
|
|
52
|
-
* themes that put handler name in the Status column, so we trust
|
|
53
|
-
* the URL instead. */
|
|
54
|
-
const STATUS_TO_ID: Record<string, number> = {
|
|
55
|
-
new: 10, feedback: 20, acknowledged: 30, confirmed: 40,
|
|
56
|
-
assigned: 50, resolved: 80, closed: 90,
|
|
57
|
-
};
|
|
58
|
-
const urlHasStatus = (states: string[]) => (r: any) => {
|
|
59
|
-
const url = r._filter_url || '';
|
|
60
|
-
const wantIds = states.map(s => STATUS_TO_ID[s.toLowerCase()]).filter(Boolean);
|
|
61
|
-
const params = new URLSearchParams(url.split('?')[1] || '');
|
|
62
|
-
const got = (params.get('show_status') || '').split(',').map(s => parseInt(s, 10)).filter(Boolean);
|
|
63
|
-
const missing = wantIds.filter(w => !got.includes(w));
|
|
64
|
-
return missing.length === 0 ? null : `URL missing show_status for ${missing.join(',')} — got "${params.get('show_status')}"`;
|
|
65
|
-
};
|
|
66
|
-
|
|
67
|
-
/** No error field. */
|
|
68
|
-
const noError = (r: any) =>
|
|
69
|
-
r._error ? `connector reported _error: ${String(r._error).slice(0, 200)}` : null;
|
|
70
|
-
|
|
71
|
-
const countLessOrEq = (key: string, max: number) => (r: any) => {
|
|
72
|
-
const n = Array.isArray(r?.[key]) ? r[key].length : r?.[key];
|
|
73
|
-
return n != null && n <= max ? null : `expected ${key} ≤ ${max}, got ${n}`;
|
|
74
|
-
};
|
|
75
|
-
|
|
76
|
-
// ─── Cases ────────────────────────────────────────────────────
|
|
77
|
-
const CASES: Case[] = [
|
|
78
|
-
{
|
|
79
|
-
name: 'baseline / project_name resolves',
|
|
80
|
-
tool: 'mantis.search_bugs',
|
|
81
|
-
args: { project_name: 'FortiNAC', status: 'assigned', limit: 5 },
|
|
82
|
-
assertions: [noError, has('bugs'), countLessOrEq('bugs', 5)],
|
|
83
|
-
},
|
|
84
|
-
{
|
|
85
|
-
name: 'status=assigned filter',
|
|
86
|
-
tool: 'mantis.search_bugs',
|
|
87
|
-
args: { project_name: 'FortiNAC', status: 'assigned', limit: 10 },
|
|
88
|
-
assertions: [noError, urlHasStatus(['assigned'])],
|
|
89
|
-
},
|
|
90
|
-
{
|
|
91
|
-
name: 'source=QA filter (client-side, was broken pre-v0.13.0)',
|
|
92
|
-
tool: 'mantis.search_bugs',
|
|
93
|
-
args: { project_name: 'FortiNAC', status: 'assigned', source: 'QA', limit: 10 },
|
|
94
|
-
assertions: [noError, everyRowMatches('source', ['QA'])],
|
|
95
|
-
},
|
|
96
|
-
{
|
|
97
|
-
name: 'source=DEV filter — different value, same path',
|
|
98
|
-
tool: 'mantis.search_bugs',
|
|
99
|
-
args: { project_name: 'FortiNAC', status: 'assigned', source: 'DEV', limit: 10 },
|
|
100
|
-
assertions: [noError, everyRowMatches('source', ['DEV'])],
|
|
101
|
-
},
|
|
102
|
-
{
|
|
103
|
-
name: 'fix_schedule filter',
|
|
104
|
-
tool: 'mantis.search_bugs',
|
|
105
|
-
args: { project_name: 'FortiNAC', status: 'assigned', fix_schedule: '8.0.0', limit: 10 },
|
|
106
|
-
assertions: [noError, everyRowMatches('fix_schedule', ['8.0.0'])],
|
|
107
|
-
},
|
|
108
|
-
{
|
|
109
|
-
name: 'combined: status + project + source + fix_schedule',
|
|
110
|
-
tool: 'mantis.search_bugs',
|
|
111
|
-
args: {
|
|
112
|
-
project_name: 'FortiNAC', status: 'assigned',
|
|
113
|
-
fix_schedule: '8.0.0', source: 'QA', limit: 5,
|
|
114
|
-
},
|
|
115
|
-
assertions: [
|
|
116
|
-
noError,
|
|
117
|
-
everyRowMatches('source', ['QA']),
|
|
118
|
-
everyRowMatches('fix_schedule', ['8.0.0']),
|
|
119
|
-
urlHasStatus(['assigned']),
|
|
120
|
-
],
|
|
121
|
-
},
|
|
122
|
-
{
|
|
123
|
-
name: 'empty-match: source=NONEXISTENT → 0 bugs, no error',
|
|
124
|
-
tool: 'mantis.search_bugs',
|
|
125
|
-
args: { project_name: 'FortiNAC', status: 'assigned', source: 'NONEXISTENT_VALUE_XYZ', limit: 5 },
|
|
126
|
-
assertions: [
|
|
127
|
-
noError,
|
|
128
|
-
(r: any) => r.bugs?.length === 0 ? null : `expected 0 bugs, got ${r.bugs?.length}`,
|
|
129
|
-
],
|
|
130
|
-
},
|
|
131
|
-
{
|
|
132
|
-
name: 'resolution=open via extra_params (URL-layer)',
|
|
133
|
-
tool: 'mantis.search_bugs',
|
|
134
|
-
args: {
|
|
135
|
-
project_name: 'FortiNAC', status: 'assigned',
|
|
136
|
-
extra_params: { 'resolution[]': 10 }, limit: 5,
|
|
137
|
-
},
|
|
138
|
-
assertions: [noError, has('bugs')],
|
|
139
|
-
},
|
|
140
|
-
{
|
|
141
|
-
name: 'get_bug round-trip on bug from search',
|
|
142
|
-
tool: 'mantis.search_bugs',
|
|
143
|
-
args: { project_name: 'FortiNAC', status: 'assigned', limit: 1 },
|
|
144
|
-
assertions: [noError, nonEmpty('bugs')],
|
|
145
|
-
},
|
|
146
|
-
];
|
|
147
|
-
|
|
148
|
-
// ─── Runner ───────────────────────────────────────────────────
|
|
149
|
-
const args = process.argv.slice(2);
|
|
150
|
-
const caseFilter = (() => {
|
|
151
|
-
const idx = args.indexOf('--case');
|
|
152
|
-
return idx >= 0 ? args[idx + 1] : null;
|
|
153
|
-
})();
|
|
154
|
-
const bail = args.includes('--bail');
|
|
155
|
-
|
|
156
|
-
const C = {
|
|
157
|
-
red: (s: string) => `\x1b[31m${s}\x1b[0m`,
|
|
158
|
-
green: (s: string) => `\x1b[32m${s}\x1b[0m`,
|
|
159
|
-
yellow: (s: string) => `\x1b[33m${s}\x1b[0m`,
|
|
160
|
-
dim: (s: string) => `\x1b[2m${s}\x1b[0m`,
|
|
161
|
-
bold: (s: string) => `\x1b[1m${s}\x1b[0m`,
|
|
162
|
-
};
|
|
163
|
-
|
|
164
|
-
async function runCase(c: Case): Promise<{ pass: boolean; failures: string[]; resp: any }> {
|
|
165
|
-
const failures: string[] = [];
|
|
166
|
-
let resp: any = null;
|
|
167
|
-
try {
|
|
168
|
-
const r = await dispatchTool(
|
|
169
|
-
{ id: `test-${Date.now()}`, name: c.tool, input: c.args },
|
|
170
|
-
{ noTruncation: true },
|
|
171
|
-
);
|
|
172
|
-
if (r.is_error) {
|
|
173
|
-
return { pass: false, failures: [`is_error=true: ${r.content.slice(0, 300)}`], resp: null };
|
|
174
|
-
}
|
|
175
|
-
resp = JSON.parse(r.content);
|
|
176
|
-
} catch (e) {
|
|
177
|
-
return { pass: false, failures: [`exception: ${(e as Error).message}`], resp: null };
|
|
178
|
-
}
|
|
179
|
-
for (const a of c.assertions || []) {
|
|
180
|
-
const failure = a(resp);
|
|
181
|
-
if (failure) failures.push(failure);
|
|
182
|
-
}
|
|
183
|
-
return { pass: failures.length === 0, failures, resp };
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
(async () => {
|
|
187
|
-
const cases = caseFilter
|
|
188
|
-
? CASES.filter(c => c.name.toLowerCase().includes(caseFilter.toLowerCase()))
|
|
189
|
-
: CASES;
|
|
190
|
-
if (cases.length === 0) {
|
|
191
|
-
console.error(`no cases match '${caseFilter}'`);
|
|
192
|
-
console.error(`available: ${CASES.map(c => c.name).join(', ')}`);
|
|
193
|
-
process.exit(2);
|
|
194
|
-
}
|
|
195
|
-
let passed = 0, failed = 0;
|
|
196
|
-
const startedAll = Date.now();
|
|
197
|
-
for (const c of cases) {
|
|
198
|
-
process.stdout.write(`${C.dim('▶')} ${c.name.padEnd(60)} `);
|
|
199
|
-
const t0 = Date.now();
|
|
200
|
-
const { pass, failures, resp } = await runCase(c);
|
|
201
|
-
const ms = Date.now() - t0;
|
|
202
|
-
if (pass) {
|
|
203
|
-
const n = resp?.bugs?.length ?? '?';
|
|
204
|
-
console.log(`${C.green('PASS')} ${C.dim(`(${ms}ms, ${n} bug${n === 1 ? '' : 's'})`)}`);
|
|
205
|
-
passed++;
|
|
206
|
-
} else {
|
|
207
|
-
console.log(`${C.red('FAIL')} ${C.dim(`(${ms}ms)`)}`);
|
|
208
|
-
for (const f of failures) console.log(` ${C.red('×')} ${f}`);
|
|
209
|
-
failed++;
|
|
210
|
-
if (c.dump && resp) {
|
|
211
|
-
console.log(C.dim(' response keys: ' + Object.keys(resp).join(', ')));
|
|
212
|
-
if (resp._filter_diagnostics) {
|
|
213
|
-
console.log(C.dim(' _filter_diagnostics: ' + JSON.stringify(resp._filter_diagnostics)));
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
if (bail) break;
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
const totalMs = Date.now() - startedAll;
|
|
220
|
-
console.log('');
|
|
221
|
-
console.log(`${C.bold(`${passed}/${passed + failed} passed`)} in ${(totalMs / 1000).toFixed(1)}s`);
|
|
222
|
-
process.exit(failed === 0 ? 0 : 1);
|
|
223
|
-
})();
|
|
@@ -1,139 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Phase A §15.4 self-check — LocalMemoryStore must satisfy the
|
|
3
|
-
* summarizer's needs end-to-end without Temper.
|
|
4
|
-
*
|
|
5
|
-
* pnpm tsx scripts/test-memory-local.ts
|
|
6
|
-
*
|
|
7
|
-
* Constructs LocalMemoryStore directly (bypasses getMemoryStore so the
|
|
8
|
-
* user's settings don't matter — we always exercise the local SQLite
|
|
9
|
-
* path). Writes summary + fact + cursor + health blocks via the key
|
|
10
|
-
* helpers, then asserts:
|
|
11
|
-
* 1. Summarizer happy-path round trip works (put → get → search)
|
|
12
|
-
* 2. buildMemoryContext recalls user-relevant blocks via search
|
|
13
|
-
* 3. Cursor putBlock(k, v1) then putBlock(k, v2) replaces, not appends
|
|
14
|
-
* 4. INTERNAL_KEY_PREFIXES filtering keeps cursor/health out of context
|
|
15
|
-
*/
|
|
16
|
-
|
|
17
|
-
import { LocalMemoryStore } from '../lib/chat/local-memory';
|
|
18
|
-
import { buildMemoryContext } from '../lib/chat/build-memory-context';
|
|
19
|
-
import {
|
|
20
|
-
cursorKey,
|
|
21
|
-
factKey,
|
|
22
|
-
healthKey,
|
|
23
|
-
stableHash,
|
|
24
|
-
summaryKey,
|
|
25
|
-
INTERNAL_KEY_PREFIXES,
|
|
26
|
-
type CursorValue,
|
|
27
|
-
} from '../lib/memory/keys';
|
|
28
|
-
|
|
29
|
-
const NS = '__phaseA_selfcheck__';
|
|
30
|
-
const SID = 'sid-test-1';
|
|
31
|
-
const NOW = Date.now();
|
|
32
|
-
|
|
33
|
-
async function main() {
|
|
34
|
-
const store = new LocalMemoryStore(NS);
|
|
35
|
-
console.log(`Backend: ${store.kind} ns=${store.currentNamespace}`);
|
|
36
|
-
|
|
37
|
-
let failures = 0;
|
|
38
|
-
const fail = (msg: string) => { console.log(` ✗ ${msg}`); failures += 1; };
|
|
39
|
-
const pass = (msg: string) => console.log(` ✓ ${msg}`);
|
|
40
|
-
|
|
41
|
-
// ── 1. Summarizer happy-path round trip ──────────────────────────
|
|
42
|
-
console.log('Test 1 — summarizer round trip');
|
|
43
|
-
const sk = summaryKey(SID, NOW);
|
|
44
|
-
await store.putBlock(
|
|
45
|
-
sk,
|
|
46
|
-
{
|
|
47
|
-
text: 'User asked about Forge architecture. Discussed memory layer + summarizer design.',
|
|
48
|
-
from_ts: NOW - 1000, to_ts: NOW, message_count: 10,
|
|
49
|
-
model: 'haiku', provider: 'anthropic', ingest_ts: NOW,
|
|
50
|
-
},
|
|
51
|
-
{ description: 'session summary', scope: 'own' },
|
|
52
|
-
);
|
|
53
|
-
|
|
54
|
-
const fk = factKey('user', 'zliu', stableHash('prefers terse responses with code'));
|
|
55
|
-
await store.putBlock(
|
|
56
|
-
fk,
|
|
57
|
-
{
|
|
58
|
-
content: 'prefers terse responses with code',
|
|
59
|
-
subject_kind: 'preference', subject: 'zliu',
|
|
60
|
-
source_ref: `chat:${SID}@${NOW}`, confidence: null,
|
|
61
|
-
extracted_by: 'summarizer',
|
|
62
|
-
},
|
|
63
|
-
{ description: 'prefers terse responses with code', scope: 'own' },
|
|
64
|
-
);
|
|
65
|
-
|
|
66
|
-
const sb = await store.getBlock(sk);
|
|
67
|
-
const fb = await store.getBlock(fk);
|
|
68
|
-
if (sb?.value) pass(`summary readable at ${sk}`); else fail('summary block missing');
|
|
69
|
-
if (fb?.value) pass(`fact readable at ${fk}`); else fail('fact block missing');
|
|
70
|
-
|
|
71
|
-
// ── 2. buildMemoryContext recalls relevant content ───────────────
|
|
72
|
-
console.log('Test 2 — buildMemoryContext recalls summary via search');
|
|
73
|
-
const ctxA = await buildMemoryContext({
|
|
74
|
-
store,
|
|
75
|
-
currentUserMessage: 'tell me about the memory summarizer',
|
|
76
|
-
});
|
|
77
|
-
if (ctxA.hits.length > 0) pass(`got ${ctxA.hits.length} hit(s) for query`);
|
|
78
|
-
else fail('expected search hits for "memory summarizer" query, got 0');
|
|
79
|
-
const hitText = ctxA.hits.map((h) => h.fact ?? '').join(' | ');
|
|
80
|
-
if (/summarizer|memory layer/i.test(hitText)) pass('hit contains expected keywords');
|
|
81
|
-
else fail(`hit text didn't match: ${hitText.slice(0, 200)}`);
|
|
82
|
-
|
|
83
|
-
// ── 3. Cursor upsert replaces ────────────────────────────────────
|
|
84
|
-
console.log('Test 3 — cursor upsert replaces');
|
|
85
|
-
const ck = cursorKey(SID);
|
|
86
|
-
const v1: CursorValue = { last_ingested_ts: 1, last_run_ts: 1, ingest_count: 1 };
|
|
87
|
-
const v2: CursorValue = { last_ingested_ts: 999, last_run_ts: 999, ingest_count: 2 };
|
|
88
|
-
await store.putBlock(ck, v1);
|
|
89
|
-
await store.putBlock(ck, v2);
|
|
90
|
-
const after = (await store.getBlock(ck))?.value as CursorValue | undefined;
|
|
91
|
-
if (after?.last_ingested_ts === 999) pass('cursor replaced (v2 wins)');
|
|
92
|
-
else fail(`expected last_ingested_ts=999, got ${after?.last_ingested_ts}`);
|
|
93
|
-
const rows = (await store.listBlocks()).filter((b) => b.key === ck);
|
|
94
|
-
if (rows.length === 1) pass('cursor row count = 1 (no append)');
|
|
95
|
-
else fail(`expected 1 cursor row, got ${rows.length}`);
|
|
96
|
-
|
|
97
|
-
// ── 4. INTERNAL_KEY_PREFIXES filters cursor/health out of context ─
|
|
98
|
-
console.log('Test 4 — buildMemoryContext excludes cursor/health by prefix');
|
|
99
|
-
await store.putBlock(healthKey(SID), { last_run_ts: NOW, error: null, ingest_count: 1, last_token_estimate: 100 });
|
|
100
|
-
|
|
101
|
-
// Make cursor + health look attractive to search by giving the user
|
|
102
|
-
// message a literal token they'd LIKE-match.
|
|
103
|
-
const ctxB = await buildMemoryContext({
|
|
104
|
-
store,
|
|
105
|
-
currentUserMessage: 'summarizer cursor health status',
|
|
106
|
-
});
|
|
107
|
-
const allKeys = [
|
|
108
|
-
...ctxB.blocks.map((b) => b.key),
|
|
109
|
-
...ctxB.hits.map((h) => h.id),
|
|
110
|
-
];
|
|
111
|
-
const leaked = allKeys.filter((k) =>
|
|
112
|
-
INTERNAL_KEY_PREFIXES.some((p) =>
|
|
113
|
-
k.startsWith(p) || k.startsWith('block:' + p),
|
|
114
|
-
),
|
|
115
|
-
);
|
|
116
|
-
if (leaked.length === 0) pass('no cursor/health leaked into context');
|
|
117
|
-
else fail(`leaked internal blocks: ${leaked.join(', ')}`);
|
|
118
|
-
|
|
119
|
-
// Sanity: render output must not literally contain the cursor key
|
|
120
|
-
if (!ctxB.text.includes('forge.summarizer.cursor:') && !ctxB.text.includes('forge.summarizer.health:')) {
|
|
121
|
-
pass('rendered context does not contain internal prefixes');
|
|
122
|
-
} else {
|
|
123
|
-
fail('rendered context still mentions internal prefix');
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
console.log('');
|
|
127
|
-
if (failures === 0) {
|
|
128
|
-
console.log('✓ Phase A LocalMemoryStore self-check passed.');
|
|
129
|
-
process.exit(0);
|
|
130
|
-
} else {
|
|
131
|
-
console.log(`✗ ${failures} check(s) failed.`);
|
|
132
|
-
process.exit(1);
|
|
133
|
-
}
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
main().catch((err) => {
|
|
137
|
-
console.error('Self-check crashed:', err);
|
|
138
|
-
process.exit(2);
|
|
139
|
-
});
|
|
@@ -1,106 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Memory store upsert idempotency check.
|
|
3
|
-
*
|
|
4
|
-
* pnpm tsx scripts/test-memory-upsert.ts
|
|
5
|
-
*
|
|
6
|
-
* Verifies putBlock semantics needed by the chat summarizer:
|
|
7
|
-
* - Same key written twice → second value wins (replace, not append)
|
|
8
|
-
* - listBlocks shows one row, not two
|
|
9
|
-
* - factKey() produces stable hashes across runs
|
|
10
|
-
*
|
|
11
|
-
* Runs against whichever backend getMemoryStore() picks — typically
|
|
12
|
-
* LocalMemoryStore unless Temper creds are set. Writes to a sentinel
|
|
13
|
-
* namespace key prefix and cleans up after itself.
|
|
14
|
-
*/
|
|
15
|
-
|
|
16
|
-
import { getMemoryStore } from '../lib/chat/memory-store';
|
|
17
|
-
import {
|
|
18
|
-
cursorKey,
|
|
19
|
-
factKey,
|
|
20
|
-
summaryKey,
|
|
21
|
-
healthKey,
|
|
22
|
-
stableHash,
|
|
23
|
-
type CursorValue,
|
|
24
|
-
} from '../lib/memory/keys';
|
|
25
|
-
|
|
26
|
-
const SENTINEL_SESSION = '__upsert_test__';
|
|
27
|
-
|
|
28
|
-
async function main() {
|
|
29
|
-
const store = getMemoryStore();
|
|
30
|
-
console.log(`Backend: ${store.kind} (enabled=${store.enabled})`);
|
|
31
|
-
if (!store.enabled) {
|
|
32
|
-
console.error('Store not enabled — cannot run test.');
|
|
33
|
-
process.exit(1);
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
let failures = 0;
|
|
37
|
-
const fail = (msg: string) => {
|
|
38
|
-
console.log(` ✗ ${msg}`);
|
|
39
|
-
failures += 1;
|
|
40
|
-
};
|
|
41
|
-
const pass = (msg: string) => console.log(` ✓ ${msg}`);
|
|
42
|
-
|
|
43
|
-
// ── 1. cursor upsert: v1 then v2 → v2 wins ───────────────────────
|
|
44
|
-
console.log('Test 1 — cursor upsert');
|
|
45
|
-
const ck = cursorKey(SENTINEL_SESSION);
|
|
46
|
-
const v1: CursorValue = { last_ingested_ts: 100, last_run_ts: 200, ingest_count: 1 };
|
|
47
|
-
const v2: CursorValue = { last_ingested_ts: 500, last_run_ts: 600, ingest_count: 2 };
|
|
48
|
-
await store.putBlock(ck, v1);
|
|
49
|
-
await store.putBlock(ck, v2);
|
|
50
|
-
const read = (await store.getBlock(ck))?.value as CursorValue | undefined;
|
|
51
|
-
if (!read) fail('getBlock returned null after putBlock');
|
|
52
|
-
else if (read.last_ingested_ts !== 500) fail(`expected last_ingested_ts=500, got ${read.last_ingested_ts}`);
|
|
53
|
-
else if (read.ingest_count !== 2) fail(`expected ingest_count=2, got ${read.ingest_count}`);
|
|
54
|
-
else pass('second putBlock replaced first');
|
|
55
|
-
|
|
56
|
-
// ── 2. listBlocks has one row for the key, not two ───────────────
|
|
57
|
-
console.log('Test 2 — listBlocks dedup');
|
|
58
|
-
const all = await store.listBlocks();
|
|
59
|
-
const matching = all.filter((b) => b.key === ck);
|
|
60
|
-
if (matching.length === 1) pass(`exactly one row for ${ck}`);
|
|
61
|
-
else fail(`expected 1 row for ${ck}, got ${matching.length}`);
|
|
62
|
-
|
|
63
|
-
// ── 3. factKey is stable across calls ────────────────────────────
|
|
64
|
-
console.log('Test 3 — factKey stable hash');
|
|
65
|
-
const fk1 = factKey('user', 'zliu', stableHash('prefers terse responses'));
|
|
66
|
-
const fk2 = factKey('user', 'zliu', stableHash('prefers terse responses'));
|
|
67
|
-
if (fk1 === fk2) pass(`same content → same key (${fk1})`);
|
|
68
|
-
else fail(`hash drift: ${fk1} vs ${fk2}`);
|
|
69
|
-
|
|
70
|
-
// Different content → different key
|
|
71
|
-
const fk3 = factKey('user', 'zliu', stableHash('uses Chinese in chat'));
|
|
72
|
-
if (fk3 !== fk1) pass('different content → different key');
|
|
73
|
-
else fail('hash collision: distinct content produced same key');
|
|
74
|
-
|
|
75
|
-
// ── 4. summary / health keys round-trip ──────────────────────────
|
|
76
|
-
console.log('Test 4 — summary/health keys round-trip');
|
|
77
|
-
const sk = summaryKey(SENTINEL_SESSION, 12345);
|
|
78
|
-
const hk = healthKey(SENTINEL_SESSION);
|
|
79
|
-
await store.putBlock(sk, { text: 'hello', from_ts: 1, to_ts: 12345, message_count: 5, model: 'm', provider: 'p', ingest_ts: Date.now() });
|
|
80
|
-
await store.putBlock(hk, { last_run_ts: Date.now(), error: null, ingest_count: 1, last_token_estimate: 100 });
|
|
81
|
-
const sb = await store.getBlock(sk);
|
|
82
|
-
const hb = await store.getBlock(hk);
|
|
83
|
-
if (sb) pass('summary block round-tripped');
|
|
84
|
-
else fail('summary block missing after putBlock');
|
|
85
|
-
if (hb) pass('health block round-tripped');
|
|
86
|
-
else fail('health block missing after putBlock');
|
|
87
|
-
|
|
88
|
-
// ── cleanup ──────────────────────────────────────────────────────
|
|
89
|
-
console.log('Cleanup: removing sentinel blocks');
|
|
90
|
-
// No deleteBlock on the interface — leave them; sentinel ns prefix
|
|
91
|
-
// makes them identifiable. Re-running the test overwrites in place.
|
|
92
|
-
|
|
93
|
-
console.log('');
|
|
94
|
-
if (failures === 0) {
|
|
95
|
-
console.log('✓ All checks passed.');
|
|
96
|
-
process.exit(0);
|
|
97
|
-
} else {
|
|
98
|
-
console.log(`✗ ${failures} check(s) failed.`);
|
|
99
|
-
process.exit(1);
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
main().catch((err) => {
|
|
104
|
-
console.error('Test crashed:', err);
|
|
105
|
-
process.exit(2);
|
|
106
|
-
});
|
package/scripts/verify-usage.ts
DELETED
|
@@ -1,178 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Verification script — compares direct JSONL scanning with DB scanner results.
|
|
3
|
-
* Run: npx tsx scripts/verify-usage.ts
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import { readdirSync, readFileSync, statSync } from 'fs';
|
|
7
|
-
import { join, basename } from 'path';
|
|
8
|
-
import { homedir } from 'os';
|
|
9
|
-
|
|
10
|
-
const CLAUDE_DIR = join(homedir(), '.claude', 'projects');
|
|
11
|
-
|
|
12
|
-
const PRICING: Record<string, { input: number; output: number }> = {
|
|
13
|
-
'claude-opus-4': { input: 15, output: 75 },
|
|
14
|
-
'claude-sonnet-4': { input: 3, output: 15 },
|
|
15
|
-
'claude-haiku-4': { input: 0.80, output: 4 },
|
|
16
|
-
'default': { input: 3, output: 15 },
|
|
17
|
-
};
|
|
18
|
-
|
|
19
|
-
function getModelFamily(model: string): string {
|
|
20
|
-
if (!model) return 'unknown';
|
|
21
|
-
if (model.includes('opus')) return 'claude-opus-4';
|
|
22
|
-
if (model.includes('haiku')) return 'claude-haiku-4';
|
|
23
|
-
if (model.includes('sonnet')) return 'claude-sonnet-4';
|
|
24
|
-
return 'unknown';
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
function calcCost(family: string, input: number, output: number, cacheRead: number, cacheCreate: number): number {
|
|
28
|
-
const p = PRICING[family] || PRICING['default'];
|
|
29
|
-
return (
|
|
30
|
-
(input * p.input / 1_000_000) +
|
|
31
|
-
(output * p.output / 1_000_000) +
|
|
32
|
-
(cacheRead * p.input * 0.1 / 1_000_000) +
|
|
33
|
-
(cacheCreate * p.input * 0.25 / 1_000_000)
|
|
34
|
-
);
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
interface ProjectStats {
|
|
38
|
-
input: number; output: number; cost: number; sessions: number; messages: number;
|
|
39
|
-
cacheRead: number; cacheCreate: number;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
interface ModelStats {
|
|
43
|
-
input: number; output: number; cost: number; messages: number;
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
interface DayStats {
|
|
47
|
-
input: number; output: number; cost: number;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
const byProject: Record<string, ProjectStats> = {};
|
|
51
|
-
const byModel: Record<string, ModelStats> = {};
|
|
52
|
-
const byDay: Record<string, DayStats> = {};
|
|
53
|
-
let totalInput = 0, totalOutput = 0, totalCost = 0, totalSessions = 0, totalMessages = 0;
|
|
54
|
-
|
|
55
|
-
console.log('Scanning JSONL files...\n');
|
|
56
|
-
|
|
57
|
-
const projectDirs = readdirSync(CLAUDE_DIR);
|
|
58
|
-
let fileCount = 0;
|
|
59
|
-
|
|
60
|
-
for (const projDir of projectDirs) {
|
|
61
|
-
const projPath = join(CLAUDE_DIR, projDir);
|
|
62
|
-
try { if (!statSync(projPath).isDirectory()) continue; } catch { continue; }
|
|
63
|
-
|
|
64
|
-
const projectName = projDir.replace(/^-/, '/').replace(/-/g, '/').split('/').pop() || projDir;
|
|
65
|
-
const files = readdirSync(projPath).filter(f => f.endsWith('.jsonl') && !f.startsWith('agent-'));
|
|
66
|
-
|
|
67
|
-
for (const file of files) {
|
|
68
|
-
const filePath = join(projPath, file);
|
|
69
|
-
fileCount++;
|
|
70
|
-
let sessionInput = 0, sessionOutput = 0, sessionCost = 0, sessionMsgs = 0;
|
|
71
|
-
|
|
72
|
-
try {
|
|
73
|
-
const content = readFileSync(filePath, 'utf-8');
|
|
74
|
-
for (const line of content.split('\n')) {
|
|
75
|
-
if (!line.trim()) continue;
|
|
76
|
-
try {
|
|
77
|
-
const obj = JSON.parse(line);
|
|
78
|
-
if (obj.type === 'assistant' && obj.message?.usage) {
|
|
79
|
-
const u = obj.message.usage;
|
|
80
|
-
const model = obj.message.model || '';
|
|
81
|
-
const family = getModelFamily(model);
|
|
82
|
-
const input = u.input_tokens || 0;
|
|
83
|
-
const output = u.output_tokens || 0;
|
|
84
|
-
const cacheRead = u.cache_read_input_tokens || 0;
|
|
85
|
-
const cacheCreate = u.cache_creation_input_tokens || 0;
|
|
86
|
-
const cost = calcCost(family, input, output, cacheRead, cacheCreate);
|
|
87
|
-
|
|
88
|
-
sessionInput += input;
|
|
89
|
-
sessionOutput += output;
|
|
90
|
-
sessionCost += cost;
|
|
91
|
-
sessionMsgs++;
|
|
92
|
-
|
|
93
|
-
if (!byModel[family]) byModel[family] = { input: 0, output: 0, cost: 0, messages: 0 };
|
|
94
|
-
byModel[family].input += input;
|
|
95
|
-
byModel[family].output += output;
|
|
96
|
-
byModel[family].cost += cost;
|
|
97
|
-
byModel[family].messages++;
|
|
98
|
-
|
|
99
|
-
const day = (obj.timestamp || '').slice(0, 10) || 'unknown';
|
|
100
|
-
if (!byDay[day]) byDay[day] = { input: 0, output: 0, cost: 0 };
|
|
101
|
-
byDay[day].input += input;
|
|
102
|
-
byDay[day].output += output;
|
|
103
|
-
byDay[day].cost += cost;
|
|
104
|
-
}
|
|
105
|
-
} catch {}
|
|
106
|
-
}
|
|
107
|
-
} catch { continue; }
|
|
108
|
-
|
|
109
|
-
if (sessionMsgs > 0) {
|
|
110
|
-
totalSessions++;
|
|
111
|
-
totalMessages += sessionMsgs;
|
|
112
|
-
totalInput += sessionInput;
|
|
113
|
-
totalOutput += sessionOutput;
|
|
114
|
-
totalCost += sessionCost;
|
|
115
|
-
|
|
116
|
-
if (!byProject[projectName]) byProject[projectName] = { input: 0, output: 0, cost: 0, sessions: 0, messages: 0, cacheRead: 0, cacheCreate: 0 };
|
|
117
|
-
byProject[projectName].input += sessionInput;
|
|
118
|
-
byProject[projectName].output += sessionOutput;
|
|
119
|
-
byProject[projectName].cost += sessionCost;
|
|
120
|
-
byProject[projectName].sessions++;
|
|
121
|
-
byProject[projectName].messages += sessionMsgs;
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
// Now run the DB scanner and compare
|
|
127
|
-
console.log('Running DB scanner...\n');
|
|
128
|
-
|
|
129
|
-
// Set up environment for the scanner
|
|
130
|
-
process.env.FORGE_DATA_DIR = process.env.FORGE_DATA_DIR || join(homedir(), '.forge', 'data');
|
|
131
|
-
|
|
132
|
-
// Dynamic import to use the actual scanner
|
|
133
|
-
const { scanUsage, queryUsage } = await import('../lib/usage-scanner');
|
|
134
|
-
|
|
135
|
-
const scanResult = scanUsage();
|
|
136
|
-
console.log(`Scan result: ${scanResult.scanned} files scanned, ${scanResult.updated} updated, ${scanResult.errors} errors\n`);
|
|
137
|
-
|
|
138
|
-
const dbData = queryUsage({});
|
|
139
|
-
|
|
140
|
-
// Compare
|
|
141
|
-
console.log('=== COMPARISON ===\n');
|
|
142
|
-
|
|
143
|
-
console.log('TOTAL:');
|
|
144
|
-
console.log(` Direct: ${(totalInput/1000).toFixed(0)}K in, ${(totalOutput/1000).toFixed(0)}K out, $${totalCost.toFixed(2)}, ${totalSessions} sessions, ${totalMessages} msgs`);
|
|
145
|
-
console.log(` DB: ${(dbData.total.input/1000).toFixed(0)}K in, ${(dbData.total.output/1000).toFixed(0)}K out, $${dbData.total.cost.toFixed(2)}, ${dbData.total.sessions} sessions, ${dbData.total.messages} msgs`);
|
|
146
|
-
|
|
147
|
-
const costDiff = Math.abs(totalCost - dbData.total.cost);
|
|
148
|
-
const costMatch = costDiff < 0.1;
|
|
149
|
-
console.log(` Match: ${costMatch ? '✅' : '❌'} (diff: $${costDiff.toFixed(2)})\n`);
|
|
150
|
-
|
|
151
|
-
console.log('BY MODEL:');
|
|
152
|
-
for (const [model, d] of Object.entries(byModel).sort((a, b) => b[1].cost - a[1].cost)) {
|
|
153
|
-
const dbModel = dbData.byModel.find(m => m.model === model);
|
|
154
|
-
const dbCost = dbModel?.cost || 0;
|
|
155
|
-
const match = Math.abs(d.cost - dbCost) < 0.1;
|
|
156
|
-
console.log(` ${model.padEnd(20)} Direct: $${d.cost.toFixed(2).padStart(8)} DB: $${dbCost.toFixed(2).padStart(8)} ${match ? '✅' : '❌'}`);
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
console.log('\nBY PROJECT (top 10):');
|
|
160
|
-
const sortedProjects = Object.entries(byProject).sort((a, b) => b[1].cost - a[1].cost).slice(0, 10);
|
|
161
|
-
for (const [name, d] of sortedProjects) {
|
|
162
|
-
const dbProj = dbData.byProject.find(p => p.name === name);
|
|
163
|
-
const dbCost = dbProj?.cost || 0;
|
|
164
|
-
const match = Math.abs(d.cost - dbCost) < 0.1;
|
|
165
|
-
console.log(` ${name.padEnd(25)} Direct: $${d.cost.toFixed(2).padStart(8)} DB: $${dbCost.toFixed(2).padStart(8)} ${match ? '✅' : '❌'}`);
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
console.log('\nBY DAY (last 7):');
|
|
169
|
-
const sortedDays = Object.entries(byDay).filter(([d]) => d !== 'unknown').sort((a, b) => b[0].localeCompare(a[0])).slice(0, 7);
|
|
170
|
-
for (const [day, d] of sortedDays) {
|
|
171
|
-
const dbDay = dbData.byDay.find(dd => dd.date === day);
|
|
172
|
-
const dbCost = dbDay?.cost || 0;
|
|
173
|
-
const match = Math.abs(d.cost - dbCost) < 0.1;
|
|
174
|
-
console.log(` ${day} Direct: $${d.cost.toFixed(2).padStart(8)} DB: $${dbCost.toFixed(2).padStart(8)} ${match ? '✅' : '❌'}`);
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
console.log(`\nFiles scanned: ${fileCount}`);
|
|
178
|
-
console.log('');
|