@spences10/pi-context 0.0.14 → 0.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/context-format.d.ts +2 -1
- package/dist/context-format.js +34 -0
- package/dist/context-format.js.map +1 -1
- package/dist/eval/checks.d.ts +9 -0
- package/dist/eval/checks.js +212 -0
- package/dist/eval/checks.js.map +1 -0
- package/dist/eval/fixtures.d.ts +13 -0
- package/dist/eval/fixtures.js +79 -0
- package/dist/eval/fixtures.js.map +1 -0
- package/dist/eval/index.d.ts +4 -0
- package/dist/eval/index.js +56 -0
- package/dist/eval/index.js.map +1 -0
- package/dist/eval/report.d.ts +3 -0
- package/dist/eval/report.js +43 -0
- package/dist/eval/report.js.map +1 -0
- package/dist/eval/scenarios.d.ts +3 -0
- package/dist/eval/scenarios.js +52 -0
- package/dist/eval/scenarios.js.map +1 -0
- package/dist/eval/types.d.ts +48 -0
- package/dist/eval/types.js +2 -0
- package/dist/eval/types.js.map +1 -0
- package/dist/eval.d.ts +31 -0
- package/dist/eval.js +418 -0
- package/dist/eval.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +10 -27
- package/dist/index.js.map +1 -1
- package/dist/store/chunking-helpers.d.ts +8 -0
- package/dist/store/chunking-helpers.js +114 -0
- package/dist/store/chunking-helpers.js.map +1 -0
- package/dist/store/purge-helpers.d.ts +8 -0
- package/dist/store/purge-helpers.js +80 -0
- package/dist/store/purge-helpers.js.map +1 -0
- package/dist/store/query-helpers.d.ts +24 -0
- package/dist/store/query-helpers.js +125 -0
- package/dist/store/query-helpers.js.map +1 -0
- package/dist/store/schema-helpers.d.ts +3 -0
- package/dist/store/schema-helpers.js +18 -0
- package/dist/store/schema-helpers.js.map +1 -0
- package/dist/store/search-helpers.d.ts +11 -0
- package/dist/store/search-helpers.js +157 -0
- package/dist/store/search-helpers.js.map +1 -0
- package/dist/store.d.ts +4 -3
- package/dist/store.js +30 -9
- package/dist/store.js.map +1 -1
- package/dist/text.d.ts +1 -0
- package/dist/text.js +50 -13
- package/dist/text.js.map +1 -1
- package/package.json +9 -5
package/dist/context-format.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import { type ContextListResult, type ContextPurgeDetails, type ContextSearchResult, type ContextStats } from './store.js';
|
|
1
|
+
import { type ContextChunk, type ContextChunkSummary, type ContextListResult, type ContextPurgeDetails, type ContextSearchResult, type ContextStats } from './store.js';
|
|
2
2
|
export declare function format_search_results(results: ContextSearchResult[]): string;
|
|
3
|
+
export declare function format_get_result(source_id: string, chunk_id: string | undefined, chunks: ContextChunk[], summary: ContextChunkSummary | null): string;
|
|
3
4
|
export declare function format_list_results(results: ContextListResult[]): string;
|
|
4
5
|
export declare function format_purge_details(details: ContextPurgeDetails): string;
|
|
5
6
|
export declare function format_stats(stats: ContextStats): string;
|
package/dist/context-format.js
CHANGED
|
@@ -12,6 +12,40 @@ export function format_search_results(results) {
|
|
|
12
12
|
].join('\n'))
|
|
13
13
|
.join('\n\n---\n\n');
|
|
14
14
|
}
|
|
15
|
+
export function format_get_result(source_id, chunk_id, chunks, summary) {
|
|
16
|
+
if (chunks.length > 0) {
|
|
17
|
+
return chunks
|
|
18
|
+
.map((chunk) => [
|
|
19
|
+
`## ${chunk.id}`,
|
|
20
|
+
`Source: ${chunk.source_id} • Chunk ${chunk.ordinal}`,
|
|
21
|
+
'',
|
|
22
|
+
chunk.content,
|
|
23
|
+
].join('\n'))
|
|
24
|
+
.join('\n\n---\n\n');
|
|
25
|
+
}
|
|
26
|
+
if (!summary) {
|
|
27
|
+
return [
|
|
28
|
+
`Source ${source_id} was not found in the context sidecar.`,
|
|
29
|
+
'It may have expired, been purged, or belonged to a different local context database.',
|
|
30
|
+
'Try context_list to inspect available sources, or rerun the original tool if the content is still needed.',
|
|
31
|
+
].join('\n');
|
|
32
|
+
}
|
|
33
|
+
if (!chunk_id)
|
|
34
|
+
return 'No chunks found.';
|
|
35
|
+
const range = summary.first_chunk_id === summary.last_chunk_id
|
|
36
|
+
? summary.first_chunk_id
|
|
37
|
+
: `${summary.first_chunk_id} … ${summary.last_chunk_id}`;
|
|
38
|
+
return [
|
|
39
|
+
`No chunk found for chunk_id "${chunk_id}".`,
|
|
40
|
+
`Source ${source_id} has ${summary.chunk_count} chunk(s): ${range}.`,
|
|
41
|
+
`Valid ordinals: ${summary.first_ordinal} … ${summary.last_ordinal}.`,
|
|
42
|
+
summary.first_chunk_id
|
|
43
|
+
? `Try chunk_id:"${summary.first_chunk_id}" or chunk_id:"1".`
|
|
44
|
+
: undefined,
|
|
45
|
+
]
|
|
46
|
+
.filter((line) => line !== undefined)
|
|
47
|
+
.join('\n');
|
|
48
|
+
}
|
|
15
49
|
export function format_list_results(results) {
|
|
16
50
|
if (results.length === 0)
|
|
17
51
|
return 'No indexed context sources found.';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"context-format.js","sourceRoot":"","sources":["../src/context-format.ts"],"names":[],"mappings":"AAAA,OAAO,EACN,wBAAwB,EACxB,0BAA0B,EAC1B,6BAA6B,EAC7B,gCAAgC,EAChC,4BAA4B,GAC5B,MAAM,aAAa,CAAC;AACrB,OAAO,EACN,0BAA0B,
|
|
1
|
+
{"version":3,"file":"context-format.js","sourceRoot":"","sources":["../src/context-format.ts"],"names":[],"mappings":"AAAA,OAAO,EACN,wBAAwB,EACxB,0BAA0B,EAC1B,6BAA6B,EAC7B,gCAAgC,EAChC,4BAA4B,GAC5B,MAAM,aAAa,CAAC;AACrB,OAAO,EACN,0BAA0B,GAO1B,MAAM,YAAY,CAAC;AAEpB,MAAM,UAAU,qBAAqB,CACpC,OAA8B;IAE9B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,6BAA6B,CAAC;IAC/D,OAAO,OAAO;SACZ,GAAG,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE,CACtB;QACC,MAAM,KAAK,GAAG,CAAC,KAAK,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,QAAQ,EAAE;QACrD,WAAW,MAAM,CAAC,SAAS,aAAa,MAAM,CAAC,QAAQ,YAAY,MAAM,CAAC,SAAS,EAAE;QACrF,EAAE;QACF,MAAM,CAAC,OAAO;KACd,CAAC,IAAI,CAAC,IAAI,CAAC,CACZ;SACA,IAAI,CAAC,aAAa,CAAC,CAAC;AACvB,CAAC;AAED,MAAM,UAAU,iBAAiB,CAChC,SAAiB,EACjB,QAA4B,EAC5B,MAAsB,EACtB,OAAmC;IAEnC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,MAAM;aACX,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CACd;YACC,MAAM,KAAK,CAAC,EAAE,EAAE;YAChB,WAAW,KAAK,CAAC,SAAS,YAAY,KAAK,CAAC,OAAO,EAAE;YACrD,EAAE;YACF,KAAK,CAAC,OAAO;SACb,CAAC,IAAI,CAAC,IAAI,CAAC,CACZ;aACA,IAAI,CAAC,aAAa,CAAC,CAAC;IACvB,CAAC;IAED,IAAI,CAAC,OAAO,EAAE,CAAC;QACd,OAAO;YACN,UAAU,SAAS,wCAAwC;YAC3D,sFAAsF;YACtF,2GAA2G;SAC3G,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACd,CAAC;IAED,IAAI,CAAC,QAAQ;QAAE,OAAO,kBAAkB,CAAC;IACzC,MAAM,KAAK,GACV,OAAO,CAAC,cAAc,KAAK,OAAO,CAAC,aAAa;QAC/C,CAAC,CAAC,OAAO,CAAC,cAAc;QACxB,CAAC,CAAC,GAAG,OAAO,CAAC,cAAc,MAAM,OAAO,CAAC,aAAa,EAAE,CAAC;IAC3D,OAAO;QACN,gCAAgC,QAAQ,IAAI;QAC5C,UAAU,SAAS,QAAQ,OAAO,CAAC,WAAW,cAAc,KAAK,GAAG;QACpE,mBAAmB,OAAO,CAAC,aAAa,MAAM,OAAO,CAAC,YAAY,GAAG;QACrE,OAAO,CAAC,cAAc;YACrB,CAAC,CAAC,iBAAiB,OAAO,CAAC,cAAc,oBAAoB;YAC7D,CAAC,CAAC,SAAS;KACZ;SACC,MAAM,CAAC,CAAC,IAAI,EAAkB,EAAE,CAAC,IAAI,KAAK,SAAS,CAAC;SACpD,IAAI,CAAC,IAAI,CAAC,CAAC;AACd,CAAC;AAED,MAAM,UAAU,mBAAmB,CAClC,OAA4B;IAE5B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QACvB,OAAO,mCAAmC,CAAC;IAC5C,OAAO,OAAO;SACZ,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CACf;QACC,MAAM,MAAM,CAAC,SAAS,EAAE;QACxB,YAAY,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,WAAW,EAAE,YAAY,MAAM,CAAC,SAAS,EAAE;QACnF,SAAS,MAAM,CAAC,KAAK,WAAW,MAAM,CAAC,KAAK,WAAW,MAAM,CAAC,WAAW,SAAS;QAClF,YAAY,MAAM,CAAC,YAAY,IAAI,QAAQ,EAAE;QAC7C,YAAY,MAAM,CAAC,UAAU,IAAI,QAAQ,EAAE;QAC3C,MAAM,CAAC,aAAa;YACnB,CAAC,CAAC,UAAU,MAAM,CAAC,aAAa,EAAE;YAClC,CAAC,CAAC,SAAS;QACZ,MAAM,CAAC,iBAAiB;YACvB,CAAC,CAAC,gBAAgB,MAAM,CAAC,iBAAiB,EAAE;YAC5C,CAAC,CAAC,SAAS;QACZ,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,SAAS;KACzD;SACC,MAAM,CAAC,OAAO,CAAC;SACf,IAAI,CAAC,IAAI,CAAC,CACZ;SACA,IAAI,CAAC,MAAM,CAAC,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,oBAAoB,CACnC,OAA4B;IAE5B,MAAM,OAAO,GAAG;QACf,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,aAAa,OAAO,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,SAAS;QAChE,OAAO,CAAC,YAAY,KAAK,SAAS;YACjC,CAAC,CAAC,gBAAgB,OAAO,CAAC,YAAY,IAAI,QAAQ,EAAE;YACpD,CAAC,CAAC,SAAS;QACZ,OAAO,CAAC,UAAU,KAAK,SAAS;YAC/B,CAAC,CAAC,cAAc,OAAO,CAAC,UAAU,IAAI,QAAQ,EAAE;YAChD,CAAC,CAAC,SAAS;QACZ,OAAO,CAAC,eAAe,KAAK,SAAS;YACpC,CAAC,CAAC,mBAAmB,OAAO,CAAC,eAAe,EAAE;YAC9C,CAAC,CAAC,SAAS;KACZ;SACC,MAAM,CAAC,OAAO,CAAC;SACf,IAAI,CAAC,IAAI,CAAC,CAAC;IACb,OAAO,WAAW,OAAO,CAAC,OAAO,sBAAsB,OAAO,CAAC,CAAC,CAAC,aAAa,OAAO,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;AACjG,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,KAAmB;IAC/C,MAAM,MAAM,GAAG,KAAK,CAAC,kBAAkB,IAAI,KAAK,CAAC,gBAAgB,CAAC;IAClE,OAAO;QACN,0BAA0B;QAC1B,EAAE;QACF,cAAc,0BAA0B,EAAE,EAAE;QAC5C,MAAM;YACL,CAAC,CAAC,oBAAoB,KAAK,CAAC,kBAAkB,IAAI,QAAQ,aAAa,KAAK,CAAC,gBAAgB,IAAI,QAAQ,EAAE;YAC3G,CAAC,CAAC,iBAAiB;QACpB,qBAAqB,KAAK,CAAC,OAAO,EAAE;QACpC,oBAAoB,KAAK,CAAC,MAAM,EAAE;QAClC,8BAA8B,KAAK,CAAC,YAAY,EAAE;QAClD,qBAAqB,KAAK,CAAC,cAAc,EAAE;QAC3C,oBAAoB,KAAK,CAAC,aAAa,EAAE;QACzC,8BAA8B,KAAK,CAAC,mBAAmB,EAAE;QACzD,qBAAqB,KAAK,CAAC,cAAc,EAAE;QAC3C,kBAAkB,KAAK,CAAC,WAAW,EAAE;QACrC,gBAAgB,KAAK,CAAC,aAAa,GAAG;QACtC,eAAe,KAAK,CAAC,WAAW,EAAE;QAClC,2BAA2B,gBAAgB,CAAC,KAAK,CAAC,iBAAiB,CAAC,EAAE;QACtE,2BAA2B,gBAAgB,CAAC,KAAK,CAAC,iBAAiB,CAAC,EAAE;QACtE,2BAA2B,gBAAgB,CAAC,KAAK,CAAC,wBAAwB,CAAC,EAAE;QAC7E,2BAA2B,gBAAgB,CAAC,KAAK,CAAC,wBAAwB,CAAC,EAAE;QAC7E,qBAAqB,KAAK,CAAC,cAAc,IAAI,UAAU,EAAE;QACzD,wBAAwB,KAAK,CAAC,iBAAiB,EAAE;QACjD,kBAAkB,KAAK,CAAC,MAAM,KAAK,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,MAAM,EAAE;KAC9E,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACd,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,SAAwB;IACxD,OAAO,SAAS,KAAK,IAAI;QACxB,CAAC,CAAC,QAAQ;QACV,CAAC,CAAC,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;AACtC,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,IAAmB;IAC9C,OAAO,IAAI,KAAK,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,GAAG,IAAI,SAAS,CAAC;AACtD,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,MAAqB;IAClD,OAAO,MAAM,KAAK,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,GAAG,MAAM,MAAM,CAAC;AACvD,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,KAAa;IACvC,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC;AAC1C,CAAC;AAED,MAAM,UAAU,mBAAmB,CAClC,KAAa,EACb,KAAa;IAEb,OAAO,GAAG,UAAU,CAAC,KAAK,CAAC,MAAM,KAAK,QAAQ,CAAC;AAChD,CAAC;AAED,MAAM,UAAU,8BAA8B,CAC7C,KAAmB;IAEnB,MAAM,KAAK,GAAG,4BAA4B,EAAE,CAAC;IAC7C,MAAM,cAAc,GAAG,0BAA0B,EAAE,CAAC;IACpD,MAAM,UAAU,GAAG,6BAA6B,EAAE,CAAC;IACnD,OAAO;QACN,6BAA6B;QAC7B,EAAE;QACF,kBAAkB,gCAAgC,EAAE,EAAE;QACtD,mBAAmB,KAAK,EAAE,MAAM,IAAI,iCAAiC,EAAE;QACvE,0BAA0B,WAAW,CAAC,KAAK,CAAC,cAAc,CAAC,EAAE;QAC7D,yBAAyB,aAAa,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE;QACtD,kCAAkC,KAAK,CAAC,iBAAiB,EAAE;QAC3D,uCAAuC,mBAAmB,CAAC,cAAc,CAAC,SAAS,EAAE,cAAc,CAAC,SAAS,CAAC,EAAE;QAChH,sCAAsC,mBAAmB,CAAC,UAAU,CAAC,SAAS,EAAE,UAAU,CAAC,SAAS,CAAC,EAAE;QACvG,EAAE;QACF,UAAU;QACV,GAAG,MAAM,CAAC,OAAO,CAAC,wBAAwB,CAAC,CAAC,GAAG,CAC9C,CAAC,CAAC,GAAG,EAAE,MAAM,CAAC,EAAE,EAAE,CAAC,KAAK,GAAG,KAAK,MAAM,CAAC,WAAW,EAAE,CACpD;QACD,EAAE;QACF,QAAQ;QACR,8BAA8B;QAC9B,qGAAqG;KACrG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACd,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { ContextStore } from '../store.js';
|
|
2
|
+
import type { EvalCaseResult, SeededSource } from './types.js';
|
|
3
|
+
export declare function run_search_evals(store: ContextStore, seeded: Map<string, SeededSource>, results: EvalCaseResult[]): void;
|
|
4
|
+
export declare function run_retrieval_evals(store: ContextStore, seeded: Map<string, SeededSource>, results: EvalCaseResult[]): void;
|
|
5
|
+
export declare function run_lifecycle_evals(store: ContextStore, seeded: Map<string, SeededSource>, results: EvalCaseResult[]): void;
|
|
6
|
+
export declare function run_capture_evals(results: EvalCaseResult[]): void;
|
|
7
|
+
export declare function run_retention_evals(store: ContextStore, results: EvalCaseResult[]): void;
|
|
8
|
+
export declare function run_cost_evals(store: ContextStore, seeded: Map<string, SeededSource>, results: EvalCaseResult[]): void;
|
|
9
|
+
export declare function run_dedupe_evals(results: EvalCaseResult[]): void;
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import { mkdtempSync, rmSync } from 'node:fs';
|
|
2
|
+
import { tmpdir } from 'node:os';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { format_get_result } from '../context-format.js';
|
|
5
|
+
import { ContextStore, should_index_text } from '../store.js';
|
|
6
|
+
import { PROJECT, SESSION, make_noise_lines, result_bytes, } from './fixtures.js';
|
|
7
|
+
import { get_scenarios, search_scenarios } from './scenarios.js';
|
|
8
|
+
export function run_search_evals(store, seeded, results) {
|
|
9
|
+
for (const scenario of search_scenarios) {
|
|
10
|
+
const source = seeded.get(scenario.source);
|
|
11
|
+
if (!source)
|
|
12
|
+
throw new Error(`Missing source ${scenario.source}`);
|
|
13
|
+
const search = store.search(scenario.query, {
|
|
14
|
+
source_id: source.source_id,
|
|
15
|
+
limit: scenario.limit,
|
|
16
|
+
});
|
|
17
|
+
const passed = search.some((result) => result.content.includes(scenario.expect));
|
|
18
|
+
results.push({
|
|
19
|
+
name: scenario.name,
|
|
20
|
+
category: 'search',
|
|
21
|
+
passed,
|
|
22
|
+
description: scenario.description,
|
|
23
|
+
detail: passed
|
|
24
|
+
? `matched ${search[0]?.chunk_id ?? 'unknown chunk'}`
|
|
25
|
+
: `no expected match for query ${JSON.stringify(scenario.query)} (${search.length} result(s))`,
|
|
26
|
+
returned_bytes: result_bytes(search),
|
|
27
|
+
result_count: search.length,
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
export function run_retrieval_evals(store, seeded, results) {
|
|
32
|
+
for (const scenario of get_scenarios) {
|
|
33
|
+
const source = seeded.get(scenario.source);
|
|
34
|
+
if (!source)
|
|
35
|
+
throw new Error(`Missing source ${scenario.source}`);
|
|
36
|
+
const chunks = store.get(source.source_id, scenario.chunk_id);
|
|
37
|
+
const passed = chunks.some((chunk) => chunk.content.includes(scenario.expect));
|
|
38
|
+
results.push({
|
|
39
|
+
name: scenario.name,
|
|
40
|
+
category: 'retrieval',
|
|
41
|
+
passed,
|
|
42
|
+
description: scenario.description,
|
|
43
|
+
detail: passed
|
|
44
|
+
? `retrieved ${chunks[0]?.id ?? scenario.chunk_id}`
|
|
45
|
+
: `missing expected content via chunk_id ${scenario.chunk_id}`,
|
|
46
|
+
returned_bytes: result_bytes(chunks),
|
|
47
|
+
result_count: chunks.length,
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
export function run_lifecycle_evals(store, seeded, results) {
|
|
52
|
+
const source = seeded.get('staging-help');
|
|
53
|
+
if (!source)
|
|
54
|
+
throw new Error('Missing source staging-help');
|
|
55
|
+
store.purge({ source_id: source.source_id, global: true });
|
|
56
|
+
const chunks = store.get(source.source_id, undefined, {
|
|
57
|
+
global: true,
|
|
58
|
+
});
|
|
59
|
+
const summary = store.chunk_summary(source.source_id, {
|
|
60
|
+
global: true,
|
|
61
|
+
});
|
|
62
|
+
const text = format_get_result(source.source_id, undefined, chunks, summary);
|
|
63
|
+
const passed = chunks.length === 0 &&
|
|
64
|
+
text.includes('was not found') &&
|
|
65
|
+
text.includes('expired');
|
|
66
|
+
results.push({
|
|
67
|
+
name: 'missing-source-guidance',
|
|
68
|
+
category: 'lifecycle',
|
|
69
|
+
passed,
|
|
70
|
+
description: 'Expired receipt retrieval should distinguish a missing backing source from an empty result and give guidance.',
|
|
71
|
+
detail: passed
|
|
72
|
+
? 'missing source reports expiry/purge guidance'
|
|
73
|
+
: 'missing source is currently indistinguishable from no data',
|
|
74
|
+
returned_bytes: Buffer.byteLength(text, 'utf8'),
|
|
75
|
+
result_count: chunks.length,
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
export function run_capture_evals(results) {
|
|
79
|
+
const large_text = 'x'.repeat(24 * 1024 + 1);
|
|
80
|
+
const many_lines = Array.from({ length: 301 }, (_, index) => String(index)).join('\n');
|
|
81
|
+
const small_text = 'small output';
|
|
82
|
+
const passed = should_index_text(large_text) &&
|
|
83
|
+
should_index_text(many_lines) &&
|
|
84
|
+
!should_index_text(small_text);
|
|
85
|
+
results.push({
|
|
86
|
+
name: 'capture-thresholds',
|
|
87
|
+
category: 'capture',
|
|
88
|
+
passed,
|
|
89
|
+
description: 'Oversized byte and line-count outputs should be captured while small outputs stay inline.',
|
|
90
|
+
detail: passed
|
|
91
|
+
? 'byte, line, and small-output thresholds behaved as expected'
|
|
92
|
+
: 'capture threshold mismatch',
|
|
93
|
+
returned_bytes: 0,
|
|
94
|
+
result_count: 3,
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
function store_db(store) {
|
|
98
|
+
return Reflect.get(store, 'db');
|
|
99
|
+
}
|
|
100
|
+
export function run_retention_evals(store, results) {
|
|
101
|
+
const old = store.store({
|
|
102
|
+
text: `old-retention-token\n${'o '.repeat(500)}`,
|
|
103
|
+
tool_name: 'bash',
|
|
104
|
+
force: true,
|
|
105
|
+
});
|
|
106
|
+
const fresh = store.store({
|
|
107
|
+
text: `fresh-retention-token\n${'f '.repeat(500)}`,
|
|
108
|
+
tool_name: 'bash',
|
|
109
|
+
force: true,
|
|
110
|
+
});
|
|
111
|
+
if (!old || !fresh)
|
|
112
|
+
throw new Error('Failed to seed retention eval');
|
|
113
|
+
store_db(store)
|
|
114
|
+
.prepare('UPDATE context_sources SET created_at = ? WHERE id = ?')
|
|
115
|
+
.run(Date.now() - 10 * 24 * 60 * 60 * 1000, old.source_id);
|
|
116
|
+
const cleanup = store.cleanup({
|
|
117
|
+
retention_days: 7,
|
|
118
|
+
purge_on_shutdown: false,
|
|
119
|
+
max_mb: null,
|
|
120
|
+
max_bytes: null,
|
|
121
|
+
});
|
|
122
|
+
const old_results = store.search('old-retention-token', {
|
|
123
|
+
source_id: old.source_id,
|
|
124
|
+
global: true,
|
|
125
|
+
});
|
|
126
|
+
const fresh_results = store.search('fresh-retention-token', {
|
|
127
|
+
source_id: fresh.source_id,
|
|
128
|
+
global: true,
|
|
129
|
+
});
|
|
130
|
+
const passed = cleanup.age_deleted >= 1 &&
|
|
131
|
+
old_results.length === 0 &&
|
|
132
|
+
fresh_results.length > 0;
|
|
133
|
+
results.push({
|
|
134
|
+
name: 'age-retention-cleanup',
|
|
135
|
+
category: 'retention',
|
|
136
|
+
passed,
|
|
137
|
+
description: 'Age cleanup should delete expired sources without deleting fresh sources.',
|
|
138
|
+
detail: `age_deleted=${cleanup.age_deleted}; old=${old_results.length}; fresh=${fresh_results.length}`,
|
|
139
|
+
returned_bytes: result_bytes([...old_results, ...fresh_results]),
|
|
140
|
+
result_count: old_results.length + fresh_results.length,
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
export function run_cost_evals(store, seeded, results) {
|
|
144
|
+
const source = seeded.get('chunk-boundary');
|
|
145
|
+
if (!source)
|
|
146
|
+
throw new Error('Missing source chunk-boundary');
|
|
147
|
+
const search = store.search('alpha-token omega-token', {
|
|
148
|
+
source_id: source.source_id,
|
|
149
|
+
limit: 25,
|
|
150
|
+
});
|
|
151
|
+
const bytes = result_bytes(search);
|
|
152
|
+
const passed = search.length > 0 && bytes <= 24 * 1024;
|
|
153
|
+
results.push({
|
|
154
|
+
name: 'bounded-relaxed-search-cost',
|
|
155
|
+
category: 'cost',
|
|
156
|
+
passed,
|
|
157
|
+
description: 'Relaxed search should recover useful chunks without dumping excessive context back inline.',
|
|
158
|
+
detail: `results=${search.length}; returned_bytes=${bytes}; budget=24576`,
|
|
159
|
+
returned_bytes: bytes,
|
|
160
|
+
result_count: search.length,
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
export function run_dedupe_evals(results) {
|
|
164
|
+
const dir = mkdtempSync(join(tmpdir(), 'pi-context-dedupe-eval-'));
|
|
165
|
+
const store = new ContextStore({
|
|
166
|
+
db_path: join(dir, 'context.db'),
|
|
167
|
+
project_path: PROJECT,
|
|
168
|
+
session_id: `${SESSION}:a`,
|
|
169
|
+
});
|
|
170
|
+
try {
|
|
171
|
+
const text = `shared-dedupe-token\n${make_noise_lines(40, 'dedupe')}`;
|
|
172
|
+
const first = store.store({
|
|
173
|
+
text,
|
|
174
|
+
tool_name: 'read',
|
|
175
|
+
session_id: `${SESSION}:a`,
|
|
176
|
+
project_path: PROJECT,
|
|
177
|
+
force: true,
|
|
178
|
+
});
|
|
179
|
+
const second = store.store({
|
|
180
|
+
text,
|
|
181
|
+
tool_name: 'read',
|
|
182
|
+
session_id: `${SESSION}:b`,
|
|
183
|
+
project_path: PROJECT,
|
|
184
|
+
force: true,
|
|
185
|
+
});
|
|
186
|
+
if (!first || !second)
|
|
187
|
+
throw new Error('Failed to seed dedupe eval');
|
|
188
|
+
const retrieved = store.get(second.source_id, undefined, {
|
|
189
|
+
session_id: `${SESSION}:b`,
|
|
190
|
+
project_path: PROJECT,
|
|
191
|
+
});
|
|
192
|
+
const stats = store.stats({ global: true });
|
|
193
|
+
const passed = first.source_id === second.source_id &&
|
|
194
|
+
second.deduped === true &&
|
|
195
|
+
stats.sources === 1 &&
|
|
196
|
+
retrieved.length === first.chunk_count;
|
|
197
|
+
results.push({
|
|
198
|
+
name: 'cross-session-dedupe',
|
|
199
|
+
category: 'dedupe',
|
|
200
|
+
passed,
|
|
201
|
+
description: 'Identical content across sessions should be content-address reused instead of stored twice.',
|
|
202
|
+
detail: `first=${first.source_id}; second=${second.source_id}; sources=${stats.sources}; retrieved=${retrieved.length}`,
|
|
203
|
+
returned_bytes: result_bytes(retrieved),
|
|
204
|
+
result_count: stats.sources,
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
finally {
|
|
208
|
+
store.close();
|
|
209
|
+
rmSync(dir, { recursive: true, force: true });
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
//# sourceMappingURL=checks.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"checks.js","sourceRoot":"","sources":["../../src/eval/checks.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAC9C,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,OAAO,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAAE,YAAY,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAC9D,OAAO,EACN,OAAO,EACP,OAAO,EACP,gBAAgB,EAChB,YAAY,GACZ,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAGjE,MAAM,UAAU,gBAAgB,CAC/B,KAAmB,EACnB,MAAiC,EACjC,OAAyB;IAEzB,KAAK,MAAM,QAAQ,IAAI,gBAAgB,EAAE,CAAC;QACzC,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAC3C,IAAI,CAAC,MAAM;YAAE,MAAM,IAAI,KAAK,CAAC,kBAAkB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAClE,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE;YAC3C,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,KAAK,EAAE,QAAQ,CAAC,KAAK;SACrB,CAAC,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CACrC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,CACxC,CAAC;QACF,OAAO,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,QAAQ,CAAC,IAAI;YACnB,QAAQ,EAAE,QAAQ;YAClB,MAAM;YACN,WAAW,EAAE,QAAQ,CAAC,WAAW;YACjC,MAAM,EAAE,MAAM;gBACb,CAAC,CAAC,WAAW,MAAM,CAAC,CAAC,CAAC,EAAE,QAAQ,IAAI,eAAe,EAAE;gBACrD,CAAC,CAAC,+BAA+B,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,MAAM,CAAC,MAAM,aAAa;YAC/F,cAAc,EAAE,YAAY,CAAC,MAAM,CAAC;YACpC,YAAY,EAAE,MAAM,CAAC,MAAM;SAC3B,CAAC,CAAC;IACJ,CAAC;AACF,CAAC;AAED,MAAM,UAAU,mBAAmB,CAClC,KAAmB,EACnB,MAAiC,EACjC,OAAyB;IAEzB,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;QACtC,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAC3C,IAAI,CAAC,MAAM;YAAE,MAAM,IAAI,KAAK,CAAC,kBAAkB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAClE,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC9D,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CACpC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,CACvC,CAAC;QACF,OAAO,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,QAAQ,CAAC,IAAI;YACnB,QAAQ,EAAE,WAAW;YACrB,MAAM;YACN,WAAW,EAAE,QAAQ,CAAC,WAAW;YACjC,MAAM,EAAE,MAAM;gBACb,CAAC,CAAC,aAAa,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,IAAI,QAAQ,CAAC,QAAQ,EAAE;gBACnD,CAAC,CAAC,yCAAyC,QAAQ,CAAC,QAAQ,EAAE;YAC/D,cAAc,EAAE,YAAY,CAAC,MAAM,CAAC;YACpC,YAAY,EAAE,MAAM,CAAC,MAAM;SAC3B,CAAC,CAAC;IACJ,CAAC;AACF,CAAC;AAED,MAAM,UAAU,mBAAmB,CAClC,KAAmB,EACnB,MAAiC,EACjC,OAAyB;IAEzB,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;IAC1C,IAAI,CAAC,MAAM;QAAE,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC,CAAC;IAC5D,KAAK,CAAC,KAAK,CAAC,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;IAC3D,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,SAAS,EAAE;QACrD,MAAM,EAAE,IAAI;KACZ,CAAC,CAAC;IACH,MAAM,OAAO,GAAG,KAAK,CAAC,aAAa,CAAC,MAAM,CAAC,SAAS,EAAE;QACrD,MAAM,EAAE,IAAI;KACZ,CAAC,CAAC;IACH,MAAM,IAAI,GAAG,iBAAiB,CAC7B,MAAM,CAAC,SAAS,EAChB,SAAS,EACT,MAAM,EACN,OAAO,CACP,CAAC;IACF,MAAM,MAAM,GACX,MAAM,CAAC,MAAM,KAAK,CAAC;QACnB,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC;QAC9B,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;IAC1B,OAAO,CAAC,IAAI,CAAC;QACZ,IAAI,EAAE,yBAAyB;QAC/B,QAAQ,EAAE,WAAW;QACrB,MAAM;QACN,WAAW,EACV,+GAA+G;QAChH,MAAM,EAAE,MAAM;YACb,CAAC,CAAC,8CAA8C;YAChD,CAAC,CAAC,4DAA4D;QAC/D,cAAc,EAAE,MAAM,CAAC,UAAU,CAAC,IAAI,EAAE,MAAM,CAAC;QAC/C,YAAY,EAAE,MAAM,CAAC,MAAM;KAC3B,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,OAAyB;IAC1D,MAAM,UAAU,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC;IAC7C,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAC3D,MAAM,CAAC,KAAK,CAAC,CACb,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACb,MAAM,UAAU,GAAG,cAAc,CAAC;IAClC,MAAM,MAAM,GACX,iBAAiB,CAAC,UAAU,CAAC;QAC7B,iBAAiB,CAAC,UAAU,CAAC;QAC7B,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC;IAChC,OAAO,CAAC,IAAI,CAAC;QACZ,IAAI,EAAE,oBAAoB;QAC1B,QAAQ,EAAE,SAAS;QACnB,MAAM;QACN,WAAW,EACV,2FAA2F;QAC5F,MAAM,EAAE,MAAM;YACb,CAAC,CAAC,6DAA6D;YAC/D,CAAC,CAAC,4BAA4B;QAC/B,cAAc,EAAE,CAAC;QACjB,YAAY,EAAE,CAAC;KACf,CAAC,CAAC;AACJ,CAAC;AAED,SAAS,QAAQ,CAAC,KAAmB;IACpC,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,CAAiB,CAAC;AACjD,CAAC;AAED,MAAM,UAAU,mBAAmB,CAClC,KAAmB,EACnB,OAAyB;IAEzB,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC;QACvB,IAAI,EAAE,wBAAwB,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;QAChD,SAAS,EAAE,MAAM;QACjB,KAAK,EAAE,IAAI;KACX,CAAC,CAAC;IACH,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC;QACzB,IAAI,EAAE,0BAA0B,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;QAClD,SAAS,EAAE,MAAM;QACjB,KAAK,EAAE,IAAI;KACX,CAAC,CAAC;IACH,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK;QACjB,MAAM,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAC;IAClD,QAAQ,CAAC,KAAK,CAAC;SACb,OAAO,CAAC,wDAAwD,CAAC;SACjE,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,EAAE,GAAG,CAAC,SAAS,CAAC,CAAC;IAC5D,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC;QAC7B,cAAc,EAAE,CAAC;QACjB,iBAAiB,EAAE,KAAK;QACxB,MAAM,EAAE,IAAI;QACZ,SAAS,EAAE,IAAI;KACf,CAAC,CAAC;IACH,MAAM,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,qBAAqB,EAAE;QACvD,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,MAAM,EAAE,IAAI;KACZ,CAAC,CAAC;IACH,MAAM,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC,uBAAuB,EAAE;QAC3D,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,MAAM,EAAE,IAAI;KACZ,CAAC,CAAC;IACH,MAAM,MAAM,GACX,OAAO,CAAC,WAAW,IAAI,CAAC;QACxB,WAAW,CAAC,MAAM,KAAK,CAAC;QACxB,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC;IAC1B,OAAO,CAAC,IAAI,CAAC;QACZ,IAAI,EAAE,uBAAuB;QAC7B,QAAQ,EAAE,WAAW;QACrB,MAAM;QACN,WAAW,EACV,2EAA2E;QAC5E,MAAM,EAAE,eAAe,OAAO,CAAC,WAAW,SAAS,WAAW,CAAC,MAAM,WAAW,aAAa,CAAC,MAAM,EAAE;QACtG,cAAc,EAAE,YAAY,CAAC,CAAC,GAAG,WAAW,EAAE,GAAG,aAAa,CAAC,CAAC;QAChE,YAAY,EAAE,WAAW,CAAC,MAAM,GAAG,aAAa,CAAC,MAAM;KACvD,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,cAAc,CAC7B,KAAmB,EACnB,MAAiC,EACjC,OAAyB;IAEzB,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;IAC5C,IAAI,CAAC,MAAM;QAAE,MAAM,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,yBAAyB,EAAE;QACtD,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,KAAK,EAAE,EAAE;KACT,CAAC,CAAC;IACH,MAAM,KAAK,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;IACnC,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,IAAI,EAAE,GAAG,IAAI,CAAC;IACvD,OAAO,CAAC,IAAI,CAAC;QACZ,IAAI,EAAE,6BAA6B;QACnC,QAAQ,EAAE,MAAM;QAChB,MAAM;QACN,WAAW,EACV,4FAA4F;QAC7F,MAAM,EAAE,WAAW,MAAM,CAAC,MAAM,oBAAoB,KAAK,gBAAgB;QACzE,cAAc,EAAE,KAAK;QACrB,YAAY,EAAE,MAAM,CAAC,MAAM;KAC3B,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,OAAyB;IACzD,MAAM,GAAG,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,yBAAyB,CAAC,CAAC,CAAC;IACnE,MAAM,KAAK,GAAG,IAAI,YAAY,CAAC;QAC9B,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,YAAY,CAAC;QAChC,YAAY,EAAE,OAAO;QACrB,UAAU,EAAE,GAAG,OAAO,IAAI;KAC1B,CAAC,CAAC;IACH,IAAI,CAAC;QACJ,MAAM,IAAI,GAAG,wBAAwB,gBAAgB,CAAC,EAAE,EAAE,QAAQ,CAAC,EAAE,CAAC;QACtE,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC;YACzB,IAAI;YACJ,SAAS,EAAE,MAAM;YACjB,UAAU,EAAE,GAAG,OAAO,IAAI;YAC1B,YAAY,EAAE,OAAO;YACrB,KAAK,EAAE,IAAI;SACX,CAAC,CAAC;QACH,MAAM,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC;YAC1B,IAAI;YACJ,SAAS,EAAE,MAAM;YACjB,UAAU,EAAE,GAAG,OAAO,IAAI;YAC1B,YAAY,EAAE,OAAO;YACrB,KAAK,EAAE,IAAI;SACX,CAAC,CAAC;QACH,IAAI,CAAC,KAAK,IAAI,CAAC,MAAM;YACpB,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,SAAS,EAAE;YACxD,UAAU,EAAE,GAAG,OAAO,IAAI;YAC1B,YAAY,EAAE,OAAO;SACrB,CAAC,CAAC;QACH,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;QAC5C,MAAM,MAAM,GACX,KAAK,CAAC,SAAS,KAAK,MAAM,CAAC,SAAS;YACpC,MAAM,CAAC,OAAO,KAAK,IAAI;YACvB,KAAK,CAAC,OAAO,KAAK,CAAC;YACnB,SAAS,CAAC,MAAM,KAAK,KAAK,CAAC,WAAW,CAAC;QACxC,OAAO,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,sBAAsB;YAC5B,QAAQ,EAAE,QAAQ;YAClB,MAAM;YACN,WAAW,EACV,6FAA6F;YAC9F,MAAM,EAAE,SAAS,KAAK,CAAC,SAAS,YAAY,MAAM,CAAC,SAAS,aAAa,KAAK,CAAC,OAAO,eAAe,SAAS,CAAC,MAAM,EAAE;YACvH,cAAc,EAAE,YAAY,CAAC,SAAS,CAAC;YACvC,YAAY,EAAE,KAAK,CAAC,OAAO;SAC3B,CAAC,CAAC;IACJ,CAAC;YAAS,CAAC;QACV,KAAK,CAAC,KAAK,EAAE,CAAC;QACd,MAAM,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAC/C,CAAC;AACF,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { ContextStore } from '../store.js';
|
|
2
|
+
import type { SeededSource } from './types.js';
|
|
3
|
+
export declare const PROJECT = "/tmp/pi-context-eval-project";
|
|
4
|
+
export declare const SESSION = "/tmp/pi-context-eval-session.jsonl";
|
|
5
|
+
export declare function make_noise_lines(count: number, prefix: string): string;
|
|
6
|
+
export declare function source_texts(): Record<string, {
|
|
7
|
+
tool_name: string;
|
|
8
|
+
text: string;
|
|
9
|
+
}>;
|
|
10
|
+
export declare function seed(store: ContextStore): Map<string, SeededSource>;
|
|
11
|
+
export declare function result_bytes(values: Array<{
|
|
12
|
+
content: string;
|
|
13
|
+
}>): number;
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
export const PROJECT = '/tmp/pi-context-eval-project';
|
|
2
|
+
export const SESSION = '/tmp/pi-context-eval-session.jsonl';
|
|
3
|
+
export function make_noise_lines(count, prefix) {
|
|
4
|
+
return Array.from({ length: count }, (_, index) => `${prefix} noise line ${index} ${'x'.repeat(48)}`).join('\n');
|
|
5
|
+
}
|
|
6
|
+
export function source_texts() {
|
|
7
|
+
return {
|
|
8
|
+
'needle-log': {
|
|
9
|
+
tool_name: 'bash',
|
|
10
|
+
text: [
|
|
11
|
+
make_noise_lines(80, 'before'),
|
|
12
|
+
'TARGET_VALUE appears on line 855 with surrounding diagnostic text',
|
|
13
|
+
make_noise_lines(80, 'after'),
|
|
14
|
+
].join('\n'),
|
|
15
|
+
},
|
|
16
|
+
'staging-help': {
|
|
17
|
+
tool_name: 'bash',
|
|
18
|
+
text: [
|
|
19
|
+
'Interactive git UI help',
|
|
20
|
+
'Press space to stage this hunk.',
|
|
21
|
+
'Press enter to open the selected file.',
|
|
22
|
+
'Use a to amend after review.',
|
|
23
|
+
make_noise_lines(40, 'staging'),
|
|
24
|
+
].join('\n'),
|
|
25
|
+
},
|
|
26
|
+
'skill-script': {
|
|
27
|
+
tool_name: 'read',
|
|
28
|
+
text: [
|
|
29
|
+
'case "$command" in',
|
|
30
|
+
' update) run_gh_skill_update "$repo" ;;',
|
|
31
|
+
' install) run_gh_skill_install "$repo" ;;',
|
|
32
|
+
'esac',
|
|
33
|
+
make_noise_lines(40, 'script'),
|
|
34
|
+
].join('\n'),
|
|
35
|
+
},
|
|
36
|
+
'code-symbols': {
|
|
37
|
+
tool_name: 'read',
|
|
38
|
+
text: [
|
|
39
|
+
"pi.registerCommand('context-stats', {",
|
|
40
|
+
" description: 'Show context sidecar byte accounting',",
|
|
41
|
+
'});',
|
|
42
|
+
make_noise_lines(40, 'code'),
|
|
43
|
+
].join('\n'),
|
|
44
|
+
},
|
|
45
|
+
'chunk-boundary': {
|
|
46
|
+
tool_name: 'mcp__mcp-omnisearch__web_extract',
|
|
47
|
+
text: [
|
|
48
|
+
`alpha-token ${'a'.repeat(5000)}`,
|
|
49
|
+
'',
|
|
50
|
+
`omega-token ${'b'.repeat(5000)}`,
|
|
51
|
+
].join('\n'),
|
|
52
|
+
},
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
export function seed(store) {
|
|
56
|
+
const seeded = new Map();
|
|
57
|
+
for (const [name, source] of Object.entries(source_texts())) {
|
|
58
|
+
const stored = store.store({
|
|
59
|
+
text: source.text,
|
|
60
|
+
tool_name: source.tool_name,
|
|
61
|
+
input_summary: name,
|
|
62
|
+
project_path: PROJECT,
|
|
63
|
+
session_id: SESSION,
|
|
64
|
+
force: true,
|
|
65
|
+
});
|
|
66
|
+
if (!stored)
|
|
67
|
+
throw new Error(`Failed to seed ${name}`);
|
|
68
|
+
seeded.set(name, {
|
|
69
|
+
name,
|
|
70
|
+
source_id: stored.source_id,
|
|
71
|
+
first_chunk_id: stored.first_chunk_id,
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
return seeded;
|
|
75
|
+
}
|
|
76
|
+
export function result_bytes(values) {
|
|
77
|
+
return values.reduce((total, value) => total + Buffer.byteLength(value.content, 'utf8'), 0);
|
|
78
|
+
}
|
|
79
|
+
//# sourceMappingURL=fixtures.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fixtures.js","sourceRoot":"","sources":["../../src/eval/fixtures.ts"],"names":[],"mappings":"AAGA,MAAM,CAAC,MAAM,OAAO,GAAG,8BAA8B,CAAC;AACtD,MAAM,CAAC,MAAM,OAAO,GAAG,oCAAoC,CAAC;AAE5D,MAAM,UAAU,gBAAgB,CAC/B,KAAa,EACb,MAAc;IAEd,OAAO,KAAK,CAAC,IAAI,CAChB,EAAE,MAAM,EAAE,KAAK,EAAE,EACjB,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,MAAM,eAAe,KAAK,IAAI,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAC/D,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACd,CAAC;AAED,MAAM,UAAU,YAAY;IAI3B,OAAO;QACN,YAAY,EAAE;YACb,SAAS,EAAE,MAAM;YACjB,IAAI,EAAE;gBACL,gBAAgB,CAAC,EAAE,EAAE,QAAQ,CAAC;gBAC9B,mEAAmE;gBACnE,gBAAgB,CAAC,EAAE,EAAE,OAAO,CAAC;aAC7B,CAAC,IAAI,CAAC,IAAI,CAAC;SACZ;QACD,cAAc,EAAE;YACf,SAAS,EAAE,MAAM;YACjB,IAAI,EAAE;gBACL,yBAAyB;gBACzB,iCAAiC;gBACjC,wCAAwC;gBACxC,8BAA8B;gBAC9B,gBAAgB,CAAC,EAAE,EAAE,SAAS,CAAC;aAC/B,CAAC,IAAI,CAAC,IAAI,CAAC;SACZ;QACD,cAAc,EAAE;YACf,SAAS,EAAE,MAAM;YACjB,IAAI,EAAE;gBACL,oBAAoB;gBACpB,0CAA0C;gBAC1C,4CAA4C;gBAC5C,MAAM;gBACN,gBAAgB,CAAC,EAAE,EAAE,QAAQ,CAAC;aAC9B,CAAC,IAAI,CAAC,IAAI,CAAC;SACZ;QACD,cAAc,EAAE;YACf,SAAS,EAAE,MAAM;YACjB,IAAI,EAAE;gBACL,uCAAuC;gBACvC,wDAAwD;gBACxD,KAAK;gBACL,gBAAgB,CAAC,EAAE,EAAE,MAAM,CAAC;aAC5B,CAAC,IAAI,CAAC,IAAI,CAAC;SACZ;QACD,gBAAgB,EAAE;YACjB,SAAS,EAAE,kCAAkC;YAC7C,IAAI,EAAE;gBACL,eAAe,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE;gBACjC,EAAE;gBACF,eAAe,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE;aACjC,CAAC,IAAI,CAAC,IAAI,CAAC;SACZ;KACD,CAAC;AACH,CAAC;AAED,MAAM,UAAU,IAAI,CAAC,KAAmB;IACvC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAwB,CAAC;IAC/C,KAAK,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,EAAE,CAAC;QAC7D,MAAM,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC;YAC1B,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,aAAa,EAAE,IAAI;YACnB,YAAY,EAAE,OAAO;YACrB,UAAU,EAAE,OAAO;YACnB,KAAK,EAAE,IAAI;SACX,CAAC,CAAC;QACH,IAAI,CAAC,MAAM;YAAE,MAAM,IAAI,KAAK,CAAC,kBAAkB,IAAI,EAAE,CAAC,CAAC;QACvD,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE;YAChB,IAAI;YACJ,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,cAAc,EAAE,MAAM,CAAC,cAAc;SACrC,CAAC,CAAC;IACJ,CAAC;IACD,OAAO,MAAM,CAAC;AACf,CAAC;AAED,MAAM,UAAU,YAAY,CAC3B,MAAkC;IAElC,OAAO,MAAM,CAAC,MAAM,CACnB,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,CAChB,KAAK,GAAG,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,EACjD,CAAC,CACD,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { mkdtempSync, rmSync } from 'node:fs';
|
|
2
|
+
import { tmpdir } from 'node:os';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { ContextStore } from '../store.js';
|
|
5
|
+
import { run_capture_evals, run_cost_evals, run_dedupe_evals, run_lifecycle_evals, run_retention_evals, run_retrieval_evals, run_search_evals, } from './checks.js';
|
|
6
|
+
import { PROJECT, SESSION, seed } from './fixtures.js';
|
|
7
|
+
import { build_sections, format_report } from './report.js';
|
|
8
|
+
export function run_context_eval() {
|
|
9
|
+
const dir = mkdtempSync(join(tmpdir(), 'pi-context-eval-'));
|
|
10
|
+
const db_path = join(dir, 'context.db');
|
|
11
|
+
const store = new ContextStore({
|
|
12
|
+
db_path,
|
|
13
|
+
project_path: PROJECT,
|
|
14
|
+
session_id: SESSION,
|
|
15
|
+
});
|
|
16
|
+
try {
|
|
17
|
+
const seeded = seed(store);
|
|
18
|
+
const results = [];
|
|
19
|
+
run_search_evals(store, seeded, results);
|
|
20
|
+
run_retrieval_evals(store, seeded, results);
|
|
21
|
+
run_lifecycle_evals(store, seeded, results);
|
|
22
|
+
run_capture_evals(results);
|
|
23
|
+
run_retention_evals(store, results);
|
|
24
|
+
run_cost_evals(store, seeded, results);
|
|
25
|
+
run_dedupe_evals(results);
|
|
26
|
+
const passed = results.filter((result) => result.passed).length;
|
|
27
|
+
const total = results.length;
|
|
28
|
+
const returned_bytes = results.reduce((sum, result) => sum + result.returned_bytes, 0);
|
|
29
|
+
return {
|
|
30
|
+
version: 1,
|
|
31
|
+
summary: {
|
|
32
|
+
passed,
|
|
33
|
+
failed: total - passed,
|
|
34
|
+
total,
|
|
35
|
+
score_pct: Math.round((passed / total) * 1000) / 10,
|
|
36
|
+
returned_bytes,
|
|
37
|
+
},
|
|
38
|
+
sections: build_sections(results),
|
|
39
|
+
results,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
finally {
|
|
43
|
+
store.close();
|
|
44
|
+
rmSync(dir, { recursive: true, force: true });
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
export async function run_context_eval_cli(args = process.argv.slice(2)) {
|
|
48
|
+
const json = args.includes('--json');
|
|
49
|
+
const report = run_context_eval();
|
|
50
|
+
process.stdout.write(json
|
|
51
|
+
? `${JSON.stringify(report, null, 2)}\n`
|
|
52
|
+
: `${format_report(report)}\n`);
|
|
53
|
+
if (report.summary.failed > 0)
|
|
54
|
+
process.exitCode = 1;
|
|
55
|
+
}
|
|
56
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAC9C,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EACN,iBAAiB,EACjB,cAAc,EACd,gBAAgB,EAChB,mBAAmB,EACnB,mBAAmB,EACnB,mBAAmB,EACnB,gBAAgB,GAChB,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,eAAe,CAAC;AACvD,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAG5D,MAAM,UAAU,gBAAgB;IAC/B,MAAM,GAAG,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,kBAAkB,CAAC,CAAC,CAAC;IAC5D,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;IACxC,MAAM,KAAK,GAAG,IAAI,YAAY,CAAC;QAC9B,OAAO;QACP,YAAY,EAAE,OAAO;QACrB,UAAU,EAAE,OAAO;KACnB,CAAC,CAAC;IAEH,IAAI,CAAC;QACJ,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;QAC3B,MAAM,OAAO,GAAqB,EAAE,CAAC;QAErC,gBAAgB,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QACzC,mBAAmB,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QAC5C,mBAAmB,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QAC5C,iBAAiB,CAAC,OAAO,CAAC,CAAC;QAC3B,mBAAmB,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QACpC,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QACvC,gBAAgB,CAAC,OAAO,CAAC,CAAC;QAE1B,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;QAChE,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC;QAC7B,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,CACpC,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE,CAAC,GAAG,GAAG,MAAM,CAAC,cAAc,EAC5C,CAAC,CACD,CAAC;QACF,OAAO;YACN,OAAO,EAAE,CAAC;YACV,OAAO,EAAE;gBACR,MAAM;gBACN,MAAM,EAAE,KAAK,GAAG,MAAM;gBACtB,KAAK;gBACL,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG,KAAK,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE;gBACnD,cAAc;aACd;YACD,QAAQ,EAAE,cAAc,CAAC,OAAO,CAAC;YACjC,OAAO;SACP,CAAC;IACH,CAAC;YAAS,CAAC;QACV,KAAK,CAAC,KAAK,EAAE,CAAC;QACd,MAAM,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAC/C,CAAC;AACF,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACzC,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;IAE5B,MAAM,IAAI,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IACrC,MAAM,MAAM,GAAG,gBAAgB,EAAE,CAAC;IAClC,OAAO,CAAC,MAAM,CAAC,KAAK,CACnB,IAAI;QACH,CAAC,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI;QACxC,CAAC,CAAC,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAC/B,CAAC;IACF,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;AACrD,CAAC"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
export function build_sections(results) {
|
|
2
|
+
const categories = [
|
|
3
|
+
'search',
|
|
4
|
+
'retrieval',
|
|
5
|
+
'lifecycle',
|
|
6
|
+
'capture',
|
|
7
|
+
'retention',
|
|
8
|
+
'cost',
|
|
9
|
+
'dedupe',
|
|
10
|
+
];
|
|
11
|
+
return categories
|
|
12
|
+
.map((category) => {
|
|
13
|
+
const section = results.filter((result) => result.category === category);
|
|
14
|
+
const passed = section.filter((result) => result.passed).length;
|
|
15
|
+
const total = section.length;
|
|
16
|
+
return {
|
|
17
|
+
category,
|
|
18
|
+
passed,
|
|
19
|
+
failed: total - passed,
|
|
20
|
+
total,
|
|
21
|
+
score_pct: total === 0 ? 0 : Math.round((passed / total) * 1000) / 10,
|
|
22
|
+
};
|
|
23
|
+
})
|
|
24
|
+
.filter((section) => section.total > 0);
|
|
25
|
+
}
|
|
26
|
+
export function format_report(report) {
|
|
27
|
+
return [
|
|
28
|
+
'# pi-context eval',
|
|
29
|
+
'',
|
|
30
|
+
`Score: ${report.summary.passed}/${report.summary.total} (${report.summary.score_pct}%)`,
|
|
31
|
+
`Returned bytes: ${report.summary.returned_bytes}`,
|
|
32
|
+
'',
|
|
33
|
+
'Sections:',
|
|
34
|
+
...report.sections.map((section) => `- ${section.category}: ${section.passed}/${section.total} (${section.score_pct}%)`),
|
|
35
|
+
'',
|
|
36
|
+
...report.results.map((result) => [
|
|
37
|
+
`${result.passed ? '✅' : '❌'} ${result.name} (${result.category})`,
|
|
38
|
+
` ${result.description}`,
|
|
39
|
+
` ${result.detail}; results=${result.result_count}; bytes=${result.returned_bytes}`,
|
|
40
|
+
].join('\n')),
|
|
41
|
+
].join('\n');
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=report.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"report.js","sourceRoot":"","sources":["../../src/eval/report.ts"],"names":[],"mappings":"AAMA,MAAM,UAAU,cAAc,CAC7B,OAAyB;IAEzB,MAAM,UAAU,GAAmB;QAClC,QAAQ;QACR,WAAW;QACX,WAAW;QACX,SAAS;QACT,WAAW;QACX,MAAM;QACN,QAAQ;KACR,CAAC;IACF,OAAO,UAAU;SACf,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE;QACjB,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAC7B,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,KAAK,QAAQ,CACxC,CAAC;QACF,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;QAChE,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC;QAC7B,OAAO;YACN,QAAQ;YACR,MAAM;YACN,MAAM,EAAE,KAAK,GAAG,MAAM;YACtB,KAAK;YACL,SAAS,EACR,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG,KAAK,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE;SAC3D,CAAC;IACH,CAAC,CAAC;SACD,MAAM,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;AAC1C,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,MAAkB;IAC/C,OAAO;QACN,mBAAmB;QACnB,EAAE;QACF,UAAU,MAAM,CAAC,OAAO,CAAC,MAAM,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,KAAK,MAAM,CAAC,OAAO,CAAC,SAAS,IAAI;QACxF,mBAAmB,MAAM,CAAC,OAAO,CAAC,cAAc,EAAE;QAClD,EAAE;QACF,WAAW;QACX,GAAG,MAAM,CAAC,QAAQ,CAAC,GAAG,CACrB,CAAC,OAAO,EAAE,EAAE,CACX,KAAK,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,KAAK,KAAK,OAAO,CAAC,SAAS,IAAI,CACpF;QACD,EAAE;QACF,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAChC;YACC,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,MAAM,CAAC,IAAI,KAAK,MAAM,CAAC,QAAQ,GAAG;YAClE,MAAM,MAAM,CAAC,WAAW,EAAE;YAC1B,MAAM,MAAM,CAAC,MAAM,aAAa,MAAM,CAAC,YAAY,WAAW,MAAM,CAAC,cAAc,EAAE;SACrF,CAAC,IAAI,CAAC,IAAI,CAAC,CACZ;KACD,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACd,CAAC"}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
export const search_scenarios = [
|
|
2
|
+
{
|
|
3
|
+
name: 'exact-token-baseline',
|
|
4
|
+
source: 'needle-log',
|
|
5
|
+
query: 'TARGET_VALUE',
|
|
6
|
+
expect: 'TARGET_VALUE appears on line 855',
|
|
7
|
+
limit: 5,
|
|
8
|
+
description: 'Basic exact-token lookup should work before and after.',
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
name: 'over-specific-human-query',
|
|
12
|
+
source: 'staging-help',
|
|
13
|
+
query: 'stage specific chunk line commit checks amend author sign-off',
|
|
14
|
+
expect: 'Press space to stage this hunk',
|
|
15
|
+
limit: 5,
|
|
16
|
+
description: 'Human/agent query contains useful words plus absent words; strict AND search used to miss.',
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
name: 'literal-or-query',
|
|
20
|
+
source: 'skill-script',
|
|
21
|
+
query: 'run_gh_skill_update OR run_gh_skill_install OR case update',
|
|
22
|
+
expect: 'run_gh_skill_update',
|
|
23
|
+
limit: 5,
|
|
24
|
+
description: 'Queries copied from agent reasoning often include OR; search should treat this as alternatives.',
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
name: 'punctuation-symbol-query',
|
|
28
|
+
source: 'code-symbols',
|
|
29
|
+
query: 'registerCommand context-stats',
|
|
30
|
+
expect: "registerCommand('context-stats'",
|
|
31
|
+
limit: 5,
|
|
32
|
+
description: 'Code and command names include punctuation that should not make retrieval brittle.',
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
name: 'chunk-boundary-query',
|
|
36
|
+
source: 'chunk-boundary',
|
|
37
|
+
query: 'alpha-token omega-token',
|
|
38
|
+
expect: 'alpha-token',
|
|
39
|
+
limit: 5,
|
|
40
|
+
description: 'Useful terms may be split across chunks; relaxed fallback should still recover likely chunks.',
|
|
41
|
+
},
|
|
42
|
+
];
|
|
43
|
+
export const get_scenarios = [
|
|
44
|
+
{
|
|
45
|
+
name: 'get-first-ordinal-alias',
|
|
46
|
+
source: 'needle-log',
|
|
47
|
+
chunk_id: '1',
|
|
48
|
+
expect: 'before noise line 0',
|
|
49
|
+
description: 'Receipt-led chunk retrieval via ordinal alias should stay reliable.',
|
|
50
|
+
},
|
|
51
|
+
];
|
|
52
|
+
//# sourceMappingURL=scenarios.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scenarios.js","sourceRoot":"","sources":["../../src/eval/scenarios.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAqB;IACjD;QACC,IAAI,EAAE,sBAAsB;QAC5B,MAAM,EAAE,YAAY;QACpB,KAAK,EAAE,cAAc;QACrB,MAAM,EAAE,kCAAkC;QAC1C,KAAK,EAAE,CAAC;QACR,WAAW,EACV,wDAAwD;KACzD;IACD;QACC,IAAI,EAAE,2BAA2B;QACjC,MAAM,EAAE,cAAc;QACtB,KAAK,EACJ,+DAA+D;QAChE,MAAM,EAAE,gCAAgC;QACxC,KAAK,EAAE,CAAC;QACR,WAAW,EACV,4FAA4F;KAC7F;IACD;QACC,IAAI,EAAE,kBAAkB;QACxB,MAAM,EAAE,cAAc;QACtB,KAAK,EACJ,4DAA4D;QAC7D,MAAM,EAAE,qBAAqB;QAC7B,KAAK,EAAE,CAAC;QACR,WAAW,EACV,iGAAiG;KAClG;IACD;QACC,IAAI,EAAE,0BAA0B;QAChC,MAAM,EAAE,cAAc;QACtB,KAAK,EAAE,+BAA+B;QACtC,MAAM,EAAE,iCAAiC;QACzC,KAAK,EAAE,CAAC;QACR,WAAW,EACV,oFAAoF;KACrF;IACD;QACC,IAAI,EAAE,sBAAsB;QAC5B,MAAM,EAAE,gBAAgB;QACxB,KAAK,EAAE,yBAAyB;QAChC,MAAM,EAAE,aAAa;QACrB,KAAK,EAAE,CAAC;QACR,WAAW,EACV,+FAA+F;KAChG;CACD,CAAC;AAEF,MAAM,CAAC,MAAM,aAAa,GAAkB;IAC3C;QACC,IAAI,EAAE,yBAAyB;QAC/B,MAAM,EAAE,YAAY;QACpB,QAAQ,EAAE,GAAG;QACb,MAAM,EAAE,qBAAqB;QAC7B,WAAW,EACV,qEAAqE;KACtE;CACD,CAAC"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
export interface SeededSource {
|
|
2
|
+
name: string;
|
|
3
|
+
source_id: string;
|
|
4
|
+
first_chunk_id: string | null;
|
|
5
|
+
}
|
|
6
|
+
export interface SearchScenario {
|
|
7
|
+
name: string;
|
|
8
|
+
query: string;
|
|
9
|
+
source: string;
|
|
10
|
+
expect: string;
|
|
11
|
+
limit?: number;
|
|
12
|
+
description: string;
|
|
13
|
+
}
|
|
14
|
+
export interface GetScenario {
|
|
15
|
+
name: string;
|
|
16
|
+
source: string;
|
|
17
|
+
chunk_id: string;
|
|
18
|
+
expect: string;
|
|
19
|
+
description: string;
|
|
20
|
+
}
|
|
21
|
+
export type EvalCategory = 'search' | 'retrieval' | 'lifecycle' | 'capture' | 'retention' | 'cost' | 'dedupe';
|
|
22
|
+
export interface EvalCaseResult {
|
|
23
|
+
name: string;
|
|
24
|
+
category: EvalCategory;
|
|
25
|
+
passed: boolean;
|
|
26
|
+
description: string;
|
|
27
|
+
detail: string;
|
|
28
|
+
returned_bytes: number;
|
|
29
|
+
result_count: number;
|
|
30
|
+
}
|
|
31
|
+
export interface EvalReport {
|
|
32
|
+
version: 1;
|
|
33
|
+
summary: {
|
|
34
|
+
passed: number;
|
|
35
|
+
failed: number;
|
|
36
|
+
total: number;
|
|
37
|
+
score_pct: number;
|
|
38
|
+
returned_bytes: number;
|
|
39
|
+
};
|
|
40
|
+
sections: Array<{
|
|
41
|
+
category: EvalCategory;
|
|
42
|
+
passed: number;
|
|
43
|
+
failed: number;
|
|
44
|
+
total: number;
|
|
45
|
+
score_pct: number;
|
|
46
|
+
}>;
|
|
47
|
+
results: EvalCaseResult[];
|
|
48
|
+
}
|