daftari 1.15.0 → 1.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +72 -0
- package/README.md +8 -2
- package/dist/backfill/apply.d.ts +14 -0
- package/dist/backfill/apply.d.ts.map +1 -0
- package/dist/backfill/apply.js +111 -0
- package/dist/backfill/apply.js.map +1 -0
- package/dist/backfill/derive.d.ts +25 -0
- package/dist/backfill/derive.d.ts.map +1 -0
- package/dist/backfill/derive.js +142 -0
- package/dist/backfill/derive.js.map +1 -0
- package/dist/backfill/index.d.ts +2 -0
- package/dist/backfill/index.d.ts.map +1 -0
- package/dist/backfill/index.js +232 -0
- package/dist/backfill/index.js.map +1 -0
- package/dist/backfill/plan.d.ts +19 -0
- package/dist/backfill/plan.d.ts.map +1 -0
- package/dist/backfill/plan.js +157 -0
- package/dist/backfill/plan.js.map +1 -0
- package/dist/backfill/types.d.ts +19 -0
- package/dist/backfill/types.d.ts.map +1 -0
- package/dist/backfill/types.js +10 -0
- package/dist/backfill/types.js.map +1 -0
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +15 -0
- package/dist/cli.js.map +1 -1
- package/dist/curation/lint.d.ts +3 -0
- package/dist/curation/lint.d.ts.map +1 -1
- package/dist/curation/lint.js +5 -0
- package/dist/curation/lint.js.map +1 -1
- package/dist/curation/staged-actions.d.ts +68 -0
- package/dist/curation/staged-actions.d.ts.map +1 -0
- package/dist/curation/staged-actions.js +394 -0
- package/dist/curation/staged-actions.js.map +1 -0
- package/dist/eval/generate.d.ts +12 -0
- package/dist/eval/generate.d.ts.map +1 -0
- package/dist/eval/generate.js +221 -0
- package/dist/eval/generate.js.map +1 -0
- package/dist/eval/index.d.ts +2 -0
- package/dist/eval/index.d.ts.map +1 -0
- package/dist/eval/index.js +311 -0
- package/dist/eval/index.js.map +1 -0
- package/dist/eval/llm.d.ts +47 -0
- package/dist/eval/llm.d.ts.map +1 -0
- package/dist/eval/llm.js +165 -0
- package/dist/eval/llm.js.map +1 -0
- package/dist/eval/prompts.d.ts +5 -0
- package/dist/eval/prompts.d.ts.map +1 -0
- package/dist/eval/prompts.js +44 -0
- package/dist/eval/prompts.js.map +1 -0
- package/dist/eval/run.d.ts +13 -0
- package/dist/eval/run.d.ts.map +1 -0
- package/dist/eval/run.js +78 -0
- package/dist/eval/run.js.map +1 -0
- package/dist/eval/score.d.ts +12 -0
- package/dist/eval/score.d.ts.map +1 -0
- package/dist/eval/score.js +154 -0
- package/dist/eval/score.js.map +1 -0
- package/dist/eval/storage.d.ts +10 -0
- package/dist/eval/storage.d.ts.map +1 -0
- package/dist/eval/storage.js +69 -0
- package/dist/eval/storage.js.map +1 -0
- package/dist/eval/subgraph.d.ts +17 -0
- package/dist/eval/subgraph.d.ts.map +1 -0
- package/dist/eval/subgraph.js +214 -0
- package/dist/eval/subgraph.js.map +1 -0
- package/dist/eval/tool-surface.d.ts +7 -0
- package/dist/eval/tool-surface.d.ts.map +1 -0
- package/dist/eval/tool-surface.js +160 -0
- package/dist/eval/tool-surface.js.map +1 -0
- package/dist/eval/types.d.ts +173 -0
- package/dist/eval/types.d.ts.map +1 -0
- package/dist/eval/types.js +44 -0
- package/dist/eval/types.js.map +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -1
- package/dist/index.js.map +1 -1
- package/dist/search/reindex.d.ts.map +1 -1
- package/dist/search/reindex.js +6 -0
- package/dist/search/reindex.js.map +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +2 -0
- package/dist/server.js.map +1 -1
- package/dist/storage/index-db.d.ts +19 -0
- package/dist/storage/index-db.d.ts.map +1 -1
- package/dist/storage/index-db.js +56 -0
- package/dist/storage/index-db.js.map +1 -1
- package/dist/tools/curation.d.ts +2 -1
- package/dist/tools/curation.d.ts.map +1 -1
- package/dist/tools/curation.js +18 -4
- package/dist/tools/curation.js.map +1 -1
- package/dist/tools/staged-actions.d.ts +18 -0
- package/dist/tools/staged-actions.d.ts.map +1 -0
- package/dist/tools/staged-actions.js +275 -0
- package/dist/tools/staged-actions.js.map +1 -0
- package/dist/utils/config.d.ts +1 -0
- package/dist/utils/config.d.ts.map +1 -1
- package/dist/utils/config.js +32 -0
- package/dist/utils/config.js.map +1 -1
- package/dist/utils/git.d.ts +6 -0
- package/dist/utils/git.d.ts.map +1 -1
- package/dist/utils/git.js +34 -0
- package/dist/utils/git.js.map +1 -1
- package/package.json +2 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"subgraph.d.ts","sourceRoot":"","sources":["../../src/eval/subgraph.ts"],"names":[],"mappings":"AAmBA,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,yBAAyB,CAAC;AAE/D,OAAO,KAAK,EAAE,eAAe,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAEhE,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACtC;AAED,MAAM,WAAW,QAAQ;IACvB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,YAAY,EAAE,CAAC;IACtB,KAAK,EAAE,YAAY,EAAE,CAAC;CACvB;AAED,wBAAsB,cAAc,CAClC,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,MAAM,EACZ,IAAI,GAAE,eAAoB,GACzB,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,eAAe,CAAC,CAAC,CA6D5C"}
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
// Deterministic subgraph sampling for the cortex quality metric.
|
|
2
|
+
//
|
|
3
|
+
// Given a vault and a seed string, derive a small connected subgraph rooted at
|
|
4
|
+
// one "seed doc". The seed→doc mapping is stratified by collection so a vault
|
|
5
|
+
// with one dense collection cannot dominate every sample. From the seed we hop
|
|
6
|
+
// along four edge kinds — frontmatter `sources`, in-body markdown links, logged
|
|
7
|
+
// tensions, and `superseded_by` revision links — collecting neighbours until a
|
|
8
|
+
// node cap is reached. In Daftari's data model `sources:` holds external
|
|
9
|
+
// citation slugs (not in-vault paths), so the real in-vault doc→doc revision
|
|
10
|
+
// edge lives in `superseded_by:`, walked bidirectionally below.
|
|
11
|
+
//
|
|
12
|
+
// Pure given (vault state + seed): the same inputs always yield the same
|
|
13
|
+
// subgraph. All randomness is replaced by SHA-256 indexing over sorted inputs.
|
|
14
|
+
import { createHash } from "node:crypto";
|
|
15
|
+
import { readFile } from "node:fs/promises";
|
|
16
|
+
import { resolve } from "node:path";
|
|
17
|
+
import { listTensions } from "../curation/tension.js";
|
|
18
|
+
import { parseDocument } from "../frontmatter/parser.js";
|
|
19
|
+
import { err, ok } from "../frontmatter/types.js";
|
|
20
|
+
import { openIndexForActiveProvider } from "../tools/search.js";
|
|
21
|
+
export async function sampleSubgraph(vaultRoot, seed, opts = {}) {
|
|
22
|
+
const maxNodes = opts.maxNodes ?? 5;
|
|
23
|
+
const indexRes = openIndexForActiveProvider(vaultRoot);
|
|
24
|
+
if (!indexRes.ok) {
|
|
25
|
+
return err({ kind: "runtime", message: `vault index unavailable: ${indexRes.error.message}` });
|
|
26
|
+
}
|
|
27
|
+
const db = indexRes.value;
|
|
28
|
+
let docs;
|
|
29
|
+
try {
|
|
30
|
+
docs = db.prepare("SELECT path, superseded_by FROM documents").all();
|
|
31
|
+
}
|
|
32
|
+
finally {
|
|
33
|
+
db.close();
|
|
34
|
+
}
|
|
35
|
+
if (docs.length === 0) {
|
|
36
|
+
return err({ kind: "runtime", message: "vault has no indexed documents" });
|
|
37
|
+
}
|
|
38
|
+
const strata = stratifyByCollection(docs.map((d) => d.path));
|
|
39
|
+
const stratumNames = [...strata.keys()].sort();
|
|
40
|
+
const stratumIdx = hashToIndex(`${seed}:stratum`, stratumNames.length);
|
|
41
|
+
const stratumName = stratumNames[stratumIdx];
|
|
42
|
+
const stratumDocs = [...(strata.get(stratumName) ?? [])].sort();
|
|
43
|
+
const seedIdx = hashToIndex(`${seed}:doc`, stratumDocs.length);
|
|
44
|
+
const seedDoc = stratumDocs[seedIdx];
|
|
45
|
+
const visited = new Map();
|
|
46
|
+
const edges = [];
|
|
47
|
+
const tensionsByDoc = await loadTensionEdges(vaultRoot);
|
|
48
|
+
// Bidirectional supersede map, built from the already-materialized SQL rows
|
|
49
|
+
// (no extra file reads). Bidirectional so a seed landing on EITHER the old or
|
|
50
|
+
// the new doc reaches its counterpart.
|
|
51
|
+
const supersededByDoc = new Map();
|
|
52
|
+
for (const d of docs) {
|
|
53
|
+
if (typeof d.superseded_by === "string" && d.superseded_by.length > 0) {
|
|
54
|
+
pushTo(supersededByDoc, d.path, { other: d.superseded_by });
|
|
55
|
+
pushTo(supersededByDoc, d.superseded_by, { other: d.path });
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
await loadNode(vaultRoot, seedDoc, visited);
|
|
59
|
+
await walkHop(vaultRoot, seedDoc, visited, edges, tensionsByDoc, supersededByDoc);
|
|
60
|
+
const firstHopNeighbors = [...visited.keys()].filter((p) => p !== seedDoc);
|
|
61
|
+
for (const n of firstHopNeighbors) {
|
|
62
|
+
if (visited.size >= maxNodes)
|
|
63
|
+
break;
|
|
64
|
+
await walkHop(vaultRoot, n, visited, edges, tensionsByDoc, supersededByDoc);
|
|
65
|
+
}
|
|
66
|
+
const nodes = trimToCap(seedDoc, visited, edges, maxNodes);
|
|
67
|
+
const nodePaths = new Set(nodes.map((n) => n.path));
|
|
68
|
+
return ok({
|
|
69
|
+
seed_doc: seedDoc,
|
|
70
|
+
nodes,
|
|
71
|
+
edges: edges.filter((e) => keepEdge(e, nodePaths)),
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
// An edge is kept when its `from` survived the node cap, and:
|
|
75
|
+
// - for `link`/`tension`/`superseded` edges, the `to` is also a retained
|
|
76
|
+
// in-vault node — these reference other vault documents, so a dangling
|
|
77
|
+
// target means the neighbour was trimmed or never existed and the edge is
|
|
78
|
+
// meaningless.
|
|
79
|
+
// - for `sources` edges, the `to` is a provenance citation that is, by the
|
|
80
|
+
// vault's frontmatter convention, an external source slug rather than an
|
|
81
|
+
// in-vault `.md` path. Such an edge records real provenance off the seed
|
|
82
|
+
// doc and is retained even though the cited source is not itself a node.
|
|
83
|
+
function keepEdge(e, nodePaths) {
|
|
84
|
+
if (!nodePaths.has(e.from))
|
|
85
|
+
return false;
|
|
86
|
+
if (e.kind === "sources")
|
|
87
|
+
return true;
|
|
88
|
+
return nodePaths.has(e.to);
|
|
89
|
+
}
|
|
90
|
+
async function loadTensionEdges(vaultRoot) {
|
|
91
|
+
const tensionsByDoc = new Map();
|
|
92
|
+
const tensionsRes = await listTensions(vaultRoot);
|
|
93
|
+
if (!tensionsRes.ok)
|
|
94
|
+
return tensionsByDoc;
|
|
95
|
+
for (const t of tensionsRes.value) {
|
|
96
|
+
pushTo(tensionsByDoc, t.sourceA, { other: t.sourceB });
|
|
97
|
+
pushTo(tensionsByDoc, t.sourceB, { other: t.sourceA });
|
|
98
|
+
}
|
|
99
|
+
return tensionsByDoc;
|
|
100
|
+
}
|
|
101
|
+
function stratifyByCollection(paths) {
|
|
102
|
+
const m = new Map();
|
|
103
|
+
for (const p of paths) {
|
|
104
|
+
const collection = p.split("/")[0] || "_root";
|
|
105
|
+
pushTo(m, collection, p);
|
|
106
|
+
}
|
|
107
|
+
return m;
|
|
108
|
+
}
|
|
109
|
+
// Appends `value` to the array at `key`, creating the array on first use.
|
|
110
|
+
// Replaces the `map.get(key)!.push(...)` non-null-assertion idiom that biome's
|
|
111
|
+
// lint/style/noNonNullAssertion rule forbids.
|
|
112
|
+
function pushTo(m, key, value) {
|
|
113
|
+
const arr = m.get(key);
|
|
114
|
+
if (arr)
|
|
115
|
+
arr.push(value);
|
|
116
|
+
else
|
|
117
|
+
m.set(key, [value]);
|
|
118
|
+
}
|
|
119
|
+
function hashToIndex(input, mod) {
|
|
120
|
+
if (mod <= 0)
|
|
121
|
+
throw new Error("mod must be positive");
|
|
122
|
+
const h = createHash("sha256").update(input).digest();
|
|
123
|
+
const n = h.readUInt32BE(0);
|
|
124
|
+
return n % mod;
|
|
125
|
+
}
|
|
126
|
+
// Loads a document's frontmatter + body into `visited`. Uses the project's
|
|
127
|
+
// canonical parser; on malformed YAML it falls back to empty frontmatter so the
|
|
128
|
+
// walk continues. A missing file is silently skipped.
|
|
129
|
+
async function loadNode(vaultRoot, path, visited) {
|
|
130
|
+
if (visited.has(path))
|
|
131
|
+
return;
|
|
132
|
+
let raw;
|
|
133
|
+
try {
|
|
134
|
+
raw = await readFile(resolve(vaultRoot, path), "utf8");
|
|
135
|
+
}
|
|
136
|
+
catch {
|
|
137
|
+
return; // Missing doc — silently skip.
|
|
138
|
+
}
|
|
139
|
+
const parsed = parseDocument(raw);
|
|
140
|
+
if (parsed.ok) {
|
|
141
|
+
visited.set(path, { path, body: parsed.value.content, frontmatter: parsed.value.raw });
|
|
142
|
+
}
|
|
143
|
+
else {
|
|
144
|
+
visited.set(path, { path, body: raw, frontmatter: {} });
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
async function walkHop(vaultRoot, from, visited, edges, tensionsByDoc, supersededByDoc) {
|
|
148
|
+
const node = visited.get(from);
|
|
149
|
+
if (!node)
|
|
150
|
+
return;
|
|
151
|
+
const sources = Array.isArray(node.frontmatter.sources)
|
|
152
|
+
? node.frontmatter.sources
|
|
153
|
+
: [];
|
|
154
|
+
for (const s of sources) {
|
|
155
|
+
if (typeof s !== "string")
|
|
156
|
+
continue;
|
|
157
|
+
edges.push({ from, to: s, kind: "sources" });
|
|
158
|
+
await loadNode(vaultRoot, s, visited);
|
|
159
|
+
}
|
|
160
|
+
const links = extractInVaultLinks(node.body);
|
|
161
|
+
for (const l of links) {
|
|
162
|
+
edges.push({ from, to: l, kind: "link" });
|
|
163
|
+
await loadNode(vaultRoot, l, visited);
|
|
164
|
+
}
|
|
165
|
+
const tensions = tensionsByDoc.get(from) ?? [];
|
|
166
|
+
for (const t of tensions) {
|
|
167
|
+
edges.push({ from, to: t.other, kind: "tension" });
|
|
168
|
+
await loadNode(vaultRoot, t.other, visited);
|
|
169
|
+
}
|
|
170
|
+
const superseded = supersededByDoc.get(from) ?? [];
|
|
171
|
+
for (const s of superseded) {
|
|
172
|
+
edges.push({ from, to: s.other, kind: "superseded" });
|
|
173
|
+
await loadNode(vaultRoot, s.other, visited);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
function trimToCap(seed, visited, edges, cap) {
|
|
177
|
+
if (visited.size <= cap)
|
|
178
|
+
return [...visited.values()];
|
|
179
|
+
const degree = new Map();
|
|
180
|
+
for (const e of edges) {
|
|
181
|
+
degree.set(e.from, (degree.get(e.from) ?? 0) + 1);
|
|
182
|
+
degree.set(e.to, (degree.get(e.to) ?? 0) + 1);
|
|
183
|
+
}
|
|
184
|
+
const ranked = [...visited.entries()]
|
|
185
|
+
.map(([path, node]) => ({ path, node, degree: degree.get(path) ?? 0 }))
|
|
186
|
+
.sort((a, b) => {
|
|
187
|
+
if (a.path === seed)
|
|
188
|
+
return -1;
|
|
189
|
+
if (b.path === seed)
|
|
190
|
+
return 1;
|
|
191
|
+
// Higher degree wins; path tiebreak keeps the cut deterministic
|
|
192
|
+
// independent of engine sort stability.
|
|
193
|
+
return b.degree - a.degree || a.path.localeCompare(b.path);
|
|
194
|
+
});
|
|
195
|
+
return ranked.slice(0, cap).map((r) => r.node);
|
|
196
|
+
}
|
|
197
|
+
// Deliberately matches only standard markdown links to in-vault `.md` files —
|
|
198
|
+
// `[text](path.md)` / `[text](path.md#anchor)`, skipping web/mailto and
|
|
199
|
+
// absolute-rooted hrefs. Wiki-style `[[links]]` are not a Daftari vault
|
|
200
|
+
// convention and are intentionally unsupported.
|
|
201
|
+
function extractInVaultLinks(body) {
|
|
202
|
+
const out = [];
|
|
203
|
+
const re = /\[[^\]]*\]\(([^)]+\.md)(?:#[^)]*)?\)/g;
|
|
204
|
+
for (const m of body.matchAll(re)) {
|
|
205
|
+
const href = m[1];
|
|
206
|
+
if (/^https?:|^mailto:/i.test(href))
|
|
207
|
+
continue;
|
|
208
|
+
if (href.startsWith("/"))
|
|
209
|
+
continue;
|
|
210
|
+
out.push(href);
|
|
211
|
+
}
|
|
212
|
+
return out;
|
|
213
|
+
}
|
|
214
|
+
//# sourceMappingURL=subgraph.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"subgraph.js","sourceRoot":"","sources":["../../src/eval/subgraph.ts"],"names":[],"mappings":"AAAA,iEAAiE;AACjE,EAAE;AACF,+EAA+E;AAC/E,8EAA8E;AAC9E,+EAA+E;AAC/E,gFAAgF;AAChF,+EAA+E;AAC/E,yEAAyE;AACzE,6EAA6E;AAC7E,gEAAgE;AAChE,EAAE;AACF,yEAAyE;AACzE,+EAA+E;AAE/E,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACzD,OAAO,EAAE,GAAG,EAAE,EAAE,EAAe,MAAM,yBAAyB,CAAC;AAC/D,OAAO,EAAE,0BAA0B,EAAE,MAAM,oBAAoB,CAAC;AAmBhE,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,SAAiB,EACjB,IAAY,EACZ,OAAwB,EAAE;IAE1B,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;IACpC,MAAM,QAAQ,GAAG,0BAA0B,CAAC,SAAS,CAAC,CAAC;IACvD,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,OAAO,GAAG,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,4BAA4B,QAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IACjG,CAAC;IACD,MAAM,EAAE,GAAG,QAAQ,CAAC,KAAK,CAAC;IAE1B,IAAI,IAAsD,CAAC;IAC3D,IAAI,CAAC;QACH,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,2CAA2C,CAAC,CAAC,GAAG,EAG/D,CAAC;IACN,CAAC;YAAS,CAAC;QACT,EAAE,CAAC,KAAK,EAAE,CAAC;IACb,CAAC;IACD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,GAAG,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,gCAAgC,EAAE,CAAC,CAAC;IAC7E,CAAC;IAED,MAAM,MAAM,GAAG,oBAAoB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IAC7D,MAAM,YAAY,GAAG,CAAC,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAC/C,MAAM,UAAU,GAAG,WAAW,CAAC,GAAG,IAAI,UAAU,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC;IACvE,MAAM,WAAW,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC;IAC7C,MAAM,WAAW,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAChE,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,IAAI,MAAM,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC;IAC/D,MAAM,OAAO,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;IAErC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAwB,CAAC;IAChD,MAAM,KAAK,GAAmB,EAAE,CAAC;IAEjC,MAAM,aAAa,GAAG,MAAM,gBAAgB,CAAC,SAAS,CAAC,CAAC;IAExD,4EAA4E;IAC5E,8EAA8E;IAC9E,uCAAuC;IACvC,MAAM,eAAe,GAAG,IAAI,GAAG,EAAoC,CAAC;IACpE,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACrB,IAAI,OAAO,CAAC,CAAC,aAAa,KAAK,QAAQ,IAAI,CAAC,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtE,MAAM,CAAC,eAAe,EAAE,CAAC,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC;YAC5D,MAAM,CAAC,eAAe,EAAE,CAAC,CAAC,aAAa,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QAC9D,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,CAAC,SAAS,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IAC5C,MAAM,OAAO,CAAC,SAAS,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,EAAE,eAAe,CAAC,CAAC;IAClF,MAAM,iBAAiB,GAAG,CAAC,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,OAAO,CAAC,CAAC;IAC3E,KAAK,MAAM,CAAC,IAAI,iBAAiB,EAAE,CAAC;QAClC,IAAI,OAAO,CAAC,IAAI,IAAI,QAAQ;YAAE,MAAM;QACpC,MAAM,OAAO,CAAC,SAAS,EAAE,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,EAAE,eAAe,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;IAC3D,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IAEpD,OAAO,EAAE,CAAC;QACR,QAAQ,EAAE,OAAO;QACjB,KAAK;QACL,KAAK,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;KACnD,CAAC,CAAC;AACL,CAAC;AAED,8DAA8D;AAC9D,0EAA0E;AAC1E,0EAA0E;AAC1E,6EAA6E;AAC7E,kBAAkB;AAClB,4EAA4E;AAC5E,4EAA4E;AAC5E,4EAA4E;AAC5E,4EAA4E;AAC5E,SAAS,QAAQ,CAAC,CAAe,EAAE,SAAsB;IACvD,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IACzC,IAAI,CAAC,CAAC,IAAI,KAAK,SAAS;QAAE,OAAO,IAAI,CAAC;IACtC,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AAC7B,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,SAAiB;IAC/C,MAAM,aAAa,GAAG,IAAI,GAAG,EAAoC,CAAC;IAClE,MAAM,WAAW,GAAG,MAAM,YAAY,CAAC,SAAS,CAAC,CAAC;IAClD,IAAI,CAAC,WAAW,CAAC,EAAE;QAAE,OAAO,aAAa,CAAC;IAC1C,KAAK,MAAM,CAAC,IAAI,WAAW,CAAC,KAAK,EAAE,CAAC;QAClC,MAAM,CAAC,aAAa,EAAE,CAAC,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;QACvD,MAAM,CAAC,aAAa,EAAE,CAAC,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;IACzD,CAAC;IACD,OAAO,aAAa,CAAC;AACvB,CAAC;AAED,SAAS,oBAAoB,CAAC,KAAe;IAC3C,MAAM,CAAC,GAAG,IAAI,GAAG,EAAoB,CAAC;IACtC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,UAAU,GAAG,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,OAAO,CAAC;QAC9C,MAAM,CAAC,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC;IAC3B,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC;AAED,0EAA0E;AAC1E,+EAA+E;AAC/E,8CAA8C;AAC9C,SAAS,MAAM,CAAI,CAAmB,EAAE,GAAW,EAAE,KAAQ;IAC3D,MAAM,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACvB,IAAI,GAAG;QAAE,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;;QACpB,CAAC,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC;AAC3B,CAAC;AAED,SAAS,WAAW,CAAC,KAAa,EAAE,GAAW;IAC7C,IAAI,GAAG,IAAI,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,sBAAsB,CAAC,CAAC;IACtD,MAAM,CAAC,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;IACtD,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC5B,OAAO,CAAC,GAAG,GAAG,CAAC;AACjB,CAAC;AAED,2EAA2E;AAC3E,gFAAgF;AAChF,sDAAsD;AACtD,KAAK,UAAU,QAAQ,CACrB,SAAiB,EACjB,IAAY,EACZ,OAAkC;IAElC,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC;QAAE,OAAO;IAC9B,IAAI,GAAW,CAAC;IAChB,IAAI,CAAC;QACH,GAAG,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,CAAC;IACzD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,+BAA+B;IACzC,CAAC;IACD,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;IAClC,IAAI,MAAM,CAAC,EAAE,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,WAAW,EAAE,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC;IACzF,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC,CAAC;IAC1D,CAAC;AACH,CAAC;AAED,KAAK,UAAU,OAAO,CACpB,SAAiB,EACjB,IAAY,EACZ,OAAkC,EAClC,KAAqB,EACrB,aAAoD,EACpD,eAAsD;IAEtD,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC/B,IAAI,CAAC,IAAI;QAAE,OAAO;IAElB,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC;QACrD,CAAC,CAAE,IAAI,CAAC,WAAW,CAAC,OAAqB;QACzC,CAAC,CAAC,EAAE,CAAC;IACP,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,OAAO,CAAC,KAAK,QAAQ;YAAE,SAAS;QACpC,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,CAAC;QAC7C,MAAM,QAAQ,CAAC,SAAS,EAAE,CAAC,EAAE,OAAO,CAAC,CAAC;IACxC,CAAC;IAED,MAAM,KAAK,GAAG,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7C,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QAC1C,MAAM,QAAQ,CAAC,SAAS,EAAE,CAAC,EAAE,OAAO,CAAC,CAAC;IACxC,CAAC;IAED,MAAM,QAAQ,GAAG,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IAC/C,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,CAAC;QACnD,MAAM,QAAQ,CAAC,SAAS,EAAE,CAAC,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAC9C,CAAC;IAED,MAAM,UAAU,GAAG,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IACnD,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3B,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,CAAC;QACtD,MAAM,QAAQ,CAAC,SAAS,EAAE,CAAC,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAC9C,CAAC;AACH,CAAC;AAED,SAAS,SAAS,CAChB,IAAY,EACZ,OAAkC,EAClC,KAAqB,EACrB,GAAW;IAEX,IAAI,OAAO,CAAC,IAAI,IAAI,GAAG;QAAE,OAAO,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACtD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IACzC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAClD,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAChD,CAAC;IACD,MAAM,MAAM,GAAG,CAAC,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;SAClC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;SACtE,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACb,IAAI,CAAC,CAAC,IAAI,KAAK,IAAI;YAAE,OAAO,CAAC,CAAC,CAAC;QAC/B,IAAI,CAAC,CAAC,IAAI,KAAK,IAAI;YAAE,OAAO,CAAC,CAAC;QAC9B,gEAAgE;QAChE,wCAAwC;QACxC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAC7D,CAAC,CAAC,CAAC;IACL,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;AACjD,CAAC;AAED,8EAA8E;AAC9E,wEAAwE;AACxE,wEAAwE;AACxE,gDAAgD;AAChD,SAAS,mBAAmB,CAAC,IAAY;IACvC,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,MAAM,EAAE,GAAG,uCAAuC,CAAC;IACnD,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;QAClC,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAClB,IAAI,oBAAoB,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,SAAS;QAC9C,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QACnC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACjB,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { ToolDef } from "./llm.js";
|
|
2
|
+
export interface ToolSurface {
|
|
3
|
+
defs: ToolDef[];
|
|
4
|
+
handler: (name: string, input: unknown) => Promise<unknown>;
|
|
5
|
+
}
|
|
6
|
+
export declare function buildToolSurface(vaultRoot: string): ToolSurface;
|
|
7
|
+
//# sourceMappingURL=tool-surface.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tool-surface.d.ts","sourceRoot":"","sources":["../../src/eval/tool-surface.ts"],"names":[],"mappings":"AAiBA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,UAAU,CAAC;AAExC,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,OAAO,EAAE,CAAC;IAChB,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;CAC7D;AAiID,wBAAgB,gBAAgB,CAAC,SAAS,EAAE,MAAM,GAAG,WAAW,CAyB/D"}
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
// src/eval/tool-surface.ts
|
|
2
|
+
// In-process MCP tool surface for the answerer LLM. A thin adapter over the
|
|
3
|
+
// existing src/tools/* handlers — no MCP serialization, no transport, no
|
|
4
|
+
// stdio. The answerer calls these directly via the LlmClient tool loop.
|
|
5
|
+
//
|
|
6
|
+
// vault_tension_log is INTENTIONALLY EXCLUDED: it is a write tool, and the
|
|
7
|
+
// answerer is strictly read-only. Exposing a write tool to the answerer would
|
|
8
|
+
// let it mutate the vault mid-eval, which would corrupt the measurement.
|
|
9
|
+
//
|
|
10
|
+
// access is passed as `undefined` to every handler, which bypasses RBAC. This
|
|
11
|
+
// is intended: eval runs locally against a snapshot, there is no user identity.
|
|
12
|
+
import { vaultLint, vaultTensionBlast, vaultTensionClusters } from "../tools/curation.js";
|
|
13
|
+
import { vaultRead } from "../tools/read.js";
|
|
14
|
+
import { vaultSearch, vaultSearchRelated } from "../tools/search.js";
|
|
15
|
+
import { vaultThemes } from "../tools/themes.js";
|
|
16
|
+
// Awaits a tool handler's Result and flattens it to either the value or a
|
|
17
|
+
// `{ tool_error }` envelope. Never throws — a rejected promise still surfaces
|
|
18
|
+
// as a tool_error so the answerer can react instead of crashing the run.
|
|
19
|
+
async function unwrap(p) {
|
|
20
|
+
try {
|
|
21
|
+
const r = await p;
|
|
22
|
+
return r.ok ? r.value : { tool_error: r.error.message };
|
|
23
|
+
}
|
|
24
|
+
catch (e) {
|
|
25
|
+
return { tool_error: e instanceof Error ? e.message : String(e) };
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
const TOOL_DEFS = [
|
|
29
|
+
{
|
|
30
|
+
name: "vault_read",
|
|
31
|
+
description: "Read a single vault document. Returns its markdown body, parsed " +
|
|
32
|
+
"frontmatter, and metadata. Path is relative to the vault root.",
|
|
33
|
+
input_schema: {
|
|
34
|
+
type: "object",
|
|
35
|
+
properties: {
|
|
36
|
+
path: {
|
|
37
|
+
type: "string",
|
|
38
|
+
description: "Vault-relative path to the markdown file, e.g. competitive-intel/foo.md",
|
|
39
|
+
},
|
|
40
|
+
},
|
|
41
|
+
required: ["path"],
|
|
42
|
+
additionalProperties: false,
|
|
43
|
+
},
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
name: "vault_search",
|
|
47
|
+
description: "Hybrid search across the vault: BM25 lexical ranking combined with " +
|
|
48
|
+
"vector semantic similarity. Returns ranked documents with snippets.",
|
|
49
|
+
input_schema: {
|
|
50
|
+
type: "object",
|
|
51
|
+
properties: {
|
|
52
|
+
query: { type: "string", description: "Free-text search query" },
|
|
53
|
+
limit: { type: "number", description: "Maximum results to return (default 10, max 50)" },
|
|
54
|
+
},
|
|
55
|
+
required: ["query"],
|
|
56
|
+
additionalProperties: false,
|
|
57
|
+
},
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
name: "vault_search_related",
|
|
61
|
+
description: "Find documents related to a given vault document. Uses that document's " +
|
|
62
|
+
"own text and embeddings as the query; the document itself is excluded. " +
|
|
63
|
+
"Path is relative to the vault root.",
|
|
64
|
+
input_schema: {
|
|
65
|
+
type: "object",
|
|
66
|
+
properties: {
|
|
67
|
+
path: { type: "string", description: "Vault-relative path of the reference document" },
|
|
68
|
+
limit: { type: "number", description: "Maximum results to return (default 10, max 50)" },
|
|
69
|
+
},
|
|
70
|
+
required: ["path"],
|
|
71
|
+
additionalProperties: false,
|
|
72
|
+
},
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
name: "vault_themes",
|
|
76
|
+
description: "Surface thematic clusters across the vault using k-means over " +
|
|
77
|
+
"document-pooled embeddings. Each theme reports a label, coherence " +
|
|
78
|
+
"score, representative documents, and frequent tags.",
|
|
79
|
+
input_schema: {
|
|
80
|
+
type: "object",
|
|
81
|
+
properties: {
|
|
82
|
+
k: { type: "integer", description: "Optional explicit cluster count.", minimum: 1 },
|
|
83
|
+
collection: {
|
|
84
|
+
type: "string",
|
|
85
|
+
description: "Restrict clustering to documents in this collection.",
|
|
86
|
+
},
|
|
87
|
+
},
|
|
88
|
+
additionalProperties: false,
|
|
89
|
+
},
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
name: "vault_lint",
|
|
93
|
+
description: "Run the advisory curation checks across the vault: stale files, " +
|
|
94
|
+
"orphans, old drafts, stagnant low-confidence files, deprecated files " +
|
|
95
|
+
"still linked, unanswered questions, and tension health. Reports " +
|
|
96
|
+
"problems; never auto-fixes. Optionally filter to a single check.",
|
|
97
|
+
input_schema: {
|
|
98
|
+
type: "object",
|
|
99
|
+
properties: {
|
|
100
|
+
filter: { type: "string", description: "Restrict the report to a single check" },
|
|
101
|
+
},
|
|
102
|
+
additionalProperties: false,
|
|
103
|
+
},
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
name: "vault_tension_blast",
|
|
107
|
+
description: "Compute the transitive closure of downstream documents that cite or " +
|
|
108
|
+
"link a contested document — or the union over a contested cluster. " +
|
|
109
|
+
"Accepts exactly one of 'document' (vault-relative path) or 'cluster_id'.",
|
|
110
|
+
input_schema: {
|
|
111
|
+
type: "object",
|
|
112
|
+
properties: {
|
|
113
|
+
document: { type: "string", description: "Vault-relative path of a contested document" },
|
|
114
|
+
cluster_id: {
|
|
115
|
+
type: "string",
|
|
116
|
+
description: "A content-addressed cluster id from vault_tension_clusters",
|
|
117
|
+
},
|
|
118
|
+
},
|
|
119
|
+
additionalProperties: false,
|
|
120
|
+
},
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
name: "vault_tension_clusters",
|
|
124
|
+
description: "Compute connected components of the tension graph: groups of vault " +
|
|
125
|
+
"documents joined transitively by unresolved tensions. Each cluster " +
|
|
126
|
+
"reports its members, in-scope tension count, tally by kind, and age " +
|
|
127
|
+
"range. Read-only.",
|
|
128
|
+
input_schema: {
|
|
129
|
+
type: "object",
|
|
130
|
+
properties: {},
|
|
131
|
+
additionalProperties: false,
|
|
132
|
+
},
|
|
133
|
+
},
|
|
134
|
+
];
|
|
135
|
+
export function buildToolSurface(vaultRoot) {
|
|
136
|
+
// biome-ignore lint/suspicious/noExplicitAny: tool inputs are structural JSON from the LLM
|
|
137
|
+
const handler = async (name, input) => {
|
|
138
|
+
const inp = input ?? {};
|
|
139
|
+
switch (name) {
|
|
140
|
+
case "vault_read":
|
|
141
|
+
return unwrap(vaultRead(vaultRoot, String(inp.path ?? ""), undefined));
|
|
142
|
+
case "vault_search":
|
|
143
|
+
return unwrap(vaultSearch(vaultRoot, inp, undefined));
|
|
144
|
+
case "vault_search_related":
|
|
145
|
+
return unwrap(vaultSearchRelated(vaultRoot, inp, undefined));
|
|
146
|
+
case "vault_themes":
|
|
147
|
+
return unwrap(vaultThemes(vaultRoot, inp, undefined));
|
|
148
|
+
case "vault_lint":
|
|
149
|
+
return unwrap(vaultLint(vaultRoot, inp, undefined));
|
|
150
|
+
case "vault_tension_blast":
|
|
151
|
+
return unwrap(vaultTensionBlast(vaultRoot, inp, undefined));
|
|
152
|
+
case "vault_tension_clusters":
|
|
153
|
+
return unwrap(vaultTensionClusters(vaultRoot, inp, undefined));
|
|
154
|
+
default:
|
|
155
|
+
return { tool_error: `unknown tool: ${name}` };
|
|
156
|
+
}
|
|
157
|
+
};
|
|
158
|
+
return { defs: TOOL_DEFS, handler };
|
|
159
|
+
}
|
|
160
|
+
//# sourceMappingURL=tool-surface.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tool-surface.js","sourceRoot":"","sources":["../../src/eval/tool-surface.ts"],"names":[],"mappings":"AAAA,2BAA2B;AAC3B,4EAA4E;AAC5E,yEAAyE;AACzE,wEAAwE;AACxE,EAAE;AACF,2EAA2E;AAC3E,8EAA8E;AAC9E,yEAAyE;AACzE,EAAE;AACF,8EAA8E;AAC9E,gFAAgF;AAGhF,OAAO,EAAE,SAAS,EAAE,iBAAiB,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAC;AAC1F,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,WAAW,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACrE,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAQjD,0EAA0E;AAC1E,8EAA8E;AAC9E,yEAAyE;AACzE,KAAK,UAAU,MAAM,CAAI,CAA4B;IACnD,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC;QAClB,OAAO,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;IAC1D,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO,EAAE,UAAU,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IACpE,CAAC;AACH,CAAC;AAED,MAAM,SAAS,GAAc;IAC3B;QACE,IAAI,EAAE,YAAY;QAClB,WAAW,EACT,kEAAkE;YAClE,gEAAgE;QAClE,YAAY,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,IAAI,EAAE;oBACJ,IAAI,EAAE,QAAQ;oBACd,WAAW,EAAE,yEAAyE;iBACvF;aACF;YACD,QAAQ,EAAE,CAAC,MAAM,CAAC;YAClB,oBAAoB,EAAE,KAAK;SAC5B;KACF;IACD;QACE,IAAI,EAAE,cAAc;QACpB,WAAW,EACT,qEAAqE;YACrE,qEAAqE;QACvE,YAAY,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,wBAAwB,EAAE;gBAChE,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,gDAAgD,EAAE;aACzF;YACD,QAAQ,EAAE,CAAC,OAAO,CAAC;YACnB,oBAAoB,EAAE,KAAK;SAC5B;KACF;IACD;QACE,IAAI,EAAE,sBAAsB;QAC5B,WAAW,EACT,yEAAyE;YACzE,yEAAyE;YACzE,qCAAqC;QACvC,YAAY,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,+CAA+C,EAAE;gBACtF,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,gDAAgD,EAAE;aACzF;YACD,QAAQ,EAAE,CAAC,MAAM,CAAC;YAClB,oBAAoB,EAAE,KAAK;SAC5B;KACF;IACD;QACE,IAAI,EAAE,cAAc;QACpB,WAAW,EACT,gEAAgE;YAChE,oEAAoE;YACpE,qDAAqD;QACvD,YAAY,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,kCAAkC,EAAE,OAAO,EAAE,CAAC,EAAE;gBACnF,UAAU,EAAE;oBACV,IAAI,EAAE,QAAQ;oBACd,WAAW,EAAE,sDAAsD;iBACpE;aACF;YACD,oBAAoB,EAAE,KAAK;SAC5B;KACF;IACD;QACE,IAAI,EAAE,YAAY;QAClB,WAAW,EACT,kEAAkE;YAClE,uEAAuE;YACvE,kEAAkE;YAClE,kEAAkE;QACpE,YAAY,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,uCAAuC,EAAE;aACjF;YACD,oBAAoB,EAAE,KAAK;SAC5B;KACF;IACD;QACE,IAAI,EAAE,qBAAqB;QAC3B,WAAW,EACT,sEAAsE;YACtE,qEAAqE;YACrE,0EAA0E;QAC5E,YAAY,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,6CAA6C,EAAE;gBACxF,UAAU,EAAE;oBACV,IAAI,EAAE,QAAQ;oBACd,WAAW,EAAE,4DAA4D;iBAC1E;aACF;YACD,oBAAoB,EAAE,KAAK;SAC5B;KACF;IACD;QACE,IAAI,EAAE,wBAAwB;QAC9B,WAAW,EACT,qEAAqE;YACrE,qEAAqE;YACrE,sEAAsE;YACtE,mBAAmB;QACrB,YAAY,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE,EAAE;YACd,oBAAoB,EAAE,KAAK;SAC5B;KACF;CACF,CAAC;AAEF,MAAM,UAAU,gBAAgB,CAAC,SAAiB;IAChD,2FAA2F;IAC3F,MAAM,OAAO,GAAG,KAAK,EAAE,IAAY,EAAE,KAAU,EAAoB,EAAE;QACnE,MAAM,GAAG,GAAI,KAAiC,IAAI,EAAE,CAAC;QACrD,QAAQ,IAAI,EAAE,CAAC;YACb,KAAK,YAAY;gBACf,OAAO,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,MAAM,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC;YACzE,KAAK,cAAc;gBACjB,OAAO,MAAM,CAAC,WAAW,CAAC,SAAS,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC;YACxD,KAAK,sBAAsB;gBACzB,OAAO,MAAM,CAAC,kBAAkB,CAAC,SAAS,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC;YAC/D,KAAK,cAAc;gBACjB,OAAO,MAAM,CAAC,WAAW,CAAC,SAAS,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC;YACxD,KAAK,YAAY;gBACf,OAAO,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC;YACtD,KAAK,qBAAqB;gBACxB,OAAO,MAAM,CAAC,iBAAiB,CAAC,SAAS,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC;YAC9D,KAAK,wBAAwB;gBAC3B,OAAO,MAAM,CAAC,oBAAoB,CAAC,SAAS,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC;YACjE;gBACE,OAAO,EAAE,UAAU,EAAE,iBAAiB,IAAI,EAAE,EAAE,CAAC;QACnD,CAAC;IACH,CAAC,CAAC;IAEF,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,CAAC;AACtC,CAAC"}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import type { Result } from "../frontmatter/types.js";
|
|
2
|
+
export declare const TIERS: readonly ["retrieval", "cross_reference", "contradiction"];
|
|
3
|
+
export type Tier = (typeof TIERS)[number];
|
|
4
|
+
export declare const TIER_WEIGHT: Record<Tier, number>;
|
|
5
|
+
export interface Question {
|
|
6
|
+
id: string;
|
|
7
|
+
tier: Tier;
|
|
8
|
+
question: string;
|
|
9
|
+
expected_answer: string;
|
|
10
|
+
expected_sources: string[];
|
|
11
|
+
origin: "generated" | "augmented";
|
|
12
|
+
}
|
|
13
|
+
export interface QuestionSet {
|
|
14
|
+
id: string;
|
|
15
|
+
vault_hash: string;
|
|
16
|
+
seed: string;
|
|
17
|
+
timestamp: string;
|
|
18
|
+
subgraph: {
|
|
19
|
+
seed_doc: string;
|
|
20
|
+
nodes: string[];
|
|
21
|
+
edges: SubgraphEdge[];
|
|
22
|
+
};
|
|
23
|
+
questions: Question[];
|
|
24
|
+
generator_model: string;
|
|
25
|
+
prompt_version: number;
|
|
26
|
+
tier_counts_requested: Record<Tier, number>;
|
|
27
|
+
tier_counts_produced: Record<Tier, number>;
|
|
28
|
+
}
|
|
29
|
+
export interface SubgraphEdge {
|
|
30
|
+
from: string;
|
|
31
|
+
to: string;
|
|
32
|
+
kind: "sources" | "link" | "tension" | "superseded";
|
|
33
|
+
}
|
|
34
|
+
export interface Trace {
|
|
35
|
+
tool_calls: ToolCall[];
|
|
36
|
+
final_answer: string;
|
|
37
|
+
total_tool_calls: number;
|
|
38
|
+
input_tokens: number;
|
|
39
|
+
output_tokens: number;
|
|
40
|
+
wall_ms: number;
|
|
41
|
+
stop_reason: string;
|
|
42
|
+
}
|
|
43
|
+
export interface ToolCall {
|
|
44
|
+
tool: string;
|
|
45
|
+
input: unknown;
|
|
46
|
+
output: unknown;
|
|
47
|
+
latency_ms: number;
|
|
48
|
+
}
|
|
49
|
+
export type RunStatus = "complete" | "incomplete";
|
|
50
|
+
interface PerRunResultBase {
|
|
51
|
+
question_id: string;
|
|
52
|
+
question_index: number;
|
|
53
|
+
k_index: number;
|
|
54
|
+
}
|
|
55
|
+
export type PerRunResult = (PerRunResultBase & {
|
|
56
|
+
status: "complete";
|
|
57
|
+
trace: Trace;
|
|
58
|
+
}) | (PerRunResultBase & {
|
|
59
|
+
status: "incomplete";
|
|
60
|
+
trace: null;
|
|
61
|
+
});
|
|
62
|
+
export interface EvalRun {
|
|
63
|
+
id: string;
|
|
64
|
+
questions_id: string;
|
|
65
|
+
answerer_model: string;
|
|
66
|
+
prompt_version: number;
|
|
67
|
+
timestamp: string;
|
|
68
|
+
k: number;
|
|
69
|
+
runs: Record<string, PerRunResult>;
|
|
70
|
+
}
|
|
71
|
+
export type GradeVerdict = "yes" | "partial" | "no" | "ungraded";
|
|
72
|
+
export interface Grade {
|
|
73
|
+
question_id: string;
|
|
74
|
+
question_index: number;
|
|
75
|
+
k_index: number;
|
|
76
|
+
verdict: GradeVerdict;
|
|
77
|
+
reasoning: string;
|
|
78
|
+
grader_model: string;
|
|
79
|
+
}
|
|
80
|
+
export interface TierScore {
|
|
81
|
+
mean: number;
|
|
82
|
+
std: number;
|
|
83
|
+
n: number;
|
|
84
|
+
trace_efficiency: number;
|
|
85
|
+
}
|
|
86
|
+
export interface Score {
|
|
87
|
+
score: number;
|
|
88
|
+
score_std: number;
|
|
89
|
+
by_tier: Record<Tier, TierScore>;
|
|
90
|
+
models: {
|
|
91
|
+
generator: string;
|
|
92
|
+
answerer: string;
|
|
93
|
+
grader: string;
|
|
94
|
+
};
|
|
95
|
+
prompt_version: number;
|
|
96
|
+
spec_version: number;
|
|
97
|
+
questions_id: string;
|
|
98
|
+
results_id: string;
|
|
99
|
+
vault_hash: string;
|
|
100
|
+
k: number;
|
|
101
|
+
n: number;
|
|
102
|
+
timestamp: string;
|
|
103
|
+
}
|
|
104
|
+
export interface HistoryEntry {
|
|
105
|
+
score_id: string;
|
|
106
|
+
score: number;
|
|
107
|
+
score_std: number;
|
|
108
|
+
by_tier: Record<Tier, number>;
|
|
109
|
+
vault_hash: string;
|
|
110
|
+
timestamp: string;
|
|
111
|
+
n: number;
|
|
112
|
+
k: number;
|
|
113
|
+
models: {
|
|
114
|
+
generator: string;
|
|
115
|
+
answerer: string;
|
|
116
|
+
grader: string;
|
|
117
|
+
};
|
|
118
|
+
prompt_version: number;
|
|
119
|
+
spec_version: number;
|
|
120
|
+
}
|
|
121
|
+
export interface HistoryFile {
|
|
122
|
+
version: 1;
|
|
123
|
+
runs: HistoryEntry[];
|
|
124
|
+
}
|
|
125
|
+
export declare const HISTORY_RETENTION = 50;
|
|
126
|
+
export declare const SPEC_VERSION = 1;
|
|
127
|
+
export type CortexEvalError = {
|
|
128
|
+
kind: "config";
|
|
129
|
+
message: string;
|
|
130
|
+
} | {
|
|
131
|
+
kind: "runtime";
|
|
132
|
+
message: string;
|
|
133
|
+
} | {
|
|
134
|
+
kind: "llm";
|
|
135
|
+
message: string;
|
|
136
|
+
retryable: boolean;
|
|
137
|
+
};
|
|
138
|
+
export declare const QuestionSetSchema: {
|
|
139
|
+
readonly type: "object";
|
|
140
|
+
readonly required: readonly ["questions"];
|
|
141
|
+
readonly properties: {
|
|
142
|
+
readonly questions: {
|
|
143
|
+
readonly type: "array";
|
|
144
|
+
readonly items: {
|
|
145
|
+
readonly type: "object";
|
|
146
|
+
readonly required: readonly ["tier", "question", "expected_answer", "expected_sources"];
|
|
147
|
+
readonly properties: {
|
|
148
|
+
readonly tier: {
|
|
149
|
+
readonly enum: readonly ["retrieval", "cross_reference", "contradiction"];
|
|
150
|
+
};
|
|
151
|
+
readonly question: {
|
|
152
|
+
readonly type: "string";
|
|
153
|
+
readonly minLength: 1;
|
|
154
|
+
};
|
|
155
|
+
readonly expected_answer: {
|
|
156
|
+
readonly type: "string";
|
|
157
|
+
readonly minLength: 1;
|
|
158
|
+
};
|
|
159
|
+
readonly expected_sources: {
|
|
160
|
+
readonly type: "array";
|
|
161
|
+
readonly items: {
|
|
162
|
+
readonly type: "string";
|
|
163
|
+
readonly minLength: 1;
|
|
164
|
+
};
|
|
165
|
+
readonly minItems: 1;
|
|
166
|
+
};
|
|
167
|
+
};
|
|
168
|
+
};
|
|
169
|
+
};
|
|
170
|
+
};
|
|
171
|
+
};
|
|
172
|
+
export type { Result };
|
|
173
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/eval/types.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,yBAAyB,CAAC;AAItD,eAAO,MAAM,KAAK,4DAA6D,CAAC;AAChF,MAAM,MAAM,IAAI,GAAG,CAAC,OAAO,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC;AAG1C,eAAO,MAAM,WAAW,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAI5C,CAAC;AAIF,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,CAAC;IACxB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,MAAM,EAAE,WAAW,GAAG,WAAW,CAAC;CACnC;AAED,MAAM,WAAW,WAAW;IAC1B,EAAE,EAAE,MAAM,CAAC;IACX,UAAU,EAAE,MAAM,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE;QACR,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,EAAE,MAAM,EAAE,CAAC;QAChB,KAAK,EAAE,YAAY,EAAE,CAAC;KACvB,CAAC;IACF,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;IACvB,qBAAqB,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAC5C,oBAAoB,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;CAC5C;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,SAAS,GAAG,MAAM,GAAG,SAAS,GAAG,YAAY,CAAC;CACrD;AAID,MAAM,WAAW,KAAK;IACpB,UAAU,EAAE,QAAQ,EAAE,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,OAAO,CAAC;IACf,MAAM,EAAE,OAAO,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,MAAM,SAAS,GAAG,UAAU,GAAG,YAAY,CAAC;AAElD,UAAU,gBAAgB;IACxB,WAAW,EAAE,MAAM,CAAC;IACpB,cAAc,EAAE,MAAM,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;CACjB;AAKD,MAAM,MAAM,YAAY,GACpB,CAAC,gBAAgB,GAAG;IAAE,MAAM,EAAE,UAAU,CAAC;IAAC,KAAK,EAAE,KAAK,CAAA;CAAE,CAAC,GACzD,CAAC,gBAAgB,GAAG;IAAE,MAAM,EAAE,YAAY,CAAC;IAAC,KAAK,EAAE,IAAI,CAAA;CAAE,CAAC,CAAC;AAE/D,MAAM,WAAW,OAAO;IACtB,EAAE,EAAE,MAAM,CAAC;IACX,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,cAAc,EAAE,MAAM,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,CAAC,EAAE,MAAM,CAAC;IAEV,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;CACpC;AAID,MAAM,MAAM,YAAY,GAAG,KAAK,GAAG,SAAS,GAAG,IAAI,GAAG,UAAU,CAAC;AAEjE,MAAM,WAAW,KAAK;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,cAAc,EAAE,MAAM,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,YAAY,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,CAAC,EAAE,MAAM,CAAC;IACV,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,KAAK;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IACjC,MAAM,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAChE,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,SAAS,EAAE,MAAM,CAAC;CACnB;AAID,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,MAAM,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAChE,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,YAAY,EAAE,CAAC;CACtB;AAED,eAAO,MAAM,iBAAiB,KAAK,CAAC;AACpC,eAAO,MAAM,YAAY,IAAI,CAAC;AAQ9B,MAAM,MAAM,eAAe,GACvB;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GACnC;IAAE,IAAI,EAAE,SAAS,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GACpC;IAAE,IAAI,EAAE,KAAK,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,OAAO,CAAA;CAAE,CAAC;AAUzD,eAAO,MAAM,iBAAiB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsBpB,CAAC;AAGX,YAAY,EAAE,MAAM,EAAE,CAAC"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
// src/eval/types.ts
|
|
2
|
+
// Shared types for the cortex quality metric. Pure data shapes; no logic.
|
|
3
|
+
// See docs/superpowers/specs/2026-05-31-cortex-quality-metric-design.md.
|
|
4
|
+
// --- Tiers ---
|
|
5
|
+
export const TIERS = ["retrieval", "cross_reference", "contradiction"];
|
|
6
|
+
// Tier weight for the aggregate score formula.
|
|
7
|
+
export const TIER_WEIGHT = {
|
|
8
|
+
retrieval: 1,
|
|
9
|
+
cross_reference: 2,
|
|
10
|
+
contradiction: 3,
|
|
11
|
+
};
|
|
12
|
+
export const HISTORY_RETENTION = 50;
|
|
13
|
+
export const SPEC_VERSION = 1;
|
|
14
|
+
// --- JSON Schema for generator output ---
|
|
15
|
+
// The generator LLM is asked to return JSON matching this schema. Embedded
|
|
16
|
+
// here so the prompt, runtime validator, and types share one source of truth.
|
|
17
|
+
// NOTE: kept in sync MANUALLY with the `Question` interface above — there is no
|
|
18
|
+
// codegen between them. When you add/rename a Question field that the generator
|
|
19
|
+
// produces, update both this schema and `Question` in the same edit. (`id` and
|
|
20
|
+
// `origin` are assigned post-generation, so they are intentionally absent here.)
|
|
21
|
+
export const QuestionSetSchema = {
|
|
22
|
+
type: "object",
|
|
23
|
+
required: ["questions"],
|
|
24
|
+
properties: {
|
|
25
|
+
questions: {
|
|
26
|
+
type: "array",
|
|
27
|
+
items: {
|
|
28
|
+
type: "object",
|
|
29
|
+
required: ["tier", "question", "expected_answer", "expected_sources"],
|
|
30
|
+
properties: {
|
|
31
|
+
tier: { enum: TIERS },
|
|
32
|
+
question: { type: "string", minLength: 1 },
|
|
33
|
+
expected_answer: { type: "string", minLength: 1 },
|
|
34
|
+
expected_sources: {
|
|
35
|
+
type: "array",
|
|
36
|
+
items: { type: "string", minLength: 1 },
|
|
37
|
+
minItems: 1,
|
|
38
|
+
},
|
|
39
|
+
},
|
|
40
|
+
},
|
|
41
|
+
},
|
|
42
|
+
},
|
|
43
|
+
};
|
|
44
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/eval/types.ts"],"names":[],"mappings":"AAAA,oBAAoB;AACpB,0EAA0E;AAC1E,yEAAyE;AAIzE,gBAAgB;AAEhB,MAAM,CAAC,MAAM,KAAK,GAAG,CAAC,WAAW,EAAE,iBAAiB,EAAE,eAAe,CAAU,CAAC;AAGhF,+CAA+C;AAC/C,MAAM,CAAC,MAAM,WAAW,GAAyB;IAC/C,SAAS,EAAE,CAAC;IACZ,eAAe,EAAE,CAAC;IAClB,aAAa,EAAE,CAAC;CACjB,CAAC;AAyIF,MAAM,CAAC,MAAM,iBAAiB,GAAG,EAAE,CAAC;AACpC,MAAM,CAAC,MAAM,YAAY,GAAG,CAAC,CAAC;AAa9B,2CAA2C;AAC3C,2EAA2E;AAC3E,8EAA8E;AAC9E,gFAAgF;AAChF,gFAAgF;AAChF,+EAA+E;AAC/E,iFAAiF;AAEjF,MAAM,CAAC,MAAM,iBAAiB,GAAG;IAC/B,IAAI,EAAE,QAAQ;IACd,QAAQ,EAAE,CAAC,WAAW,CAAC;IACvB,UAAU,EAAE;QACV,SAAS,EAAE;YACT,IAAI,EAAE,OAAO;YACb,KAAK,EAAE;gBACL,IAAI,EAAE,QAAQ;gBACd,QAAQ,EAAE,CAAC,MAAM,EAAE,UAAU,EAAE,iBAAiB,EAAE,kBAAkB,CAAC;gBACrE,UAAU,EAAE;oBACV,IAAI,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE;oBACrB,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC,EAAE;oBAC1C,eAAe,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC,EAAE;oBACjD,gBAAgB,EAAE;wBAChB,IAAI,EAAE,OAAO;wBACb,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC,EAAE;wBACvC,QAAQ,EAAE,CAAC;qBACZ;iBACF;aACF;SACF;KACF;CACO,CAAC"}
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAwCA,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CASrE;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,MAAM,GAAG,IAAI,CAE3D;AAED,wBAAsB,IAAI,CAAC,IAAI,GAAE,MAAM,EAA0B,GAAG,OAAO,CAAC,IAAI,CAAC,CAuIhF"}
|