daftari 1.15.0 → 1.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +20 -0
  2. package/README.md +8 -2
  3. package/dist/cli.d.ts.map +1 -1
  4. package/dist/cli.js +6 -0
  5. package/dist/cli.js.map +1 -1
  6. package/dist/eval/generate.d.ts +12 -0
  7. package/dist/eval/generate.d.ts.map +1 -0
  8. package/dist/eval/generate.js +221 -0
  9. package/dist/eval/generate.js.map +1 -0
  10. package/dist/eval/index.d.ts +2 -0
  11. package/dist/eval/index.d.ts.map +1 -0
  12. package/dist/eval/index.js +311 -0
  13. package/dist/eval/index.js.map +1 -0
  14. package/dist/eval/llm.d.ts +47 -0
  15. package/dist/eval/llm.d.ts.map +1 -0
  16. package/dist/eval/llm.js +165 -0
  17. package/dist/eval/llm.js.map +1 -0
  18. package/dist/eval/prompts.d.ts +5 -0
  19. package/dist/eval/prompts.d.ts.map +1 -0
  20. package/dist/eval/prompts.js +44 -0
  21. package/dist/eval/prompts.js.map +1 -0
  22. package/dist/eval/run.d.ts +13 -0
  23. package/dist/eval/run.d.ts.map +1 -0
  24. package/dist/eval/run.js +78 -0
  25. package/dist/eval/run.js.map +1 -0
  26. package/dist/eval/score.d.ts +12 -0
  27. package/dist/eval/score.d.ts.map +1 -0
  28. package/dist/eval/score.js +154 -0
  29. package/dist/eval/score.js.map +1 -0
  30. package/dist/eval/storage.d.ts +10 -0
  31. package/dist/eval/storage.d.ts.map +1 -0
  32. package/dist/eval/storage.js +69 -0
  33. package/dist/eval/storage.js.map +1 -0
  34. package/dist/eval/subgraph.d.ts +17 -0
  35. package/dist/eval/subgraph.d.ts.map +1 -0
  36. package/dist/eval/subgraph.js +214 -0
  37. package/dist/eval/subgraph.js.map +1 -0
  38. package/dist/eval/tool-surface.d.ts +7 -0
  39. package/dist/eval/tool-surface.d.ts.map +1 -0
  40. package/dist/eval/tool-surface.js +160 -0
  41. package/dist/eval/tool-surface.js.map +1 -0
  42. package/dist/eval/types.d.ts +173 -0
  43. package/dist/eval/types.d.ts.map +1 -0
  44. package/dist/eval/types.js +44 -0
  45. package/dist/eval/types.js.map +1 -0
  46. package/package.json +2 -1
@@ -0,0 +1,160 @@
1
+ // src/eval/tool-surface.ts
2
+ // In-process MCP tool surface for the answerer LLM. A thin adapter over the
3
+ // existing src/tools/* handlers — no MCP serialization, no transport, no
4
+ // stdio. The answerer calls these directly via the LlmClient tool loop.
5
+ //
6
+ // vault_tension_log is INTENTIONALLY EXCLUDED: it is a write tool, and the
7
+ // answerer is strictly read-only. Exposing a write tool to the answerer would
8
+ // let it mutate the vault mid-eval, which would corrupt the measurement.
9
+ //
10
+ // access is passed as `undefined` to every handler, which bypasses RBAC. This
11
+ // is intended: eval runs locally against a snapshot, there is no user identity.
12
+ import { vaultLint, vaultTensionBlast, vaultTensionClusters } from "../tools/curation.js";
13
+ import { vaultRead } from "../tools/read.js";
14
+ import { vaultSearch, vaultSearchRelated } from "../tools/search.js";
15
+ import { vaultThemes } from "../tools/themes.js";
16
+ // Awaits a tool handler's Result and flattens it to either the value or a
17
+ // `{ tool_error }` envelope. Never throws — a rejected promise still surfaces
18
+ // as a tool_error so the answerer can react instead of crashing the run.
19
+ async function unwrap(p) {
20
+ try {
21
+ const r = await p;
22
+ return r.ok ? r.value : { tool_error: r.error.message };
23
+ }
24
+ catch (e) {
25
+ return { tool_error: e instanceof Error ? e.message : String(e) };
26
+ }
27
+ }
28
+ const TOOL_DEFS = [
29
+ {
30
+ name: "vault_read",
31
+ description: "Read a single vault document. Returns its markdown body, parsed " +
32
+ "frontmatter, and metadata. Path is relative to the vault root.",
33
+ input_schema: {
34
+ type: "object",
35
+ properties: {
36
+ path: {
37
+ type: "string",
38
+ description: "Vault-relative path to the markdown file, e.g. competitive-intel/foo.md",
39
+ },
40
+ },
41
+ required: ["path"],
42
+ additionalProperties: false,
43
+ },
44
+ },
45
+ {
46
+ name: "vault_search",
47
+ description: "Hybrid search across the vault: BM25 lexical ranking combined with " +
48
+ "vector semantic similarity. Returns ranked documents with snippets.",
49
+ input_schema: {
50
+ type: "object",
51
+ properties: {
52
+ query: { type: "string", description: "Free-text search query" },
53
+ limit: { type: "number", description: "Maximum results to return (default 10, max 50)" },
54
+ },
55
+ required: ["query"],
56
+ additionalProperties: false,
57
+ },
58
+ },
59
+ {
60
+ name: "vault_search_related",
61
+ description: "Find documents related to a given vault document. Uses that document's " +
62
+ "own text and embeddings as the query; the document itself is excluded. " +
63
+ "Path is relative to the vault root.",
64
+ input_schema: {
65
+ type: "object",
66
+ properties: {
67
+ path: { type: "string", description: "Vault-relative path of the reference document" },
68
+ limit: { type: "number", description: "Maximum results to return (default 10, max 50)" },
69
+ },
70
+ required: ["path"],
71
+ additionalProperties: false,
72
+ },
73
+ },
74
+ {
75
+ name: "vault_themes",
76
+ description: "Surface thematic clusters across the vault using k-means over " +
77
+ "document-pooled embeddings. Each theme reports a label, coherence " +
78
+ "score, representative documents, and frequent tags.",
79
+ input_schema: {
80
+ type: "object",
81
+ properties: {
82
+ k: { type: "integer", description: "Optional explicit cluster count.", minimum: 1 },
83
+ collection: {
84
+ type: "string",
85
+ description: "Restrict clustering to documents in this collection.",
86
+ },
87
+ },
88
+ additionalProperties: false,
89
+ },
90
+ },
91
+ {
92
+ name: "vault_lint",
93
+ description: "Run the advisory curation checks across the vault: stale files, " +
94
+ "orphans, old drafts, stagnant low-confidence files, deprecated files " +
95
+ "still linked, unanswered questions, and tension health. Reports " +
96
+ "problems; never auto-fixes. Optionally filter to a single check.",
97
+ input_schema: {
98
+ type: "object",
99
+ properties: {
100
+ filter: { type: "string", description: "Restrict the report to a single check" },
101
+ },
102
+ additionalProperties: false,
103
+ },
104
+ },
105
+ {
106
+ name: "vault_tension_blast",
107
+ description: "Compute the transitive closure of downstream documents that cite or " +
108
+ "link a contested document — or the union over a contested cluster. " +
109
+ "Accepts exactly one of 'document' (vault-relative path) or 'cluster_id'.",
110
+ input_schema: {
111
+ type: "object",
112
+ properties: {
113
+ document: { type: "string", description: "Vault-relative path of a contested document" },
114
+ cluster_id: {
115
+ type: "string",
116
+ description: "A content-addressed cluster id from vault_tension_clusters",
117
+ },
118
+ },
119
+ additionalProperties: false,
120
+ },
121
+ },
122
+ {
123
+ name: "vault_tension_clusters",
124
+ description: "Compute connected components of the tension graph: groups of vault " +
125
+ "documents joined transitively by unresolved tensions. Each cluster " +
126
+ "reports its members, in-scope tension count, tally by kind, and age " +
127
+ "range. Read-only.",
128
+ input_schema: {
129
+ type: "object",
130
+ properties: {},
131
+ additionalProperties: false,
132
+ },
133
+ },
134
+ ];
135
+ export function buildToolSurface(vaultRoot) {
136
+ // biome-ignore lint/suspicious/noExplicitAny: tool inputs are structural JSON from the LLM
137
+ const handler = async (name, input) => {
138
+ const inp = input ?? {};
139
+ switch (name) {
140
+ case "vault_read":
141
+ return unwrap(vaultRead(vaultRoot, String(inp.path ?? ""), undefined));
142
+ case "vault_search":
143
+ return unwrap(vaultSearch(vaultRoot, inp, undefined));
144
+ case "vault_search_related":
145
+ return unwrap(vaultSearchRelated(vaultRoot, inp, undefined));
146
+ case "vault_themes":
147
+ return unwrap(vaultThemes(vaultRoot, inp, undefined));
148
+ case "vault_lint":
149
+ return unwrap(vaultLint(vaultRoot, inp, undefined));
150
+ case "vault_tension_blast":
151
+ return unwrap(vaultTensionBlast(vaultRoot, inp, undefined));
152
+ case "vault_tension_clusters":
153
+ return unwrap(vaultTensionClusters(vaultRoot, inp, undefined));
154
+ default:
155
+ return { tool_error: `unknown tool: ${name}` };
156
+ }
157
+ };
158
+ return { defs: TOOL_DEFS, handler };
159
+ }
160
+ //# sourceMappingURL=tool-surface.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tool-surface.js","sourceRoot":"","sources":["../../src/eval/tool-surface.ts"],"names":[],"mappings":"AAAA,2BAA2B;AAC3B,4EAA4E;AAC5E,yEAAyE;AACzE,wEAAwE;AACxE,EAAE;AACF,2EAA2E;AAC3E,8EAA8E;AAC9E,yEAAyE;AACzE,EAAE;AACF,8EAA8E;AAC9E,gFAAgF;AAGhF,OAAO,EAAE,SAAS,EAAE,iBAAiB,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAC;AAC1F,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,WAAW,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACrE,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAQjD,0EAA0E;AAC1E,8EAA8E;AAC9E,yEAAyE;AACzE,KAAK,UAAU,MAAM,CAAI,CAA4B;IACnD,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC;QAClB,OAAO,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;IAC1D,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO,EAAE,UAAU,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IACpE,CAAC;AACH,CAAC;AAED,MAAM,SAAS,GAAc;IAC3B;QACE,IAAI,EAAE,YAAY;QAClB,WAAW,EACT,kEAAkE;YAClE,gEAAgE;QAClE,YAAY,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,IAAI,EAAE;oBACJ,IAAI,EAAE,QAAQ;oBACd,WAAW,EAAE,yEAAyE;iBACvF;aACF;YACD,QAAQ,EAAE,CAAC,MAAM,CAAC;YAClB,oBAAoB,EAAE,KAAK;SAC5B;KACF;IACD;QACE,IAAI,EAAE,cAAc;QACpB,WAAW,EACT,qEAAqE;YACrE,qEAAqE;QACvE,YAAY,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,wBAAwB,EAAE;gBAChE,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,gDAAgD,EAAE;aACzF;YACD,QAAQ,EAAE,CAAC,OAAO,CAAC;YACnB,oBAAoB,EAAE,KAAK;SAC5B;KACF;IACD;QACE,IAAI,EAAE,sBAAsB;QAC5B,WAAW,EACT,yEAAyE;YACzE,yEAAyE;YACzE,qCAAqC;QACvC,YAAY,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,+CAA+C,EAAE;gBACtF,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,gDAAgD,EAAE;aACzF;YACD,QAAQ,EAAE,CAAC,MAAM,CAAC;YAClB,oBAAoB,EAAE,KAAK;SAC5B;KACF;IACD;QACE,IAAI,EAAE,cAAc;QACpB,WAAW,EACT,gEAAgE;YAChE,oEAAoE;YACpE,qDAAqD;QACvD,YAAY,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,kCAAkC,EAAE,OAAO,EAAE,CAAC,EAAE;gBACnF,UAAU,EAAE;oBACV,IAAI,EAAE,QAAQ;oBACd,WAAW,EAAE,sDAAsD;iBACpE;aACF;YACD,oBAAoB,EAAE,KAAK;SAC5B;KACF;IACD;QACE,IAAI,EAAE,YAAY;QAClB,WAAW,EACT,kEAAkE;YAClE,uEAAuE;YACvE,kEAAkE;YAClE,kEAAkE;QACpE,YAAY,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,uCAAuC,EAAE;aACjF;YACD,oBAAoB,EAAE,KAAK;SAC5B;KACF;IACD;QACE,IAAI,EAAE,qBAAqB;QAC3B,WAAW,EACT,sEAAsE;YACtE,qEAAqE;YACrE,0EAA0E;QAC5E,YAAY,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,6CAA6C,EAAE;gBACxF,UAAU,EAAE;oBACV,IAAI,EAAE,QAAQ;oBACd,WAAW,EAAE,4DAA4D;iBAC1E;aACF;YACD,oBAAoB,EAAE,KAAK;SAC5B;KACF;IACD;QACE,IAAI,EAAE,wBAAwB;QAC9B,WAAW,EACT,qEAAqE;YACrE,qEAAqE;YACrE,sEAAsE;YACtE,mBAAmB;QACrB,YAAY,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE,EAAE;YACd,oBAAoB,EAAE,KAAK;SAC5B;KACF;CACF,CAAC;AAEF,MAAM,UAAU,gBAAgB,CAAC,SAAiB;IAChD,2FAA2F;IAC3F,MAAM,OAAO,GAAG,KAAK,EAAE,IAAY,EAAE,KAAU,EAAoB,EAAE;QACnE,MAAM,GAAG,GAAI,KAAiC,IAAI,EAAE,CAAC;QACrD,QAAQ,IAAI,EAAE,CAAC;YACb,KAAK,YAAY;gBACf,OAAO,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,MAAM,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC;YACzE,KAAK,cAAc;gBACjB,OAAO,MAAM,CAAC,WAAW,CAAC,SAAS,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC;YACxD,KAAK,sBAAsB;gBACzB,OAAO,MAAM,CAAC,kBAAkB,CAAC,SAAS,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC;YAC/D,KAAK,cAAc;gBACjB,OAAO,MAAM,CAAC,WAAW,CAAC,SAAS,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC;YACxD,KAAK,YAAY;gBACf,OAAO,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC;YACtD,KAAK,qBAAqB;gBACxB,OAAO,MAAM,CAAC,iBAAiB,CAAC,SAAS,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC;YAC9D,KAAK,wBAAwB;gBAC3B,OAAO,MAAM,CAAC,oBAAoB,CAAC,SAAS,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC;YACjE;gBACE,OAAO,EAAE,UAAU,EAAE,iBAAiB,IAAI,EAAE,EAAE,CAAC;QACnD,CAAC;IACH,CAAC,CAAC;IAEF,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,CAAC;AACtC,CAAC"}
@@ -0,0 +1,173 @@
1
+ import type { Result } from "../frontmatter/types.js";
2
+ export declare const TIERS: readonly ["retrieval", "cross_reference", "contradiction"];
3
+ export type Tier = (typeof TIERS)[number];
4
+ export declare const TIER_WEIGHT: Record<Tier, number>;
5
+ export interface Question {
6
+ id: string;
7
+ tier: Tier;
8
+ question: string;
9
+ expected_answer: string;
10
+ expected_sources: string[];
11
+ origin: "generated" | "augmented";
12
+ }
13
+ export interface QuestionSet {
14
+ id: string;
15
+ vault_hash: string;
16
+ seed: string;
17
+ timestamp: string;
18
+ subgraph: {
19
+ seed_doc: string;
20
+ nodes: string[];
21
+ edges: SubgraphEdge[];
22
+ };
23
+ questions: Question[];
24
+ generator_model: string;
25
+ prompt_version: number;
26
+ tier_counts_requested: Record<Tier, number>;
27
+ tier_counts_produced: Record<Tier, number>;
28
+ }
29
+ export interface SubgraphEdge {
30
+ from: string;
31
+ to: string;
32
+ kind: "sources" | "link" | "tension" | "superseded";
33
+ }
34
+ export interface Trace {
35
+ tool_calls: ToolCall[];
36
+ final_answer: string;
37
+ total_tool_calls: number;
38
+ input_tokens: number;
39
+ output_tokens: number;
40
+ wall_ms: number;
41
+ stop_reason: string;
42
+ }
43
+ export interface ToolCall {
44
+ tool: string;
45
+ input: unknown;
46
+ output: unknown;
47
+ latency_ms: number;
48
+ }
49
+ export type RunStatus = "complete" | "incomplete";
50
+ interface PerRunResultBase {
51
+ question_id: string;
52
+ question_index: number;
53
+ k_index: number;
54
+ }
55
+ export type PerRunResult = (PerRunResultBase & {
56
+ status: "complete";
57
+ trace: Trace;
58
+ }) | (PerRunResultBase & {
59
+ status: "incomplete";
60
+ trace: null;
61
+ });
62
+ export interface EvalRun {
63
+ id: string;
64
+ questions_id: string;
65
+ answerer_model: string;
66
+ prompt_version: number;
67
+ timestamp: string;
68
+ k: number;
69
+ runs: Record<string, PerRunResult>;
70
+ }
71
+ export type GradeVerdict = "yes" | "partial" | "no" | "ungraded";
72
+ export interface Grade {
73
+ question_id: string;
74
+ question_index: number;
75
+ k_index: number;
76
+ verdict: GradeVerdict;
77
+ reasoning: string;
78
+ grader_model: string;
79
+ }
80
+ export interface TierScore {
81
+ mean: number;
82
+ std: number;
83
+ n: number;
84
+ trace_efficiency: number;
85
+ }
86
+ export interface Score {
87
+ score: number;
88
+ score_std: number;
89
+ by_tier: Record<Tier, TierScore>;
90
+ models: {
91
+ generator: string;
92
+ answerer: string;
93
+ grader: string;
94
+ };
95
+ prompt_version: number;
96
+ spec_version: number;
97
+ questions_id: string;
98
+ results_id: string;
99
+ vault_hash: string;
100
+ k: number;
101
+ n: number;
102
+ timestamp: string;
103
+ }
104
+ export interface HistoryEntry {
105
+ score_id: string;
106
+ score: number;
107
+ score_std: number;
108
+ by_tier: Record<Tier, number>;
109
+ vault_hash: string;
110
+ timestamp: string;
111
+ n: number;
112
+ k: number;
113
+ models: {
114
+ generator: string;
115
+ answerer: string;
116
+ grader: string;
117
+ };
118
+ prompt_version: number;
119
+ spec_version: number;
120
+ }
121
+ export interface HistoryFile {
122
+ version: 1;
123
+ runs: HistoryEntry[];
124
+ }
125
+ export declare const HISTORY_RETENTION = 50;
126
+ export declare const SPEC_VERSION = 1;
127
+ export type CortexEvalError = {
128
+ kind: "config";
129
+ message: string;
130
+ } | {
131
+ kind: "runtime";
132
+ message: string;
133
+ } | {
134
+ kind: "llm";
135
+ message: string;
136
+ retryable: boolean;
137
+ };
138
+ export declare const QuestionSetSchema: {
139
+ readonly type: "object";
140
+ readonly required: readonly ["questions"];
141
+ readonly properties: {
142
+ readonly questions: {
143
+ readonly type: "array";
144
+ readonly items: {
145
+ readonly type: "object";
146
+ readonly required: readonly ["tier", "question", "expected_answer", "expected_sources"];
147
+ readonly properties: {
148
+ readonly tier: {
149
+ readonly enum: readonly ["retrieval", "cross_reference", "contradiction"];
150
+ };
151
+ readonly question: {
152
+ readonly type: "string";
153
+ readonly minLength: 1;
154
+ };
155
+ readonly expected_answer: {
156
+ readonly type: "string";
157
+ readonly minLength: 1;
158
+ };
159
+ readonly expected_sources: {
160
+ readonly type: "array";
161
+ readonly items: {
162
+ readonly type: "string";
163
+ readonly minLength: 1;
164
+ };
165
+ readonly minItems: 1;
166
+ };
167
+ };
168
+ };
169
+ };
170
+ };
171
+ };
172
+ export type { Result };
173
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/eval/types.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,yBAAyB,CAAC;AAItD,eAAO,MAAM,KAAK,4DAA6D,CAAC;AAChF,MAAM,MAAM,IAAI,GAAG,CAAC,OAAO,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC;AAG1C,eAAO,MAAM,WAAW,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAI5C,CAAC;AAIF,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,CAAC;IACxB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,MAAM,EAAE,WAAW,GAAG,WAAW,CAAC;CACnC;AAED,MAAM,WAAW,WAAW;IAC1B,EAAE,EAAE,MAAM,CAAC;IACX,UAAU,EAAE,MAAM,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE;QACR,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,EAAE,MAAM,EAAE,CAAC;QAChB,KAAK,EAAE,YAAY,EAAE,CAAC;KACvB,CAAC;IACF,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;IACvB,qBAAqB,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAC5C,oBAAoB,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;CAC5C;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,SAAS,GAAG,MAAM,GAAG,SAAS,GAAG,YAAY,CAAC;CACrD;AAID,MAAM,WAAW,KAAK;IACpB,UAAU,EAAE,QAAQ,EAAE,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,OAAO,CAAC;IACf,MAAM,EAAE,OAAO,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,MAAM,SAAS,GAAG,UAAU,GAAG,YAAY,CAAC;AAElD,UAAU,gBAAgB;IACxB,WAAW,EAAE,MAAM,CAAC;IACpB,cAAc,EAAE,MAAM,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;CACjB;AAKD,MAAM,MAAM,YAAY,GACpB,CAAC,gBAAgB,GAAG;IAAE,MAAM,EAAE,UAAU,CAAC;IAAC,KAAK,EAAE,KAAK,CAAA;CAAE,CAAC,GACzD,CAAC,gBAAgB,GAAG;IAAE,MAAM,EAAE,YAAY,CAAC;IAAC,KAAK,EAAE,IAAI,CAAA;CAAE,CAAC,CAAC;AAE/D,MAAM,WAAW,OAAO;IACtB,EAAE,EAAE,MAAM,CAAC;IACX,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,cAAc,EAAE,MAAM,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,CAAC,EAAE,MAAM,CAAC;IAEV,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;CACpC;AAID,MAAM,MAAM,YAAY,GAAG,KAAK,GAAG,SAAS,GAAG,IAAI,GAAG,UAAU,CAAC;AAEjE,MAAM,WAAW,KAAK;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,cAAc,EAAE,MAAM,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,YAAY,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,CAAC,EAAE,MAAM,CAAC;IACV,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,KAAK;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IACjC,MAAM,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAChE,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,SAAS,EAAE,MAAM,CAAC;CACnB;AAID,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,MAAM,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAChE,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,YAAY,EAAE,CAAC;CACtB;AAED,eAAO,MAAM,iBAAiB,KAAK,CAAC;AACpC,eAAO,MAAM,YAAY,IAAI,CAAC;AAQ9B,MAAM,MAAM,eAAe,GACvB;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GACnC;IAAE,IAAI,EAAE,SAAS,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GACpC;IAAE,IAAI,EAAE,KAAK,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,OAAO,CAAA;CAAE,CAAC;AAUzD,eAAO,MAAM,iBAAiB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsBpB,CAAC;AAGX,YAAY,EAAE,MAAM,EAAE,CAAC"}
@@ -0,0 +1,44 @@
1
+ // src/eval/types.ts
2
+ // Shared types for the cortex quality metric. Pure data shapes; no logic.
3
+ // See docs/superpowers/specs/2026-05-31-cortex-quality-metric-design.md.
4
+ // --- Tiers ---
5
+ export const TIERS = ["retrieval", "cross_reference", "contradiction"];
6
+ // Tier weight for the aggregate score formula.
7
+ export const TIER_WEIGHT = {
8
+ retrieval: 1,
9
+ cross_reference: 2,
10
+ contradiction: 3,
11
+ };
12
+ export const HISTORY_RETENTION = 50;
13
+ export const SPEC_VERSION = 1;
14
+ // --- JSON Schema for generator output ---
15
+ // The generator LLM is asked to return JSON matching this schema. Embedded
16
+ // here so the prompt, runtime validator, and types share one source of truth.
17
+ // NOTE: kept in sync MANUALLY with the `Question` interface above — there is no
18
+ // codegen between them. When you add/rename a Question field that the generator
19
+ // produces, update both this schema and `Question` in the same edit. (`id` and
20
+ // `origin` are assigned post-generation, so they are intentionally absent here.)
21
+ export const QuestionSetSchema = {
22
+ type: "object",
23
+ required: ["questions"],
24
+ properties: {
25
+ questions: {
26
+ type: "array",
27
+ items: {
28
+ type: "object",
29
+ required: ["tier", "question", "expected_answer", "expected_sources"],
30
+ properties: {
31
+ tier: { enum: TIERS },
32
+ question: { type: "string", minLength: 1 },
33
+ expected_answer: { type: "string", minLength: 1 },
34
+ expected_sources: {
35
+ type: "array",
36
+ items: { type: "string", minLength: 1 },
37
+ minItems: 1,
38
+ },
39
+ },
40
+ },
41
+ },
42
+ },
43
+ };
44
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/eval/types.ts"],"names":[],"mappings":"AAAA,oBAAoB;AACpB,0EAA0E;AAC1E,yEAAyE;AAIzE,gBAAgB;AAEhB,MAAM,CAAC,MAAM,KAAK,GAAG,CAAC,WAAW,EAAE,iBAAiB,EAAE,eAAe,CAAU,CAAC;AAGhF,+CAA+C;AAC/C,MAAM,CAAC,MAAM,WAAW,GAAyB;IAC/C,SAAS,EAAE,CAAC;IACZ,eAAe,EAAE,CAAC;IAClB,aAAa,EAAE,CAAC;CACjB,CAAC;AAyIF,MAAM,CAAC,MAAM,iBAAiB,GAAG,EAAE,CAAC;AACpC,MAAM,CAAC,MAAM,YAAY,GAAG,CAAC,CAAC;AAa9B,2CAA2C;AAC3C,2EAA2E;AAC3E,8EAA8E;AAC9E,gFAAgF;AAChF,gFAAgF;AAChF,+EAA+E;AAC/E,iFAAiF;AAEjF,MAAM,CAAC,MAAM,iBAAiB,GAAG;IAC/B,IAAI,EAAE,QAAQ;IACd,QAAQ,EAAE,CAAC,WAAW,CAAC;IACvB,UAAU,EAAE;QACV,SAAS,EAAE;YACT,IAAI,EAAE,OAAO;YACb,KAAK,EAAE;gBACL,IAAI,EAAE,QAAQ;gBACd,QAAQ,EAAE,CAAC,MAAM,EAAE,UAAU,EAAE,iBAAiB,EAAE,kBAAkB,CAAC;gBACrE,UAAU,EAAE;oBACV,IAAI,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE;oBACrB,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC,EAAE;oBAC1C,eAAe,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC,EAAE;oBACjD,gBAAgB,EAAE;wBAChB,IAAI,EAAE,OAAO;wBACb,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC,EAAE;wBACvC,QAAQ,EAAE,CAAC;qBACZ;iBACF;aACF;SACF;KACF;CACO,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "daftari",
3
- "version": "1.15.0",
3
+ "version": "1.16.0",
4
4
  "description": "An open-source, multi-user knowledge vault exposed to AI agents via an MCP server.",
5
5
  "license": "MIT",
6
6
  "author": "Mihir Wagle / mavaali",
@@ -53,6 +53,7 @@
53
53
  ]
54
54
  },
55
55
  "dependencies": {
56
+ "@anthropic-ai/sdk": "^0.100.1",
56
57
  "@huggingface/transformers": "^4.2.0",
57
58
  "@modelcontextprotocol/sdk": "^1.29.0",
58
59
  "better-sqlite3": "^12.10.0",