@kweaver-ai/kweaver-sdk 0.8.3 → 0.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/agent-chat.d.ts +10 -2
- package/dist/api/agent-chat.js +19 -5
- package/dist/api/datasources.d.ts +14 -0
- package/dist/api/datasources.js +14 -0
- package/dist/cli.js +2 -14
- package/dist/client.d.ts +7 -1
- package/dist/client.js +7 -1
- package/dist/commands/bkn-ops.d.ts +1 -1
- package/dist/commands/bkn-ops.js +42 -21
- package/dist/commands/bkn.js +6 -3
- package/dist/commands/ds.d.ts +0 -31
- package/dist/commands/ds.js +18 -448
- package/dist/commands/explore-bkn.d.ts +7 -1
- package/dist/commands/explore-bkn.js +32 -3
- package/dist/resources/datasources.d.ts +7 -0
- package/dist/resources/datasources.js +7 -0
- package/dist/templates/explorer/bkn.js +860 -9
- package/dist/templates/explorer/index.html +1 -0
- package/dist/templates/explorer/style.css +225 -0
- package/dist/templates/explorer/vendor/g6.min.js +68 -0
- package/dist/trace-ai/eval-set/schemas.d.ts +1 -0
- package/dist/trace-ai/eval-set/schemas.js +4 -0
- package/dist/trace-ai/eval-set/types.d.ts +2 -0
- package/dist/trace-ai/exp/capture-fingerprint.d.ts +10 -0
- package/dist/trace-ai/exp/capture-fingerprint.js +12 -0
- package/dist/trace-ai/exp/context/context-assembler.d.ts +18 -0
- package/dist/trace-ai/exp/context/context-assembler.js +42 -0
- package/dist/trace-ai/exp/context/failure-analyzer.d.ts +22 -0
- package/dist/trace-ai/exp/context/failure-analyzer.js +59 -0
- package/dist/trace-ai/exp/context/kn-data-prober.d.ts +13 -0
- package/dist/trace-ai/exp/context/kn-data-prober.js +38 -0
- package/dist/trace-ai/exp/context/kn-schema-client.d.ts +14 -0
- package/dist/trace-ai/exp/context/kn-schema-client.js +41 -0
- package/dist/trace-ai/exp/context/retrieval-health.d.ts +32 -0
- package/dist/trace-ai/exp/context/retrieval-health.js +138 -0
- package/dist/trace-ai/exp/context/vega-catalog-client.d.ts +14 -0
- package/dist/trace-ai/exp/context/vega-catalog-client.js +15 -0
- package/dist/trace-ai/exp/coordinator.d.ts +34 -21
- package/dist/trace-ai/exp/coordinator.js +246 -24
- package/dist/trace-ai/exp/eval-runner.js +4 -2
- package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +1 -0
- package/dist/trace-ai/exp/exp-store/events-jsonl.js +18 -0
- package/dist/trace-ai/exp/exp-store/expected-fingerprint.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/expected-fingerprint.js +31 -0
- package/dist/trace-ai/exp/exp-store/index.d.ts +63 -2
- package/dist/trace-ai/exp/exp-store/index.js +2 -1
- package/dist/trace-ai/exp/exp-store/rollback-yaml.d.ts +12 -0
- package/dist/trace-ai/exp/exp-store/rollback-yaml.js +29 -0
- package/dist/trace-ai/exp/index.d.ts +2 -0
- package/dist/trace-ai/exp/index.js +68 -3
- package/dist/trace-ai/exp/info.js +1 -1
- package/dist/trace-ai/exp/patch/index.d.ts +13 -2
- package/dist/trace-ai/exp/patch/index.js +65 -10
- package/dist/trace-ai/exp/patch/kn-api-client.d.ts +40 -0
- package/dist/trace-ai/exp/patch/kn-api-client.js +14 -0
- package/dist/trace-ai/exp/patch/kn.d.ts +8 -0
- package/dist/trace-ai/exp/patch/kn.js +36 -0
- package/dist/trace-ai/exp/patch/skill-api-client.d.ts +17 -0
- package/dist/trace-ai/exp/patch/skill-api-client.js +14 -0
- package/dist/trace-ai/exp/patch/skill-content.d.ts +9 -0
- package/dist/trace-ai/exp/patch/skill-content.js +12 -0
- package/dist/trace-ai/exp/preflight.d.ts +77 -0
- package/dist/trace-ai/exp/preflight.js +148 -0
- package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +3 -14
- package/dist/trace-ai/exp/providers/synthesizer-client.js +53 -35
- package/dist/trace-ai/exp/providers/triage-client.d.ts +15 -2
- package/dist/trace-ai/exp/providers/triage-client.js +143 -28
- package/dist/trace-ai/exp/run-preflight.d.ts +19 -0
- package/dist/trace-ai/exp/run-preflight.js +56 -0
- package/dist/trace-ai/exp/schemas.d.ts +402 -44
- package/dist/trace-ai/exp/schemas.js +131 -18
- package/dist/utils/deprecation.d.ts +1 -0
- package/dist/utils/deprecation.js +18 -0
- package/package.json +2 -1
|
@@ -1,39 +1,57 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
1
|
+
export function buildKnContextPrompt(ctx) {
|
|
2
|
+
const existingTypes = ctx.existing_schema.object_types
|
|
3
|
+
.map(t => ` - ${t.concept_name}: fields=[${t.fields.map(f => `${f.name}(${f.type})`).join(", ")}]`)
|
|
4
|
+
.join("\n");
|
|
5
|
+
const existingRelations = ctx.existing_schema.relation_types
|
|
6
|
+
.map(r => ` - ${r.concept_name}: ${r.source} → ${r.target}, join_key="${r.join_key}"`)
|
|
7
|
+
.join("\n") || " (none)";
|
|
8
|
+
const dataviews = ctx.available_dataviews
|
|
9
|
+
.map(dv => ` - id="${dv.id}" name="${dv.name}"\n columns=[${dv.columns.map(c => `${c.name}(${c.type})`).join(", ")}]`)
|
|
10
|
+
.join("\n");
|
|
11
|
+
const probes = ctx.data_probes && ctx.data_probes.length > 0
|
|
12
|
+
? "\n## Data Probes (live record counts from KN)\n" + ctx.data_probes
|
|
13
|
+
.map(p => ` - ${p.concept_name} (data_view: ${p.data_view_id}) → ${p.total_records} records`)
|
|
14
|
+
.join("\n")
|
|
15
|
+
: "";
|
|
16
|
+
return `
|
|
17
|
+
## Existing KN Schema (kn_id: ${ctx.kn_id})
|
|
18
|
+
Object types:
|
|
19
|
+
${existingTypes || " (none)"}
|
|
20
|
+
Relation types:
|
|
21
|
+
${existingRelations}
|
|
12
22
|
|
|
13
|
-
|
|
23
|
+
## Available Vega Dataviews
|
|
24
|
+
${dataviews}${probes}
|
|
14
25
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
26
|
+
## Instructions for generating the KN patch:
|
|
27
|
+
1. Find the dataview whose name contains the missing concept_name as a suffix substring
|
|
28
|
+
(e.g. concept "vehicle_sales" matches "ht_data_513_vehicle_sales" because name.endsWith("vehicle_sales") after split by "_" heuristic).
|
|
29
|
+
If multiple candidates exist, pick the closest suffix match.
|
|
30
|
+
2. Extract data_properties from that dataview's columns.
|
|
31
|
+
3. Infer primary_keys: look for a column named "{concept_name}_id". If not found, use "id".
|
|
32
|
+
Include only the primary key column(s), not all columns.
|
|
33
|
+
4. For relation_type inference:
|
|
34
|
+
- Normalize field names: lowercase + remove underscores (e.g. "VEHICLEID" → "vehicleid", "vehicle_id" → "vehicleid").
|
|
35
|
+
- If any existing KN object_type has a field whose normalized name equals any column in the new dataview:
|
|
36
|
+
propose a relation_type with join_key = "{existing_field_name} → {new_column_name}".
|
|
37
|
+
5. Set kn_id to "${ctx.kn_id}".
|
|
38
|
+
`.trim();
|
|
39
|
+
}
|
|
40
|
+
export function buildSkillContextPrompt(ctx) {
|
|
41
|
+
const skillDocs = ctx.bound_skills
|
|
42
|
+
.map(s => `### skill_id: "${s.id}" (version: ${s.version})\n${s.content}`)
|
|
43
|
+
.join("\n\n---\n\n");
|
|
44
|
+
return `
|
|
45
|
+
## Currently Bound Skills
|
|
46
|
+
${skillDocs}
|
|
29
47
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
48
|
+
## Instructions for generating the skill.content patch:
|
|
49
|
+
1. Read the failure evidence carefully. It names a specific tool (e.g. "query_object_instance").
|
|
50
|
+
2. Search each skill's content for that tool name. The skill that documents the tool is the one to patch.
|
|
51
|
+
Set skill_id to that skill's id.
|
|
52
|
+
3. Generate append_section that fixes the capability gap described in the failure evidence.
|
|
53
|
+
- Match the existing document style (heading level, bullet format, code examples if any).
|
|
54
|
+
- Be specific: if the issue is "no sort_by", show exactly how to pass sort_by.
|
|
55
|
+
- If the issue is "no pagination", show the search_after loop pattern.
|
|
56
|
+
`.trim();
|
|
39
57
|
}
|
|
@@ -1,19 +1,32 @@
|
|
|
1
|
-
import type { RoundData } from "../schemas.js";
|
|
1
|
+
import type { RoundData, FailureAttribution, QueryFailureAnalysis, Mission, NextChange, KnContext, SkillContext } from "../schemas.js";
|
|
2
2
|
export interface TriageInput {
|
|
3
3
|
currentRound: RoundData;
|
|
4
4
|
prevRounds: RoundData[];
|
|
5
5
|
candidateConfig: Record<string, unknown>;
|
|
6
6
|
crossRoundMemoryRef?: string;
|
|
7
|
+
failureAnalysis?: QueryFailureAnalysis[];
|
|
8
|
+
mission: Mission;
|
|
9
|
+
kn_context?: KnContext;
|
|
10
|
+
skill_context?: SkillContext;
|
|
7
11
|
}
|
|
8
12
|
export interface TriageResult {
|
|
13
|
+
verdict: "continue" | "publish" | "abort";
|
|
14
|
+
summary: string;
|
|
15
|
+
failure_attribution: FailureAttribution[];
|
|
16
|
+
/** Next change to apply. Present when verdict === "continue"; null/undefined otherwise. */
|
|
17
|
+
next_change?: NextChange;
|
|
18
|
+
/** Diagnoses extracted from the LLM response (defaults to [summary]) */
|
|
9
19
|
diagnoses: string[];
|
|
20
|
+
/** Hints extracted from the LLM response */
|
|
10
21
|
hints: string[];
|
|
11
|
-
|
|
22
|
+
/** Opaque memory token passed across rounds */
|
|
12
23
|
new_memory_token: string;
|
|
13
24
|
}
|
|
14
25
|
export interface TriageClient {
|
|
15
26
|
triage(input: TriageInput): Promise<TriageResult>;
|
|
16
27
|
}
|
|
28
|
+
export declare function parseTriageOutput(raw: string): TriageResult;
|
|
29
|
+
export declare function buildTriagePrompt(input: TriageInput): string;
|
|
17
30
|
export declare class ClaudeCodeTriageClient implements TriageClient {
|
|
18
31
|
triage(input: TriageInput): Promise<TriageResult>;
|
|
19
32
|
}
|
|
@@ -1,51 +1,166 @@
|
|
|
1
1
|
// src/trace-ai/exp/providers/triage-client.ts
|
|
2
|
+
import yaml from "js-yaml";
|
|
2
3
|
import { z } from "zod";
|
|
3
4
|
import { defaultRegistry } from "../../../agent-providers/registry.js";
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
5
|
+
import { FailureAttributionSchema, NextChangeSchema } from "../schemas.js";
|
|
6
|
+
import { buildKnContextPrompt, buildSkillContextPrompt } from "./synthesizer-client.js";
|
|
7
|
+
/**
|
|
8
|
+
* Unwrap a Markdown code fence the LLM commonly puts around its JSON
|
|
9
|
+
* (```json … ``` or bare ``` … ```), tolerating prose before/after it.
|
|
10
|
+
* Falls back to the trimmed input when no fence is present.
|
|
11
|
+
*/
|
|
12
|
+
function unwrapJson(raw) {
|
|
13
|
+
const fence = raw.match(/```(?:json)?\s*\n?([\s\S]*?)```/);
|
|
14
|
+
return (fence ? fence[1] : raw).trim();
|
|
15
|
+
}
|
|
16
|
+
export function parseTriageOutput(raw) {
|
|
17
|
+
let obj;
|
|
18
|
+
try {
|
|
19
|
+
obj = JSON.parse(unwrapJson(raw));
|
|
20
|
+
}
|
|
21
|
+
catch {
|
|
22
|
+
throw new Error(`Triage output is not valid JSON: ${raw.slice(0, 200)}`);
|
|
23
|
+
}
|
|
24
|
+
const data = obj;
|
|
25
|
+
const verdict = data["verdict"];
|
|
26
|
+
if (verdict !== "continue" && verdict !== "publish" && verdict !== "abort") {
|
|
27
|
+
throw new Error(`Invalid verdict in triage output: ${String(verdict)}`);
|
|
28
|
+
}
|
|
29
|
+
const summary = typeof data["summary"] === "string" ? data["summary"] : "";
|
|
30
|
+
const rawAttribution = Array.isArray(data["failure_attribution"]) ? data["failure_attribution"] : [];
|
|
31
|
+
const failure_attribution = rawAttribution.map((item) => {
|
|
32
|
+
const parsed = FailureAttributionSchema.safeParse(item);
|
|
33
|
+
if (!parsed.success)
|
|
34
|
+
throw new Error(`Invalid failure_attribution item: ${JSON.stringify(item)}`);
|
|
35
|
+
return parsed.data;
|
|
36
|
+
});
|
|
37
|
+
let next_change;
|
|
38
|
+
if (verdict === "continue") {
|
|
39
|
+
if (data["next_change"] === undefined || data["next_change"] === null) {
|
|
40
|
+
throw new Error(`verdict=continue requires next_change in triage output`);
|
|
41
|
+
}
|
|
42
|
+
const parsed = NextChangeSchema.safeParse(data["next_change"]);
|
|
43
|
+
if (!parsed.success) {
|
|
44
|
+
throw new Error(`Invalid next_change in triage output: ${parsed.error.message}`);
|
|
45
|
+
}
|
|
46
|
+
next_change = parsed.data;
|
|
47
|
+
}
|
|
48
|
+
const diagnoses = Array.isArray(data["diagnoses"])
|
|
49
|
+
? data["diagnoses"].filter((d) => typeof d === "string")
|
|
50
|
+
: [summary];
|
|
51
|
+
const hints = Array.isArray(data["hints"])
|
|
52
|
+
? data["hints"].filter((h) => typeof h === "string")
|
|
53
|
+
: [];
|
|
54
|
+
const new_memory_token = typeof data["new_memory_token"] === "string" ? data["new_memory_token"] : summary;
|
|
55
|
+
return { verdict, summary, failure_attribution, next_change, diagnoses, hints, new_memory_token };
|
|
56
|
+
}
|
|
57
|
+
export function buildTriagePrompt(input) {
|
|
58
|
+
const r = input.currentRound;
|
|
59
|
+
const scoresSummary = r.scores
|
|
60
|
+
? `outcome=${r.scores.outcome.toFixed(2)}, trajectory=${r.scores.trajectory.toFixed(2)}, guardrail=${r.scores.guardrail.toFixed(2)}`
|
|
61
|
+
: "no scores";
|
|
62
|
+
let failureSection;
|
|
63
|
+
if (input.failureAnalysis && input.failureAnalysis.length > 0) {
|
|
64
|
+
const lines = input.failureAnalysis.map(fa => {
|
|
65
|
+
let entry = `${fa.query_id} [${fa.verdict}]: ${fa.assertion_reason}`;
|
|
66
|
+
if (fa.tool_call_summary.length > 0) {
|
|
67
|
+
entry += `\n Tools: ${fa.tool_call_summary.join("; ")}`;
|
|
68
|
+
}
|
|
69
|
+
return entry;
|
|
70
|
+
}).join("\n");
|
|
71
|
+
failureSection = `FAILURE ANALYSIS:\n${lines}`;
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
19
74
|
const failedQueries = (r.per_query_results ?? [])
|
|
20
75
|
.filter(q => q.assertion_results.some(a => a.verdict === "fail"))
|
|
21
76
|
.map(q => `${q.query_id}: ${q.assertion_results.filter(a => a.verdict === "fail").map(a => a.type).join(", ")}`)
|
|
22
77
|
.join("\n");
|
|
23
|
-
|
|
24
|
-
|
|
78
|
+
failureSection = `FAILED QUERIES:\n${failedQueries || "None"}`;
|
|
79
|
+
}
|
|
80
|
+
let contextSection = "";
|
|
81
|
+
if (input.kn_context)
|
|
82
|
+
contextSection += "\n\n" + buildKnContextPrompt(input.kn_context);
|
|
83
|
+
if (input.skill_context)
|
|
84
|
+
contextSection += "\n\n" + buildSkillContextPrompt(input.skill_context);
|
|
85
|
+
// Restrict suggested_target choices + output examples to layers this mission opted into.
|
|
86
|
+
// Empty / missing enabled_targets defaults to ["agent.system_prompt"] (handled by MissionSchema).
|
|
87
|
+
const enabled = new Set(input.mission.enabled_targets);
|
|
88
|
+
const targetEnumLine = [...enabled].map(t => `"${t}"`).join(" | ");
|
|
89
|
+
const allExamples = [
|
|
90
|
+
["agent.system_prompt", `# agent.system_prompt — patch is a JSON string (escaped) or object with {agent:{system_prompt}}
|
|
91
|
+
{"target":"agent.system_prompt","hypothesis":"Add explicit stop condition","patch":"{\\"agent\\":{\\"system_prompt\\":\\"New prompt here\\"}}"}`],
|
|
92
|
+
["agent.skills", `# agent.skills — patch is structured {unbind:[skill_id...], bind:[{id,version}...]}
|
|
93
|
+
{"target":"agent.skills","hypothesis":"Swap retrieval skill to v2","patch":{"unbind":["retrieval-v1"],"bind":[{"id":"retrieval-v2","version":"v2"}]}}`],
|
|
94
|
+
["kn.object_type", `# kn.object_type — patch is {kn_id, add_object_types:[{concept_name, dataview_id, primary_keys, data_properties}], add_relation_types:[]}
|
|
95
|
+
{"target":"kn.object_type","hypothesis":"Add vehicle_sales concept","patch":{"kn_id":"kn-x","add_object_types":[{"concept_name":"vehicle_sales","dataview_id":"dv-001","primary_keys":["vehicle_sales_id"],"data_properties":[{"name":"sales","type":"integer"},{"name":"month","type":"string"}]}],"add_relation_types":[]}}`],
|
|
96
|
+
["kn.relation_type", `# kn.relation_type — patch is {kn_id, add_object_types:[], add_relation_types:[{concept_name, source_object_type, target_object_type, join_key}]}
|
|
97
|
+
{"target":"kn.relation_type","hypothesis":"Link sales to vehicle","patch":{"kn_id":"kn-x","add_object_types":[],"add_relation_types":[{"concept_name":"sold_for","source_object_type":"vehicle_sales","target_object_type":"vehicle","join_key":"vehicle_id"}]}}`],
|
|
98
|
+
["skill.content", `# skill.content — patch is {skill_id, append_section}
|
|
99
|
+
{"target":"skill.content","hypothesis":"Document sort_by usage","patch":{"skill_id":"query-sop","append_section":"## Sort_by usage\\nPass sort_by=[{field, order}] to query_object_instance for ordering."}}`],
|
|
100
|
+
];
|
|
101
|
+
const examplesBlock = allExamples
|
|
102
|
+
.filter(([target]) => enabled.has(target))
|
|
103
|
+
.map(([, body]) => body)
|
|
104
|
+
.join("\n\n");
|
|
105
|
+
return `You are an agent evaluation planner. Analyze the current round results, decide whether to continue/publish/abort, and (if continuing) propose the next change to try in one pass.
|
|
106
|
+
|
|
107
|
+
GOAL: ${input.mission.goal}
|
|
108
|
+
|
|
109
|
+
CURRENT CANDIDATE CONFIG:
|
|
110
|
+
${yaml.dump(input.candidateConfig, { lineWidth: 80 })}
|
|
25
111
|
|
|
26
112
|
ROUND ${r.round} SCORES: ${scoresSummary}
|
|
27
113
|
|
|
28
|
-
|
|
29
|
-
${failedQueries || "None"}
|
|
114
|
+
${failureSection}
|
|
30
115
|
|
|
31
116
|
TRAJECTORY ISSUES:
|
|
32
117
|
${(r.per_query_results ?? []).filter(q => q.trajectory_summary.retry_count > 1).map(q => `${q.query_id}: ${q.trajectory_summary.retry_count} retries`).join("\n") || "None"}
|
|
33
118
|
|
|
34
119
|
PREVIOUS ROUND HISTORY:
|
|
35
|
-
${input.prevRounds.map(pr => `Round ${pr.round}: outcome=${pr.scores?.outcome.toFixed(2) ?? "?"}, verdict=${pr.triage_conclusion?.verdict ?? "?"}`).join("\n") || "None"}
|
|
120
|
+
${input.prevRounds.map(pr => `Round ${pr.round}: outcome=${pr.scores?.outcome.toFixed(2) ?? "?"}, verdict=${pr.triage_conclusion?.verdict ?? "?"}, hints=${pr.triage_conclusion?.hints?.join("; ") || "none"}`).join("\n") || "None"}
|
|
121
|
+
|
|
122
|
+
${input.crossRoundMemoryRef ? `CROSS-ROUND CONTEXT: ${input.crossRoundMemoryRef}` : ""}${contextSection}
|
|
123
|
+
|
|
124
|
+
Respond with a single JSON object containing these fields:
|
|
125
|
+
- "verdict": "continue" | "publish" | "abort"
|
|
126
|
+
* continue = more rounds needed, you must also provide next_change
|
|
127
|
+
* publish = current candidate is good enough
|
|
128
|
+
* abort = experiment cannot improve further
|
|
129
|
+
- "summary": one-sentence summary of key findings
|
|
130
|
+
- "failure_attribution": array of root-cause attributions (sorted by affected_queries count desc),
|
|
131
|
+
empty array when verdict is publish or abort.
|
|
132
|
+
Each entry shape:
|
|
133
|
+
{
|
|
134
|
+
"layer": "kn" | "skill" | "agent",
|
|
135
|
+
"evidence": "<one sentence citing specific tool call or return value>",
|
|
136
|
+
"affected_queries": ["<query_id>", ...],
|
|
137
|
+
"suggested_target": ${targetEnumLine}
|
|
138
|
+
}
|
|
139
|
+
- "next_change": REQUIRED when verdict=continue, omit/null otherwise. Use failure_attribution[0].suggested_target.
|
|
140
|
+
This mission has enabled_targets = [${[...enabled].join(", ")}]; do NOT propose any other target.
|
|
141
|
+
- "hints": array of short actionable hints for the next round (carried forward to future PREVIOUS ROUND HISTORY); use [] if none.
|
|
142
|
+
- "new_memory_token": short string (≤ 200 chars) summarizing what to remember across rounds; will appear as CROSS-ROUND CONTEXT next round.
|
|
143
|
+
|
|
144
|
+
Attribution rules:
|
|
145
|
+
- "kn": agent queried KN but concept/relation was missing or returned empty unexpectedly
|
|
146
|
+
- "skill": agent had the right intent but the tool usage pattern was wrong (no pagination, no sort_by, wrong filter)
|
|
147
|
+
- "agent": agent misidentified the concept to query, or made an orchestration error
|
|
36
148
|
|
|
37
|
-
|
|
149
|
+
NEXT_CHANGE OUTPUT EXAMPLES (pick the one matching your chosen target):
|
|
38
150
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
151
|
+
${examplesBlock}`;
|
|
152
|
+
}
|
|
153
|
+
export class ClaudeCodeTriageClient {
|
|
154
|
+
async triage(input) {
|
|
155
|
+
const provider = defaultRegistry.resolve({ preferred: "claude-code" });
|
|
156
|
+
if (!provider)
|
|
157
|
+
throw new Error("claude-code provider not available");
|
|
158
|
+
const prompt = buildTriagePrompt(input);
|
|
44
159
|
const response = await provider.invoke({
|
|
45
160
|
prompt,
|
|
46
|
-
outputSchema:
|
|
161
|
+
outputSchema: z.unknown(),
|
|
47
162
|
correlationId: `triage-${Date.now()}`,
|
|
48
163
|
});
|
|
49
|
-
return response.
|
|
164
|
+
return parseTriageOutput(response.rawText);
|
|
50
165
|
}
|
|
51
166
|
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { type AgentConfigFetcher } from "./capture-fingerprint.js";
|
|
2
|
+
export interface RunPreflightOpts {
|
|
3
|
+
expDir: string;
|
|
4
|
+
agentId: string;
|
|
5
|
+
fetchConfig: AgentConfigFetcher;
|
|
6
|
+
evalSetPaths: string[];
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Run the preflight reconciliation. Throws PreflightMismatchError if the live
|
|
10
|
+
* agent's KN binding does not match the eval set's target_kn.
|
|
11
|
+
*
|
|
12
|
+
* The expected fingerprint is re-captured every round: without an executor that
|
|
13
|
+
* deploys patches, the loop cannot tell an intentional agent change from
|
|
14
|
+
* unintended drift, so it does not gate on config drift. It records the live
|
|
15
|
+
* config as the per-round provenance fingerprint and lets preflightCheck
|
|
16
|
+
* enforce the load-bearing invariant — the KN binding. (When an executor
|
|
17
|
+
* exists, pass the deployed fingerprint as `expected` to also gate on drift.)
|
|
18
|
+
*/
|
|
19
|
+
export declare function runPreflight(opts: RunPreflightOpts): Promise<void>;
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
// src/trace-ai/exp/run-preflight.ts
|
|
2
|
+
//
|
|
3
|
+
// Preflight orchestration: before an eval round runs, reconcile the live agent
|
|
4
|
+
// against expectation. On first run it captures a baseline expected fingerprint;
|
|
5
|
+
// thereafter it verifies the live agent still matches that baseline and that the
|
|
6
|
+
// agent's KN binding matches every eval set's declared target_kn.
|
|
7
|
+
import fs from "node:fs/promises";
|
|
8
|
+
import path from "node:path";
|
|
9
|
+
import yaml from "js-yaml";
|
|
10
|
+
import { EvalSetIndexSchema } from "../eval-set/schemas.js";
|
|
11
|
+
import { captureAgentFingerprint } from "./capture-fingerprint.js";
|
|
12
|
+
import { preflightCheck } from "./preflight.js";
|
|
13
|
+
import { writeExpectedFingerprint } from "./exp-store/expected-fingerprint.js";
|
|
14
|
+
/**
|
|
15
|
+
* Resolve the single KN the eval sets expect the agent to be bound to.
|
|
16
|
+
* Returns undefined when no eval set declares a target_kn (invariant 4 is then
|
|
17
|
+
* skipped). Throws when eval sets declare conflicting target_kn values.
|
|
18
|
+
*/
|
|
19
|
+
async function resolveEvalTargetKn(evalSetPaths) {
|
|
20
|
+
const found = new Set();
|
|
21
|
+
for (const p of evalSetPaths) {
|
|
22
|
+
let raw;
|
|
23
|
+
try {
|
|
24
|
+
raw = await fs.readFile(path.join(p, "index.yaml"), "utf8");
|
|
25
|
+
}
|
|
26
|
+
catch {
|
|
27
|
+
continue; // no index here — the eval runner surfaces that, not preflight
|
|
28
|
+
}
|
|
29
|
+
const parsed = EvalSetIndexSchema.safeParse(yaml.load(raw));
|
|
30
|
+
if (parsed.success && parsed.data.target_kn)
|
|
31
|
+
found.add(parsed.data.target_kn);
|
|
32
|
+
}
|
|
33
|
+
if (found.size === 0)
|
|
34
|
+
return undefined;
|
|
35
|
+
if (found.size > 1) {
|
|
36
|
+
throw new Error(`eval sets declare conflicting target_kn: ${[...found].sort().join(", ")}`);
|
|
37
|
+
}
|
|
38
|
+
return [...found][0];
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Run the preflight reconciliation. Throws PreflightMismatchError if the live
|
|
42
|
+
* agent's KN binding does not match the eval set's target_kn.
|
|
43
|
+
*
|
|
44
|
+
* The expected fingerprint is re-captured every round: without an executor that
|
|
45
|
+
* deploys patches, the loop cannot tell an intentional agent change from
|
|
46
|
+
* unintended drift, so it does not gate on config drift. It records the live
|
|
47
|
+
* config as the per-round provenance fingerprint and lets preflightCheck
|
|
48
|
+
* enforce the load-bearing invariant — the KN binding. (When an executor
|
|
49
|
+
* exists, pass the deployed fingerprint as `expected` to also gate on drift.)
|
|
50
|
+
*/
|
|
51
|
+
export async function runPreflight(opts) {
|
|
52
|
+
const actual = await captureAgentFingerprint(opts.fetchConfig, opts.agentId, "latest");
|
|
53
|
+
await writeExpectedFingerprint(opts.expDir, actual);
|
|
54
|
+
const targetKn = await resolveEvalTargetKn(opts.evalSetPaths);
|
|
55
|
+
preflightCheck(actual, actual, targetKn);
|
|
56
|
+
}
|