@kweaver-ai/kweaver-sdk 0.8.3 → 0.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/agent-chat.d.ts +10 -2
- package/dist/api/agent-chat.js +19 -5
- package/dist/api/datasources.d.ts +14 -0
- package/dist/api/datasources.js +14 -0
- package/dist/cli.js +2 -14
- package/dist/client.d.ts +7 -1
- package/dist/client.js +7 -1
- package/dist/commands/bkn-ops.d.ts +1 -1
- package/dist/commands/bkn-ops.js +42 -21
- package/dist/commands/bkn.js +6 -3
- package/dist/commands/ds.d.ts +0 -31
- package/dist/commands/ds.js +18 -448
- package/dist/commands/explore-bkn.d.ts +7 -1
- package/dist/commands/explore-bkn.js +32 -3
- package/dist/resources/datasources.d.ts +7 -0
- package/dist/resources/datasources.js +7 -0
- package/dist/templates/explorer/bkn.js +860 -9
- package/dist/templates/explorer/index.html +1 -0
- package/dist/templates/explorer/style.css +225 -0
- package/dist/templates/explorer/vendor/g6.min.js +68 -0
- package/dist/trace-ai/eval-set/schemas.d.ts +1 -0
- package/dist/trace-ai/eval-set/schemas.js +4 -0
- package/dist/trace-ai/eval-set/types.d.ts +2 -0
- package/dist/trace-ai/exp/capture-fingerprint.d.ts +10 -0
- package/dist/trace-ai/exp/capture-fingerprint.js +12 -0
- package/dist/trace-ai/exp/context/context-assembler.d.ts +18 -0
- package/dist/trace-ai/exp/context/context-assembler.js +42 -0
- package/dist/trace-ai/exp/context/failure-analyzer.d.ts +22 -0
- package/dist/trace-ai/exp/context/failure-analyzer.js +59 -0
- package/dist/trace-ai/exp/context/kn-data-prober.d.ts +13 -0
- package/dist/trace-ai/exp/context/kn-data-prober.js +38 -0
- package/dist/trace-ai/exp/context/kn-schema-client.d.ts +14 -0
- package/dist/trace-ai/exp/context/kn-schema-client.js +41 -0
- package/dist/trace-ai/exp/context/retrieval-health.d.ts +32 -0
- package/dist/trace-ai/exp/context/retrieval-health.js +138 -0
- package/dist/trace-ai/exp/context/vega-catalog-client.d.ts +14 -0
- package/dist/trace-ai/exp/context/vega-catalog-client.js +15 -0
- package/dist/trace-ai/exp/coordinator.d.ts +34 -21
- package/dist/trace-ai/exp/coordinator.js +246 -24
- package/dist/trace-ai/exp/eval-runner.js +4 -2
- package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +1 -0
- package/dist/trace-ai/exp/exp-store/events-jsonl.js +18 -0
- package/dist/trace-ai/exp/exp-store/expected-fingerprint.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/expected-fingerprint.js +31 -0
- package/dist/trace-ai/exp/exp-store/index.d.ts +63 -2
- package/dist/trace-ai/exp/exp-store/index.js +2 -1
- package/dist/trace-ai/exp/exp-store/rollback-yaml.d.ts +12 -0
- package/dist/trace-ai/exp/exp-store/rollback-yaml.js +29 -0
- package/dist/trace-ai/exp/index.d.ts +2 -0
- package/dist/trace-ai/exp/index.js +68 -3
- package/dist/trace-ai/exp/info.js +1 -1
- package/dist/trace-ai/exp/patch/index.d.ts +13 -2
- package/dist/trace-ai/exp/patch/index.js +65 -10
- package/dist/trace-ai/exp/patch/kn-api-client.d.ts +40 -0
- package/dist/trace-ai/exp/patch/kn-api-client.js +14 -0
- package/dist/trace-ai/exp/patch/kn.d.ts +8 -0
- package/dist/trace-ai/exp/patch/kn.js +36 -0
- package/dist/trace-ai/exp/patch/skill-api-client.d.ts +17 -0
- package/dist/trace-ai/exp/patch/skill-api-client.js +14 -0
- package/dist/trace-ai/exp/patch/skill-content.d.ts +9 -0
- package/dist/trace-ai/exp/patch/skill-content.js +12 -0
- package/dist/trace-ai/exp/preflight.d.ts +77 -0
- package/dist/trace-ai/exp/preflight.js +148 -0
- package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +3 -14
- package/dist/trace-ai/exp/providers/synthesizer-client.js +53 -35
- package/dist/trace-ai/exp/providers/triage-client.d.ts +15 -2
- package/dist/trace-ai/exp/providers/triage-client.js +143 -28
- package/dist/trace-ai/exp/run-preflight.d.ts +19 -0
- package/dist/trace-ai/exp/run-preflight.js +56 -0
- package/dist/trace-ai/exp/schemas.d.ts +402 -44
- package/dist/trace-ai/exp/schemas.js +131 -18
- package/dist/utils/deprecation.d.ts +1 -0
- package/dist/utils/deprecation.js +18 -0
- package/package.json +2 -1
|
@@ -4,7 +4,6 @@ import fs from "node:fs/promises";
|
|
|
4
4
|
import { fileURLToPath } from "node:url";
|
|
5
5
|
import { ExpStore } from "./exp-store/index.js";
|
|
6
6
|
import { ExperimentCoordinator } from "./coordinator.js";
|
|
7
|
-
import { ClaudeCodeSynthesizer } from "./providers/synthesizer-client.js";
|
|
8
7
|
import { ClaudeCodeTriageClient } from "./providers/triage-client.js";
|
|
9
8
|
import { runEval } from "./eval-runner.js";
|
|
10
9
|
import { defaultRegistry } from "../../agent-providers/registry.js";
|
|
@@ -13,17 +12,40 @@ import { PromptTemplateRegistry } from "../../agent-providers/prompt-template.js
|
|
|
13
12
|
import { createBuiltinSemanticMatchProvider } from "../eval-set/semantic-match-provider.js";
|
|
14
13
|
import { ensureValidToken } from "../../auth/oauth.js";
|
|
15
14
|
import { fetchAgentInfo, sendChatRequest } from "../../api/agent-chat.js";
|
|
15
|
+
import { getAgent } from "../../api/agent-list.js";
|
|
16
16
|
import { getTracesByConversation } from "../../api/conversations.js";
|
|
17
17
|
import { upsertRegistry, listRegistry } from "./exp-store/exp-registry.js";
|
|
18
18
|
import { runInfo, runList, getHealthChecks } from "./info.js";
|
|
19
19
|
import { resolveClaudeBinary } from "./claude-binary.js";
|
|
20
|
+
import { KweaverKnSchemaClient } from "./context/kn-schema-client.js";
|
|
21
|
+
import { ContextAssembler } from "./context/context-assembler.js";
|
|
22
|
+
import { probeObjectTypes } from "./context/kn-data-prober.js";
|
|
23
|
+
import { queryResource } from "../../api/resources.js";
|
|
24
|
+
import { KweaverVegaCatalogClient } from "./context/vega-catalog-client.js";
|
|
25
|
+
import { KweaverKnApiClient } from "./patch/kn-api-client.js";
|
|
26
|
+
import { KweaverSkillApiClient } from "./patch/skill-api-client.js";
|
|
27
|
+
const MCP_PATH = "/api/agent-retrieval/v1/mcp";
|
|
20
28
|
const __expIndexDir = path.dirname(fileURLToPath(import.meta.url));
|
|
29
|
+
export function formatFailureAttribution(attribution) {
|
|
30
|
+
if (attribution.length === 0)
|
|
31
|
+
return "";
|
|
32
|
+
const lines = attribution.map((a) => {
|
|
33
|
+
const queries = a.affected_queries.join(", ");
|
|
34
|
+
const evidence = a.evidence.length > 55 ? a.evidence.slice(0, 52) + "..." : a.evidence;
|
|
35
|
+
const layerTag = `[${a.layer}]`.padEnd(7);
|
|
36
|
+
return ` ${layerTag} ${evidence.padEnd(55)} → ${a.suggested_target} (${queries})`;
|
|
37
|
+
});
|
|
38
|
+
return "Failure attribution:\n" + lines.join("\n");
|
|
39
|
+
}
|
|
21
40
|
const EVAL_SET_RUBRIC_DIR = path.join(__expIndexDir, "..", "eval-set", "rubric-templates");
|
|
22
41
|
function ensureProvider() {
|
|
23
42
|
if (!defaultRegistry.has("claude-code")) {
|
|
24
43
|
defaultRegistry.register(new ClaudeCodeSubprocessProvider({
|
|
25
44
|
binary: resolveClaudeBinary(),
|
|
26
|
-
|
|
45
|
+
// 10 min: the merged Triage+Synthesizer call carries a whole round of
|
|
46
|
+
// eval context (per-query failures, KN schema, skill content) and the
|
|
47
|
+
// semantic-match evaluator runs on every case — 120s timed both out.
|
|
48
|
+
defaultTimeoutMs: 600_000,
|
|
27
49
|
}), { setAsDefault: true });
|
|
28
50
|
}
|
|
29
51
|
}
|
|
@@ -110,6 +132,7 @@ export async function runExpCommand(argv) {
|
|
|
110
132
|
const rounds = await store.readAllRounds();
|
|
111
133
|
const lineage = await store.readLineage();
|
|
112
134
|
const mission = await store.readMission().catch(() => null);
|
|
135
|
+
const events = await store.readAllEvents().catch(() => []);
|
|
113
136
|
process.stdout.write(`State: ${replayed.currentState} Round: ${replayed.currentRound}\n`);
|
|
114
137
|
if (mission?.next_change) {
|
|
115
138
|
process.stdout.write(`Suggested next change:\n target: ${mission.next_change.target}\n hypothesis: ${mission.next_change.hypothesis}\n`);
|
|
@@ -121,6 +144,15 @@ export async function runExpCommand(argv) {
|
|
|
121
144
|
process.stdout.write(`Triage: ${last.triage_conclusion.diagnoses.join("; ")}\n`);
|
|
122
145
|
}
|
|
123
146
|
}
|
|
147
|
+
// Read last TriageComplete event for failure_attribution
|
|
148
|
+
const lastTriage = events.filter((e) => e["type"] === "TriageComplete").at(-1);
|
|
149
|
+
const attribution = Array.isArray(lastTriage?.["failure_attribution"])
|
|
150
|
+
? lastTriage["failure_attribution"]
|
|
151
|
+
: [];
|
|
152
|
+
const attrText = formatFailureAttribution(attribution);
|
|
153
|
+
if (attrText) {
|
|
154
|
+
process.stdout.write(`${attrText}\n`);
|
|
155
|
+
}
|
|
124
156
|
process.stdout.write(`Lineage: ${lineage.length} versions\n`);
|
|
125
157
|
return 0;
|
|
126
158
|
}
|
|
@@ -202,10 +234,43 @@ async function makeCoordinator(expDir) {
|
|
|
202
234
|
catch {
|
|
203
235
|
process.stderr.write("warn: could not create semantic-match provider — semantic_match assertions will be skipped\n");
|
|
204
236
|
}
|
|
237
|
+
// Read mission upfront so KN/Skill clients are only constructed when the
|
|
238
|
+
// experiment actually enables those layers. Avoids exposing stub clients
|
|
239
|
+
// (KweaverKnApiClient, KweaverSkillApiClient — both throw "not yet implemented")
|
|
240
|
+
// to missions that don't need them.
|
|
241
|
+
const mission = await new ExpStore(expDir).readMission();
|
|
242
|
+
const enabled = new Set(mission.enabled_targets);
|
|
243
|
+
const needsKn = enabled.has("kn.object_type") || enabled.has("kn.relation_type");
|
|
244
|
+
const needsSkill = enabled.has("skill.content");
|
|
245
|
+
const mcpUrl = baseUrl.replace(/\/+$/, "") + MCP_PATH;
|
|
246
|
+
const knSchemaClient = new KweaverKnSchemaClient(mcpUrl, token);
|
|
247
|
+
const vegaCatalogClient = new KweaverVegaCatalogClient(baseUrl, token);
|
|
248
|
+
// Wire probeObjectTypes with auth + businessDomain
|
|
249
|
+
const boundProbe = (schema, failures) => probeObjectTypes(schema, failures, queryResource, { baseUrl, accessToken: token });
|
|
250
|
+
// No-op SkillApiClient lets ContextAssembler pre-fetch bound_skill stubs even when
|
|
251
|
+
// skill.content isn't enabled (the bound list is informational for the planner).
|
|
252
|
+
const noopSkillContextClient = {
|
|
253
|
+
async getSkillContent(_id) { return ""; },
|
|
254
|
+
async publishSkillVersion(_id, _content) { return { version: "noop", content: "" }; },
|
|
255
|
+
};
|
|
256
|
+
const contextAssembler = new ContextAssembler(knSchemaClient, vegaCatalogClient, noopSkillContextClient, boundProbe);
|
|
205
257
|
return new ExperimentCoordinator({
|
|
206
258
|
expDir,
|
|
207
|
-
synthesizer: new ClaudeCodeSynthesizer(),
|
|
208
259
|
triage: new ClaudeCodeTriageClient(),
|
|
260
|
+
contextAssembler,
|
|
261
|
+
// Full agent config (system_prompt / llms / skills.tools) comes from
|
|
262
|
+
// GET /v3/agent/{id}, which wraps the config object under a `config` key.
|
|
263
|
+
fetchAgentConfig: async (agentId) => {
|
|
264
|
+
const body = await getAgent({ baseUrl, accessToken: token, agentId, businessDomain: bd });
|
|
265
|
+
const parsed = JSON.parse(body);
|
|
266
|
+
return (parsed["config"] ?? parsed);
|
|
267
|
+
},
|
|
268
|
+
knClient: needsKn ? new KweaverKnApiClient(baseUrl, token) : undefined,
|
|
269
|
+
skillClient: needsSkill ? new KweaverSkillApiClient(baseUrl, token) : undefined,
|
|
270
|
+
fetchTrace: async (conversationId) => {
|
|
271
|
+
const r = await getTracesByConversation({ baseUrl, accessToken: token, conversationId, businessDomain: bd });
|
|
272
|
+
return { spans: r.spans };
|
|
273
|
+
},
|
|
209
274
|
runEval: ({ evalSetPaths, candidatePath, round }) => runEval({
|
|
210
275
|
evalSetPaths,
|
|
211
276
|
candidatePath,
|
|
@@ -57,7 +57,7 @@ export async function buildExpSnapshot(expDir) {
|
|
|
57
57
|
const scores = lastRound?.scores ?? null;
|
|
58
58
|
const triage_summary = lastRound?.triage_conclusion?.diagnoses.join("; ") ?? null;
|
|
59
59
|
const suggested_next = mission?.next_change
|
|
60
|
-
? { target: mission.next_change.target, hypothesis: mission.next_change.hypothesis }
|
|
60
|
+
? { target: mission.next_change.target, hypothesis: mission.next_change.hypothesis ?? "" }
|
|
61
61
|
: null;
|
|
62
62
|
return {
|
|
63
63
|
workspace: expDir,
|
|
@@ -1,2 +1,13 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
|
|
1
|
+
import type { KnApiClient } from "./kn-api-client.js";
|
|
2
|
+
import type { SkillApiClient } from "./skill-api-client.js";
|
|
3
|
+
export interface ApplyResult {
|
|
4
|
+
candidate: Record<string, unknown>;
|
|
5
|
+
skillVersion?: string;
|
|
6
|
+
}
|
|
7
|
+
export declare class PatchApplier {
|
|
8
|
+
private workDir;
|
|
9
|
+
private knClient?;
|
|
10
|
+
private skillClient?;
|
|
11
|
+
constructor(workDir: string, knClient?: KnApiClient | undefined, skillClient?: SkillApiClient | undefined);
|
|
12
|
+
apply(candidate: Record<string, unknown>, rawNextChange: unknown): Promise<ApplyResult>;
|
|
13
|
+
}
|
|
@@ -1,13 +1,68 @@
|
|
|
1
|
+
import { NextChangeSchema } from "../schemas.js";
|
|
1
2
|
import { applyAgentConfigPatch } from "./agent-config.js";
|
|
2
|
-
import {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
3
|
+
import { KnPatcher } from "./kn.js";
|
|
4
|
+
import { SkillContentPatcher } from "./skill-content.js";
|
|
5
|
+
export class PatchApplier {
|
|
6
|
+
workDir;
|
|
7
|
+
knClient;
|
|
8
|
+
skillClient;
|
|
9
|
+
constructor(workDir, knClient, skillClient) {
|
|
10
|
+
this.workDir = workDir;
|
|
11
|
+
this.knClient = knClient;
|
|
12
|
+
this.skillClient = skillClient;
|
|
12
13
|
}
|
|
14
|
+
async apply(candidate, rawNextChange) {
|
|
15
|
+
const nc = NextChangeSchema.parse(rawNextChange);
|
|
16
|
+
const next = structuredClone(candidate);
|
|
17
|
+
switch (nc.target) {
|
|
18
|
+
case "agent.system_prompt": {
|
|
19
|
+
const patchStr = typeof nc.patch === "string" ? nc.patch : JSON.stringify(nc.patch);
|
|
20
|
+
return { candidate: applyAgentConfigPatch(next, patchStr) };
|
|
21
|
+
}
|
|
22
|
+
case "agent.skills": {
|
|
23
|
+
return { candidate: applyAgentSkillsPatch(next, nc.patch) };
|
|
24
|
+
}
|
|
25
|
+
case "kn.object_type":
|
|
26
|
+
case "kn.relation_type": {
|
|
27
|
+
if (!this.knClient)
|
|
28
|
+
throw new Error("KnApiClient not provided for kn.* patch");
|
|
29
|
+
await new KnPatcher(this.knClient, this.workDir).apply(nc.patch);
|
|
30
|
+
const existingKn = next["kn"]
|
|
31
|
+
?? { id: nc.patch.kn_id, object_types: [], relation_types: [] };
|
|
32
|
+
next["kn"] = {
|
|
33
|
+
...existingKn,
|
|
34
|
+
object_types: [...(existingKn["object_types"] ?? []), ...nc.patch.add_object_types],
|
|
35
|
+
relation_types: [...(existingKn["relation_types"] ?? []), ...nc.patch.add_relation_types],
|
|
36
|
+
};
|
|
37
|
+
return { candidate: next };
|
|
38
|
+
}
|
|
39
|
+
case "skill.content": {
|
|
40
|
+
if (!this.skillClient)
|
|
41
|
+
throw new Error("SkillApiClient not provided for skill.content patch");
|
|
42
|
+
const { newVersion } = await new SkillContentPatcher(this.skillClient).apply(nc.patch);
|
|
43
|
+
const agent = next["agent"] ?? {};
|
|
44
|
+
const skills = agent["skills"] ?? [];
|
|
45
|
+
agent["skills"] = skills.map(s => s.id === nc.patch.skill_id ? { ...s, version: newVersion } : s);
|
|
46
|
+
next["agent"] = agent;
|
|
47
|
+
return { candidate: next, skillVersion: newVersion };
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
throw new Error(`Unhandled patch target: ${String(nc.target)}`);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
function applyAgentSkillsPatch(candidate, patch) {
|
|
54
|
+
const agent = candidate["agent"] ?? {};
|
|
55
|
+
const skills = (agent["skills"] ?? []).slice();
|
|
56
|
+
const unbindSet = new Set(patch.unbind);
|
|
57
|
+
let updated = skills.filter(s => !unbindSet.has(s.id));
|
|
58
|
+
for (const bind of patch.bind) {
|
|
59
|
+
const idx = updated.findIndex(s => s.id === bind.id);
|
|
60
|
+
if (idx >= 0)
|
|
61
|
+
updated[idx] = bind;
|
|
62
|
+
else
|
|
63
|
+
updated.push(bind);
|
|
64
|
+
}
|
|
65
|
+
agent["skills"] = updated;
|
|
66
|
+
candidate["agent"] = agent;
|
|
67
|
+
return candidate;
|
|
13
68
|
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import type { KnObjectTypeDef, KnRelationTypeDef } from "../schemas.js";
|
|
2
|
+
export interface KnApiClient {
|
|
3
|
+
validateObjectType(knId: string, spec: KnObjectTypeDef): Promise<{
|
|
4
|
+
valid: boolean;
|
|
5
|
+
error?: string;
|
|
6
|
+
}>;
|
|
7
|
+
addObjectType(knId: string, spec: KnObjectTypeDef): Promise<{
|
|
8
|
+
concept_id: string;
|
|
9
|
+
}>;
|
|
10
|
+
validateRelationType(knId: string, spec: KnRelationTypeDef): Promise<{
|
|
11
|
+
valid: boolean;
|
|
12
|
+
error?: string;
|
|
13
|
+
}>;
|
|
14
|
+
addRelationType(knId: string, spec: KnRelationTypeDef): Promise<{
|
|
15
|
+
relation_id: string;
|
|
16
|
+
}>;
|
|
17
|
+
objectTypeExists(knId: string, conceptName: string): Promise<boolean>;
|
|
18
|
+
relationTypeExists(knId: string, conceptName: string): Promise<boolean>;
|
|
19
|
+
}
|
|
20
|
+
export declare class KweaverKnApiClient implements KnApiClient {
|
|
21
|
+
private baseUrl;
|
|
22
|
+
private token;
|
|
23
|
+
constructor(baseUrl: string, token: string);
|
|
24
|
+
validateObjectType(_knId: string, _spec: KnObjectTypeDef): Promise<{
|
|
25
|
+
valid: boolean;
|
|
26
|
+
error?: string;
|
|
27
|
+
}>;
|
|
28
|
+
addObjectType(_knId: string, _spec: KnObjectTypeDef): Promise<{
|
|
29
|
+
concept_id: string;
|
|
30
|
+
}>;
|
|
31
|
+
validateRelationType(_knId: string, _spec: KnRelationTypeDef): Promise<{
|
|
32
|
+
valid: boolean;
|
|
33
|
+
error?: string;
|
|
34
|
+
}>;
|
|
35
|
+
addRelationType(_knId: string, _spec: KnRelationTypeDef): Promise<{
|
|
36
|
+
relation_id: string;
|
|
37
|
+
}>;
|
|
38
|
+
objectTypeExists(_knId: string, _name: string): Promise<boolean>;
|
|
39
|
+
relationTypeExists(_knId: string, _name: string): Promise<boolean>;
|
|
40
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export class KweaverKnApiClient {
|
|
2
|
+
baseUrl;
|
|
3
|
+
token;
|
|
4
|
+
constructor(baseUrl, token) {
|
|
5
|
+
this.baseUrl = baseUrl;
|
|
6
|
+
this.token = token;
|
|
7
|
+
}
|
|
8
|
+
async validateObjectType(_knId, _spec) { throw new Error("KweaverKnApiClient not yet implemented"); }
|
|
9
|
+
async addObjectType(_knId, _spec) { throw new Error("KweaverKnApiClient not yet implemented"); }
|
|
10
|
+
async validateRelationType(_knId, _spec) { throw new Error("KweaverKnApiClient not yet implemented"); }
|
|
11
|
+
async addRelationType(_knId, _spec) { throw new Error("KweaverKnApiClient not yet implemented"); }
|
|
12
|
+
async objectTypeExists(_knId, _name) { throw new Error("KweaverKnApiClient not yet implemented"); }
|
|
13
|
+
async relationTypeExists(_knId, _name) { throw new Error("KweaverKnApiClient not yet implemented"); }
|
|
14
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { KnPatch } from "../schemas.js";
|
|
2
|
+
import type { KnApiClient } from "./kn-api-client.js";
|
|
3
|
+
export declare class KnPatcher {
|
|
4
|
+
private client;
|
|
5
|
+
private rollback;
|
|
6
|
+
constructor(client: KnApiClient, workDir: string);
|
|
7
|
+
apply(patch: KnPatch): Promise<void>;
|
|
8
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { RollbackYaml } from "../exp-store/rollback-yaml.js";
|
|
2
|
+
export class KnPatcher {
|
|
3
|
+
client;
|
|
4
|
+
rollback;
|
|
5
|
+
constructor(client, workDir) {
|
|
6
|
+
this.client = client;
|
|
7
|
+
this.rollback = new RollbackYaml(workDir);
|
|
8
|
+
}
|
|
9
|
+
async apply(patch) {
|
|
10
|
+
// Phase 1: dry-run all
|
|
11
|
+
for (const spec of patch.add_object_types) {
|
|
12
|
+
const r = await this.client.validateObjectType(patch.kn_id, spec);
|
|
13
|
+
if (!r.valid)
|
|
14
|
+
throw new Error(`KN dry-run failed for '${spec.concept_name}': ${r.error ?? "unknown"}`);
|
|
15
|
+
}
|
|
16
|
+
for (const spec of patch.add_relation_types) {
|
|
17
|
+
const r = await this.client.validateRelationType(patch.kn_id, spec);
|
|
18
|
+
if (!r.valid)
|
|
19
|
+
throw new Error(`KN dry-run failed for relation '${spec.concept_name}': ${r.error ?? "unknown"}`);
|
|
20
|
+
}
|
|
21
|
+
// Phase 2: apply object types (write rollback BEFORE each KN call)
|
|
22
|
+
for (const spec of patch.add_object_types) {
|
|
23
|
+
if (await this.client.objectTypeExists(patch.kn_id, spec.concept_name))
|
|
24
|
+
continue;
|
|
25
|
+
await this.rollback.appendStep({ op: "remove_object_type", kn_id: patch.kn_id, concept_name: spec.concept_name });
|
|
26
|
+
await this.client.addObjectType(patch.kn_id, spec);
|
|
27
|
+
}
|
|
28
|
+
// Phase 3: apply relation types
|
|
29
|
+
for (const spec of patch.add_relation_types) {
|
|
30
|
+
if (await this.client.relationTypeExists(patch.kn_id, spec.concept_name))
|
|
31
|
+
continue;
|
|
32
|
+
await this.rollback.appendStep({ op: "remove_relation_type", kn_id: patch.kn_id, concept_name: spec.concept_name });
|
|
33
|
+
await this.client.addRelationType(patch.kn_id, spec);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export interface SkillApiClient {
|
|
2
|
+
getSkillContent(skillId: string): Promise<string>;
|
|
3
|
+
publishSkillVersion(skillId: string, content: string): Promise<{
|
|
4
|
+
version: string;
|
|
5
|
+
content: string;
|
|
6
|
+
}>;
|
|
7
|
+
}
|
|
8
|
+
export declare class KweaverSkillApiClient implements SkillApiClient {
|
|
9
|
+
private baseUrl;
|
|
10
|
+
private token;
|
|
11
|
+
constructor(baseUrl: string, token: string);
|
|
12
|
+
getSkillContent(_skillId: string): Promise<string>;
|
|
13
|
+
publishSkillVersion(_skillId: string, _content: string): Promise<{
|
|
14
|
+
version: string;
|
|
15
|
+
content: string;
|
|
16
|
+
}>;
|
|
17
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export class KweaverSkillApiClient {
|
|
2
|
+
baseUrl;
|
|
3
|
+
token;
|
|
4
|
+
constructor(baseUrl, token) {
|
|
5
|
+
this.baseUrl = baseUrl;
|
|
6
|
+
this.token = token;
|
|
7
|
+
}
|
|
8
|
+
async getSkillContent(_skillId) {
|
|
9
|
+
throw new Error("KweaverSkillApiClient not yet implemented — use mock in tests");
|
|
10
|
+
}
|
|
11
|
+
async publishSkillVersion(_skillId, _content) {
|
|
12
|
+
throw new Error("KweaverSkillApiClient not yet implemented");
|
|
13
|
+
}
|
|
14
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { SkillContentPatch } from "../schemas.js";
|
|
2
|
+
import type { SkillApiClient } from "./skill-api-client.js";
|
|
3
|
+
export declare class SkillContentPatcher {
|
|
4
|
+
private client;
|
|
5
|
+
constructor(client: SkillApiClient);
|
|
6
|
+
apply(patch: SkillContentPatch): Promise<{
|
|
7
|
+
newVersion: string;
|
|
8
|
+
}>;
|
|
9
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export class SkillContentPatcher {
|
|
2
|
+
client;
|
|
3
|
+
constructor(client) {
|
|
4
|
+
this.client = client;
|
|
5
|
+
}
|
|
6
|
+
async apply(patch) {
|
|
7
|
+
const existing = await this.client.getSkillContent(patch.skill_id);
|
|
8
|
+
const updated = existing.trimEnd() + "\n\n" + patch.append_section;
|
|
9
|
+
const result = await this.client.publishSkillVersion(patch.skill_id, updated);
|
|
10
|
+
return { newVersion: result.version };
|
|
11
|
+
}
|
|
12
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/** A single tool the agent has bound, identified by tool + toolbox. */
|
|
2
|
+
export interface ToolRef {
|
|
3
|
+
tool_id: string;
|
|
4
|
+
tool_box_id: string;
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* A kn_id/knId tool input whose `map_type` is not `"fixedValue"` — i.e. the KN id
|
|
8
|
+
* is not a fixed binding. `map_type: "auto"` means the LLM generates the value at
|
|
9
|
+
* runtime; `"model"`/`"var"` are likewise resolved dynamically. In all of these
|
|
10
|
+
* the configured `map_value` is NOT what the runtime queries, so a check that
|
|
11
|
+
* trusts `map_value` would be a false positive.
|
|
12
|
+
*/
|
|
13
|
+
export interface NonFixedKnBinding {
|
|
14
|
+
input_name: string;
|
|
15
|
+
map_type: string;
|
|
16
|
+
/** The configured value — recorded for diagnostics; the runtime ignores it. */
|
|
17
|
+
map_value: string;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* The material subset of an agent's configuration — the fields that affect eval
|
|
21
|
+
* outcomes. Both the loop-owned "expected" record and the per-round "actual"
|
|
22
|
+
* read-back are normalized into this shape so they can be compared field-by-field.
|
|
23
|
+
*/
|
|
24
|
+
export interface AgentFingerprint {
|
|
25
|
+
agent_id: string;
|
|
26
|
+
version: string;
|
|
27
|
+
system_prompt: string;
|
|
28
|
+
model: string;
|
|
29
|
+
temperature: number;
|
|
30
|
+
/** Bound tools, sorted by tool_id. */
|
|
31
|
+
tools: ToolRef[];
|
|
32
|
+
/**
|
|
33
|
+
* KN ids the agent's tools deterministically query — extracted only from
|
|
34
|
+
* kn_id/knId inputs bound with `map_type: "fixedValue"` (or no map_type, for
|
|
35
|
+
* older configs). Deduped and sorted.
|
|
36
|
+
*/
|
|
37
|
+
kn_ids: string[];
|
|
38
|
+
/**
|
|
39
|
+
* kn_id/knId inputs bound with a non-fixed `map_type` (e.g. `"auto"`). Empty
|
|
40
|
+
* when every KN binding is fixed. A non-empty list means the agent does NOT
|
|
41
|
+
* deterministically query a known KN — the value is resolved at runtime.
|
|
42
|
+
*/
|
|
43
|
+
non_fixed_kn_bindings: NonFixedKnBinding[];
|
|
44
|
+
}
|
|
45
|
+
/** One field that failed reconciliation. */
|
|
46
|
+
export interface PreflightMismatch {
|
|
47
|
+
field: string;
|
|
48
|
+
expected: string;
|
|
49
|
+
actual: string;
|
|
50
|
+
}
|
|
51
|
+
/** Thrown by preflightCheck when the agent under test does not match expectation. */
|
|
52
|
+
export declare class PreflightMismatchError extends Error {
|
|
53
|
+
readonly mismatches: PreflightMismatch[];
|
|
54
|
+
constructor(mismatches: PreflightMismatch[]);
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Build an AgentFingerprint from a raw agent-factory config object (the JSON
|
|
58
|
+
* returned by `agent get`). Format-specific extraction lives here so the rest of
|
|
59
|
+
* the loop only ever deals with the normalized fingerprint.
|
|
60
|
+
*/
|
|
61
|
+
export declare function fingerprintFromAgentConfig(agentId: string, version: string, raw: Record<string, unknown>): AgentFingerprint;
|
|
62
|
+
/**
|
|
63
|
+
* Reconcile the agent under test against expectation. Throws PreflightMismatchError
|
|
64
|
+
* listing every failed invariant. Checks four invariants:
|
|
65
|
+
* 1. identity — agent_id matches
|
|
66
|
+
* 2. version — pinned version matches
|
|
67
|
+
* 3. config — system_prompt / model / temperature / tools match
|
|
68
|
+
* 4. question↔patient — agent's KN binding is exactly the eval set's target_kn,
|
|
69
|
+
* AND that binding is deterministic (map_type "fixedValue").
|
|
70
|
+
* A non-fixed binding (e.g. "auto" = model-generated)
|
|
71
|
+
* fails this invariant even if map_value holds the
|
|
72
|
+
* right id, because the runtime does not use map_value.
|
|
73
|
+
*
|
|
74
|
+
* Invariant 4 is skipped when evalTargetKn is undefined (eval set has not yet
|
|
75
|
+
* declared a target_kn) — it cannot check what is not declared.
|
|
76
|
+
*/
|
|
77
|
+
export declare function preflightCheck(expected: AgentFingerprint, actual: AgentFingerprint, evalTargetKn?: string): void;
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
// src/trace-ai/exp/preflight.ts
|
|
2
|
+
//
|
|
3
|
+
// Preflight reconciliation: before an eval round measures an agent, verify the
|
|
4
|
+
// agent under test actually matches what the experiment expects. The exp loop
|
|
5
|
+
// has no actuator/read-back of its own, so without this guard a whole round can
|
|
6
|
+
// silently measure the wrong agent, the wrong version, or an agent bound to the
|
|
7
|
+
// wrong KN. This turns those silent failures into a fail-fast at ~1s cost.
|
|
8
|
+
/** Tool-input names that carry a KN id. The platform uses both spellings. */
|
|
9
|
+
const KN_INPUT_NAMES = new Set(["kn_id", "knId"]);
|
|
10
|
+
/** Thrown by preflightCheck when the agent under test does not match expectation. */
|
|
11
|
+
export class PreflightMismatchError extends Error {
|
|
12
|
+
mismatches;
|
|
13
|
+
constructor(mismatches) {
|
|
14
|
+
const lines = mismatches.map(m => ` - ${m.field}: expected ${m.expected}, actual ${m.actual}`);
|
|
15
|
+
super(`Preflight check failed — agent under test does not match expectation:\n${lines.join("\n")}`);
|
|
16
|
+
this.name = "PreflightMismatchError";
|
|
17
|
+
this.mismatches = mismatches;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Build an AgentFingerprint from a raw agent-factory config object (the JSON
|
|
22
|
+
* returned by `agent get`). Format-specific extraction lives here so the rest of
|
|
23
|
+
* the loop only ever deals with the normalized fingerprint.
|
|
24
|
+
*/
|
|
25
|
+
export function fingerprintFromAgentConfig(agentId, version, raw) {
|
|
26
|
+
const system_prompt = typeof raw["system_prompt"] === "string" ? raw["system_prompt"] : "";
|
|
27
|
+
const llms = Array.isArray(raw["llms"]) ? raw["llms"] : [];
|
|
28
|
+
const defaultLlm = llms.find(l => l["is_default"] === true) ?? llms[0];
|
|
29
|
+
const llmConfig = defaultLlm?.["llm_config"] ?? {};
|
|
30
|
+
const model = typeof llmConfig["name"] === "string" ? llmConfig["name"] : "";
|
|
31
|
+
const temperature = typeof llmConfig["temperature"] === "number" ? llmConfig["temperature"] : NaN;
|
|
32
|
+
const skills = raw["skills"] ?? {};
|
|
33
|
+
const rawTools = Array.isArray(skills["tools"]) ? skills["tools"] : [];
|
|
34
|
+
const tools = rawTools
|
|
35
|
+
.map(t => ({ tool_id: String(t["tool_id"] ?? ""), tool_box_id: String(t["tool_box_id"] ?? "") }))
|
|
36
|
+
.sort((a, b) => a.tool_id.localeCompare(b.tool_id));
|
|
37
|
+
// Separate KN bindings by whether they are deterministic. A kn_id/knId input
|
|
38
|
+
// contributes to kn_ids only when bound with map_type "fixedValue" (or no
|
|
39
|
+
// map_type at all — older configs predate the field). Anything else (notably
|
|
40
|
+
// "auto" = model-generated) is recorded as a non-fixed binding instead, so a
|
|
41
|
+
// downstream check does not mistake the configured map_value for a real binding.
|
|
42
|
+
const knSet = new Set();
|
|
43
|
+
const nonFixed = new Map();
|
|
44
|
+
for (const t of rawTools) {
|
|
45
|
+
const inputs = Array.isArray(t["tool_input"]) ? t["tool_input"] : [];
|
|
46
|
+
for (const inp of inputs) {
|
|
47
|
+
if (!KN_INPUT_NAMES.has(String(inp["input_name"])))
|
|
48
|
+
continue;
|
|
49
|
+
const inputName = String(inp["input_name"]);
|
|
50
|
+
const mapValue = typeof inp["map_value"] === "string" ? inp["map_value"] : "";
|
|
51
|
+
const mapType = typeof inp["map_type"] === "string" ? inp["map_type"] : "";
|
|
52
|
+
const isFixed = mapType === "" || mapType === "fixedValue";
|
|
53
|
+
if (isFixed) {
|
|
54
|
+
if (mapValue !== "")
|
|
55
|
+
knSet.add(mapValue);
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
nonFixed.set(`${inputName}|${mapType}|${mapValue}`, { input_name: inputName, map_type: mapType, map_value: mapValue });
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
const kn_ids = [...knSet].sort();
|
|
63
|
+
const non_fixed_kn_bindings = [...nonFixed.values()].sort((a, b) => `${a.input_name}|${a.map_type}`.localeCompare(`${b.input_name}|${b.map_type}`));
|
|
64
|
+
return { agent_id: agentId, version, system_prompt, model, temperature, tools, kn_ids, non_fixed_kn_bindings };
|
|
65
|
+
}
|
|
66
|
+
/** Render a value for a diff message, truncating long strings so the message stays readable. */
|
|
67
|
+
function repr(value) {
|
|
68
|
+
if (value.length > 80)
|
|
69
|
+
return `"${value.slice(0, 60)}…"(${value.length} chars)`;
|
|
70
|
+
return JSON.stringify(value);
|
|
71
|
+
}
|
|
72
|
+
function toolsEqual(a, b) {
|
|
73
|
+
if (a.length !== b.length)
|
|
74
|
+
return false;
|
|
75
|
+
return a.every((t, i) => {
|
|
76
|
+
const bt = b[i];
|
|
77
|
+
return bt !== undefined && t.tool_id === bt.tool_id && t.tool_box_id === bt.tool_box_id;
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
function reprTools(tools) {
|
|
81
|
+
return tools.length === 0 ? "(none)" : tools.map(t => t.tool_id).join(",");
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Reconcile the agent under test against expectation. Throws PreflightMismatchError
|
|
85
|
+
* listing every failed invariant. Checks four invariants:
|
|
86
|
+
* 1. identity — agent_id matches
|
|
87
|
+
* 2. version — pinned version matches
|
|
88
|
+
* 3. config — system_prompt / model / temperature / tools match
|
|
89
|
+
* 4. question↔patient — agent's KN binding is exactly the eval set's target_kn,
|
|
90
|
+
* AND that binding is deterministic (map_type "fixedValue").
|
|
91
|
+
* A non-fixed binding (e.g. "auto" = model-generated)
|
|
92
|
+
* fails this invariant even if map_value holds the
|
|
93
|
+
* right id, because the runtime does not use map_value.
|
|
94
|
+
*
|
|
95
|
+
* Invariant 4 is skipped when evalTargetKn is undefined (eval set has not yet
|
|
96
|
+
* declared a target_kn) — it cannot check what is not declared.
|
|
97
|
+
*/
|
|
98
|
+
export function preflightCheck(expected, actual, evalTargetKn) {
|
|
99
|
+
const mismatches = [];
|
|
100
|
+
if (expected.agent_id !== actual.agent_id) {
|
|
101
|
+
mismatches.push({ field: "agent_id", expected: repr(expected.agent_id), actual: repr(actual.agent_id) });
|
|
102
|
+
}
|
|
103
|
+
if (expected.version !== actual.version) {
|
|
104
|
+
mismatches.push({ field: "version", expected: repr(expected.version), actual: repr(actual.version) });
|
|
105
|
+
}
|
|
106
|
+
if (expected.system_prompt !== actual.system_prompt) {
|
|
107
|
+
mismatches.push({ field: "system_prompt", expected: repr(expected.system_prompt), actual: repr(actual.system_prompt) });
|
|
108
|
+
}
|
|
109
|
+
if (expected.model !== actual.model) {
|
|
110
|
+
mismatches.push({ field: "model", expected: repr(expected.model), actual: repr(actual.model) });
|
|
111
|
+
}
|
|
112
|
+
// NaN marks an absent temperature; two absent temperatures are equal.
|
|
113
|
+
const tempEqual = expected.temperature === actual.temperature
|
|
114
|
+
|| (Number.isNaN(expected.temperature) && Number.isNaN(actual.temperature));
|
|
115
|
+
if (!tempEqual) {
|
|
116
|
+
mismatches.push({ field: "temperature", expected: String(expected.temperature), actual: String(actual.temperature) });
|
|
117
|
+
}
|
|
118
|
+
if (!toolsEqual(expected.tools, actual.tools)) {
|
|
119
|
+
mismatches.push({ field: "tools", expected: reprTools(expected.tools), actual: reprTools(actual.tools) });
|
|
120
|
+
}
|
|
121
|
+
if (evalTargetKn !== undefined) {
|
|
122
|
+
const nonFixed = actual.non_fixed_kn_bindings ?? [];
|
|
123
|
+
if (nonFixed.length > 0) {
|
|
124
|
+
// The agent's KN id is resolved at runtime (e.g. map_type "auto" =
|
|
125
|
+
// model-generated), so it does not deterministically query evalTargetKn —
|
|
126
|
+
// even if map_value happens to hold the right id. This is the exact
|
|
127
|
+
// false-positive a map_value-only check would miss.
|
|
128
|
+
const detail = nonFixed.map(b => `${b.input_name}(map_type=${b.map_type})`).join(", ");
|
|
129
|
+
mismatches.push({
|
|
130
|
+
field: "kn_binding",
|
|
131
|
+
expected: repr(evalTargetKn),
|
|
132
|
+
actual: `non-deterministic — ${detail}: the KN id is resolved at runtime, the configured value is ignored. Set map_type to "fixedValue".`,
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
const actualKn = actual.kn_ids;
|
|
137
|
+
if (actualKn.length !== 1 || actualKn[0] !== evalTargetKn) {
|
|
138
|
+
mismatches.push({
|
|
139
|
+
field: "kn_binding",
|
|
140
|
+
expected: repr(evalTargetKn),
|
|
141
|
+
actual: actualKn.length === 0 ? "(none)" : actualKn.map(repr).join(","),
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
if (mismatches.length > 0)
|
|
147
|
+
throw new PreflightMismatchError(mismatches);
|
|
148
|
+
}
|
|
@@ -1,14 +1,3 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
export
|
|
3
|
-
|
|
4
|
-
candidateConfig: Record<string, unknown>;
|
|
5
|
-
prevRound?: RoundData;
|
|
6
|
-
prevRounds: RoundData[];
|
|
7
|
-
crossRoundMemoryRef?: string;
|
|
8
|
-
}
|
|
9
|
-
export interface SynthesizerClient {
|
|
10
|
-
generate(input: SynthesizerInput): Promise<NextChange>;
|
|
11
|
-
}
|
|
12
|
-
export declare class ClaudeCodeSynthesizer implements SynthesizerClient {
|
|
13
|
-
generate(input: SynthesizerInput): Promise<NextChange>;
|
|
14
|
-
}
|
|
1
|
+
import type { KnContext, SkillContext } from "../schemas.js";
|
|
2
|
+
export declare function buildKnContextPrompt(ctx: KnContext): string;
|
|
3
|
+
export declare function buildSkillContextPrompt(ctx: SkillContext): string;
|