@kweaver-ai/kweaver-sdk 0.8.3 → 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/dist/api/agent-chat.d.ts +10 -2
  2. package/dist/api/agent-chat.js +19 -5
  3. package/dist/api/datasources.d.ts +14 -0
  4. package/dist/api/datasources.js +14 -0
  5. package/dist/cli.js +2 -14
  6. package/dist/client.d.ts +7 -1
  7. package/dist/client.js +7 -1
  8. package/dist/commands/bkn-ops.d.ts +1 -1
  9. package/dist/commands/bkn-ops.js +42 -21
  10. package/dist/commands/bkn.js +6 -3
  11. package/dist/commands/ds.d.ts +0 -31
  12. package/dist/commands/ds.js +18 -448
  13. package/dist/commands/explore-bkn.d.ts +7 -1
  14. package/dist/commands/explore-bkn.js +32 -3
  15. package/dist/resources/datasources.d.ts +7 -0
  16. package/dist/resources/datasources.js +7 -0
  17. package/dist/templates/explorer/bkn.js +860 -9
  18. package/dist/templates/explorer/index.html +1 -0
  19. package/dist/templates/explorer/style.css +225 -0
  20. package/dist/templates/explorer/vendor/g6.min.js +68 -0
  21. package/dist/trace-ai/eval-set/schemas.d.ts +1 -0
  22. package/dist/trace-ai/eval-set/schemas.js +4 -0
  23. package/dist/trace-ai/eval-set/types.d.ts +2 -0
  24. package/dist/trace-ai/exp/capture-fingerprint.d.ts +10 -0
  25. package/dist/trace-ai/exp/capture-fingerprint.js +12 -0
  26. package/dist/trace-ai/exp/context/context-assembler.d.ts +18 -0
  27. package/dist/trace-ai/exp/context/context-assembler.js +42 -0
  28. package/dist/trace-ai/exp/context/failure-analyzer.d.ts +22 -0
  29. package/dist/trace-ai/exp/context/failure-analyzer.js +59 -0
  30. package/dist/trace-ai/exp/context/kn-data-prober.d.ts +13 -0
  31. package/dist/trace-ai/exp/context/kn-data-prober.js +38 -0
  32. package/dist/trace-ai/exp/context/kn-schema-client.d.ts +14 -0
  33. package/dist/trace-ai/exp/context/kn-schema-client.js +41 -0
  34. package/dist/trace-ai/exp/context/retrieval-health.d.ts +32 -0
  35. package/dist/trace-ai/exp/context/retrieval-health.js +138 -0
  36. package/dist/trace-ai/exp/context/vega-catalog-client.d.ts +14 -0
  37. package/dist/trace-ai/exp/context/vega-catalog-client.js +15 -0
  38. package/dist/trace-ai/exp/coordinator.d.ts +34 -21
  39. package/dist/trace-ai/exp/coordinator.js +246 -24
  40. package/dist/trace-ai/exp/eval-runner.js +4 -2
  41. package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +1 -0
  42. package/dist/trace-ai/exp/exp-store/events-jsonl.js +18 -0
  43. package/dist/trace-ai/exp/exp-store/expected-fingerprint.d.ts +3 -0
  44. package/dist/trace-ai/exp/exp-store/expected-fingerprint.js +31 -0
  45. package/dist/trace-ai/exp/exp-store/index.d.ts +63 -2
  46. package/dist/trace-ai/exp/exp-store/index.js +2 -1
  47. package/dist/trace-ai/exp/exp-store/rollback-yaml.d.ts +12 -0
  48. package/dist/trace-ai/exp/exp-store/rollback-yaml.js +29 -0
  49. package/dist/trace-ai/exp/index.d.ts +2 -0
  50. package/dist/trace-ai/exp/index.js +68 -3
  51. package/dist/trace-ai/exp/info.js +1 -1
  52. package/dist/trace-ai/exp/patch/index.d.ts +13 -2
  53. package/dist/trace-ai/exp/patch/index.js +65 -10
  54. package/dist/trace-ai/exp/patch/kn-api-client.d.ts +40 -0
  55. package/dist/trace-ai/exp/patch/kn-api-client.js +14 -0
  56. package/dist/trace-ai/exp/patch/kn.d.ts +8 -0
  57. package/dist/trace-ai/exp/patch/kn.js +36 -0
  58. package/dist/trace-ai/exp/patch/skill-api-client.d.ts +17 -0
  59. package/dist/trace-ai/exp/patch/skill-api-client.js +14 -0
  60. package/dist/trace-ai/exp/patch/skill-content.d.ts +9 -0
  61. package/dist/trace-ai/exp/patch/skill-content.js +12 -0
  62. package/dist/trace-ai/exp/preflight.d.ts +77 -0
  63. package/dist/trace-ai/exp/preflight.js +148 -0
  64. package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +3 -14
  65. package/dist/trace-ai/exp/providers/synthesizer-client.js +53 -35
  66. package/dist/trace-ai/exp/providers/triage-client.d.ts +15 -2
  67. package/dist/trace-ai/exp/providers/triage-client.js +143 -28
  68. package/dist/trace-ai/exp/run-preflight.d.ts +19 -0
  69. package/dist/trace-ai/exp/run-preflight.js +56 -0
  70. package/dist/trace-ai/exp/schemas.d.ts +402 -44
  71. package/dist/trace-ai/exp/schemas.js +131 -18
  72. package/dist/utils/deprecation.d.ts +1 -0
  73. package/dist/utils/deprecation.js +18 -0
  74. package/package.json +2 -1
@@ -26,6 +26,7 @@ export declare const EvalSetIndexSchema: z.ZodObject<{
26
26
  holdout: "holdout";
27
27
  }>>;
28
28
  }, z.core.$strip>>;
29
+ target_kn: z.ZodOptional<z.ZodString>;
29
30
  }, z.core.$strip>;
30
31
  export declare const EvalSetShardSchema: z.ZodObject<{
31
32
  schema_version: z.ZodLiteral<"trace-eval-set/v1">;
@@ -46,6 +46,10 @@ export const EvalSetIndexSchema = z.object({
46
46
  schema_version: z.literal("trace-eval-set-index/v1"),
47
47
  eval_set_id: z.string().min(1),
48
48
  shards: z.array(ShardRefSchema).min(1),
49
+ // KN id the reference answers were authored against. Optional for backward
50
+ // compatibility; when present, the exp loop's preflight check verifies the
51
+ // agent under test is bound to exactly this KN before running the round.
52
+ target_kn: z.string().min(1).optional(),
49
53
  });
50
54
  // ── trace-eval-set/v1 ────────────────────────────────────────────────────
51
55
  const refineCase = (data, ctx) => {
@@ -31,6 +31,8 @@ export interface EvalSetIndex {
31
31
  schema_version: "trace-eval-set-index/v1";
32
32
  eval_set_id: string;
33
33
  shards: EvalSetIndexShard[];
34
+ /** KN id the reference answers were authored against (see EvalSetIndexSchema). */
35
+ target_kn?: string;
34
36
  }
35
37
  export interface BuildResult {
36
38
  cases_written: number;
@@ -0,0 +1,10 @@
1
+ import type { AgentFingerprint } from "./preflight.js";
2
+ /** Fetches the full raw config object of an agent at a given version. */
3
+ export type AgentConfigFetcher = (agentId: string, version: string) => Promise<Record<string, unknown>>;
4
+ /**
5
+ * Capture the live agent's material configuration as an AgentFingerprint.
6
+ * The version is resolved from the returned config body (so a "latest" request
7
+ * records the concrete version actually fetched), falling back to the requested
8
+ * version when the body omits it.
9
+ */
10
+ export declare function captureAgentFingerprint(fetchConfig: AgentConfigFetcher, agentId: string, version: string): Promise<AgentFingerprint>;
@@ -0,0 +1,12 @@
1
+ import { fingerprintFromAgentConfig } from "./preflight.js";
2
+ /**
3
+ * Capture the live agent's material configuration as an AgentFingerprint.
4
+ * The version is resolved from the returned config body (so a "latest" request
5
+ * records the concrete version actually fetched), falling back to the requested
6
+ * version when the body omits it.
7
+ */
8
+ export async function captureAgentFingerprint(fetchConfig, agentId, version) {
9
+ const config = await fetchConfig(agentId, version);
10
+ const resolvedVersion = typeof config["version"] === "string" ? config["version"] : version;
11
+ return fingerprintFromAgentConfig(agentId, resolvedVersion, config);
12
+ }
@@ -0,0 +1,18 @@
1
+ import type { PatchTarget, KnContext, SkillContext, SkillBinding, QueryFailureAnalysis, KnSchemaSnapshot } from "../schemas.js";
2
+ import type { VegaCatalogClient } from "./vega-catalog-client.js";
3
+ import type { KnSchemaClient } from "./kn-schema-client.js";
4
+ import type { SkillApiClient } from "../patch/skill-api-client.js";
5
+ import type { DataProbe } from "./kn-data-prober.js";
6
+ type ProbeFn = (schema: KnSchemaSnapshot, failures: QueryFailureAnalysis[]) => Promise<DataProbe[]>;
7
+ export declare class ContextAssembler {
8
+ private knSchemaClient;
9
+ private vegaCatalogClient;
10
+ private skillApiClient;
11
+ private probeFn?;
12
+ constructor(knSchemaClient: KnSchemaClient, vegaCatalogClient: VegaCatalogClient, skillApiClient: SkillApiClient, probeFn?: ProbeFn | undefined);
13
+ assemble(suggestedTarget: PatchTarget, knId: string | undefined, boundSkills: SkillBinding[], failureAnalysis?: QueryFailureAnalysis[]): Promise<{
14
+ kn_context?: KnContext;
15
+ skill_context?: SkillContext;
16
+ }>;
17
+ }
18
+ export {};
@@ -0,0 +1,42 @@
1
+ export class ContextAssembler {
2
+ knSchemaClient;
3
+ vegaCatalogClient;
4
+ skillApiClient;
5
+ probeFn;
6
+ constructor(knSchemaClient, vegaCatalogClient, skillApiClient, probeFn) {
7
+ this.knSchemaClient = knSchemaClient;
8
+ this.vegaCatalogClient = vegaCatalogClient;
9
+ this.skillApiClient = skillApiClient;
10
+ this.probeFn = probeFn;
11
+ }
12
+ async assemble(suggestedTarget, knId, boundSkills, failureAnalysis) {
13
+ if (suggestedTarget === "kn.object_type" || suggestedTarget === "kn.relation_type") {
14
+ if (!knId)
15
+ throw new Error("kn_id is required for kn.* patch target but was not found in candidate.yaml");
16
+ const [existing_schema, available_dataviews] = await Promise.all([
17
+ this.knSchemaClient.getSchema(knId),
18
+ this.vegaCatalogClient.listDataviews({ knId }),
19
+ ]);
20
+ let data_probes;
21
+ if (this.probeFn && failureAnalysis && failureAnalysis.length > 0) {
22
+ try {
23
+ data_probes = await this.probeFn(existing_schema, failureAnalysis);
24
+ }
25
+ catch {
26
+ // probe is best-effort
27
+ }
28
+ }
29
+ return { kn_context: { kn_id: knId, existing_schema, available_dataviews, data_probes } };
30
+ }
31
+ if (suggestedTarget === "skill.content") {
32
+ const bound_skills = await Promise.all(boundSkills.map(async (s) => ({
33
+ id: s.id,
34
+ version: s.version,
35
+ content: await this.skillApiClient.getSkillContent(s.id),
36
+ })));
37
+ return { skill_context: { bound_skills } };
38
+ }
39
+ // agent.system_prompt / agent.skills: no platform data needed
40
+ return {};
41
+ }
42
+ }
@@ -0,0 +1,22 @@
1
+ import type { QueryResult, QueryFailureAnalysis } from "../schemas.js";
2
+ import type { TraceSpan } from "../../../api/conversations.js";
3
+ type FetchTraceFn = (conversationId: string) => Promise<{
4
+ spans: TraceSpan[];
5
+ }>;
6
+ /**
7
+ * Per failing query, pair the assertion failure with what the trace says the
8
+ * agent actually did — the tool calls it made and, crucially, whether it
9
+ * retrieved any KN data (`retrieval_health`). The retrieval-health signal lets
10
+ * triage tell a mechanism failure (agent never retrieved data) apart from a
11
+ * reasoning failure (retrieved data, answered wrong).
12
+ */
13
+ export declare function analyzeFailures(results: QueryResult[], fetchTrace?: FetchTraceFn): Promise<QueryFailureAnalysis[]>;
14
+ /**
15
+ * Did ANY query in the round show the agent retrieving KN data? Used to veto a
16
+ * mechanism-failure verdict: diagnoseMechanism only sees failing queries, so a
17
+ * mostly-healthy round (passing queries retrieved fine) that happens to have a
18
+ * few failing no-data queries must not be mistaken for a global wiring failure.
19
+ * Short-circuits on the first retrieval, so a healthy round costs ~one fetch.
20
+ */
21
+ export declare function roundRetrievedAnyData(results: QueryResult[], fetchTrace?: FetchTraceFn): Promise<boolean>;
22
+ export {};
@@ -0,0 +1,59 @@
1
+ import { extractToolCalls, healthFromToolCalls, summarizeToolCalls, } from "./retrieval-health.js";
2
+ const MAX_REASON_LEN = 200;
3
+ /**
4
+ * Per failing query, pair the assertion failure with what the trace says the
5
+ * agent actually did — the tool calls it made and, crucially, whether it
6
+ * retrieved any KN data (`retrieval_health`). The retrieval-health signal lets
7
+ * triage tell a mechanism failure (agent never retrieved data) apart from a
8
+ * reasoning failure (retrieved data, answered wrong).
9
+ */
10
+ export async function analyzeFailures(results, fetchTrace) {
11
+ const failing = results.filter(r => r.assertion_results.some(a => a.verdict === "fail" || a.verdict === "skip"));
12
+ return Promise.all(failing.map(async (r) => {
13
+ const worstAssertion = r.assertion_results.find(a => a.verdict === "fail")
14
+ ?? r.assertion_results.find(a => a.verdict === "skip");
15
+ const verdict = worstAssertion?.verdict === "fail" ? "fail" : "skip";
16
+ const rawReason = worstAssertion?.reason ?? "";
17
+ const assertion_reason = rawReason.slice(0, MAX_REASON_LEN);
18
+ // "no_trace" until a trace is fetched and parsed — covers both an absent
19
+ // fetcher/conversation_id and a fetch that throws.
20
+ let tool_call_summary = [];
21
+ let retrieval_health = "no_trace";
22
+ if (fetchTrace && r.conversation_id) {
23
+ try {
24
+ const { spans } = await fetchTrace(r.conversation_id);
25
+ const calls = extractToolCalls(spans);
26
+ tool_call_summary = summarizeToolCalls(calls);
27
+ retrieval_health = healthFromToolCalls(calls);
28
+ }
29
+ catch {
30
+ // trace fetch is best-effort; retrieval_health stays "no_trace"
31
+ }
32
+ }
33
+ return { query_id: r.query_id, verdict, assertion_reason, tool_call_summary, retrieval_health };
34
+ }));
35
+ }
36
+ /**
37
+ * Did ANY query in the round show the agent retrieving KN data? Used to veto a
38
+ * mechanism-failure verdict: diagnoseMechanism only sees failing queries, so a
39
+ * mostly-healthy round (passing queries retrieved fine) that happens to have a
40
+ * few failing no-data queries must not be mistaken for a global wiring failure.
41
+ * Short-circuits on the first retrieval, so a healthy round costs ~one fetch.
42
+ */
43
+ export async function roundRetrievedAnyData(results, fetchTrace) {
44
+ if (!fetchTrace)
45
+ return false;
46
+ for (const r of results) {
47
+ if (!r.conversation_id)
48
+ continue;
49
+ try {
50
+ const { spans } = await fetchTrace(r.conversation_id);
51
+ if (healthFromToolCalls(extractToolCalls(spans)) === "retrieved")
52
+ return true;
53
+ }
54
+ catch {
55
+ // trace fetch is best-effort
56
+ }
57
+ }
58
+ return false;
59
+ }
@@ -0,0 +1,13 @@
1
+ import type { KnSchemaSnapshot, QueryFailureAnalysis } from "../schemas.js";
2
+ import type { QueryResourceOptions, ResourceQueryResult } from "../../../api/resources.js";
3
+ type QueryResourceFn = (opts: Pick<QueryResourceOptions, "baseUrl" | "accessToken" | "id" | "needTotal" | "limit">) => Promise<ResourceQueryResult>;
4
+ export interface DataProbe {
5
+ concept_name: string;
6
+ data_view_id: string;
7
+ total_records: number;
8
+ }
9
+ export declare function probeObjectTypes(schema: KnSchemaSnapshot, failures: QueryFailureAnalysis[], queryResource: QueryResourceFn, opts?: {
10
+ baseUrl?: string;
11
+ accessToken?: string;
12
+ }): Promise<DataProbe[]>;
13
+ export {};
@@ -0,0 +1,38 @@
1
+ function extractConceptNames(failures) {
2
+ const names = new Set();
3
+ for (const f of failures) {
4
+ for (const call of f.tool_call_summary) {
5
+ const match = call.match(/kn_search\(([^)]+)\)/);
6
+ if (match)
7
+ names.add(match[1].trim());
8
+ }
9
+ }
10
+ return names;
11
+ }
12
+ export async function probeObjectTypes(schema, failures, queryResource, opts = {}) {
13
+ const mentionedConcepts = extractConceptNames(failures);
14
+ const toProbe = schema.object_types.filter(ot => ot.data_view_id && mentionedConcepts.has(ot.concept_name));
15
+ const seen = new Set();
16
+ const unique = toProbe.filter(ot => {
17
+ if (seen.has(ot.data_view_id))
18
+ return false;
19
+ seen.add(ot.data_view_id);
20
+ return true;
21
+ });
22
+ const results = await Promise.all(unique.map(async (ot) => {
23
+ try {
24
+ const result = await queryResource({
25
+ baseUrl: opts.baseUrl ?? "",
26
+ accessToken: opts.accessToken ?? "",
27
+ id: ot.data_view_id,
28
+ needTotal: true,
29
+ limit: 1,
30
+ });
31
+ return { concept_name: ot.concept_name, data_view_id: ot.data_view_id, total_records: result.total_count ?? 0 };
32
+ }
33
+ catch {
34
+ return null;
35
+ }
36
+ }));
37
+ return results.filter((r) => r !== null);
38
+ }
@@ -0,0 +1,14 @@
1
+ import type { KnSchemaSnapshot } from "../schemas.js";
2
+ import type { ContextLoaderCallOptions, SearchSchemaArgs, SearchSchemaResult } from "../../../api/context-loader.js";
3
+ export interface KnSchemaClient {
4
+ getSchema(knId: string): Promise<KnSchemaSnapshot>;
5
+ }
6
+ type SearchSchemaFn = (opts: ContextLoaderCallOptions, args: SearchSchemaArgs) => Promise<SearchSchemaResult>;
7
+ export declare class KweaverKnSchemaClient implements KnSchemaClient {
8
+ private mcpUrl;
9
+ private token;
10
+ private searchSchemaFn;
11
+ constructor(mcpUrl: string, token: string, searchSchemaFn?: SearchSchemaFn);
12
+ getSchema(knId: string): Promise<KnSchemaSnapshot>;
13
+ }
14
+ export {};
@@ -0,0 +1,41 @@
1
+ import { searchSchema } from "../../../api/context-loader.js";
2
+ export class KweaverKnSchemaClient {
3
+ mcpUrl;
4
+ token;
5
+ searchSchemaFn;
6
+ constructor(mcpUrl, token, searchSchemaFn = searchSchema) {
7
+ this.mcpUrl = mcpUrl;
8
+ this.token = token;
9
+ this.searchSchemaFn = searchSchemaFn;
10
+ }
11
+ async getSchema(knId) {
12
+ const opts = {
13
+ mcpUrl: this.mcpUrl,
14
+ accessToken: this.token,
15
+ knId,
16
+ };
17
+ const result = await this.searchSchemaFn(opts, {
18
+ query: "*",
19
+ response_format: "json",
20
+ schema_brief: true,
21
+ });
22
+ const rawObjectTypes = (result.object_types ?? []);
23
+ const object_types = rawObjectTypes.map(ot => {
24
+ const ds = ot["data_source"];
25
+ const props = ot["properties"] ?? [];
26
+ return {
27
+ concept_name: String(ot["concept_name"] ?? ""),
28
+ data_view_id: typeof ds?.["id"] === "string" ? ds["id"] : undefined,
29
+ fields: props.map(p => ({ name: String(p["name"] ?? ""), type: String(p["type"] ?? "string") })),
30
+ };
31
+ });
32
+ const rawRelTypes = (result.relation_types ?? []);
33
+ const relation_types = rawRelTypes.map(rt => ({
34
+ concept_name: String(rt["concept_name"] ?? rt["name"] ?? ""),
35
+ source: String(rt["source"] ?? ""),
36
+ target: String(rt["target"] ?? ""),
37
+ join_key: String(rt["join_key"] ?? ""),
38
+ }));
39
+ return { object_types, relation_types };
40
+ }
41
+ }
@@ -0,0 +1,32 @@
1
+ import type { TraceSpan } from "../../../api/conversations.js";
2
+ import type { QueryFailureAnalysis } from "../schemas.js";
3
+ export type ToolCallOutcome = "data" | "empty" | "error";
4
+ export interface ToolCallRecord {
5
+ tool_name: string;
6
+ /** Raw gen_ai.tool.call.arguments JSON string (may be ""). */
7
+ arguments: string;
8
+ outcome: ToolCallOutcome;
9
+ }
10
+ export type RetrievalHealth = "retrieved" | "empty" | "errored" | "no_kn_calls" | "no_trace";
11
+ export interface MechanismDiagnosis {
12
+ broken: boolean;
13
+ /** Root-cause message, populated only when broken. */
14
+ reason: string;
15
+ }
16
+ /** Extract every `execute_tool` call from a conversation's trace spans. */
17
+ export declare function extractToolCalls(spans: TraceSpan[]): ToolCallRecord[];
18
+ /** Render tool calls as `tool_name→outcome` strings for the triage prompt, capped. */
19
+ export declare function summarizeToolCalls(calls: ToolCallRecord[], max?: number): string[];
20
+ /**
21
+ * Reduce a query's tool calls to one retrieval-health verdict. "retrieved" wins
22
+ * as soon as any KN call returned data — one good retrieval proves the mechanism
23
+ * works. Otherwise "errored" outranks "empty" (an error is the stronger signal).
24
+ */
25
+ export declare function healthFromToolCalls(calls: ToolCallRecord[]): RetrievalHealth;
26
+ /**
27
+ * Roll per-query retrieval health up to a round-level verdict. The mechanism is
28
+ * "broken" when enough failing queries exercised the KN yet none retrieved any
29
+ * data — a fail-fast signal that the round measured a wiring failure, not the
30
+ * prompt. no_kn_calls / no_trace queries carry no evidence and are ignored.
31
+ */
32
+ export declare function diagnoseMechanism(analyses: QueryFailureAnalysis[]): MechanismDiagnosis;
@@ -0,0 +1,138 @@
1
+ /** KN retrieval/navigation tools — calls to these are what "retrieved KN data" means. */
2
+ const KN_RETRIEVAL_TOOLS = new Set([
3
+ "query_object_instance",
4
+ "kn_search",
5
+ "semantic_search",
6
+ "search_schema",
7
+ "get_logic_properties_values",
8
+ "dv_query",
9
+ // SQL aggregation over the KN's Vega resources — the agent's main data path
10
+ // for COUNT/SUM/GROUP BY/TOP-N. Omitting it makes the mechanism check blind to
11
+ // a SQL-capable agent and false-flag a healthy round as "retrieved no data".
12
+ "vega_sql_query",
13
+ ]);
14
+ const TOOL_SPAN_PREFIX = "execute_tool ";
15
+ /**
16
+ * Minimum no-data failing queries before the mechanism may be blamed. A handful
17
+ * of no-data failures can be legitimate (a genuinely hard question, or the agent
18
+ * building a bad query) — the guard needs enough of them to be confident. An
19
+ * eval set smaller than this can never trip the guard on count alone; that is
20
+ * acceptable because the round-level retrieval veto (roundRetrievedAnyData in
21
+ * failure-analyzer) is the real safety net against a false positive.
22
+ */
23
+ const MIN_MECHANISM_EVIDENCE = 3;
24
+ /**
25
+ * Classify a tool's result `answer` payload. Biased toward "data" on ambiguous
26
+ * objects: a false "data" only means a mechanism failure goes undetected (the
27
+ * loop behaves as before), whereas a false "empty"/"error" could wrongly fail a
28
+ * healthy round.
29
+ */
30
+ function classifyAnswer(answer) {
31
+ if (answer === null || answer === undefined)
32
+ return "empty";
33
+ // The error payload comes back as an SSE string carrying error_code.
34
+ if (typeof answer === "string")
35
+ return /error_code/.test(answer) ? "error" : "empty";
36
+ if (Array.isArray(answer))
37
+ return answer.length > 0 ? "data" : "empty";
38
+ if (typeof answer === "object") {
39
+ const obj = answer;
40
+ if (obj["error_code"])
41
+ return "error";
42
+ // Any non-empty array property counts as data — deliberately permissive per
43
+ // the bias-toward-"data" rationale above. A metadata array (e.g. warnings)
44
+ // could trip this; that is the acceptable direction to err.
45
+ for (const v of Object.values(obj)) {
46
+ if (Array.isArray(v) && v.length > 0)
47
+ return "data";
48
+ }
49
+ return "empty";
50
+ }
51
+ return "empty";
52
+ }
53
+ /** Classify a `gen_ai.tool.call.result` payload string. Defensive — never throws. */
54
+ function classifyResult(resultStr) {
55
+ if (typeof resultStr !== "string" || resultStr.trim() === "")
56
+ return "empty";
57
+ let parsed;
58
+ try {
59
+ parsed = JSON.parse(resultStr);
60
+ }
61
+ catch {
62
+ // The error payload is itself valid JSON, so a parse failure means an
63
+ // opaque payload — flag it as an error only if it carries an error marker.
64
+ return /error_code/.test(resultStr) ? "error" : "empty";
65
+ }
66
+ return classifyAnswer(parsed?.answer);
67
+ }
68
+ /** Extract every `execute_tool` call from a conversation's trace spans. */
69
+ export function extractToolCalls(spans) {
70
+ const calls = [];
71
+ for (const span of spans) {
72
+ const attrs = span.attributes ?? {};
73
+ const byAttr = attrs["gen_ai.operation.name"] === "execute_tool";
74
+ const byName = typeof span.name === "string" && span.name.startsWith(TOOL_SPAN_PREFIX);
75
+ if (!byAttr && !byName)
76
+ continue;
77
+ const toolName = typeof attrs["gen_ai.tool.name"] === "string" && attrs["gen_ai.tool.name"]
78
+ ? attrs["gen_ai.tool.name"]
79
+ : byName
80
+ ? span.name.slice(TOOL_SPAN_PREFIX.length)
81
+ : "";
82
+ if (!toolName)
83
+ continue;
84
+ const args = typeof attrs["gen_ai.tool.call.arguments"] === "string" ? attrs["gen_ai.tool.call.arguments"] : "";
85
+ const result = typeof attrs["gen_ai.tool.call.result"] === "string" ? attrs["gen_ai.tool.call.result"] : "";
86
+ calls.push({ tool_name: toolName, arguments: args, outcome: classifyResult(result) });
87
+ }
88
+ return calls;
89
+ }
90
+ /** Render tool calls as `tool_name→outcome` strings for the triage prompt, capped. */
91
+ export function summarizeToolCalls(calls, max = 8) {
92
+ return calls.slice(0, max).map(c => `${c.tool_name}→${c.outcome}`);
93
+ }
94
+ /**
95
+ * Reduce a query's tool calls to one retrieval-health verdict. "retrieved" wins
96
+ * as soon as any KN call returned data — one good retrieval proves the mechanism
97
+ * works. Otherwise "errored" outranks "empty" (an error is the stronger signal).
98
+ */
99
+ export function healthFromToolCalls(calls) {
100
+ const knCalls = calls.filter(c => KN_RETRIEVAL_TOOLS.has(c.tool_name));
101
+ if (knCalls.length === 0)
102
+ return "no_kn_calls";
103
+ if (knCalls.some(c => c.outcome === "data"))
104
+ return "retrieved";
105
+ if (knCalls.some(c => c.outcome === "error"))
106
+ return "errored";
107
+ return "empty";
108
+ }
109
+ /**
110
+ * Roll per-query retrieval health up to a round-level verdict. The mechanism is
111
+ * "broken" when enough failing queries exercised the KN yet none retrieved any
112
+ * data — a fail-fast signal that the round measured a wiring failure, not the
113
+ * prompt. no_kn_calls / no_trace queries carry no evidence and are ignored.
114
+ */
115
+ export function diagnoseMechanism(analyses) {
116
+ let retrieved = 0;
117
+ let errored = 0;
118
+ let empty = 0;
119
+ for (const a of analyses) {
120
+ if (a.retrieval_health === "retrieved")
121
+ retrieved++;
122
+ else if (a.retrieval_health === "errored")
123
+ errored++;
124
+ else if (a.retrieval_health === "empty")
125
+ empty++;
126
+ }
127
+ const noData = errored + empty;
128
+ if (retrieved > 0 || noData < MIN_MECHANISM_EVIDENCE) {
129
+ return { broken: false, reason: "" };
130
+ }
131
+ return {
132
+ broken: true,
133
+ reason: `Mechanism failure: ${noData} failing queries exercised the KN but none retrieved data ` +
134
+ `(${errored} errored, ${empty} empty), and no failing query retrieved any KN data. ` +
135
+ `The agent is not retrieving from the knowledge network — this is not a prompt problem. ` +
136
+ `Check the agent's KN binding (kn_id map_type must be "fixedValue") and that the bound KN holds data.`,
137
+ };
138
+ }
@@ -0,0 +1,14 @@
1
+ import type { VegaCatalogEntry } from "../schemas.js";
2
+ export interface VegaCatalogClient {
3
+ listDataviews(filter?: {
4
+ knId?: string;
5
+ }): Promise<VegaCatalogEntry[]>;
6
+ }
7
+ export declare class KweaverVegaCatalogClient implements VegaCatalogClient {
8
+ private baseUrl;
9
+ private token;
10
+ constructor(baseUrl: string, token: string);
11
+ listDataviews(_filter?: {
12
+ knId?: string;
13
+ }): Promise<VegaCatalogEntry[]>;
14
+ }
@@ -0,0 +1,15 @@
1
+ // Stub: replace body with real Vega API calls when endpoint is confirmed
2
+ export class KweaverVegaCatalogClient {
3
+ baseUrl;
4
+ token;
5
+ constructor(baseUrl, token) {
6
+ this.baseUrl = baseUrl;
7
+ this.token = token;
8
+ }
9
+ async listDataviews(_filter) {
10
+ // TODO: GET {baseUrl}/api/vega/v1/dataviews?kn_id={filter.knId}
11
+ // Response shape: [{ id, name, columns: [{ name, type }] }]
12
+ // Intentionally returns empty — data_probes from KnDataProber is the primary enrichment path
13
+ return [];
14
+ }
15
+ }
@@ -1,26 +1,13 @@
1
- import type { Mission, NextChange, QueryResult, RoundData } from "./schemas.js";
2
- export interface SynthesizerClient {
3
- generate(input: {
4
- mission: Mission;
5
- candidateConfig: Record<string, unknown>;
6
- prevRound?: RoundData;
7
- prevRounds: RoundData[];
8
- crossRoundMemoryRef?: string;
9
- }): Promise<NextChange>;
10
- }
11
- export interface TriageClient {
12
- triage(input: {
13
- currentRound: RoundData;
14
- prevRounds: RoundData[];
15
- candidateConfig: Record<string, unknown>;
16
- crossRoundMemoryRef?: string;
17
- }): Promise<RoundData["triage_conclusion"] & {
18
- new_memory_token: string;
19
- }>;
20
- }
1
+ import type { KnApiClient } from "./patch/kn-api-client.js";
2
+ import type { SkillApiClient } from "./patch/skill-api-client.js";
3
+ import { ContextAssembler } from "./context/context-assembler.js";
4
+ import type { TriageClient, TriageResult } from "./providers/triage-client.js";
5
+ import type { AgentConfigFetcher } from "./capture-fingerprint.js";
6
+ import type { QueryResult } from "./schemas.js";
7
+ import type { TraceSpan } from "../../api/conversations.js";
8
+ export type { TriageClient, TriageResult };
21
9
  export interface CoordinatorOpts {
22
10
  expDir: string;
23
- synthesizer: SynthesizerClient;
24
11
  triage: TriageClient;
25
12
  runEval: (opts: {
26
13
  evalSetPaths: string[];
@@ -31,6 +18,18 @@ export interface CoordinatorOpts {
31
18
  queryResults: QueryResult[];
32
19
  }>;
33
20
  experimentId?: string;
21
+ contextAssembler?: ContextAssembler;
22
+ fetchTrace?: (conversationId: string) => Promise<{
23
+ spans: TraceSpan[];
24
+ }>;
25
+ knClient?: KnApiClient;
26
+ skillClient?: SkillApiClient;
27
+ /**
28
+ * When provided, a preflight reconciliation runs before each eval round —
29
+ * verifying the live agent matches expectation and is bound to the eval set's
30
+ * target KN. A mismatch fails the round fast, before any eval chat is sent.
31
+ */
32
+ fetchAgentConfig?: AgentConfigFetcher;
34
33
  }
35
34
  export declare class ExperimentCoordinator {
36
35
  private opts;
@@ -39,6 +38,20 @@ export declare class ExperimentCoordinator {
39
38
  constructor(opts: CoordinatorOpts);
40
39
  run(): Promise<void>;
41
40
  resume(): Promise<void>;
41
+ /**
42
+ * Install SIGINT/SIGHUP/SIGTERM handlers that flush a final event and release
43
+ * the lock before exit. Returns an uninstaller that MUST be called in the
44
+ * caller's finally block (otherwise normal exit would still fire the handler).
45
+ *
46
+ * Semantics:
47
+ * SIGINT → user-intent abort → emit `aborted` event (terminal)
48
+ * SIGHUP → terminal closed → emit `step_failed` retryable
49
+ * SIGTERM → external kill (ambig.) → emit `step_failed` retryable
50
+ *
51
+ * SIGKILL / OOM / power loss can't be caught here — Layer 2 auto-recovery in
52
+ * run() handles that case on the next start.
53
+ */
54
+ private installSignalHandlers;
42
55
  private runLoop;
43
56
  private checkAbort;
44
57
  private withRetry;