@kweaver-ai/kweaver-sdk 0.8.3 → 0.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/agent-chat.d.ts +10 -2
- package/dist/api/agent-chat.js +19 -5
- package/dist/api/datasources.d.ts +14 -0
- package/dist/api/datasources.js +14 -0
- package/dist/cli.js +2 -14
- package/dist/client.d.ts +7 -1
- package/dist/client.js +7 -1
- package/dist/commands/bkn-ops.d.ts +1 -1
- package/dist/commands/bkn-ops.js +42 -21
- package/dist/commands/bkn.js +6 -3
- package/dist/commands/ds.d.ts +0 -31
- package/dist/commands/ds.js +18 -448
- package/dist/commands/explore-bkn.d.ts +7 -1
- package/dist/commands/explore-bkn.js +32 -3
- package/dist/resources/datasources.d.ts +7 -0
- package/dist/resources/datasources.js +7 -0
- package/dist/templates/explorer/bkn.js +860 -9
- package/dist/templates/explorer/index.html +1 -0
- package/dist/templates/explorer/style.css +225 -0
- package/dist/templates/explorer/vendor/g6.min.js +68 -0
- package/dist/trace-ai/eval-set/schemas.d.ts +1 -0
- package/dist/trace-ai/eval-set/schemas.js +4 -0
- package/dist/trace-ai/eval-set/types.d.ts +2 -0
- package/dist/trace-ai/exp/capture-fingerprint.d.ts +10 -0
- package/dist/trace-ai/exp/capture-fingerprint.js +12 -0
- package/dist/trace-ai/exp/context/context-assembler.d.ts +18 -0
- package/dist/trace-ai/exp/context/context-assembler.js +42 -0
- package/dist/trace-ai/exp/context/failure-analyzer.d.ts +22 -0
- package/dist/trace-ai/exp/context/failure-analyzer.js +59 -0
- package/dist/trace-ai/exp/context/kn-data-prober.d.ts +13 -0
- package/dist/trace-ai/exp/context/kn-data-prober.js +38 -0
- package/dist/trace-ai/exp/context/kn-schema-client.d.ts +14 -0
- package/dist/trace-ai/exp/context/kn-schema-client.js +41 -0
- package/dist/trace-ai/exp/context/retrieval-health.d.ts +32 -0
- package/dist/trace-ai/exp/context/retrieval-health.js +138 -0
- package/dist/trace-ai/exp/context/vega-catalog-client.d.ts +14 -0
- package/dist/trace-ai/exp/context/vega-catalog-client.js +15 -0
- package/dist/trace-ai/exp/coordinator.d.ts +34 -21
- package/dist/trace-ai/exp/coordinator.js +246 -24
- package/dist/trace-ai/exp/eval-runner.js +4 -2
- package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +1 -0
- package/dist/trace-ai/exp/exp-store/events-jsonl.js +18 -0
- package/dist/trace-ai/exp/exp-store/expected-fingerprint.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/expected-fingerprint.js +31 -0
- package/dist/trace-ai/exp/exp-store/index.d.ts +63 -2
- package/dist/trace-ai/exp/exp-store/index.js +2 -1
- package/dist/trace-ai/exp/exp-store/rollback-yaml.d.ts +12 -0
- package/dist/trace-ai/exp/exp-store/rollback-yaml.js +29 -0
- package/dist/trace-ai/exp/index.d.ts +2 -0
- package/dist/trace-ai/exp/index.js +68 -3
- package/dist/trace-ai/exp/info.js +1 -1
- package/dist/trace-ai/exp/patch/index.d.ts +13 -2
- package/dist/trace-ai/exp/patch/index.js +65 -10
- package/dist/trace-ai/exp/patch/kn-api-client.d.ts +40 -0
- package/dist/trace-ai/exp/patch/kn-api-client.js +14 -0
- package/dist/trace-ai/exp/patch/kn.d.ts +8 -0
- package/dist/trace-ai/exp/patch/kn.js +36 -0
- package/dist/trace-ai/exp/patch/skill-api-client.d.ts +17 -0
- package/dist/trace-ai/exp/patch/skill-api-client.js +14 -0
- package/dist/trace-ai/exp/patch/skill-content.d.ts +9 -0
- package/dist/trace-ai/exp/patch/skill-content.js +12 -0
- package/dist/trace-ai/exp/preflight.d.ts +77 -0
- package/dist/trace-ai/exp/preflight.js +148 -0
- package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +3 -14
- package/dist/trace-ai/exp/providers/synthesizer-client.js +53 -35
- package/dist/trace-ai/exp/providers/triage-client.d.ts +15 -2
- package/dist/trace-ai/exp/providers/triage-client.js +143 -28
- package/dist/trace-ai/exp/run-preflight.d.ts +19 -0
- package/dist/trace-ai/exp/run-preflight.js +56 -0
- package/dist/trace-ai/exp/schemas.d.ts +402 -44
- package/dist/trace-ai/exp/schemas.js +131 -18
- package/dist/utils/deprecation.d.ts +1 -0
- package/dist/utils/deprecation.js +18 -0
- package/package.json +2 -1
|
@@ -26,6 +26,7 @@ export declare const EvalSetIndexSchema: z.ZodObject<{
|
|
|
26
26
|
holdout: "holdout";
|
|
27
27
|
}>>;
|
|
28
28
|
}, z.core.$strip>>;
|
|
29
|
+
target_kn: z.ZodOptional<z.ZodString>;
|
|
29
30
|
}, z.core.$strip>;
|
|
30
31
|
export declare const EvalSetShardSchema: z.ZodObject<{
|
|
31
32
|
schema_version: z.ZodLiteral<"trace-eval-set/v1">;
|
|
@@ -46,6 +46,10 @@ export const EvalSetIndexSchema = z.object({
|
|
|
46
46
|
schema_version: z.literal("trace-eval-set-index/v1"),
|
|
47
47
|
eval_set_id: z.string().min(1),
|
|
48
48
|
shards: z.array(ShardRefSchema).min(1),
|
|
49
|
+
// KN id the reference answers were authored against. Optional for backward
|
|
50
|
+
// compatibility; when present, the exp loop's preflight check verifies the
|
|
51
|
+
// agent under test is bound to exactly this KN before running the round.
|
|
52
|
+
target_kn: z.string().min(1).optional(),
|
|
49
53
|
});
|
|
50
54
|
// ── trace-eval-set/v1 ────────────────────────────────────────────────────
|
|
51
55
|
const refineCase = (data, ctx) => {
|
|
@@ -31,6 +31,8 @@ export interface EvalSetIndex {
|
|
|
31
31
|
schema_version: "trace-eval-set-index/v1";
|
|
32
32
|
eval_set_id: string;
|
|
33
33
|
shards: EvalSetIndexShard[];
|
|
34
|
+
/** KN id the reference answers were authored against (see EvalSetIndexSchema). */
|
|
35
|
+
target_kn?: string;
|
|
34
36
|
}
|
|
35
37
|
export interface BuildResult {
|
|
36
38
|
cases_written: number;
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { AgentFingerprint } from "./preflight.js";
|
|
2
|
+
/** Fetches the full raw config object of an agent at a given version. */
|
|
3
|
+
export type AgentConfigFetcher = (agentId: string, version: string) => Promise<Record<string, unknown>>;
|
|
4
|
+
/**
|
|
5
|
+
* Capture the live agent's material configuration as an AgentFingerprint.
|
|
6
|
+
* The version is resolved from the returned config body (so a "latest" request
|
|
7
|
+
* records the concrete version actually fetched), falling back to the requested
|
|
8
|
+
* version when the body omits it.
|
|
9
|
+
*/
|
|
10
|
+
export declare function captureAgentFingerprint(fetchConfig: AgentConfigFetcher, agentId: string, version: string): Promise<AgentFingerprint>;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { fingerprintFromAgentConfig } from "./preflight.js";
|
|
2
|
+
/**
|
|
3
|
+
* Capture the live agent's material configuration as an AgentFingerprint.
|
|
4
|
+
* The version is resolved from the returned config body (so a "latest" request
|
|
5
|
+
* records the concrete version actually fetched), falling back to the requested
|
|
6
|
+
* version when the body omits it.
|
|
7
|
+
*/
|
|
8
|
+
export async function captureAgentFingerprint(fetchConfig, agentId, version) {
|
|
9
|
+
const config = await fetchConfig(agentId, version);
|
|
10
|
+
const resolvedVersion = typeof config["version"] === "string" ? config["version"] : version;
|
|
11
|
+
return fingerprintFromAgentConfig(agentId, resolvedVersion, config);
|
|
12
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { PatchTarget, KnContext, SkillContext, SkillBinding, QueryFailureAnalysis, KnSchemaSnapshot } from "../schemas.js";
|
|
2
|
+
import type { VegaCatalogClient } from "./vega-catalog-client.js";
|
|
3
|
+
import type { KnSchemaClient } from "./kn-schema-client.js";
|
|
4
|
+
import type { SkillApiClient } from "../patch/skill-api-client.js";
|
|
5
|
+
import type { DataProbe } from "./kn-data-prober.js";
|
|
6
|
+
type ProbeFn = (schema: KnSchemaSnapshot, failures: QueryFailureAnalysis[]) => Promise<DataProbe[]>;
|
|
7
|
+
export declare class ContextAssembler {
|
|
8
|
+
private knSchemaClient;
|
|
9
|
+
private vegaCatalogClient;
|
|
10
|
+
private skillApiClient;
|
|
11
|
+
private probeFn?;
|
|
12
|
+
constructor(knSchemaClient: KnSchemaClient, vegaCatalogClient: VegaCatalogClient, skillApiClient: SkillApiClient, probeFn?: ProbeFn | undefined);
|
|
13
|
+
assemble(suggestedTarget: PatchTarget, knId: string | undefined, boundSkills: SkillBinding[], failureAnalysis?: QueryFailureAnalysis[]): Promise<{
|
|
14
|
+
kn_context?: KnContext;
|
|
15
|
+
skill_context?: SkillContext;
|
|
16
|
+
}>;
|
|
17
|
+
}
|
|
18
|
+
export {};
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
export class ContextAssembler {
|
|
2
|
+
knSchemaClient;
|
|
3
|
+
vegaCatalogClient;
|
|
4
|
+
skillApiClient;
|
|
5
|
+
probeFn;
|
|
6
|
+
constructor(knSchemaClient, vegaCatalogClient, skillApiClient, probeFn) {
|
|
7
|
+
this.knSchemaClient = knSchemaClient;
|
|
8
|
+
this.vegaCatalogClient = vegaCatalogClient;
|
|
9
|
+
this.skillApiClient = skillApiClient;
|
|
10
|
+
this.probeFn = probeFn;
|
|
11
|
+
}
|
|
12
|
+
async assemble(suggestedTarget, knId, boundSkills, failureAnalysis) {
|
|
13
|
+
if (suggestedTarget === "kn.object_type" || suggestedTarget === "kn.relation_type") {
|
|
14
|
+
if (!knId)
|
|
15
|
+
throw new Error("kn_id is required for kn.* patch target but was not found in candidate.yaml");
|
|
16
|
+
const [existing_schema, available_dataviews] = await Promise.all([
|
|
17
|
+
this.knSchemaClient.getSchema(knId),
|
|
18
|
+
this.vegaCatalogClient.listDataviews({ knId }),
|
|
19
|
+
]);
|
|
20
|
+
let data_probes;
|
|
21
|
+
if (this.probeFn && failureAnalysis && failureAnalysis.length > 0) {
|
|
22
|
+
try {
|
|
23
|
+
data_probes = await this.probeFn(existing_schema, failureAnalysis);
|
|
24
|
+
}
|
|
25
|
+
catch {
|
|
26
|
+
// probe is best-effort
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return { kn_context: { kn_id: knId, existing_schema, available_dataviews, data_probes } };
|
|
30
|
+
}
|
|
31
|
+
if (suggestedTarget === "skill.content") {
|
|
32
|
+
const bound_skills = await Promise.all(boundSkills.map(async (s) => ({
|
|
33
|
+
id: s.id,
|
|
34
|
+
version: s.version,
|
|
35
|
+
content: await this.skillApiClient.getSkillContent(s.id),
|
|
36
|
+
})));
|
|
37
|
+
return { skill_context: { bound_skills } };
|
|
38
|
+
}
|
|
39
|
+
// agent.system_prompt / agent.skills: no platform data needed
|
|
40
|
+
return {};
|
|
41
|
+
}
|
|
42
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { QueryResult, QueryFailureAnalysis } from "../schemas.js";
|
|
2
|
+
import type { TraceSpan } from "../../../api/conversations.js";
|
|
3
|
+
type FetchTraceFn = (conversationId: string) => Promise<{
|
|
4
|
+
spans: TraceSpan[];
|
|
5
|
+
}>;
|
|
6
|
+
/**
|
|
7
|
+
* Per failing query, pair the assertion failure with what the trace says the
|
|
8
|
+
* agent actually did — the tool calls it made and, crucially, whether it
|
|
9
|
+
* retrieved any KN data (`retrieval_health`). The retrieval-health signal lets
|
|
10
|
+
* triage tell a mechanism failure (agent never retrieved data) apart from a
|
|
11
|
+
* reasoning failure (retrieved data, answered wrong).
|
|
12
|
+
*/
|
|
13
|
+
export declare function analyzeFailures(results: QueryResult[], fetchTrace?: FetchTraceFn): Promise<QueryFailureAnalysis[]>;
|
|
14
|
+
/**
|
|
15
|
+
* Did ANY query in the round show the agent retrieving KN data? Used to veto a
|
|
16
|
+
* mechanism-failure verdict: diagnoseMechanism only sees failing queries, so a
|
|
17
|
+
* mostly-healthy round (passing queries retrieved fine) that happens to have a
|
|
18
|
+
* few failing no-data queries must not be mistaken for a global wiring failure.
|
|
19
|
+
* Short-circuits on the first retrieval, so a healthy round costs ~one fetch.
|
|
20
|
+
*/
|
|
21
|
+
export declare function roundRetrievedAnyData(results: QueryResult[], fetchTrace?: FetchTraceFn): Promise<boolean>;
|
|
22
|
+
export {};
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { extractToolCalls, healthFromToolCalls, summarizeToolCalls, } from "./retrieval-health.js";
|
|
2
|
+
const MAX_REASON_LEN = 200;
|
|
3
|
+
/**
|
|
4
|
+
* Per failing query, pair the assertion failure with what the trace says the
|
|
5
|
+
* agent actually did — the tool calls it made and, crucially, whether it
|
|
6
|
+
* retrieved any KN data (`retrieval_health`). The retrieval-health signal lets
|
|
7
|
+
* triage tell a mechanism failure (agent never retrieved data) apart from a
|
|
8
|
+
* reasoning failure (retrieved data, answered wrong).
|
|
9
|
+
*/
|
|
10
|
+
export async function analyzeFailures(results, fetchTrace) {
|
|
11
|
+
const failing = results.filter(r => r.assertion_results.some(a => a.verdict === "fail" || a.verdict === "skip"));
|
|
12
|
+
return Promise.all(failing.map(async (r) => {
|
|
13
|
+
const worstAssertion = r.assertion_results.find(a => a.verdict === "fail")
|
|
14
|
+
?? r.assertion_results.find(a => a.verdict === "skip");
|
|
15
|
+
const verdict = worstAssertion?.verdict === "fail" ? "fail" : "skip";
|
|
16
|
+
const rawReason = worstAssertion?.reason ?? "";
|
|
17
|
+
const assertion_reason = rawReason.slice(0, MAX_REASON_LEN);
|
|
18
|
+
// "no_trace" until a trace is fetched and parsed — covers both an absent
|
|
19
|
+
// fetcher/conversation_id and a fetch that throws.
|
|
20
|
+
let tool_call_summary = [];
|
|
21
|
+
let retrieval_health = "no_trace";
|
|
22
|
+
if (fetchTrace && r.conversation_id) {
|
|
23
|
+
try {
|
|
24
|
+
const { spans } = await fetchTrace(r.conversation_id);
|
|
25
|
+
const calls = extractToolCalls(spans);
|
|
26
|
+
tool_call_summary = summarizeToolCalls(calls);
|
|
27
|
+
retrieval_health = healthFromToolCalls(calls);
|
|
28
|
+
}
|
|
29
|
+
catch {
|
|
30
|
+
// trace fetch is best-effort; retrieval_health stays "no_trace"
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return { query_id: r.query_id, verdict, assertion_reason, tool_call_summary, retrieval_health };
|
|
34
|
+
}));
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Did ANY query in the round show the agent retrieving KN data? Used to veto a
|
|
38
|
+
* mechanism-failure verdict: diagnoseMechanism only sees failing queries, so a
|
|
39
|
+
* mostly-healthy round (passing queries retrieved fine) that happens to have a
|
|
40
|
+
* few failing no-data queries must not be mistaken for a global wiring failure.
|
|
41
|
+
* Short-circuits on the first retrieval, so a healthy round costs ~one fetch.
|
|
42
|
+
*/
|
|
43
|
+
export async function roundRetrievedAnyData(results, fetchTrace) {
|
|
44
|
+
if (!fetchTrace)
|
|
45
|
+
return false;
|
|
46
|
+
for (const r of results) {
|
|
47
|
+
if (!r.conversation_id)
|
|
48
|
+
continue;
|
|
49
|
+
try {
|
|
50
|
+
const { spans } = await fetchTrace(r.conversation_id);
|
|
51
|
+
if (healthFromToolCalls(extractToolCalls(spans)) === "retrieved")
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
catch {
|
|
55
|
+
// trace fetch is best-effort
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return false;
|
|
59
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { KnSchemaSnapshot, QueryFailureAnalysis } from "../schemas.js";
|
|
2
|
+
import type { QueryResourceOptions, ResourceQueryResult } from "../../../api/resources.js";
|
|
3
|
+
type QueryResourceFn = (opts: Pick<QueryResourceOptions, "baseUrl" | "accessToken" | "id" | "needTotal" | "limit">) => Promise<ResourceQueryResult>;
|
|
4
|
+
export interface DataProbe {
|
|
5
|
+
concept_name: string;
|
|
6
|
+
data_view_id: string;
|
|
7
|
+
total_records: number;
|
|
8
|
+
}
|
|
9
|
+
export declare function probeObjectTypes(schema: KnSchemaSnapshot, failures: QueryFailureAnalysis[], queryResource: QueryResourceFn, opts?: {
|
|
10
|
+
baseUrl?: string;
|
|
11
|
+
accessToken?: string;
|
|
12
|
+
}): Promise<DataProbe[]>;
|
|
13
|
+
export {};
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
function extractConceptNames(failures) {
|
|
2
|
+
const names = new Set();
|
|
3
|
+
for (const f of failures) {
|
|
4
|
+
for (const call of f.tool_call_summary) {
|
|
5
|
+
const match = call.match(/kn_search\(([^)]+)\)/);
|
|
6
|
+
if (match)
|
|
7
|
+
names.add(match[1].trim());
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
return names;
|
|
11
|
+
}
|
|
12
|
+
export async function probeObjectTypes(schema, failures, queryResource, opts = {}) {
|
|
13
|
+
const mentionedConcepts = extractConceptNames(failures);
|
|
14
|
+
const toProbe = schema.object_types.filter(ot => ot.data_view_id && mentionedConcepts.has(ot.concept_name));
|
|
15
|
+
const seen = new Set();
|
|
16
|
+
const unique = toProbe.filter(ot => {
|
|
17
|
+
if (seen.has(ot.data_view_id))
|
|
18
|
+
return false;
|
|
19
|
+
seen.add(ot.data_view_id);
|
|
20
|
+
return true;
|
|
21
|
+
});
|
|
22
|
+
const results = await Promise.all(unique.map(async (ot) => {
|
|
23
|
+
try {
|
|
24
|
+
const result = await queryResource({
|
|
25
|
+
baseUrl: opts.baseUrl ?? "",
|
|
26
|
+
accessToken: opts.accessToken ?? "",
|
|
27
|
+
id: ot.data_view_id,
|
|
28
|
+
needTotal: true,
|
|
29
|
+
limit: 1,
|
|
30
|
+
});
|
|
31
|
+
return { concept_name: ot.concept_name, data_view_id: ot.data_view_id, total_records: result.total_count ?? 0 };
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
return null;
|
|
35
|
+
}
|
|
36
|
+
}));
|
|
37
|
+
return results.filter((r) => r !== null);
|
|
38
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { KnSchemaSnapshot } from "../schemas.js";
|
|
2
|
+
import type { ContextLoaderCallOptions, SearchSchemaArgs, SearchSchemaResult } from "../../../api/context-loader.js";
|
|
3
|
+
export interface KnSchemaClient {
|
|
4
|
+
getSchema(knId: string): Promise<KnSchemaSnapshot>;
|
|
5
|
+
}
|
|
6
|
+
type SearchSchemaFn = (opts: ContextLoaderCallOptions, args: SearchSchemaArgs) => Promise<SearchSchemaResult>;
|
|
7
|
+
export declare class KweaverKnSchemaClient implements KnSchemaClient {
|
|
8
|
+
private mcpUrl;
|
|
9
|
+
private token;
|
|
10
|
+
private searchSchemaFn;
|
|
11
|
+
constructor(mcpUrl: string, token: string, searchSchemaFn?: SearchSchemaFn);
|
|
12
|
+
getSchema(knId: string): Promise<KnSchemaSnapshot>;
|
|
13
|
+
}
|
|
14
|
+
export {};
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { searchSchema } from "../../../api/context-loader.js";
|
|
2
|
+
export class KweaverKnSchemaClient {
|
|
3
|
+
mcpUrl;
|
|
4
|
+
token;
|
|
5
|
+
searchSchemaFn;
|
|
6
|
+
constructor(mcpUrl, token, searchSchemaFn = searchSchema) {
|
|
7
|
+
this.mcpUrl = mcpUrl;
|
|
8
|
+
this.token = token;
|
|
9
|
+
this.searchSchemaFn = searchSchemaFn;
|
|
10
|
+
}
|
|
11
|
+
async getSchema(knId) {
|
|
12
|
+
const opts = {
|
|
13
|
+
mcpUrl: this.mcpUrl,
|
|
14
|
+
accessToken: this.token,
|
|
15
|
+
knId,
|
|
16
|
+
};
|
|
17
|
+
const result = await this.searchSchemaFn(opts, {
|
|
18
|
+
query: "*",
|
|
19
|
+
response_format: "json",
|
|
20
|
+
schema_brief: true,
|
|
21
|
+
});
|
|
22
|
+
const rawObjectTypes = (result.object_types ?? []);
|
|
23
|
+
const object_types = rawObjectTypes.map(ot => {
|
|
24
|
+
const ds = ot["data_source"];
|
|
25
|
+
const props = ot["properties"] ?? [];
|
|
26
|
+
return {
|
|
27
|
+
concept_name: String(ot["concept_name"] ?? ""),
|
|
28
|
+
data_view_id: typeof ds?.["id"] === "string" ? ds["id"] : undefined,
|
|
29
|
+
fields: props.map(p => ({ name: String(p["name"] ?? ""), type: String(p["type"] ?? "string") })),
|
|
30
|
+
};
|
|
31
|
+
});
|
|
32
|
+
const rawRelTypes = (result.relation_types ?? []);
|
|
33
|
+
const relation_types = rawRelTypes.map(rt => ({
|
|
34
|
+
concept_name: String(rt["concept_name"] ?? rt["name"] ?? ""),
|
|
35
|
+
source: String(rt["source"] ?? ""),
|
|
36
|
+
target: String(rt["target"] ?? ""),
|
|
37
|
+
join_key: String(rt["join_key"] ?? ""),
|
|
38
|
+
}));
|
|
39
|
+
return { object_types, relation_types };
|
|
40
|
+
}
|
|
41
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import type { TraceSpan } from "../../../api/conversations.js";
|
|
2
|
+
import type { QueryFailureAnalysis } from "../schemas.js";
|
|
3
|
+
export type ToolCallOutcome = "data" | "empty" | "error";
|
|
4
|
+
export interface ToolCallRecord {
|
|
5
|
+
tool_name: string;
|
|
6
|
+
/** Raw gen_ai.tool.call.arguments JSON string (may be ""). */
|
|
7
|
+
arguments: string;
|
|
8
|
+
outcome: ToolCallOutcome;
|
|
9
|
+
}
|
|
10
|
+
export type RetrievalHealth = "retrieved" | "empty" | "errored" | "no_kn_calls" | "no_trace";
|
|
11
|
+
export interface MechanismDiagnosis {
|
|
12
|
+
broken: boolean;
|
|
13
|
+
/** Root-cause message, populated only when broken. */
|
|
14
|
+
reason: string;
|
|
15
|
+
}
|
|
16
|
+
/** Extract every `execute_tool` call from a conversation's trace spans. */
|
|
17
|
+
export declare function extractToolCalls(spans: TraceSpan[]): ToolCallRecord[];
|
|
18
|
+
/** Render tool calls as `tool_name→outcome` strings for the triage prompt, capped. */
|
|
19
|
+
export declare function summarizeToolCalls(calls: ToolCallRecord[], max?: number): string[];
|
|
20
|
+
/**
|
|
21
|
+
* Reduce a query's tool calls to one retrieval-health verdict. "retrieved" wins
|
|
22
|
+
* as soon as any KN call returned data — one good retrieval proves the mechanism
|
|
23
|
+
* works. Otherwise "errored" outranks "empty" (an error is the stronger signal).
|
|
24
|
+
*/
|
|
25
|
+
export declare function healthFromToolCalls(calls: ToolCallRecord[]): RetrievalHealth;
|
|
26
|
+
/**
|
|
27
|
+
* Roll per-query retrieval health up to a round-level verdict. The mechanism is
|
|
28
|
+
* "broken" when enough failing queries exercised the KN yet none retrieved any
|
|
29
|
+
* data — a fail-fast signal that the round measured a wiring failure, not the
|
|
30
|
+
* prompt. no_kn_calls / no_trace queries carry no evidence and are ignored.
|
|
31
|
+
*/
|
|
32
|
+
export declare function diagnoseMechanism(analyses: QueryFailureAnalysis[]): MechanismDiagnosis;
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
/** KN retrieval/navigation tools — calls to these are what "retrieved KN data" means. */
|
|
2
|
+
const KN_RETRIEVAL_TOOLS = new Set([
|
|
3
|
+
"query_object_instance",
|
|
4
|
+
"kn_search",
|
|
5
|
+
"semantic_search",
|
|
6
|
+
"search_schema",
|
|
7
|
+
"get_logic_properties_values",
|
|
8
|
+
"dv_query",
|
|
9
|
+
// SQL aggregation over the KN's Vega resources — the agent's main data path
|
|
10
|
+
// for COUNT/SUM/GROUP BY/TOP-N. Omitting it makes the mechanism check blind to
|
|
11
|
+
// a SQL-capable agent and false-flag a healthy round as "retrieved no data".
|
|
12
|
+
"vega_sql_query",
|
|
13
|
+
]);
|
|
14
|
+
const TOOL_SPAN_PREFIX = "execute_tool ";
|
|
15
|
+
/**
|
|
16
|
+
* Minimum no-data failing queries before the mechanism may be blamed. A handful
|
|
17
|
+
* of no-data failures can be legitimate (a genuinely hard question, or the agent
|
|
18
|
+
* building a bad query) — the guard needs enough of them to be confident. An
|
|
19
|
+
* eval set smaller than this can never trip the guard on count alone; that is
|
|
20
|
+
* acceptable because the round-level retrieval veto (roundRetrievedAnyData in
|
|
21
|
+
* failure-analyzer) is the real safety net against a false positive.
|
|
22
|
+
*/
|
|
23
|
+
const MIN_MECHANISM_EVIDENCE = 3;
|
|
24
|
+
/**
|
|
25
|
+
* Classify a tool's result `answer` payload. Biased toward "data" on ambiguous
|
|
26
|
+
* objects: a false "data" only means a mechanism failure goes undetected (the
|
|
27
|
+
* loop behaves as before), whereas a false "empty"/"error" could wrongly fail a
|
|
28
|
+
* healthy round.
|
|
29
|
+
*/
|
|
30
|
+
function classifyAnswer(answer) {
|
|
31
|
+
if (answer === null || answer === undefined)
|
|
32
|
+
return "empty";
|
|
33
|
+
// The error payload comes back as an SSE string carrying error_code.
|
|
34
|
+
if (typeof answer === "string")
|
|
35
|
+
return /error_code/.test(answer) ? "error" : "empty";
|
|
36
|
+
if (Array.isArray(answer))
|
|
37
|
+
return answer.length > 0 ? "data" : "empty";
|
|
38
|
+
if (typeof answer === "object") {
|
|
39
|
+
const obj = answer;
|
|
40
|
+
if (obj["error_code"])
|
|
41
|
+
return "error";
|
|
42
|
+
// Any non-empty array property counts as data — deliberately permissive per
|
|
43
|
+
// the bias-toward-"data" rationale above. A metadata array (e.g. warnings)
|
|
44
|
+
// could trip this; that is the acceptable direction to err.
|
|
45
|
+
for (const v of Object.values(obj)) {
|
|
46
|
+
if (Array.isArray(v) && v.length > 0)
|
|
47
|
+
return "data";
|
|
48
|
+
}
|
|
49
|
+
return "empty";
|
|
50
|
+
}
|
|
51
|
+
return "empty";
|
|
52
|
+
}
|
|
53
|
+
/** Classify a `gen_ai.tool.call.result` payload string. Defensive — never throws. */
|
|
54
|
+
function classifyResult(resultStr) {
|
|
55
|
+
if (typeof resultStr !== "string" || resultStr.trim() === "")
|
|
56
|
+
return "empty";
|
|
57
|
+
let parsed;
|
|
58
|
+
try {
|
|
59
|
+
parsed = JSON.parse(resultStr);
|
|
60
|
+
}
|
|
61
|
+
catch {
|
|
62
|
+
// The error payload is itself valid JSON, so a parse failure means an
|
|
63
|
+
// opaque payload — flag it as an error only if it carries an error marker.
|
|
64
|
+
return /error_code/.test(resultStr) ? "error" : "empty";
|
|
65
|
+
}
|
|
66
|
+
return classifyAnswer(parsed?.answer);
|
|
67
|
+
}
|
|
68
|
+
/** Extract every `execute_tool` call from a conversation's trace spans. */
|
|
69
|
+
export function extractToolCalls(spans) {
|
|
70
|
+
const calls = [];
|
|
71
|
+
for (const span of spans) {
|
|
72
|
+
const attrs = span.attributes ?? {};
|
|
73
|
+
const byAttr = attrs["gen_ai.operation.name"] === "execute_tool";
|
|
74
|
+
const byName = typeof span.name === "string" && span.name.startsWith(TOOL_SPAN_PREFIX);
|
|
75
|
+
if (!byAttr && !byName)
|
|
76
|
+
continue;
|
|
77
|
+
const toolName = typeof attrs["gen_ai.tool.name"] === "string" && attrs["gen_ai.tool.name"]
|
|
78
|
+
? attrs["gen_ai.tool.name"]
|
|
79
|
+
: byName
|
|
80
|
+
? span.name.slice(TOOL_SPAN_PREFIX.length)
|
|
81
|
+
: "";
|
|
82
|
+
if (!toolName)
|
|
83
|
+
continue;
|
|
84
|
+
const args = typeof attrs["gen_ai.tool.call.arguments"] === "string" ? attrs["gen_ai.tool.call.arguments"] : "";
|
|
85
|
+
const result = typeof attrs["gen_ai.tool.call.result"] === "string" ? attrs["gen_ai.tool.call.result"] : "";
|
|
86
|
+
calls.push({ tool_name: toolName, arguments: args, outcome: classifyResult(result) });
|
|
87
|
+
}
|
|
88
|
+
return calls;
|
|
89
|
+
}
|
|
90
|
+
/** Render tool calls as `tool_name→outcome` strings for the triage prompt, capped. */
|
|
91
|
+
export function summarizeToolCalls(calls, max = 8) {
|
|
92
|
+
return calls.slice(0, max).map(c => `${c.tool_name}→${c.outcome}`);
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Reduce a query's tool calls to one retrieval-health verdict. "retrieved" wins
|
|
96
|
+
* as soon as any KN call returned data — one good retrieval proves the mechanism
|
|
97
|
+
* works. Otherwise "errored" outranks "empty" (an error is the stronger signal).
|
|
98
|
+
*/
|
|
99
|
+
export function healthFromToolCalls(calls) {
|
|
100
|
+
const knCalls = calls.filter(c => KN_RETRIEVAL_TOOLS.has(c.tool_name));
|
|
101
|
+
if (knCalls.length === 0)
|
|
102
|
+
return "no_kn_calls";
|
|
103
|
+
if (knCalls.some(c => c.outcome === "data"))
|
|
104
|
+
return "retrieved";
|
|
105
|
+
if (knCalls.some(c => c.outcome === "error"))
|
|
106
|
+
return "errored";
|
|
107
|
+
return "empty";
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Roll per-query retrieval health up to a round-level verdict. The mechanism is
|
|
111
|
+
* "broken" when enough failing queries exercised the KN yet none retrieved any
|
|
112
|
+
* data — a fail-fast signal that the round measured a wiring failure, not the
|
|
113
|
+
* prompt. no_kn_calls / no_trace queries carry no evidence and are ignored.
|
|
114
|
+
*/
|
|
115
|
+
export function diagnoseMechanism(analyses) {
|
|
116
|
+
let retrieved = 0;
|
|
117
|
+
let errored = 0;
|
|
118
|
+
let empty = 0;
|
|
119
|
+
for (const a of analyses) {
|
|
120
|
+
if (a.retrieval_health === "retrieved")
|
|
121
|
+
retrieved++;
|
|
122
|
+
else if (a.retrieval_health === "errored")
|
|
123
|
+
errored++;
|
|
124
|
+
else if (a.retrieval_health === "empty")
|
|
125
|
+
empty++;
|
|
126
|
+
}
|
|
127
|
+
const noData = errored + empty;
|
|
128
|
+
if (retrieved > 0 || noData < MIN_MECHANISM_EVIDENCE) {
|
|
129
|
+
return { broken: false, reason: "" };
|
|
130
|
+
}
|
|
131
|
+
return {
|
|
132
|
+
broken: true,
|
|
133
|
+
reason: `Mechanism failure: ${noData} failing queries exercised the KN but none retrieved data ` +
|
|
134
|
+
`(${errored} errored, ${empty} empty), and no failing query retrieved any KN data. ` +
|
|
135
|
+
`The agent is not retrieving from the knowledge network — this is not a prompt problem. ` +
|
|
136
|
+
`Check the agent's KN binding (kn_id map_type must be "fixedValue") and that the bound KN holds data.`,
|
|
137
|
+
};
|
|
138
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { VegaCatalogEntry } from "../schemas.js";
|
|
2
|
+
export interface VegaCatalogClient {
|
|
3
|
+
listDataviews(filter?: {
|
|
4
|
+
knId?: string;
|
|
5
|
+
}): Promise<VegaCatalogEntry[]>;
|
|
6
|
+
}
|
|
7
|
+
export declare class KweaverVegaCatalogClient implements VegaCatalogClient {
|
|
8
|
+
private baseUrl;
|
|
9
|
+
private token;
|
|
10
|
+
constructor(baseUrl: string, token: string);
|
|
11
|
+
listDataviews(_filter?: {
|
|
12
|
+
knId?: string;
|
|
13
|
+
}): Promise<VegaCatalogEntry[]>;
|
|
14
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
// Stub: replace body with real Vega API calls when endpoint is confirmed
|
|
2
|
+
export class KweaverVegaCatalogClient {
|
|
3
|
+
baseUrl;
|
|
4
|
+
token;
|
|
5
|
+
constructor(baseUrl, token) {
|
|
6
|
+
this.baseUrl = baseUrl;
|
|
7
|
+
this.token = token;
|
|
8
|
+
}
|
|
9
|
+
async listDataviews(_filter) {
|
|
10
|
+
// TODO: GET {baseUrl}/api/vega/v1/dataviews?kn_id={filter.knId}
|
|
11
|
+
// Response shape: [{ id, name, columns: [{ name, type }] }]
|
|
12
|
+
// Intentionally returns empty — data_probes from KnDataProber is the primary enrichment path
|
|
13
|
+
return [];
|
|
14
|
+
}
|
|
15
|
+
}
|
|
@@ -1,26 +1,13 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
}): Promise<NextChange>;
|
|
10
|
-
}
|
|
11
|
-
export interface TriageClient {
|
|
12
|
-
triage(input: {
|
|
13
|
-
currentRound: RoundData;
|
|
14
|
-
prevRounds: RoundData[];
|
|
15
|
-
candidateConfig: Record<string, unknown>;
|
|
16
|
-
crossRoundMemoryRef?: string;
|
|
17
|
-
}): Promise<RoundData["triage_conclusion"] & {
|
|
18
|
-
new_memory_token: string;
|
|
19
|
-
}>;
|
|
20
|
-
}
|
|
1
|
+
import type { KnApiClient } from "./patch/kn-api-client.js";
|
|
2
|
+
import type { SkillApiClient } from "./patch/skill-api-client.js";
|
|
3
|
+
import { ContextAssembler } from "./context/context-assembler.js";
|
|
4
|
+
import type { TriageClient, TriageResult } from "./providers/triage-client.js";
|
|
5
|
+
import type { AgentConfigFetcher } from "./capture-fingerprint.js";
|
|
6
|
+
import type { QueryResult } from "./schemas.js";
|
|
7
|
+
import type { TraceSpan } from "../../api/conversations.js";
|
|
8
|
+
export type { TriageClient, TriageResult };
|
|
21
9
|
export interface CoordinatorOpts {
|
|
22
10
|
expDir: string;
|
|
23
|
-
synthesizer: SynthesizerClient;
|
|
24
11
|
triage: TriageClient;
|
|
25
12
|
runEval: (opts: {
|
|
26
13
|
evalSetPaths: string[];
|
|
@@ -31,6 +18,18 @@ export interface CoordinatorOpts {
|
|
|
31
18
|
queryResults: QueryResult[];
|
|
32
19
|
}>;
|
|
33
20
|
experimentId?: string;
|
|
21
|
+
contextAssembler?: ContextAssembler;
|
|
22
|
+
fetchTrace?: (conversationId: string) => Promise<{
|
|
23
|
+
spans: TraceSpan[];
|
|
24
|
+
}>;
|
|
25
|
+
knClient?: KnApiClient;
|
|
26
|
+
skillClient?: SkillApiClient;
|
|
27
|
+
/**
|
|
28
|
+
* When provided, a preflight reconciliation runs before each eval round —
|
|
29
|
+
* verifying the live agent matches expectation and is bound to the eval set's
|
|
30
|
+
* target KN. A mismatch fails the round fast, before any eval chat is sent.
|
|
31
|
+
*/
|
|
32
|
+
fetchAgentConfig?: AgentConfigFetcher;
|
|
34
33
|
}
|
|
35
34
|
export declare class ExperimentCoordinator {
|
|
36
35
|
private opts;
|
|
@@ -39,6 +38,20 @@ export declare class ExperimentCoordinator {
|
|
|
39
38
|
constructor(opts: CoordinatorOpts);
|
|
40
39
|
run(): Promise<void>;
|
|
41
40
|
resume(): Promise<void>;
|
|
41
|
+
/**
|
|
42
|
+
* Install SIGINT/SIGHUP/SIGTERM handlers that flush a final event and release
|
|
43
|
+
* the lock before exit. Returns an uninstaller that MUST be called in the
|
|
44
|
+
* caller's finally block (otherwise normal exit would still fire the handler).
|
|
45
|
+
*
|
|
46
|
+
* Semantics:
|
|
47
|
+
* SIGINT → user-intent abort → emit `aborted` event (terminal)
|
|
48
|
+
* SIGHUP → terminal closed → emit `step_failed` retryable
|
|
49
|
+
* SIGTERM → external kill (ambig.) → emit `step_failed` retryable
|
|
50
|
+
*
|
|
51
|
+
* SIGKILL / OOM / power loss can't be caught here — Layer 2 auto-recovery in
|
|
52
|
+
* run() handles that case on the next start.
|
|
53
|
+
*/
|
|
54
|
+
private installSignalHandlers;
|
|
42
55
|
private runLoop;
|
|
43
56
|
private checkAbort;
|
|
44
57
|
private withRetry;
|