veryfront 0.1.523 → 0.1.525
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/deno.js +1 -1
- package/esm/src/agent/testing/durable-run-canaries/index.d.ts +2 -0
- package/esm/src/agent/testing/durable-run-canaries/index.d.ts.map +1 -0
- package/esm/src/agent/testing/durable-run-canaries/index.js +1 -0
- package/esm/src/agent/testing/durable-run-canaries/runner.d.ts +102 -0
- package/esm/src/agent/testing/durable-run-canaries/runner.d.ts.map +1 -0
- package/esm/src/agent/testing/durable-run-canaries/runner.js +372 -0
- package/esm/src/agent/testing/index.d.ts +1 -0
- package/esm/src/agent/testing/index.d.ts.map +1 -1
- package/esm/src/agent/testing/index.js +1 -0
- package/esm/src/integrations/_data.js +11 -11
- package/esm/src/server/handlers/request/agent-stream.handler.d.ts.map +1 -1
- package/esm/src/server/handlers/request/agent-stream.handler.js +10 -1
- package/esm/src/utils/version-constant.d.ts +1 -1
- package/esm/src/utils/version-constant.js +1 -1
- package/package.json +1 -1
- package/src/deno.js +1 -1
- package/src/src/agent/testing/durable-run-canaries/index.ts +18 -0
- package/src/src/agent/testing/durable-run-canaries/runner.ts +582 -0
- package/src/src/agent/testing/index.ts +19 -0
- package/src/src/integrations/_data.ts +11 -11
- package/src/src/server/handlers/request/agent-stream.handler.ts +18 -1
- package/src/src/utils/version-constant.ts +1 -1
package/esm/deno.js
CHANGED
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
export { createDurableRunCanaryApiClient, createDurableRunCanaryRunner, type DurableRunCanaryApiClient, type DurableRunCanaryApiConfig, type DurableRunCanaryCase, type DurableRunCanaryCreateRootRunInput, type DurableRunCanaryMessage, type DurableRunCanaryPreparedCase, type DurableRunCanaryResult, type DurableRunCanaryRunnerConfig, durableRunCanaryRunnerInternals, type DurableRunCanaryRunSummary, type DurableRunCanarySendUserMessageInput, type DurableRunCanaryStartRunInput, getDurableRunCanaryMessageSchema, parseDurableRunCanaryRunSummary, } from "./runner.js";
|
|
2
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/src/agent/testing/durable-run-canaries/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,+BAA+B,EAC/B,4BAA4B,EAC5B,KAAK,yBAAyB,EAC9B,KAAK,yBAAyB,EAC9B,KAAK,oBAAoB,EACzB,KAAK,kCAAkC,EACvC,KAAK,uBAAuB,EAC5B,KAAK,4BAA4B,EACjC,KAAK,sBAAsB,EAC3B,KAAK,4BAA4B,EACjC,+BAA+B,EAC/B,KAAK,0BAA0B,EAC/B,KAAK,oCAAoC,EACzC,KAAK,6BAA6B,EAClC,gCAAgC,EAChC,+BAA+B,GAChC,MAAM,aAAa,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { createDurableRunCanaryApiClient, createDurableRunCanaryRunner, durableRunCanaryRunnerInternals, getDurableRunCanaryMessageSchema, parseDurableRunCanaryRunSummary, } from "./runner.js";
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import type { InferSchema } from "../../../extensions/schema/index.js";
|
|
2
|
+
export interface DurableRunCanaryApiConfig {
|
|
3
|
+
apiUrl: string;
|
|
4
|
+
authToken: string;
|
|
5
|
+
agentId: string;
|
|
6
|
+
projectId: string | null;
|
|
7
|
+
branchId?: string | null;
|
|
8
|
+
requestTimeoutMs: number;
|
|
9
|
+
fetch?: (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
|
|
10
|
+
}
|
|
11
|
+
export interface DurableRunCanaryCreateRootRunInput {
|
|
12
|
+
conversationId: string;
|
|
13
|
+
runId: string;
|
|
14
|
+
}
|
|
15
|
+
export interface DurableRunCanarySendUserMessageInput {
|
|
16
|
+
conversationId: string;
|
|
17
|
+
prompt: string;
|
|
18
|
+
}
|
|
19
|
+
export interface DurableRunCanaryStartRunInput extends DurableRunCanaryCreateRootRunInput {
|
|
20
|
+
messageId: string;
|
|
21
|
+
prompt: string;
|
|
22
|
+
userMessageId: string;
|
|
23
|
+
}
|
|
24
|
+
export declare const getDurableRunCanaryMessageSchema: () => import("../../../internal-agents/schema.js").Schema<{
|
|
25
|
+
id: string;
|
|
26
|
+
role: "tool" | "user" | "assistant" | "system";
|
|
27
|
+
parts: ({
|
|
28
|
+
type: string;
|
|
29
|
+
} & {} & Record<string, unknown>)[];
|
|
30
|
+
} & {
|
|
31
|
+
status?: string | undefined;
|
|
32
|
+
} & Record<string, unknown>>;
|
|
33
|
+
export type DurableRunCanaryMessage = InferSchema<ReturnType<typeof getDurableRunCanaryMessageSchema>>;
|
|
34
|
+
export interface DurableRunCanaryRunSummary {
|
|
35
|
+
runId: string;
|
|
36
|
+
conversationId: string;
|
|
37
|
+
messageId: string;
|
|
38
|
+
agentId: string;
|
|
39
|
+
status: string;
|
|
40
|
+
latestEventId: number;
|
|
41
|
+
latestExternalEventSequence: number | null;
|
|
42
|
+
waitingToolCallId: string | null;
|
|
43
|
+
waitingToolName: string | null;
|
|
44
|
+
terminalErrorCode: string | null;
|
|
45
|
+
terminalErrorMessage: string | null;
|
|
46
|
+
startedAt: string | null;
|
|
47
|
+
finishedAt: string | null;
|
|
48
|
+
}
|
|
49
|
+
export declare function parseDurableRunCanaryRunSummary(value: unknown): DurableRunCanaryRunSummary;
|
|
50
|
+
export interface DurableRunCanaryApiClient {
|
|
51
|
+
createDurableRootRun: (input: DurableRunCanaryCreateRootRunInput) => Promise<void>;
|
|
52
|
+
getRunSummary: (input: DurableRunCanaryCreateRootRunInput) => Promise<DurableRunCanaryRunSummary>;
|
|
53
|
+
listMessagesForCanary: (input: {
|
|
54
|
+
conversationId: string;
|
|
55
|
+
}) => Promise<DurableRunCanaryMessage[]>;
|
|
56
|
+
sendUserMessageForCanary: (input: DurableRunCanarySendUserMessageInput) => Promise<DurableRunCanaryMessage>;
|
|
57
|
+
startDurableRun: (input: DurableRunCanaryStartRunInput) => Promise<void>;
|
|
58
|
+
}
|
|
59
|
+
export declare function createDurableRunCanaryApiClient(config: DurableRunCanaryApiConfig): DurableRunCanaryApiClient;
|
|
60
|
+
export interface DurableRunCanaryResult {
|
|
61
|
+
id: string;
|
|
62
|
+
label: string;
|
|
63
|
+
status: "pass" | "fail";
|
|
64
|
+
details: string;
|
|
65
|
+
durationMs: number;
|
|
66
|
+
conversationId: string;
|
|
67
|
+
runId: string;
|
|
68
|
+
artifactPaths?: string[];
|
|
69
|
+
}
|
|
70
|
+
export interface DurableRunCanaryPreparedCase {
|
|
71
|
+
artifactPaths?: string[] | ((runId: string) => string[]);
|
|
72
|
+
cleanup: (input?: {
|
|
73
|
+
runId: string;
|
|
74
|
+
}) => Promise<void>;
|
|
75
|
+
conversationId: string;
|
|
76
|
+
prompt: string;
|
|
77
|
+
startSidecar?: () => Promise<(() => Promise<void>) | void>;
|
|
78
|
+
title: string;
|
|
79
|
+
validate: (input: {
|
|
80
|
+
messages: DurableRunCanaryMessage[];
|
|
81
|
+
run: DurableRunCanaryRunSummary;
|
|
82
|
+
}) => Promise<void> | void;
|
|
83
|
+
}
|
|
84
|
+
export interface DurableRunCanaryCase {
|
|
85
|
+
id: string;
|
|
86
|
+
label: string;
|
|
87
|
+
prepare: () => Promise<DurableRunCanaryPreparedCase>;
|
|
88
|
+
}
|
|
89
|
+
export interface DurableRunCanaryRunnerConfig extends DurableRunCanaryApiConfig {
|
|
90
|
+
keepSuccessfulEvidence: boolean;
|
|
91
|
+
}
|
|
92
|
+
declare function collectReferencedChildConversationIds(messages: DurableRunCanaryMessage[]): string[];
|
|
93
|
+
declare function isTerminalRunStatus(status: string): boolean;
|
|
94
|
+
export declare function createDurableRunCanaryRunner(config: DurableRunCanaryRunnerConfig, apiClient?: DurableRunCanaryApiClient): {
|
|
95
|
+
runCase: (testCase: DurableRunCanaryCase) => Promise<DurableRunCanaryResult>;
|
|
96
|
+
};
|
|
97
|
+
export declare const durableRunCanaryRunnerInternals: {
|
|
98
|
+
collectReferencedChildConversationIds: typeof collectReferencedChildConversationIds;
|
|
99
|
+
isTerminalRunStatus: typeof isTerminalRunStatus;
|
|
100
|
+
};
|
|
101
|
+
export {};
|
|
102
|
+
//# sourceMappingURL=runner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../../../../src/src/agent/testing/durable-run-canaries/runner.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,qCAAqC,CAAC;AAEvE,MAAM,WAAW,yBAAyB;IACxC,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,GAAG,GAAG,GAAG,OAAO,EAAE,IAAI,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;CAClF;AAED,MAAM,WAAW,kCAAkC;IACjD,cAAc,EAAE,MAAM,CAAC;IACvB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,oCAAoC;IACnD,cAAc,EAAE,MAAM,CAAC;IACvB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,6BAA8B,SAAQ,kCAAkC;IACvF,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,eAAO,MAAM,gCAAgC;;;;;;;;4BAO5C,CAAC;AAEF,MAAM,MAAM,uBAAuB,GAAG,WAAW,CAC/C,UAAU,CAAC,OAAO,gCAAgC,CAAC,CACpD,CAAC;AAEF,MAAM,WAAW,0BAA0B;IACzC,KAAK,EAAE,MAAM,CAAC;IACd,cAAc,EAAE,MAAM,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,aAAa,EAAE,MAAM,CAAC;IACtB,2BAA2B,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,iBAAiB,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,iBAAiB,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3B;AA4CD,wBAAgB,+BAA+B,CAAC,KAAK,EAAE,OAAO,GAAG,0BAA0B,CAoC1F;AAmFD,MAAM,WAAW,yBAAyB;IACxC,oBAAoB,EAAE,CAAC,KAAK,EAAE,kCAAkC,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACnF,aAAa,EAAE,CAAC,KAAK,EAAE,kCAAkC,KAAK,OAAO,CAAC,0BAA0B,CAAC,CAAC;IAClG,qBAAqB,EAAE,CAAC,KAAK,EAAE;QAAE,cAAc,EAAE,MAAM,CAAA;KAAE,KAAK,OAAO,CAAC,uBAAuB,EAAE,CAAC,CAAC;IACjG,wBAAwB,EAAE,CACxB,KAAK,EAAE,oCAAoC,KACxC,OAAO,CAAC,uBAAuB,CAAC,CAAC;IACtC,eAAe,EAAE,CAAC,KAAK,EAAE,6BAA6B,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1E;AAED,wBAAgB,+BAA+B,CAC7C,MAAM,EAAE,yBAAyB,GAChC,yBAAyB,CAgF3B;AAED,MAAM,WAAW,sBAAsB;IACrC,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,EAAE,MAAM,CAAC;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,4BAA4B;IAC3C,aAAa,CAAC,EAAE,MAAM,EAAE,GAAG,CAAC,CAAC,KAAK,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC,CAAC;IACzD,OAAO,EAAE,CAAC,KAAK,CAAC,EAAE;QAAE,KAAK,EAAE,MAAM,CAAA;KAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACtD,cAAc,EAAE,MAAM,CAAC;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,CAAC,EAAE,MAAM,OAAO,CAAC,CAAC,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC;IAC3D,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,CAAC,KAAK,EAAE;QAChB,QAAQ,EAAE,uBAAuB,EAAE,CAAC;QACpC,GAAG,EAAE,0BAA0B,CAAC;KACjC,KAAK,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;CAC5B;AAED,MAAM,WAAW,oBAAoB;IACnC,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,OAAO,CAAC,4BAA4B,CAAC,CAAC;CACtD;AAED,MAAM,WAAW,4BAA6B,SAAQ,yBAAyB;IAC7E,sBAAsB,EAAE,OAAO,CAAC;CACjC;AA0DD,iBAAS,qCAAqC,CAAC,QAAQ,EAAE,uBAAuB,EAAE,GAAG,MAAM,EAAE,CAc5F;AAQD,iBAAS,mBAAmB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAEpD;AA2CD,wBAAgB,4BAA4B,CAC1C,MAAM,EAAE,4BAA4B,EACpC,SAAS,GAAE,yBAAmE;wBAkB7C,oBAAoB,KAAG,OAAO,CAAC,sBAAsB,CAAC;EAqFxF;AAED,eAAO,MAAM,+BAA+B;;;CAG3C,CAAC"}
|
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
import * as dntShim from "../../../../_dnt.shims.js";
|
|
2
|
+
import { defineSchema } from "../../../schemas/index.js";
|
|
3
|
+
export const getDurableRunCanaryMessageSchema = defineSchema((v) => v.object({
|
|
4
|
+
id: v.string(),
|
|
5
|
+
role: v.enum(["user", "assistant", "system", "tool"]),
|
|
6
|
+
status: v.string().optional(),
|
|
7
|
+
parts: v.array(v.object({ type: v.string() }).passthrough()).default([]),
|
|
8
|
+
}).passthrough());
|
|
9
|
+
const getSnakeRunSummarySchema = defineSchema((v) => v.object({
|
|
10
|
+
run_id: v.string(),
|
|
11
|
+
conversation_id: v.string().uuid(),
|
|
12
|
+
message_id: v.string().uuid(),
|
|
13
|
+
agent_id: v.string(),
|
|
14
|
+
status: v.string(),
|
|
15
|
+
latest_event_id: v.number().int().nonnegative(),
|
|
16
|
+
latest_external_event_sequence: v.number().int().nonnegative().optional(),
|
|
17
|
+
waiting_tool_call_id: v.string().nullable().optional(),
|
|
18
|
+
waiting_tool_name: v.string().nullable().optional(),
|
|
19
|
+
terminal_error_code: v.string().nullable().optional(),
|
|
20
|
+
terminal_error_message: v.string().nullable().optional(),
|
|
21
|
+
started_at: v.string().nullable().optional(),
|
|
22
|
+
finished_at: v.string().nullable().optional(),
|
|
23
|
+
}).passthrough());
|
|
24
|
+
const getCamelRunSummarySchema = defineSchema((v) => v.object({
|
|
25
|
+
runId: v.string(),
|
|
26
|
+
conversationId: v.string().uuid(),
|
|
27
|
+
messageId: v.string().uuid(),
|
|
28
|
+
agentId: v.string(),
|
|
29
|
+
status: v.string(),
|
|
30
|
+
latestEventId: v.number().int().nonnegative(),
|
|
31
|
+
latestExternalEventSequence: v.number().int().nonnegative().optional(),
|
|
32
|
+
waitingToolCallId: v.string().nullable().optional(),
|
|
33
|
+
waitingToolName: v.string().nullable().optional(),
|
|
34
|
+
terminalErrorCode: v.string().nullable().optional(),
|
|
35
|
+
terminalErrorMessage: v.string().nullable().optional(),
|
|
36
|
+
startedAt: v.string().nullable().optional(),
|
|
37
|
+
finishedAt: v.string().nullable().optional(),
|
|
38
|
+
}).passthrough());
|
|
39
|
+
const getDurableRunCanaryMessageListSchema = defineSchema((v) => v.object({
|
|
40
|
+
data: v.array(getDurableRunCanaryMessageSchema()),
|
|
41
|
+
}));
|
|
42
|
+
export function parseDurableRunCanaryRunSummary(value) {
|
|
43
|
+
const snake = getSnakeRunSummarySchema().safeParse(value);
|
|
44
|
+
if (snake.success) {
|
|
45
|
+
return {
|
|
46
|
+
runId: snake.data.run_id,
|
|
47
|
+
conversationId: snake.data.conversation_id,
|
|
48
|
+
messageId: snake.data.message_id,
|
|
49
|
+
agentId: snake.data.agent_id,
|
|
50
|
+
status: snake.data.status,
|
|
51
|
+
latestEventId: snake.data.latest_event_id,
|
|
52
|
+
latestExternalEventSequence: snake.data.latest_external_event_sequence ?? null,
|
|
53
|
+
waitingToolCallId: snake.data.waiting_tool_call_id ?? null,
|
|
54
|
+
waitingToolName: snake.data.waiting_tool_name ?? null,
|
|
55
|
+
terminalErrorCode: snake.data.terminal_error_code ?? null,
|
|
56
|
+
terminalErrorMessage: snake.data.terminal_error_message ?? null,
|
|
57
|
+
startedAt: snake.data.started_at ?? null,
|
|
58
|
+
finishedAt: snake.data.finished_at ?? null,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
const camel = getCamelRunSummarySchema().parse(value);
|
|
62
|
+
return {
|
|
63
|
+
runId: camel.runId,
|
|
64
|
+
conversationId: camel.conversationId,
|
|
65
|
+
messageId: camel.messageId,
|
|
66
|
+
agentId: camel.agentId,
|
|
67
|
+
status: camel.status,
|
|
68
|
+
latestEventId: camel.latestEventId,
|
|
69
|
+
latestExternalEventSequence: camel.latestExternalEventSequence ?? null,
|
|
70
|
+
waitingToolCallId: camel.waitingToolCallId ?? null,
|
|
71
|
+
waitingToolName: camel.waitingToolName ?? null,
|
|
72
|
+
terminalErrorCode: camel.terminalErrorCode ?? null,
|
|
73
|
+
terminalErrorMessage: camel.terminalErrorMessage ?? null,
|
|
74
|
+
startedAt: camel.startedAt ?? null,
|
|
75
|
+
finishedAt: camel.finishedAt ?? null,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
function createJsonHeaders(config, headers) {
|
|
79
|
+
const result = new Headers(headers);
|
|
80
|
+
if (!result.has("Content-Type")) {
|
|
81
|
+
result.set("Content-Type", "application/json");
|
|
82
|
+
}
|
|
83
|
+
result.set("Authorization", `Bearer ${config.authToken}`);
|
|
84
|
+
return result;
|
|
85
|
+
}
|
|
86
|
+
function createFetch(config) {
|
|
87
|
+
return config.fetch ?? fetch;
|
|
88
|
+
}
|
|
89
|
+
function createApiUrl(config, path) {
|
|
90
|
+
const baseHref = config.apiUrl.endsWith("/") ? config.apiUrl : `${config.apiUrl}/`;
|
|
91
|
+
const relativePath = path.startsWith("/") ? path.slice(1) : path;
|
|
92
|
+
return new URL(relativePath, baseHref);
|
|
93
|
+
}
|
|
94
|
+
function buildCreateRootRunBody(config, input) {
|
|
95
|
+
return {
|
|
96
|
+
kind: "agent",
|
|
97
|
+
owner: {
|
|
98
|
+
kind: "conversation",
|
|
99
|
+
id: input.conversationId,
|
|
100
|
+
},
|
|
101
|
+
public_id: input.runId,
|
|
102
|
+
request: {
|
|
103
|
+
mode: "default_chat",
|
|
104
|
+
agent_id: config.agentId,
|
|
105
|
+
initial_status: "pending",
|
|
106
|
+
...(config.projectId
|
|
107
|
+
? {
|
|
108
|
+
source_target_kind: "project",
|
|
109
|
+
runtime_target_kind: "production",
|
|
110
|
+
runtime_target_branch_id: config.branchId ?? null,
|
|
111
|
+
}
|
|
112
|
+
: {}),
|
|
113
|
+
},
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
function buildStartRunBody(config, input) {
|
|
117
|
+
return {
|
|
118
|
+
kind: "agent",
|
|
119
|
+
owner: {
|
|
120
|
+
kind: "conversation",
|
|
121
|
+
id: input.conversationId,
|
|
122
|
+
},
|
|
123
|
+
public_id: input.runId,
|
|
124
|
+
request: {
|
|
125
|
+
mode: "default_chat",
|
|
126
|
+
agent_id: config.agentId,
|
|
127
|
+
input: {
|
|
128
|
+
messages: [
|
|
129
|
+
{
|
|
130
|
+
id: input.userMessageId,
|
|
131
|
+
role: "user",
|
|
132
|
+
parts: [{ type: "text", text: input.prompt }],
|
|
133
|
+
},
|
|
134
|
+
],
|
|
135
|
+
context: {
|
|
136
|
+
conversation_id: input.conversationId,
|
|
137
|
+
project_id: config.projectId,
|
|
138
|
+
branch_id: config.branchId ?? null,
|
|
139
|
+
},
|
|
140
|
+
durable_root_run: {
|
|
141
|
+
run_id: input.runId,
|
|
142
|
+
message_id: input.messageId,
|
|
143
|
+
},
|
|
144
|
+
},
|
|
145
|
+
},
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
export function createDurableRunCanaryApiClient(config) {
|
|
149
|
+
const request = createFetch(config);
|
|
150
|
+
async function apiFetch(path, init, parse) {
|
|
151
|
+
const response = await request(createApiUrl(config, path), {
|
|
152
|
+
...init,
|
|
153
|
+
headers: createJsonHeaders(config, init?.headers),
|
|
154
|
+
signal: AbortSignal.timeout(config.requestTimeoutMs),
|
|
155
|
+
});
|
|
156
|
+
if (!response.ok) {
|
|
157
|
+
throw new Error(`API ${init?.method ?? "GET"} ${path} failed: ${response.status} ${await response.text()}`);
|
|
158
|
+
}
|
|
159
|
+
const payload = await response.json();
|
|
160
|
+
return parse ? parse(payload) : payload;
|
|
161
|
+
}
|
|
162
|
+
async function sendUserMessageForCanary(input) {
|
|
163
|
+
return apiFetch(`/conversations/${input.conversationId}/messages`, {
|
|
164
|
+
method: "POST",
|
|
165
|
+
body: JSON.stringify({
|
|
166
|
+
role: "user",
|
|
167
|
+
parts: [{ type: "text", text: input.prompt }],
|
|
168
|
+
}),
|
|
169
|
+
}, (value) => getDurableRunCanaryMessageSchema().parse(value));
|
|
170
|
+
}
|
|
171
|
+
async function createDurableRootRun(input) {
|
|
172
|
+
await apiFetch("/runs", {
|
|
173
|
+
method: "POST",
|
|
174
|
+
body: JSON.stringify(buildCreateRootRunBody(config, input)),
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
async function startDurableRun(input) {
|
|
178
|
+
await apiFetch("/runs", {
|
|
179
|
+
method: "POST",
|
|
180
|
+
body: JSON.stringify(buildStartRunBody(config, input)),
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
async function getRunSummary(input) {
|
|
184
|
+
const response = await apiFetch(`/conversations/${input.conversationId}/runs/${input.runId}`);
|
|
185
|
+
return parseDurableRunCanaryRunSummary(response);
|
|
186
|
+
}
|
|
187
|
+
async function listMessagesForCanary(input) {
|
|
188
|
+
const payload = await apiFetch(`/conversations/${input.conversationId}/messages?limit=100`, undefined, (value) => getDurableRunCanaryMessageListSchema().parse(value));
|
|
189
|
+
return payload.data;
|
|
190
|
+
}
|
|
191
|
+
return {
|
|
192
|
+
createDurableRootRun,
|
|
193
|
+
getRunSummary,
|
|
194
|
+
listMessagesForCanary,
|
|
195
|
+
sendUserMessageForCanary,
|
|
196
|
+
startDurableRun,
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
const UUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
|
|
200
|
+
function isRecord(value) {
|
|
201
|
+
return value !== null && typeof value === "object" && !Array.isArray(value);
|
|
202
|
+
}
|
|
203
|
+
function collectChildConversationIdsFromValue(value, childConversationIds, depth = 0) {
|
|
204
|
+
if (depth > 8) {
|
|
205
|
+
return;
|
|
206
|
+
}
|
|
207
|
+
if (typeof value === "string") {
|
|
208
|
+
try {
|
|
209
|
+
collectChildConversationIdsFromValue(JSON.parse(value), childConversationIds, depth + 1);
|
|
210
|
+
}
|
|
211
|
+
catch {
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
if (Array.isArray(value)) {
|
|
217
|
+
for (const entry of value) {
|
|
218
|
+
collectChildConversationIdsFromValue(entry, childConversationIds, depth + 1);
|
|
219
|
+
}
|
|
220
|
+
return;
|
|
221
|
+
}
|
|
222
|
+
if (!isRecord(value)) {
|
|
223
|
+
return;
|
|
224
|
+
}
|
|
225
|
+
for (const key of ["childConversationId", "child_conversation_id"]) {
|
|
226
|
+
const childConversationId = value[key];
|
|
227
|
+
if (typeof childConversationId === "string" && UUID_PATTERN.test(childConversationId)) {
|
|
228
|
+
childConversationIds.add(childConversationId);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
for (const nestedValue of Object.values(value)) {
|
|
232
|
+
collectChildConversationIdsFromValue(nestedValue, childConversationIds, depth + 1);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
function collectReferencedChildConversationIds(messages) {
|
|
236
|
+
const childConversationIds = new Set();
|
|
237
|
+
for (const message of messages) {
|
|
238
|
+
for (const part of message.parts) {
|
|
239
|
+
if (!isRecord(part) || (part.type !== "tool_result" && part.type !== "tool-result")) {
|
|
240
|
+
continue;
|
|
241
|
+
}
|
|
242
|
+
collectChildConversationIdsFromValue(part.output, childConversationIds);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
return [...childConversationIds];
|
|
246
|
+
}
|
|
247
|
+
function sleep(ms) {
|
|
248
|
+
return new Promise((resolve) => {
|
|
249
|
+
dntShim.setTimeout(resolve, ms);
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
function isTerminalRunStatus(status) {
|
|
253
|
+
return status === "completed" || status === "failed" || status === "cancelled";
|
|
254
|
+
}
|
|
255
|
+
function createDurableRunCanaryRunId() {
|
|
256
|
+
return `run_${dntShim.crypto.randomUUID()}`;
|
|
257
|
+
}
|
|
258
|
+
async function waitForRunSummaryVisibility(input) {
|
|
259
|
+
const deadline = Date.now() + 30_000;
|
|
260
|
+
while (Date.now() < deadline) {
|
|
261
|
+
try {
|
|
262
|
+
return await input.getRunSummary(input);
|
|
263
|
+
}
|
|
264
|
+
catch (error) {
|
|
265
|
+
if (!(error instanceof Error) || !error.message.includes(" 404 ")) {
|
|
266
|
+
throw error;
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
await sleep(500);
|
|
270
|
+
}
|
|
271
|
+
throw new Error(`Run ${input.runId} did not become visible in time`);
|
|
272
|
+
}
|
|
273
|
+
async function waitForTerminalRun(input) {
|
|
274
|
+
const deadline = Date.now() + input.requestTimeoutMs;
|
|
275
|
+
while (Date.now() < deadline) {
|
|
276
|
+
const run = await input.getRunSummary(input);
|
|
277
|
+
if (isTerminalRunStatus(run.status)) {
|
|
278
|
+
return run;
|
|
279
|
+
}
|
|
280
|
+
await sleep(1_500);
|
|
281
|
+
}
|
|
282
|
+
throw new Error(`Timed out waiting for run ${input.runId} to reach a terminal state`);
|
|
283
|
+
}
|
|
284
|
+
export function createDurableRunCanaryRunner(config, apiClient = createDurableRunCanaryApiClient(config)) {
|
|
285
|
+
const getRunSummary = apiClient.getRunSummary;
|
|
286
|
+
async function listMessagesWithReferencedChildren(conversationId) {
|
|
287
|
+
const messages = await apiClient.listMessagesForCanary({ conversationId });
|
|
288
|
+
const childConversationIds = collectReferencedChildConversationIds(messages);
|
|
289
|
+
const childMessages = await Promise.all(childConversationIds.map((childConversationId) => apiClient.listMessagesForCanary({ conversationId: childConversationId })));
|
|
290
|
+
return [...messages, ...childMessages.flat()];
|
|
291
|
+
}
|
|
292
|
+
async function runCase(testCase) {
|
|
293
|
+
const startedAt = Date.now();
|
|
294
|
+
const prepared = await testCase.prepare();
|
|
295
|
+
let runId = "unknown";
|
|
296
|
+
const stopSidecar = await prepared.startSidecar?.();
|
|
297
|
+
const resolveArtifactPaths = (currentRunId) => typeof prepared.artifactPaths === "function"
|
|
298
|
+
? prepared.artifactPaths(currentRunId)
|
|
299
|
+
: prepared.artifactPaths;
|
|
300
|
+
try {
|
|
301
|
+
const userMessage = await apiClient.sendUserMessageForCanary({
|
|
302
|
+
conversationId: prepared.conversationId,
|
|
303
|
+
prompt: prepared.prompt,
|
|
304
|
+
});
|
|
305
|
+
runId = createDurableRunCanaryRunId();
|
|
306
|
+
await apiClient.createDurableRootRun({
|
|
307
|
+
conversationId: prepared.conversationId,
|
|
308
|
+
runId,
|
|
309
|
+
});
|
|
310
|
+
const visibleRun = await waitForRunSummaryVisibility({
|
|
311
|
+
conversationId: prepared.conversationId,
|
|
312
|
+
getRunSummary,
|
|
313
|
+
runId,
|
|
314
|
+
});
|
|
315
|
+
await apiClient.startDurableRun({
|
|
316
|
+
conversationId: prepared.conversationId,
|
|
317
|
+
messageId: visibleRun.messageId,
|
|
318
|
+
prompt: prepared.prompt,
|
|
319
|
+
runId,
|
|
320
|
+
userMessageId: userMessage.id,
|
|
321
|
+
});
|
|
322
|
+
const terminalRun = await waitForTerminalRun({
|
|
323
|
+
conversationId: prepared.conversationId,
|
|
324
|
+
getRunSummary,
|
|
325
|
+
requestTimeoutMs: config.requestTimeoutMs,
|
|
326
|
+
runId,
|
|
327
|
+
});
|
|
328
|
+
const messages = await listMessagesWithReferencedChildren(prepared.conversationId);
|
|
329
|
+
await prepared.validate({
|
|
330
|
+
messages,
|
|
331
|
+
run: terminalRun,
|
|
332
|
+
});
|
|
333
|
+
const artifactPaths = resolveArtifactPaths(runId);
|
|
334
|
+
if (!config.keepSuccessfulEvidence) {
|
|
335
|
+
await prepared.cleanup({ runId });
|
|
336
|
+
}
|
|
337
|
+
return {
|
|
338
|
+
id: testCase.id,
|
|
339
|
+
label: testCase.label,
|
|
340
|
+
status: "pass",
|
|
341
|
+
details: "OK",
|
|
342
|
+
durationMs: Date.now() - startedAt,
|
|
343
|
+
conversationId: prepared.conversationId,
|
|
344
|
+
runId,
|
|
345
|
+
...(artifactPaths?.length ? { artifactPaths } : {}),
|
|
346
|
+
};
|
|
347
|
+
}
|
|
348
|
+
catch (error) {
|
|
349
|
+
const artifactPaths = resolveArtifactPaths(runId);
|
|
350
|
+
return {
|
|
351
|
+
id: testCase.id,
|
|
352
|
+
label: testCase.label,
|
|
353
|
+
status: "fail",
|
|
354
|
+
details: error instanceof Error ? error.message : String(error),
|
|
355
|
+
durationMs: Date.now() - startedAt,
|
|
356
|
+
conversationId: prepared.conversationId,
|
|
357
|
+
runId,
|
|
358
|
+
...(artifactPaths?.length ? { artifactPaths } : {}),
|
|
359
|
+
};
|
|
360
|
+
}
|
|
361
|
+
finally {
|
|
362
|
+
await stopSidecar?.();
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
return {
|
|
366
|
+
runCase,
|
|
367
|
+
};
|
|
368
|
+
}
|
|
369
|
+
export const durableRunCanaryRunnerInternals = {
|
|
370
|
+
collectReferencedChildConversationIds,
|
|
371
|
+
isTerminalRunStatus,
|
|
372
|
+
};
|
|
@@ -5,5 +5,6 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import "../../../_dnt.polyfills.js";
|
|
7
7
|
export { assertCompleted, assertContains, assertToolCalled, printTestResults, testAgent, type TestCase, type TestResult, type TestSuite, } from "./agent-tester.js";
|
|
8
|
+
export { createDurableRunCanaryApiClient, createDurableRunCanaryRunner, type DurableRunCanaryApiClient, type DurableRunCanaryApiConfig, type DurableRunCanaryCase, type DurableRunCanaryCreateRootRunInput, type DurableRunCanaryMessage, type DurableRunCanaryPreparedCase, type DurableRunCanaryResult, type DurableRunCanaryRunnerConfig, durableRunCanaryRunnerInternals, type DurableRunCanaryRunSummary, type DurableRunCanarySendUserMessageInput, type DurableRunCanaryStartRunInput, getDurableRunCanaryMessageSchema, parseDurableRunCanaryRunSummary, } from "./durable-run-canaries/index.js";
|
|
8
9
|
export { buildFailureSuffix, buildLiveEvalCaseTagSummary, buildLiveEvalRequestBody, type BuildLiveEvalRequestBodyInput, buildLiveEvalRuntimeSummary, buildLiveEvalStatusSummary, buildProgressLine, buildRuntimePerformanceSummary, containsOrderedSubsequence, containsSkillLoad, countStepStartedEvents, createFailedEvalResult, createLiveEvalCaseSupport, createPassedEvalResult, createPlainTextPdf, createSkippedEvalResult, hasEveryLiveEvalTag, hasFinished, type LiveEvalCase, type LiveEvalCaseMetadata, type LiveEvalCaseSelectionInput, type LiveEvalContext, type LiveEvalProjectFile, type LiveEvalProjectFileReaderInput, type LiveEvalRequestBody, type LiveEvalResultForPerformance, type LiveEvalResultForReport, type LiveEvalResultRecord, type LiveEvalRunnerConfig, liveEvalRunnerInternals, type LiveEvalRuntime, type PreparedLiveEvalInput, resolveLiveEvalRequestedCaseIds, type RuntimePerformanceSummary, selectLiveEvalCases, } from "./live-evals/index.js";
|
|
9
10
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/src/agent/testing/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,OAAO,4BAA4B,CAAC;AAGpC,OAAO,EACL,eAAe,EACf,cAAc,EACd,gBAAgB,EAChB,gBAAgB,EAChB,SAAS,EACT,KAAK,QAAQ,EACb,KAAK,UAAU,EACf,KAAK,SAAS,GACf,MAAM,mBAAmB,CAAC;AAE3B,OAAO,EACL,kBAAkB,EAClB,2BAA2B,EAC3B,wBAAwB,EACxB,KAAK,6BAA6B,EAClC,2BAA2B,EAC3B,0BAA0B,EAC1B,iBAAiB,EACjB,8BAA8B,EAC9B,0BAA0B,EAC1B,iBAAiB,EACjB,sBAAsB,EACtB,sBAAsB,EACtB,yBAAyB,EACzB,sBAAsB,EACtB,kBAAkB,EAClB,uBAAuB,EACvB,mBAAmB,EACnB,WAAW,EACX,KAAK,YAAY,EACjB,KAAK,oBAAoB,EACzB,KAAK,0BAA0B,EAC/B,KAAK,eAAe,EACpB,KAAK,mBAAmB,EACxB,KAAK,8BAA8B,EACnC,KAAK,mBAAmB,EACxB,KAAK,4BAA4B,EACjC,KAAK,uBAAuB,EAC5B,KAAK,oBAAoB,EACzB,KAAK,oBAAoB,EACzB,uBAAuB,EACvB,KAAK,eAAe,EACpB,KAAK,qBAAqB,EAC1B,+BAA+B,EAC/B,KAAK,yBAAyB,EAC9B,mBAAmB,GACpB,MAAM,uBAAuB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/src/agent/testing/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,OAAO,4BAA4B,CAAC;AAGpC,OAAO,EACL,eAAe,EACf,cAAc,EACd,gBAAgB,EAChB,gBAAgB,EAChB,SAAS,EACT,KAAK,QAAQ,EACb,KAAK,UAAU,EACf,KAAK,SAAS,GACf,MAAM,mBAAmB,CAAC;AAE3B,OAAO,EACL,+BAA+B,EAC/B,4BAA4B,EAC5B,KAAK,yBAAyB,EAC9B,KAAK,yBAAyB,EAC9B,KAAK,oBAAoB,EACzB,KAAK,kCAAkC,EACvC,KAAK,uBAAuB,EAC5B,KAAK,4BAA4B,EACjC,KAAK,sBAAsB,EAC3B,KAAK,4BAA4B,EACjC,+BAA+B,EAC/B,KAAK,0BAA0B,EAC/B,KAAK,oCAAoC,EACzC,KAAK,6BAA6B,EAClC,gCAAgC,EAChC,+BAA+B,GAChC,MAAM,iCAAiC,CAAC;AAEzC,OAAO,EACL,kBAAkB,EAClB,2BAA2B,EAC3B,wBAAwB,EACxB,KAAK,6BAA6B,EAClC,2BAA2B,EAC3B,0BAA0B,EAC1B,iBAAiB,EACjB,8BAA8B,EAC9B,0BAA0B,EAC1B,iBAAiB,EACjB,sBAAsB,EACtB,sBAAsB,EACtB,yBAAyB,EACzB,sBAAsB,EACtB,kBAAkB,EAClB,uBAAuB,EACvB,mBAAmB,EACnB,WAAW,EACX,KAAK,YAAY,EACjB,KAAK,oBAAoB,EACzB,KAAK,0BAA0B,EAC/B,KAAK,eAAe,EACpB,KAAK,mBAAmB,EACxB,KAAK,8BAA8B,EACnC,KAAK,mBAAmB,EACxB,KAAK,4BAA4B,EACjC,KAAK,uBAAuB,EAC5B,KAAK,oBAAoB,EACzB,KAAK,oBAAoB,EACzB,uBAAuB,EACvB,KAAK,eAAe,EACpB,KAAK,qBAAqB,EAC1B,+BAA+B,EAC/B,KAAK,yBAAyB,EAC9B,mBAAmB,GACpB,MAAM,uBAAuB,CAAC"}
|
|
@@ -5,4 +5,5 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import "../../../_dnt.polyfills.js";
|
|
7
7
|
export { assertCompleted, assertContains, assertToolCalled, printTestResults, testAgent, } from "./agent-tester.js";
|
|
8
|
+
export { createDurableRunCanaryApiClient, createDurableRunCanaryRunner, durableRunCanaryRunnerInternals, getDurableRunCanaryMessageSchema, parseDurableRunCanaryRunSummary, } from "./durable-run-canaries/index.js";
|
|
8
9
|
export { buildFailureSuffix, buildLiveEvalCaseTagSummary, buildLiveEvalRequestBody, buildLiveEvalRuntimeSummary, buildLiveEvalStatusSummary, buildProgressLine, buildRuntimePerformanceSummary, containsOrderedSubsequence, containsSkillLoad, countStepStartedEvents, createFailedEvalResult, createLiveEvalCaseSupport, createPassedEvalResult, createPlainTextPdf, createSkippedEvalResult, hasEveryLiveEvalTag, hasFinished, liveEvalRunnerInternals, resolveLiveEvalRequestedCaseIds, selectLiveEvalCases, } from "./live-evals/index.js";
|