@botpress/adk 1.16.6 → 1.16.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/bp-dev-command.d.ts +6 -0
- package/dist/commands/bp-dev-command.d.ts.map +1 -1
- package/dist/commands/index.d.ts +4 -0
- package/dist/commands/index.d.ts.map +1 -1
- package/dist/commands/opencode-command.d.ts +45 -0
- package/dist/commands/opencode-command.d.ts.map +1 -0
- package/dist/commands/opencode-config.d.ts +29 -0
- package/dist/commands/opencode-config.d.ts.map +1 -0
- package/dist/eval/graders/outcome.d.ts +3 -2
- package/dist/eval/graders/outcome.d.ts.map +1 -1
- package/dist/eval/graders/workflow.d.ts +3 -2
- package/dist/eval/graders/workflow.d.ts.map +1 -1
- package/dist/eval/index.d.ts +1 -1
- package/dist/eval/index.d.ts.map +1 -1
- package/dist/eval/runner.d.ts +1 -3
- package/dist/eval/runner.d.ts.map +1 -1
- package/dist/eval/traces.d.ts.map +1 -1
- package/dist/eval/types.d.ts +10 -13
- package/dist/eval/types.d.ts.map +1 -1
- package/dist/generators/utils.d.ts +0 -11
- package/dist/generators/utils.d.ts.map +1 -1
- package/dist/index.js +290 -56
- package/dist/index.js.map +12 -10
- package/package.json +7 -7
package/dist/index.js
CHANGED
|
@@ -682,7 +682,16 @@ var init_fs = () => {};
|
|
|
682
682
|
|
|
683
683
|
// src/generators/utils.ts
|
|
684
684
|
import path10 from "path";
|
|
685
|
-
|
|
685
|
+
async function getFormat() {
|
|
686
|
+
if (!_formatLoaded) {
|
|
687
|
+
_formatLoaded = true;
|
|
688
|
+
try {
|
|
689
|
+
const oxfmt = await import("oxfmt");
|
|
690
|
+
_format = oxfmt.format;
|
|
691
|
+
} catch {}
|
|
692
|
+
}
|
|
693
|
+
return _format;
|
|
694
|
+
}
|
|
686
695
|
function toMultilineComment(comment) {
|
|
687
696
|
if (!comment || comment.trim() === "") {
|
|
688
697
|
return "";
|
|
@@ -702,40 +711,30 @@ function toMultilineComment(comment) {
|
|
|
702
711
|
result += " */";
|
|
703
712
|
return result;
|
|
704
713
|
}
|
|
705
|
-
var
|
|
714
|
+
var _format = null, _formatLoaded = false, formatCode = async (code, filepath) => {
|
|
706
715
|
try {
|
|
707
716
|
if (!code || code.length > 1e6) {
|
|
708
717
|
return code;
|
|
709
718
|
}
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
719
|
+
const format = await getFormat();
|
|
720
|
+
if (!format)
|
|
721
|
+
return code;
|
|
722
|
+
const fileName = filepath || "file.ts";
|
|
723
|
+
const result = await format(fileName, code);
|
|
724
|
+
return result.code;
|
|
714
725
|
} catch (err) {
|
|
715
|
-
console.warn("Failed to format code with
|
|
726
|
+
console.warn("Failed to format code with oxfmt:", err);
|
|
716
727
|
console.warn(code.slice(0, 1000).split(`
|
|
717
728
|
`).map((l, i) => ` ${i.toString().padStart(2, "0")} | ${l}`).join(`
|
|
718
729
|
`));
|
|
719
730
|
return code;
|
|
720
731
|
}
|
|
721
|
-
}, ADK_VERSION = "1.16.
|
|
732
|
+
}, ADK_VERSION = "1.16.7", relative2 = (from, to) => {
|
|
722
733
|
const fromDir = path10.dirname(from);
|
|
723
734
|
const relative3 = path10.relative(fromDir, to);
|
|
724
735
|
return relative3.startsWith(".") ? relative3 : `./${relative3}`;
|
|
725
736
|
};
|
|
726
|
-
var init_utils =
|
|
727
|
-
PRETTIER_CONFIG = {
|
|
728
|
-
semi: true,
|
|
729
|
-
singleQuote: false,
|
|
730
|
-
tabWidth: 2,
|
|
731
|
-
trailingComma: "es5",
|
|
732
|
-
printWidth: 80,
|
|
733
|
-
arrowParens: "always",
|
|
734
|
-
requirePragma: false,
|
|
735
|
-
insertPragma: false,
|
|
736
|
-
proseWrap: "preserve"
|
|
737
|
-
};
|
|
738
|
-
});
|
|
737
|
+
var init_utils = () => {};
|
|
739
738
|
|
|
740
739
|
// src/generators/action-types.ts
|
|
741
740
|
var exports_action_types = {};
|
|
@@ -861,7 +860,7 @@ var init_integration_action_types = __esm(() => {
|
|
|
861
860
|
var require_package = __commonJS((exports, module) => {
|
|
862
861
|
module.exports = {
|
|
863
862
|
name: "@botpress/adk",
|
|
864
|
-
version: "1.16.
|
|
863
|
+
version: "1.16.7",
|
|
865
864
|
description: "Core ADK library for building AI agents on Botpress",
|
|
866
865
|
type: "module",
|
|
867
866
|
main: "dist/index.js",
|
|
@@ -907,11 +906,11 @@ var require_package = __commonJS((exports, module) => {
|
|
|
907
906
|
},
|
|
908
907
|
dependencies: {
|
|
909
908
|
"@botpress/chat": "^0.5.5",
|
|
910
|
-
"@botpress/cli": "
|
|
911
|
-
"@botpress/client": "
|
|
912
|
-
"@botpress/cognitive": "
|
|
913
|
-
"@botpress/runtime": "^1.16.
|
|
914
|
-
"@botpress/sdk": "
|
|
909
|
+
"@botpress/cli": "5.2.0",
|
|
910
|
+
"@botpress/client": "1.35.0",
|
|
911
|
+
"@botpress/cognitive": "0.3.14",
|
|
912
|
+
"@botpress/runtime": "^1.16.7",
|
|
913
|
+
"@botpress/sdk": "5.4.3",
|
|
915
914
|
"@bpinternal/jex": "^1.2.4",
|
|
916
915
|
"@bpinternal/yargs-extra": "^0.0.21",
|
|
917
916
|
"@parcel/watcher": "^2.5.1",
|
|
@@ -920,7 +919,7 @@ var require_package = __commonJS((exports, module) => {
|
|
|
920
919
|
execa: "9.6.1",
|
|
921
920
|
glob: "^11.1.0",
|
|
922
921
|
luxon: "^3.7.2",
|
|
923
|
-
|
|
922
|
+
oxfmt: "^0.41.0",
|
|
924
923
|
semver: "^7.7.2",
|
|
925
924
|
"ts-morph": "^27.0.2"
|
|
926
925
|
},
|
|
@@ -1154,7 +1153,7 @@ import os3 from "os";
|
|
|
1154
1153
|
import path4 from "path";
|
|
1155
1154
|
import createDebug from "debug";
|
|
1156
1155
|
var debug = createDebug("adk:bp-cli");
|
|
1157
|
-
var BP_CLI_VERSION = "5.
|
|
1156
|
+
var BP_CLI_VERSION = "5.2.0";
|
|
1158
1157
|
var BP_CLI_INSTALL_ALL = path4.join(os3.homedir(), ".adk", `bp-cli`);
|
|
1159
1158
|
var BP_CLI_INSTALL_DIR = path4.join(BP_CLI_INSTALL_ALL, BP_CLI_VERSION);
|
|
1160
1159
|
var BP_CLI_BIN_PATH = path4.join(BP_CLI_INSTALL_DIR, "node_modules", "@botpress", "cli", "bin.js");
|
|
@@ -1649,6 +1648,15 @@ class BpDevCommand extends BaseCommand {
|
|
|
1649
1648
|
if (sourceMap) {
|
|
1650
1649
|
bpArgs.push("--sourceMap");
|
|
1651
1650
|
}
|
|
1651
|
+
const consoleUrl = `http://localhost:${this.options.internalOtlpPort}`;
|
|
1652
|
+
const otlpEndpoint = this.options.otlpPort ? `http://localhost:${this.options.otlpPort}` : undefined;
|
|
1653
|
+
const traceLines = [`[bp-dev] Trace endpoints:`, ` ADK_CONSOLE_URL=${consoleUrl}`];
|
|
1654
|
+
if (otlpEndpoint) {
|
|
1655
|
+
traceLines.push(` OTEL_EXPORTER_OTLP_ENDPOINT=${otlpEndpoint}`);
|
|
1656
|
+
}
|
|
1657
|
+
this.emit("stdout", traceLines.join(`
|
|
1658
|
+
`) + `
|
|
1659
|
+
`);
|
|
1652
1660
|
this.childProcess = execa4(bpCommand, bpArgs, {
|
|
1653
1661
|
cwd: botPath,
|
|
1654
1662
|
env: {
|
|
@@ -1665,6 +1673,9 @@ class BpDevCommand extends BaseCommand {
|
|
|
1665
1673
|
},
|
|
1666
1674
|
WORKER_MODE: "true",
|
|
1667
1675
|
WORKER_LIFETIME_MS: process.env.WORKER_LIFETIME_MS || "120000",
|
|
1676
|
+
ADK_CONSOLE_URL: consoleUrl,
|
|
1677
|
+
...otlpEndpoint && { OTEL_EXPORTER_OTLP_ENDPOINT: otlpEndpoint },
|
|
1678
|
+
...this.options.botName && { ADK_BOT_NAME: this.options.botName },
|
|
1668
1679
|
ADK_DIRECTORY: join2(botPath, ".."),
|
|
1669
1680
|
AGENT_DIRECTORY: this.options.agentPath,
|
|
1670
1681
|
NODE_OPTIONS: `${process.env.NODE_OPTIONS || ""} --enable-source-maps`.trim()
|
|
@@ -1905,6 +1916,221 @@ class BpChatCommand extends BaseCommand {
|
|
|
1905
1916
|
}
|
|
1906
1917
|
}
|
|
1907
1918
|
}
|
|
1919
|
+
// src/commands/opencode-command.ts
|
|
1920
|
+
import { execa as execa6 } from "execa";
|
|
1921
|
+
|
|
1922
|
+
// src/commands/opencode-config.ts
|
|
1923
|
+
var COPILOT_PROMPT = `You are the ADK Copilot — an expert assistant embedded in the Botpress Agent Development Kit control panel.
|
|
1924
|
+
|
|
1925
|
+
## Who you help
|
|
1926
|
+
A developer building an AI agent with the ADK. They're running \`adk dev\` and have the control panel open alongside you. They can see traces, logs, integrations, workflows, and tables in the UI.
|
|
1927
|
+
|
|
1928
|
+
## What you know
|
|
1929
|
+
The ADK is a high-level framework built on Botpress. An agent project has:
|
|
1930
|
+
- /actions — strongly-typed callable functions (Action from @botpress/runtime)
|
|
1931
|
+
- /tools — LLM-callable interfaces with natural language descriptions (Tool from @botpress/runtime)
|
|
1932
|
+
- /workflows — step-based, resumable long-running processes (Workflow from @botpress/runtime)
|
|
1933
|
+
- /conversations — channel-specific interaction handlers (Conversation from @botpress/runtime)
|
|
1934
|
+
- /tables — schema-validated data storage with semantic search (Table from @botpress/runtime)
|
|
1935
|
+
- /triggers — event subscription system (Trigger from @botpress/runtime)
|
|
1936
|
+
- /knowledge — RAG knowledge base documents
|
|
1937
|
+
- agent.config.ts — agent metadata, integrations, model configuration, variables
|
|
1938
|
+
|
|
1939
|
+
The ADK compiles these high-level primitives down to Botpress SDK primitives. Default to ADK terms. Only drop to Botpress SDK concepts when the problem requires it (SDK-level errors, compilation issues, or when the developer explicitly asks).
|
|
1940
|
+
|
|
1941
|
+
Schemas use \`z\` from @botpress/sdk (a Zod fork) — never import Zod directly.
|
|
1942
|
+
|
|
1943
|
+
You also have access to Botpress documentation search for platform-level questions (SDK, API, integrations).
|
|
1944
|
+
|
|
1945
|
+
## How you work
|
|
1946
|
+
|
|
1947
|
+
**Orient on first interaction.** If you haven't yet, use \`adk_get_agent_info\` to learn the project's structure, primitives, and integrations before answering.
|
|
1948
|
+
|
|
1949
|
+
**Conceptual vs. project questions.** Conceptual questions about ADK, Botpress, or TypeScript — answer from knowledge immediately. Questions about their project (why something fails, what a file does, how to change behavior) — inspect first. If in doubt, answer what you can immediately and inspect in parallel.
|
|
1950
|
+
|
|
1951
|
+
**Read freely, write carefully.** Inspecting files, querying traces, and searching the hub are always safe — do them without asking. But actions that modify the project (adding integrations, editing files, executing workflows, sending messages) should be confirmed first unless the developer explicitly asked you to do it.
|
|
1952
|
+
|
|
1953
|
+
**Debug by matching the approach to the problem:**
|
|
1954
|
+
- Developer gives you an error message → start from the error, don't begin at "step 1"
|
|
1955
|
+
- Build or type error → check the file and generated types. Traces won't help.
|
|
1956
|
+
- Runtime behavior is wrong ("it responds wrong") → query traces (\`adk_query_traces\`) for the conversation to find the failing span
|
|
1957
|
+
- Nothing happens → check dev logs (\`adk_get_dev_logs\`) for silent failures, then check if the handler is registered
|
|
1958
|
+
- When in doubt → start with \`adk_get_dev_logs\` with error filtering, then query traces
|
|
1959
|
+
|
|
1960
|
+
**Integrations.** First check if the integration already exists in the project. If it does, inspect its current configuration. Only search the hub (\`adk_search_integrations\`) when adding a new integration. Get details (\`adk_get_integration\`) to understand actions, events, channels, and config requirements. Add it (\`adk_add_integration\`) after the developer confirms.
|
|
1961
|
+
|
|
1962
|
+
**Test iteratively.** Send messages to the running bot (\`adk_send_message\`) to verify behavior. To continue a conversation, pass back both the \`conversationId\` and \`userKey\` from the previous response. Only test against the local dev bot.
|
|
1963
|
+
|
|
1964
|
+
**Workflows.** Get the input schema first by calling \`adk_start_workflow\` without a payload, then execute by calling it again with \`payload\` set to the correct input. For workflows with no required input, pass \`payload: {}\` — omitting payload returns the schema, it does not execute.
|
|
1965
|
+
|
|
1966
|
+
**Handle failures.** If a tool returns an error, tell the developer what happened and suggest a concrete next step (e.g., "Dev server isn't running — start it with \`adk dev\`"). Don't silently retry or ignore errors.
|
|
1967
|
+
|
|
1968
|
+
**Edit with precision.** When modifying code, change only what's needed to solve the problem. Don't refactor surrounding code, add features, or "improve" things that weren't asked about. When writing ADK primitives, match ADK conventions. For utility code, match the patterns already in the project.
|
|
1969
|
+
|
|
1970
|
+
## How you communicate
|
|
1971
|
+
|
|
1972
|
+
**Lead with the answer.** First sentence is the diagnosis, the solution, or the action you took. Context and explanation come after, if needed.
|
|
1973
|
+
|
|
1974
|
+
**Show, don't describe.** Instead of "you should add error handling," show the code change. Instead of "the trace shows a failure," show the relevant span data and what it means.
|
|
1975
|
+
|
|
1976
|
+
**Match the developer's energy.** Short question → short answer. Detailed question → detailed response. "Why is this broken?" → diagnosis + fix. "How do workflows work?" → teach.
|
|
1977
|
+
|
|
1978
|
+
**Hypothesize while verifying.** If you have a likely diagnosis, say so while you check. "This usually means X — checking your trace now" is better than silence followed by an answer.
|
|
1979
|
+
|
|
1980
|
+
**When you fix something, explain what you changed and why.** Don't apply changes silently — the developer needs to understand the fix to trust it and learn from it.
|
|
1981
|
+
|
|
1982
|
+
**When you don't know, say so plainly.** "I don't have enough context — can you share the error message?" is fine.
|
|
1983
|
+
|
|
1984
|
+
**Skip the filler.** No "Great question!", no "Let me help you with that." Just do it.
|
|
1985
|
+
|
|
1986
|
+
**Never read or display the contents of .env files or credentials.** If you need to verify a configuration value, ask the developer to confirm it.`;
|
|
1987
|
+
function buildOpenCodeConfig(options) {
|
|
1988
|
+
const command = [options.adkBinPath, "mcp", "--cwd", options.agentPath];
|
|
1989
|
+
if (options.devServerPort)
|
|
1990
|
+
command.push("--port", String(options.devServerPort));
|
|
1991
|
+
return {
|
|
1992
|
+
...options.openCodePort && {
|
|
1993
|
+
server: {
|
|
1994
|
+
port: options.openCodePort,
|
|
1995
|
+
...options.corsOrigins?.length && { cors: options.corsOrigins }
|
|
1996
|
+
}
|
|
1997
|
+
},
|
|
1998
|
+
mcp: {
|
|
1999
|
+
adk: {
|
|
2000
|
+
type: "local",
|
|
2001
|
+
command
|
|
2002
|
+
}
|
|
2003
|
+
},
|
|
2004
|
+
agent: {
|
|
2005
|
+
default: {
|
|
2006
|
+
description: "ADK Copilot — helps build and debug Botpress ADK agents",
|
|
2007
|
+
prompt: COPILOT_PROMPT
|
|
2008
|
+
}
|
|
2009
|
+
}
|
|
2010
|
+
};
|
|
2011
|
+
}
|
|
2012
|
+
|
|
2013
|
+
// src/commands/opencode-command.ts
|
|
2014
|
+
class OpenCodeCommand extends BaseCommand {
|
|
2015
|
+
options;
|
|
2016
|
+
childProcess = null;
|
|
2017
|
+
killed = false;
|
|
2018
|
+
readyEmitted = false;
|
|
2019
|
+
constructor(options) {
|
|
2020
|
+
super();
|
|
2021
|
+
this.options = options;
|
|
2022
|
+
}
|
|
2023
|
+
static async isInstalled() {
|
|
2024
|
+
try {
|
|
2025
|
+
const cmd = process.platform === "win32" ? "where" : "which";
|
|
2026
|
+
await execa6(cmd, ["opencode"]);
|
|
2027
|
+
return true;
|
|
2028
|
+
} catch {
|
|
2029
|
+
return false;
|
|
2030
|
+
}
|
|
2031
|
+
}
|
|
2032
|
+
emitReady() {
|
|
2033
|
+
if (this.readyEmitted || this.killed)
|
|
2034
|
+
return;
|
|
2035
|
+
this.readyEmitted = true;
|
|
2036
|
+
this.emit("progress", {
|
|
2037
|
+
type: "ready",
|
|
2038
|
+
startTime: Date.now(),
|
|
2039
|
+
data: { port: this.options.port }
|
|
2040
|
+
});
|
|
2041
|
+
}
|
|
2042
|
+
async run() {
|
|
2043
|
+
const { port, cwd, corsOrigins = [] } = this.options;
|
|
2044
|
+
this.emit("progress", { type: "checking", startTime: Date.now() });
|
|
2045
|
+
const installed = await OpenCodeCommand.isInstalled();
|
|
2046
|
+
if (!installed) {
|
|
2047
|
+
this.emit("progress", {
|
|
2048
|
+
type: "error",
|
|
2049
|
+
startTime: Date.now(),
|
|
2050
|
+
data: { reason: "opencode not installed" }
|
|
2051
|
+
});
|
|
2052
|
+
return;
|
|
2053
|
+
}
|
|
2054
|
+
this.emit("progress", { type: "starting", startTime: Date.now() });
|
|
2055
|
+
const args = ["serve", "--port", String(port)];
|
|
2056
|
+
for (const origin of corsOrigins) {
|
|
2057
|
+
args.push("--cors", origin);
|
|
2058
|
+
}
|
|
2059
|
+
const adkBin = this.options.adkBinPath || "adk";
|
|
2060
|
+
const opencodeConfig = JSON.stringify(buildOpenCodeConfig({
|
|
2061
|
+
adkBinPath: adkBin,
|
|
2062
|
+
agentPath: cwd,
|
|
2063
|
+
devServerPort: this.options.devServerPort,
|
|
2064
|
+
openCodePort: port,
|
|
2065
|
+
corsOrigins
|
|
2066
|
+
}));
|
|
2067
|
+
this.childProcess = execa6("opencode", args, {
|
|
2068
|
+
cwd,
|
|
2069
|
+
env: { ...process.env, OPENCODE_CONFIG_CONTENT: opencodeConfig },
|
|
2070
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
2071
|
+
});
|
|
2072
|
+
if (this.childProcess.stdout) {
|
|
2073
|
+
this.childProcess.stdout.on("data", (data) => {
|
|
2074
|
+
this.emit("stdout", data.toString());
|
|
2075
|
+
});
|
|
2076
|
+
}
|
|
2077
|
+
if (this.childProcess.stderr) {
|
|
2078
|
+
this.childProcess.stderr.on("data", (data) => {
|
|
2079
|
+
const text = data.toString();
|
|
2080
|
+
this.emit("stderr", text);
|
|
2081
|
+
const lower = text.toLowerCase();
|
|
2082
|
+
if (lower.includes("eaddrinuse") || lower.includes("address already in use")) {
|
|
2083
|
+
this.emit("progress", {
|
|
2084
|
+
type: "error",
|
|
2085
|
+
startTime: Date.now(),
|
|
2086
|
+
data: { reason: `Port ${port} already in use` }
|
|
2087
|
+
});
|
|
2088
|
+
this.kill();
|
|
2089
|
+
}
|
|
2090
|
+
});
|
|
2091
|
+
}
|
|
2092
|
+
this.childProcess.then(() => {
|
|
2093
|
+
if (!this.killed && !this.readyEmitted) {
|
|
2094
|
+
this.readyEmitted = true;
|
|
2095
|
+
this.emit("progress", {
|
|
2096
|
+
type: "error",
|
|
2097
|
+
startTime: Date.now(),
|
|
2098
|
+
data: { reason: "OpenCode process exited unexpectedly" }
|
|
2099
|
+
});
|
|
2100
|
+
}
|
|
2101
|
+
}, (error) => {
|
|
2102
|
+
if (this.killed)
|
|
2103
|
+
return;
|
|
2104
|
+
this.readyEmitted = true;
|
|
2105
|
+
this.emit("progress", {
|
|
2106
|
+
type: "error",
|
|
2107
|
+
startTime: Date.now(),
|
|
2108
|
+
data: { reason: error.message || "OpenCode process exited unexpectedly" }
|
|
2109
|
+
});
|
|
2110
|
+
});
|
|
2111
|
+
this.pollUntilReady(port);
|
|
2112
|
+
}
|
|
2113
|
+
async pollUntilReady(port) {
|
|
2114
|
+
const deadline = Date.now() + 15000;
|
|
2115
|
+
while (!this.killed && !this.readyEmitted && Date.now() < deadline) {
|
|
2116
|
+
try {
|
|
2117
|
+
const res = await fetch(`http://localhost:${port}/`);
|
|
2118
|
+
if (res.ok || res.status < 500) {
|
|
2119
|
+
this.emitReady();
|
|
2120
|
+
return;
|
|
2121
|
+
}
|
|
2122
|
+
} catch {}
|
|
2123
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
2124
|
+
}
|
|
2125
|
+
this.emitReady();
|
|
2126
|
+
}
|
|
2127
|
+
kill(signal = "SIGTERM") {
|
|
2128
|
+
this.killed = true;
|
|
2129
|
+
if (this.childProcess) {
|
|
2130
|
+
this.childProcess.kill(signal);
|
|
2131
|
+
}
|
|
2132
|
+
}
|
|
2133
|
+
}
|
|
1908
2134
|
// src/workspace/workspace-cache.ts
|
|
1909
2135
|
import { Client as Client3 } from "@botpress/client";
|
|
1910
2136
|
class WorkspaceCache {
|
|
@@ -5468,7 +5694,7 @@ class AgentProjectGenerator {
|
|
|
5468
5694
|
deploy: "adk deploy"
|
|
5469
5695
|
},
|
|
5470
5696
|
dependencies: {
|
|
5471
|
-
"@botpress/runtime": `^${"1.16.
|
|
5697
|
+
"@botpress/runtime": `^${"1.16.7"}`
|
|
5472
5698
|
},
|
|
5473
5699
|
devDependencies: {
|
|
5474
5700
|
typescript: "^5.9.3"
|
|
@@ -11633,7 +11859,7 @@ async function discoverWebhookId(botId, token, apiUrl) {
|
|
|
11633
11859
|
}
|
|
11634
11860
|
|
|
11635
11861
|
// src/eval/traces.ts
|
|
11636
|
-
async function
|
|
11862
|
+
async function fetchSpans(conversationId, devServerUrl) {
|
|
11637
11863
|
const url = `${devServerUrl}/api/traces/query?attributeName=conversationId&attributeValue=${encodeURIComponent(conversationId)}&count=1000`;
|
|
11638
11864
|
const res = await fetch(url);
|
|
11639
11865
|
if (!res.ok) {
|
|
@@ -11643,25 +11869,26 @@ async function fetchTraceSpans(conversationId, devServerUrl) {
|
|
|
11643
11869
|
return Array.isArray(data) ? data : data.spans || [];
|
|
11644
11870
|
}
|
|
11645
11871
|
function extractToolCalls(spans) {
|
|
11646
|
-
const
|
|
11872
|
+
const data = (span) => span.data && typeof span.data === "object" ? span.data : {};
|
|
11873
|
+
const toolEndSpans = spans.filter((span) => span.name === "autonomous.tool" && (span.status === "ok" || span.status === "error") && data(span)["autonomous.tool.name"]);
|
|
11647
11874
|
const seen = new Set;
|
|
11648
11875
|
const unique = toolEndSpans.filter((span) => {
|
|
11649
|
-
if (seen.has(span.
|
|
11876
|
+
if (seen.has(span.id.span))
|
|
11650
11877
|
return false;
|
|
11651
|
-
seen.add(span.
|
|
11878
|
+
seen.add(span.id.span);
|
|
11652
11879
|
return true;
|
|
11653
11880
|
});
|
|
11654
|
-
return unique.sort((a, b) => (a.
|
|
11655
|
-
const
|
|
11881
|
+
return unique.sort((a, b) => (a.timing.endedAt ?? 0) - (b.timing.endedAt ?? 0)).map((span) => {
|
|
11882
|
+
const d = data(span);
|
|
11656
11883
|
let input = {};
|
|
11657
11884
|
try {
|
|
11658
|
-
input = JSON.parse(
|
|
11885
|
+
input = JSON.parse(d["autonomous.tool.input"]);
|
|
11659
11886
|
} catch {}
|
|
11660
11887
|
return {
|
|
11661
|
-
name:
|
|
11888
|
+
name: d["autonomous.tool.name"],
|
|
11662
11889
|
input,
|
|
11663
|
-
output:
|
|
11664
|
-
status:
|
|
11890
|
+
output: d["autonomous.tool.output"] || "",
|
|
11891
|
+
status: d["autonomous.tool.status"] || "unknown"
|
|
11665
11892
|
};
|
|
11666
11893
|
});
|
|
11667
11894
|
}
|
|
@@ -11676,7 +11903,7 @@ async function getTraceData(conversationId, devServerUrl, options = {}) {
|
|
|
11676
11903
|
if (attempt > 0) {
|
|
11677
11904
|
await new Promise((resolve4) => setTimeout(resolve4, retryDelay));
|
|
11678
11905
|
}
|
|
11679
|
-
spans = await
|
|
11906
|
+
spans = await fetchSpans(conversationId, devServerUrl);
|
|
11680
11907
|
allToolCalls = extractToolCalls(spans);
|
|
11681
11908
|
if (!expectNew || allToolCalls.length > previousCount) {
|
|
11682
11909
|
break;
|
|
@@ -12199,9 +12426,11 @@ async function gradeTables(client, assertions) {
|
|
|
12199
12426
|
// src/eval/graders/workflow.ts
|
|
12200
12427
|
function gradeWorkflows(spans, assertions) {
|
|
12201
12428
|
const results = [];
|
|
12429
|
+
const data = (span) => span.data && typeof span.data === "object" ? span.data : {};
|
|
12202
12430
|
for (const assertion of assertions) {
|
|
12203
12431
|
const workflowSpans = spans.filter((span) => {
|
|
12204
|
-
const
|
|
12432
|
+
const d = data(span);
|
|
12433
|
+
const wfName = d["workflow.name"] || d["workflowName"];
|
|
12205
12434
|
return wfName === assertion.name;
|
|
12206
12435
|
});
|
|
12207
12436
|
if (assertion.entered !== undefined) {
|
|
@@ -12216,8 +12445,9 @@ function gradeWorkflows(spans, assertions) {
|
|
|
12216
12445
|
}
|
|
12217
12446
|
if (assertion.completed !== undefined) {
|
|
12218
12447
|
const completedSpans = workflowSpans.filter((span) => {
|
|
12219
|
-
const
|
|
12220
|
-
|
|
12448
|
+
const d = data(span);
|
|
12449
|
+
const status = d["workflow.status.final"];
|
|
12450
|
+
return status === "completed" || (span.status === "ok" || span.status === "error") && span.name?.includes("workflow");
|
|
12221
12451
|
});
|
|
12222
12452
|
const didComplete = completedSpans.length > 0;
|
|
12223
12453
|
const pass = assertion.completed ? didComplete : !didComplete;
|
|
@@ -12261,8 +12491,10 @@ async function gradeOutcome(client, evalDef, ctx, traceSpans, preSnapshots) {
|
|
|
12261
12491
|
|
|
12262
12492
|
// src/eval/runner.ts
|
|
12263
12493
|
import { randomUUID } from "crypto";
|
|
12494
|
+
var DEFAULT_IDLE_TIMEOUT = 15000;
|
|
12264
12495
|
async function runEval(evalDef, connection, options = {}) {
|
|
12265
12496
|
const devServerUrl = options.devServerUrl || "http://localhost:3001";
|
|
12497
|
+
const idleTimeout = evalDef.options?.idleTimeout ?? options.idleTimeout ?? DEFAULT_IDLE_TIMEOUT;
|
|
12266
12498
|
const start = Date.now();
|
|
12267
12499
|
const turns = [];
|
|
12268
12500
|
let outcomeAssertions = [];
|
|
@@ -12291,13 +12523,13 @@ async function runEval(evalDef, connection, options = {}) {
|
|
|
12291
12523
|
}
|
|
12292
12524
|
let previousToolCallCount = 0;
|
|
12293
12525
|
let lastConversationId = "";
|
|
12294
|
-
let
|
|
12526
|
+
let allSpans = [];
|
|
12295
12527
|
for (let i = 0;i < evalDef.conversation.length; i++) {
|
|
12296
12528
|
const turn = evalDef.conversation[i];
|
|
12297
12529
|
const turnStart = Date.now();
|
|
12298
12530
|
const result = await session.sendMessage(turn.user, {
|
|
12299
|
-
timeout: 30000,
|
|
12300
|
-
idleTimeout
|
|
12531
|
+
timeout: Math.max(30000, idleTimeout * 2),
|
|
12532
|
+
idleTimeout
|
|
12301
12533
|
});
|
|
12302
12534
|
const botDuration = Date.now() - turnStart;
|
|
12303
12535
|
lastConversationId = result.conversationId;
|
|
@@ -12318,7 +12550,7 @@ async function runEval(evalDef, connection, options = {}) {
|
|
|
12318
12550
|
expectNewCalls
|
|
12319
12551
|
});
|
|
12320
12552
|
previousToolCallCount = traceData.totalToolCallCount;
|
|
12321
|
-
|
|
12553
|
+
allSpans = traceData.raw;
|
|
12322
12554
|
const toolResults = gradeTools(traceData.toolCalls, turn.assert.tools);
|
|
12323
12555
|
assertions = [...assertions, ...toolResults];
|
|
12324
12556
|
} catch (err) {
|
|
@@ -12365,13 +12597,13 @@ async function runEval(evalDef, connection, options = {}) {
|
|
|
12365
12597
|
}
|
|
12366
12598
|
}
|
|
12367
12599
|
if (turn.assert?.workflow) {
|
|
12368
|
-
if (
|
|
12600
|
+
if (allSpans.length === 0) {
|
|
12369
12601
|
try {
|
|
12370
12602
|
const traceData = await getTraceData(result.conversationId, devServerUrl);
|
|
12371
|
-
|
|
12603
|
+
allSpans = traceData.raw;
|
|
12372
12604
|
} catch {}
|
|
12373
12605
|
}
|
|
12374
|
-
const workflowResults = gradeWorkflows(
|
|
12606
|
+
const workflowResults = gradeWorkflows(allSpans, turn.assert.workflow);
|
|
12375
12607
|
assertions.push(...workflowResults);
|
|
12376
12608
|
}
|
|
12377
12609
|
const turnPass = assertions.every((a) => a.pass);
|
|
@@ -12387,10 +12619,10 @@ async function runEval(evalDef, connection, options = {}) {
|
|
|
12387
12619
|
});
|
|
12388
12620
|
}
|
|
12389
12621
|
if (evalDef.outcome) {
|
|
12390
|
-
if (
|
|
12622
|
+
if (allSpans.length === 0 && lastConversationId && evalDef.outcome.workflow) {
|
|
12391
12623
|
try {
|
|
12392
12624
|
const traceData = await getTraceData(lastConversationId, devServerUrl);
|
|
12393
|
-
|
|
12625
|
+
allSpans = traceData.raw;
|
|
12394
12626
|
} catch {}
|
|
12395
12627
|
}
|
|
12396
12628
|
const ctx = {
|
|
@@ -12399,7 +12631,7 @@ async function runEval(evalDef, connection, options = {}) {
|
|
|
12399
12631
|
conversationId: lastConversationId
|
|
12400
12632
|
};
|
|
12401
12633
|
try {
|
|
12402
|
-
outcomeAssertions = await gradeOutcome(getBpClient(), evalDef, ctx,
|
|
12634
|
+
outcomeAssertions = await gradeOutcome(getBpClient(), evalDef, ctx, allSpans, preSnapshots);
|
|
12403
12635
|
} catch (err) {
|
|
12404
12636
|
outcomeAssertions = [
|
|
12405
12637
|
{
|
|
@@ -12476,7 +12708,7 @@ async function runEvalSuite(config, filter) {
|
|
|
12476
12708
|
for (let i = 0;i < evals.length; i++) {
|
|
12477
12709
|
const evalDef = evals[i];
|
|
12478
12710
|
config.onProgress?.({ type: "eval_start", evalName: evalDef.name, index: i });
|
|
12479
|
-
const report = await runEval(evalDef, connection, { devServerUrl });
|
|
12711
|
+
const report = await runEval(evalDef, connection, { devServerUrl, idleTimeout: config.evalOptions?.idleTimeout });
|
|
12480
12712
|
reports.push(report);
|
|
12481
12713
|
config.onProgress?.({ type: "eval_complete", evalName: evalDef.name, index: i, report });
|
|
12482
12714
|
}
|
|
@@ -12564,6 +12796,7 @@ export {
|
|
|
12564
12796
|
dependenciesKeyOrder,
|
|
12565
12797
|
defineEval,
|
|
12566
12798
|
coerceConfigValue,
|
|
12799
|
+
buildOpenCodeConfig,
|
|
12567
12800
|
bpCliImporter,
|
|
12568
12801
|
auth,
|
|
12569
12802
|
agentInfoKeyOrder,
|
|
@@ -12576,6 +12809,7 @@ export {
|
|
|
12576
12809
|
PreflightFormatter,
|
|
12577
12810
|
PreflightChecker,
|
|
12578
12811
|
PluginParser,
|
|
12812
|
+
OpenCodeCommand,
|
|
12579
12813
|
KnowledgeManager,
|
|
12580
12814
|
KBSyncOperation,
|
|
12581
12815
|
KBSyncFormatter,
|
|
@@ -12611,4 +12845,4 @@ export {
|
|
|
12611
12845
|
AgentProject
|
|
12612
12846
|
};
|
|
12613
12847
|
|
|
12614
|
-
//# debugId=
|
|
12848
|
+
//# debugId=BA9CF8B5F775A63B64756E2164756E21
|