@empiricalrun/test-gen 0.71.2 → 0.72.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +38 -0
- package/dist/agent/chat/agent-loop.d.ts +11 -9
- package/dist/agent/chat/agent-loop.d.ts.map +1 -1
- package/dist/agent/chat/agent-loop.js +20 -10
- package/dist/agent/chat/exports.d.ts +2 -2
- package/dist/agent/chat/exports.d.ts.map +1 -1
- package/dist/agent/chat/exports.js +6 -1
- package/dist/agent/chat/index.d.ts.map +1 -1
- package/dist/agent/chat/index.js +48 -13
- package/dist/agent/chat/prompt/index.d.ts +2 -2
- package/dist/agent/chat/prompt/index.d.ts.map +1 -1
- package/dist/agent/chat/prompt/index.js +2 -2
- package/dist/agent/chat/prompt/repo.d.ts +2 -2
- package/dist/agent/chat/prompt/repo.d.ts.map +1 -1
- package/dist/agent/chat/prompt/repo.js +20 -11
- package/dist/agent/chat/state.d.ts +2 -2
- package/dist/agent/chat/state.d.ts.map +1 -1
- package/dist/agent/chat/types.d.ts +0 -6
- package/dist/agent/chat/types.d.ts.map +1 -1
- package/dist/agent/chat/utils/tool-calls.d.ts +21 -0
- package/dist/agent/chat/utils/tool-calls.d.ts.map +1 -0
- package/dist/agent/chat/utils/tool-calls.js +64 -0
- package/dist/agent/chat/utils.d.ts +2 -4
- package/dist/agent/chat/utils.d.ts.map +1 -1
- package/dist/agent/chat/utils.js +5 -11
- package/dist/agent/master/browser-tests/index.spec.js +1 -1
- package/dist/auth/api-client.d.ts.map +1 -1
- package/dist/auth/api-client.js +6 -2
- package/dist/bin/environments.d.ts +1 -1
- package/dist/bin/environments.d.ts.map +1 -1
- package/dist/bin/environments.js +70 -36
- package/dist/bin/index.js +1 -1
- package/dist/bin/setup.d.ts.map +1 -1
- package/dist/bin/setup.js +10 -10
- package/dist/dashboard/index.d.ts +21 -0
- package/dist/dashboard/index.d.ts.map +1 -0
- package/dist/dashboard/index.js +83 -0
- package/dist/dashboard/totp.d.ts +2 -0
- package/dist/dashboard/totp.d.ts.map +1 -0
- package/dist/dashboard/totp.js +18 -0
- package/dist/file-info/file-system.d.ts +3 -0
- package/dist/file-info/file-system.d.ts.map +1 -0
- package/dist/{utils/file-tree.js → file-info/file-system.js} +2 -8
- package/dist/file-info/github.d.ts +3 -0
- package/dist/file-info/github.d.ts.map +1 -0
- package/dist/file-info/github.js +108 -0
- package/dist/tools/commit-and-create-pr.js +2 -2
- package/dist/tools/definitions/index.d.ts +22 -0
- package/dist/tools/definitions/index.d.ts.map +1 -0
- package/dist/tools/definitions/index.js +61 -0
- package/dist/tools/definitions/run-test.d.ts +23 -0
- package/dist/tools/definitions/run-test.d.ts.map +1 -0
- package/dist/tools/definitions/run-test.js +28 -0
- package/dist/tools/definitions/str_replace_editor.d.ts +3 -0
- package/dist/tools/definitions/str_replace_editor.d.ts.map +1 -0
- package/dist/tools/definitions/str_replace_editor.js +74 -0
- package/dist/tools/definitions/test-gen-browser.d.ts +26 -0
- package/dist/tools/definitions/test-gen-browser.d.ts.map +1 -0
- package/dist/tools/definitions/test-gen-browser.js +88 -0
- package/dist/{tool-call-service/utils.d.ts → tools/definitions/utils/queue.d.ts} +6 -4
- package/dist/tools/definitions/utils/queue.d.ts.map +1 -0
- package/dist/{tool-call-service/utils.js → tools/definitions/utils/queue.js} +20 -19
- package/dist/tools/diagnosis-fetcher.js +1 -1
- package/dist/{tool-call-service → tools/executor}/index.d.ts +2 -7
- package/dist/tools/executor/index.d.ts.map +1 -0
- package/dist/{tool-call-service → tools/executor}/index.js +15 -29
- package/dist/tools/executor/utils/checkpoint.d.ts.map +1 -0
- package/dist/tools/executor/utils/git.d.ts.map +1 -0
- package/dist/{utils → tools/executor/utils}/git.js +5 -5
- package/dist/tools/executor/utils/index.d.ts.map +1 -0
- package/dist/tools/list-environments.js +1 -1
- package/dist/tools/run-test.d.ts.map +1 -1
- package/dist/tools/run-test.js +2 -23
- package/dist/tools/str_replace_editor.d.ts.map +1 -1
- package/dist/tools/str_replace_editor.js +6 -58
- package/dist/tools/test-gen-browser.d.ts.map +1 -1
- package/dist/tools/test-gen-browser.js +2 -83
- package/dist/tools/test-run-fetcher/index.js +1 -1
- package/dist/tools/upgrade-packages/index.js +2 -2
- package/dist/tools/upgrade-packages/utils.js +1 -1
- package/dist/types/index.d.ts +0 -8
- package/dist/types/index.d.ts.map +1 -1
- package/dist/utils/SQSClient.d.ts +14 -0
- package/dist/utils/SQSClient.d.ts.map +1 -0
- package/dist/utils/SQSClient.js +116 -0
- package/dist/utils/repo-tree.d.ts +1 -1
- package/dist/utils/repo-tree.d.ts.map +1 -1
- package/dist/utils/repo-tree.js +7 -7
- package/package.json +16 -8
- package/tsconfig.tsbuildinfo +1 -1
- package/dist/tool-call-service/index.d.ts.map +0 -1
- package/dist/tool-call-service/utils.d.ts.map +0 -1
- package/dist/tools/utils/index.d.ts.map +0 -1
- package/dist/utils/checkpoint.d.ts.map +0 -1
- package/dist/utils/file-tree.d.ts +0 -3
- package/dist/utils/file-tree.d.ts.map +0 -1
- package/dist/utils/git.d.ts.map +0 -1
- /package/dist/{utils → tools/executor/utils}/checkpoint.d.ts +0 -0
- /package/dist/{utils → tools/executor/utils}/checkpoint.js +0 -0
- /package/dist/{utils → tools/executor/utils}/git.d.ts +0 -0
- /package/dist/tools/{utils → executor/utils}/index.d.ts +0 -0
- /package/dist/tools/{utils → executor/utils}/index.js +0 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,43 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.72.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- 6ac65ed: feat: add custom sqs client with bug fix and error boundary
|
|
8
|
+
- 23708d1: feat: add agent-worker app to run chat agent over durable objects
|
|
9
|
+
|
|
10
|
+
### Patch Changes
|
|
11
|
+
|
|
12
|
+
- 6b1d98c: feat: trigger impacted tests computation on tool-responses
|
|
13
|
+
- b6a04f5: feat: use webocket to get realtime updates on session
|
|
14
|
+
- 4cbc287: feat: add (de)compress layer in the worker message management
|
|
15
|
+
- a96a03c: chore: follow-ups from agent-worker first land
|
|
16
|
+
- 32905df: chore: move dashboard methods from tool-execute-service to test-gen
|
|
17
|
+
- 8e3c7a4: feat: working agent-worker for text messages
|
|
18
|
+
- 5b0d43a: chore: logging tool call flow between agent-worker and dashboard
|
|
19
|
+
- fc6f97c: fix: execSync handling for ubuntu and windows ci
|
|
20
|
+
- 440e851: feat: extended thinking in claude
|
|
21
|
+
- a23b38f: chore: add worker env in createChatModel & ToolCaller methods
|
|
22
|
+
- 12c69cc: fix: error handling for non-retryable errors
|
|
23
|
+
- 5ed01c4: fix: stop should reset askUserForInput state
|
|
24
|
+
- e1d01c8: feat: add file info builder for github in test-gen
|
|
25
|
+
- 450b79a: feat: show resolved projects for environments in cli
|
|
26
|
+
- 7009d67: feat: show tool result preview in chat ui
|
|
27
|
+
- d570c55: feat: add reporter function in agent worker
|
|
28
|
+
- 622aa35: chore: split tool call service into caller and executor
|
|
29
|
+
- 162e461: chore: file info clean up
|
|
30
|
+
- 349003e: feat: working tool calls on agent-worker
|
|
31
|
+
- d5c7696: test: add agent loop test harness
|
|
32
|
+
- 7afa5c1: feat: better error handling & state updates, add langfuse trace id to worker session state
|
|
33
|
+
- Updated dependencies [df226a5]
|
|
34
|
+
- Updated dependencies [440e851]
|
|
35
|
+
- Updated dependencies [a23b38f]
|
|
36
|
+
- Updated dependencies [450b79a]
|
|
37
|
+
- Updated dependencies [d5c7696]
|
|
38
|
+
- @empiricalrun/llm@0.20.0
|
|
39
|
+
- @empiricalrun/test-run@0.10.9
|
|
40
|
+
|
|
3
41
|
## 0.71.2
|
|
4
42
|
|
|
5
43
|
### Patch Changes
|
|
@@ -1,17 +1,19 @@
|
|
|
1
1
|
import { TraceClient } from "@empiricalrun/llm";
|
|
2
2
|
import { IChatModel } from "@empiricalrun/llm/chat";
|
|
3
|
-
import { PendingToolCall, SupportedChatModels } from "@empiricalrun/shared-types";
|
|
4
|
-
import {
|
|
5
|
-
|
|
6
|
-
import { ReporterFunction } from "./types";
|
|
7
|
-
export declare function chatAgentLoop({ chatModel, selectedModel, reporter, trace, toolCallService, fileInfo, isToolExecutionRemote, onToolCallQueued, }: {
|
|
3
|
+
import { FileInfo, PendingToolCall, ReporterFunction, StreamingMessageReporterFunc, SupportedChatModels, ToolDefinition } from "@empiricalrun/shared-types";
|
|
4
|
+
import { ToolExecutor } from "../../tools/executor";
|
|
5
|
+
export declare function chatAgentLoop({ chatModel, selectedModel, reporter, streamingMessageReporter, trace, isToolExecutionRemote, toolExecutor, repoInfo, tools, onPendingToolCall, signal, featureFlags, }: {
|
|
8
6
|
chatModel: IChatModel<any>;
|
|
9
7
|
selectedModel: SupportedChatModels;
|
|
10
|
-
|
|
8
|
+
isToolExecutionRemote: boolean;
|
|
9
|
+
toolExecutor: ToolExecutor | undefined;
|
|
11
10
|
reporter: ReporterFunction;
|
|
11
|
+
streamingMessageReporter?: StreamingMessageReporterFunc;
|
|
12
12
|
trace?: TraceClient;
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
repoInfo: FileInfo;
|
|
14
|
+
tools: ToolDefinition[];
|
|
15
|
+
onPendingToolCall: (toolCalls: PendingToolCall[]) => Promise<void>;
|
|
16
|
+
signal?: AbortSignal;
|
|
17
|
+
featureFlags: string[];
|
|
16
18
|
}): Promise<void>;
|
|
17
19
|
//# sourceMappingURL=agent-loop.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,UAAU,EAAmB,MAAM,wBAAwB,CAAC;AACrE,OAAO,EACL,eAAe,EACf,
|
|
1
|
+
{"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,UAAU,EAAmB,MAAM,wBAAwB,CAAC;AACrE,OAAO,EACL,QAAQ,EACR,eAAe,EACf,gBAAgB,EAChB,4BAA4B,EAC5B,mBAAmB,EACnB,cAAc,EACf,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAKpD,wBAAsB,aAAa,CAAC,EAClC,SAAS,EACT,aAAa,EACb,QAAQ,EACR,wBAAwB,EACxB,KAAK,EACL,qBAAqB,EACrB,YAAY,EACZ,QAAQ,EACR,KAAK,EACL,iBAAiB,EACjB,MAAM,EACN,YAAY,GACb,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,qBAAqB,EAAE,OAAO,CAAC;IAC/B,YAAY,EAAE,YAAY,GAAG,SAAS,CAAC;IACvC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,wBAAwB,CAAC,EAAE,4BAA4B,CAAC;IACxD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,QAAQ,EAAE,QAAQ,CAAC;IACnB,KAAK,EAAE,cAAc,EAAE,CAAC;IACxB,iBAAiB,EAAE,CAAC,SAAS,EAAE,eAAe,EAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACnE,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,GAAG,OAAO,CAAC,IAAI,CAAC,CAiEhB"}
|
|
@@ -5,38 +5,49 @@ const chat_1 = require("@empiricalrun/llm/chat");
|
|
|
5
5
|
const prompt_1 = require("./prompt");
|
|
6
6
|
const state_1 = require("./state");
|
|
7
7
|
const utils_1 = require("./utils");
|
|
8
|
-
async function chatAgentLoop({ chatModel, selectedModel, reporter, trace,
|
|
9
|
-
const systemPrompt = await (0, prompt_1.buildSystemPrompt)(
|
|
8
|
+
async function chatAgentLoop({ chatModel, selectedModel, reporter, streamingMessageReporter, trace, isToolExecutionRemote, toolExecutor, repoInfo, tools, onPendingToolCall, signal, featureFlags, }) {
|
|
9
|
+
const systemPrompt = await (0, prompt_1.buildSystemPrompt)(repoInfo);
|
|
10
10
|
trace?.update({ input: { systemPrompt } });
|
|
11
11
|
while (!chatModel.askUserForInput) {
|
|
12
|
+
// Abort if signal is set
|
|
13
|
+
if (signal?.aborted) {
|
|
14
|
+
(0, utils_1.log)("[chatAgentLoop] Aborted by signal");
|
|
15
|
+
break;
|
|
16
|
+
}
|
|
12
17
|
try {
|
|
13
18
|
const toolCalls = chatModel.getPendingToolCalls();
|
|
14
19
|
if (toolCalls.length > 0) {
|
|
20
|
+
await onPendingToolCall(toolCalls);
|
|
15
21
|
if (isToolExecutionRemote) {
|
|
16
|
-
const requestId = toolCalls[0].id;
|
|
17
|
-
await toolCallService.sendToQueue(requestId, toolCalls);
|
|
18
|
-
onToolCallQueued?.(requestId, toolCalls);
|
|
19
22
|
(0, utils_1.log)(`Tool call remote execution in progress`);
|
|
20
23
|
break;
|
|
21
24
|
}
|
|
22
25
|
else {
|
|
23
|
-
|
|
26
|
+
if (!toolExecutor) {
|
|
27
|
+
throw new Error("ToolExecutor is required for local execution");
|
|
28
|
+
}
|
|
29
|
+
const toolResults = await toolExecutor.execute(toolCalls);
|
|
24
30
|
chatModel.pushToolResultsMessage(toolCalls, toolResults);
|
|
25
31
|
}
|
|
26
32
|
}
|
|
27
33
|
(0, utils_1.log)(`${(0, utils_1.getModelName)(selectedModel)} is working...`);
|
|
28
|
-
|
|
29
|
-
response = await chatModel.getLLMResponse({
|
|
34
|
+
const response = await chatModel.getLLMResponse({
|
|
30
35
|
systemPrompt,
|
|
31
|
-
tools:
|
|
36
|
+
tools: tools.map((tool) => (0, chat_1.zodToOpenAITool)(tool.schema)),
|
|
32
37
|
selectedModel,
|
|
33
38
|
trace,
|
|
39
|
+
streamingMessageReporter,
|
|
40
|
+
hasThinkingEnabled: featureFlags.includes("extendedThinkingClaude"),
|
|
34
41
|
});
|
|
35
42
|
if (!response) {
|
|
36
43
|
throw new Error("Error getting response from LLM");
|
|
37
44
|
}
|
|
38
45
|
chatModel.pushMessage(response);
|
|
39
46
|
const latest = chatModel.getHumanReadableLatestMessage();
|
|
47
|
+
if (signal?.aborted) {
|
|
48
|
+
(0, utils_1.log)("[chatAgentLoop] Aborted by signal");
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
40
51
|
await reporter((0, state_1.chatStateFromModel)({
|
|
41
52
|
chatModel,
|
|
42
53
|
selectedModel,
|
|
@@ -45,7 +56,6 @@ async function chatAgentLoop({ chatModel, selectedModel, reporter, trace, toolCa
|
|
|
45
56
|
}
|
|
46
57
|
catch (error) {
|
|
47
58
|
await (0, utils_1.handleAgentError)({
|
|
48
|
-
context: "chatAgentLoop",
|
|
49
59
|
error,
|
|
50
60
|
chatModel,
|
|
51
61
|
selectedModel,
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
export
|
|
1
|
+
export { getFileInfoFromGitHub } from "../../file-info/github";
|
|
2
2
|
export { chatAgentLoop } from "./agent-loop";
|
|
3
3
|
export { defaultModel } from "./models";
|
|
4
4
|
export { CHAT_STATE_VERSIONS_MIGRATIONS_MAP, chatStateFromModel, createChatState, createChatStateForMessages, fetchToolCallAvailability, getLatestDownloadBuildUrl, LATEST_CHAT_STATE_VERSION, migrateChatState, processAttachments, } from "./state";
|
|
5
|
-
export type { ReporterFunction } from "./types";
|
|
6
5
|
export { extractAttachments } from "./utils";
|
|
6
|
+
export { processToolCallsAndUpdateChatState, rejectPendingToolCalls, } from "./utils/tool-calls";
|
|
7
7
|
export type { IChatModel } from "@empiricalrun/llm/chat";
|
|
8
8
|
export { createChatModel, SUPPORTED_CHAT_MODELS } from "@empiricalrun/llm/chat";
|
|
9
9
|
//# sourceMappingURL=exports.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"exports.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/exports.ts"],"names":[],"mappings":"AAAA,
|
|
1
|
+
{"version":3,"file":"exports.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/exports.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,wBAAwB,CAAC;AAC/D,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EACL,kCAAkC,EAClC,kBAAkB,EAClB,eAAe,EACf,0BAA0B,EAC1B,yBAAyB,EACzB,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EAChB,kBAAkB,GACnB,MAAM,SAAS,CAAC;AACjB,OAAO,EAAE,kBAAkB,EAAE,MAAM,SAAS,CAAC;AAC7C,OAAO,EACL,kCAAkC,EAClC,sBAAsB,GACvB,MAAM,oBAAoB,CAAC;AAC5B,YAAY,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACzD,OAAO,EAAE,eAAe,EAAE,qBAAqB,EAAE,MAAM,wBAAwB,CAAC"}
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.SUPPORTED_CHAT_MODELS = exports.createChatModel = exports.extractAttachments = exports.processAttachments = exports.migrateChatState = exports.LATEST_CHAT_STATE_VERSION = exports.getLatestDownloadBuildUrl = exports.fetchToolCallAvailability = exports.createChatStateForMessages = exports.createChatState = exports.chatStateFromModel = exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP = exports.defaultModel = exports.chatAgentLoop = void 0;
|
|
3
|
+
exports.SUPPORTED_CHAT_MODELS = exports.createChatModel = exports.rejectPendingToolCalls = exports.processToolCallsAndUpdateChatState = exports.extractAttachments = exports.processAttachments = exports.migrateChatState = exports.LATEST_CHAT_STATE_VERSION = exports.getLatestDownloadBuildUrl = exports.fetchToolCallAvailability = exports.createChatStateForMessages = exports.createChatState = exports.chatStateFromModel = exports.CHAT_STATE_VERSIONS_MIGRATIONS_MAP = exports.defaultModel = exports.chatAgentLoop = exports.getFileInfoFromGitHub = void 0;
|
|
4
|
+
var github_1 = require("../../file-info/github");
|
|
5
|
+
Object.defineProperty(exports, "getFileInfoFromGitHub", { enumerable: true, get: function () { return github_1.getFileInfoFromGitHub; } });
|
|
4
6
|
var agent_loop_1 = require("./agent-loop");
|
|
5
7
|
Object.defineProperty(exports, "chatAgentLoop", { enumerable: true, get: function () { return agent_loop_1.chatAgentLoop; } });
|
|
6
8
|
var models_1 = require("./models");
|
|
@@ -17,6 +19,9 @@ Object.defineProperty(exports, "migrateChatState", { enumerable: true, get: func
|
|
|
17
19
|
Object.defineProperty(exports, "processAttachments", { enumerable: true, get: function () { return state_1.processAttachments; } });
|
|
18
20
|
var utils_1 = require("./utils");
|
|
19
21
|
Object.defineProperty(exports, "extractAttachments", { enumerable: true, get: function () { return utils_1.extractAttachments; } });
|
|
22
|
+
var tool_calls_1 = require("./utils/tool-calls");
|
|
23
|
+
Object.defineProperty(exports, "processToolCallsAndUpdateChatState", { enumerable: true, get: function () { return tool_calls_1.processToolCallsAndUpdateChatState; } });
|
|
24
|
+
Object.defineProperty(exports, "rejectPendingToolCalls", { enumerable: true, get: function () { return tool_calls_1.rejectPendingToolCalls; } });
|
|
20
25
|
var chat_1 = require("@empiricalrun/llm/chat");
|
|
21
26
|
Object.defineProperty(exports, "createChatModel", { enumerable: true, get: function () { return chat_1.createChatModel; } });
|
|
22
27
|
Object.defineProperty(exports, "SUPPORTED_CHAT_MODELS", { enumerable: true, get: function () { return chat_1.SUPPORTED_CHAT_MODELS; } });
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AAEA,OAAO,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAIL,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AAoCpC,wBAAsB,yBAAyB,IAAI,OAAO,CACxD,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CACvB,CAwBA;AAED,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,GACrB,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1C,iBAmKA"}
|
package/dist/agent/chat/index.js
CHANGED
|
@@ -5,10 +5,11 @@ exports.runChatAgentForCLI = runChatAgentForCLI;
|
|
|
5
5
|
const llm_1 = require("@empiricalrun/llm");
|
|
6
6
|
const chat_1 = require("@empiricalrun/llm/chat");
|
|
7
7
|
const picocolors_1 = require("picocolors");
|
|
8
|
+
const file_system_1 = require("../../file-info/file-system");
|
|
8
9
|
const human_in_the_loop_1 = require("../../human-in-the-loop");
|
|
9
|
-
const
|
|
10
|
-
const
|
|
11
|
-
const git_1 = require("../../utils/git");
|
|
10
|
+
const definitions_1 = require("../../tools/definitions");
|
|
11
|
+
const executor_1 = require("../../tools/executor");
|
|
12
|
+
const git_1 = require("../../tools/executor/utils/git");
|
|
12
13
|
const agent_loop_1 = require("./agent-loop");
|
|
13
14
|
const state_1 = require("./state");
|
|
14
15
|
const utils_1 = require("./utils");
|
|
@@ -43,6 +44,7 @@ async function fetchEnvironmentVariables() {
|
|
|
43
44
|
}
|
|
44
45
|
async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, }) {
|
|
45
46
|
let chatState;
|
|
47
|
+
let enableStreaming = true;
|
|
46
48
|
if (useDiskForChatState) {
|
|
47
49
|
chatState = (0, state_1.loadChatState)();
|
|
48
50
|
}
|
|
@@ -83,7 +85,12 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
|
|
|
83
85
|
(0, state_1.saveToDisk)(chatState.messages, selectedModel, chatState.askUserForInput, chatState.error);
|
|
84
86
|
}
|
|
85
87
|
if (latest) {
|
|
86
|
-
|
|
88
|
+
if (!enableStreaming) {
|
|
89
|
+
console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
|
|
90
|
+
}
|
|
91
|
+
else {
|
|
92
|
+
process.stdout.write(`\n`);
|
|
93
|
+
}
|
|
87
94
|
}
|
|
88
95
|
};
|
|
89
96
|
const trace = (0, llm_1.createLangfuseTrace)({
|
|
@@ -127,7 +134,7 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
|
|
|
127
134
|
if (!process.env.EMPIRICALRUN_API_KEY) {
|
|
128
135
|
throw new Error("EMPIRICALRUN_API_KEY is not set");
|
|
129
136
|
}
|
|
130
|
-
const
|
|
137
|
+
const toolExecutor = new executor_1.ToolExecutor({
|
|
131
138
|
chatSessionId: null,
|
|
132
139
|
selectedModel,
|
|
133
140
|
branchName,
|
|
@@ -137,23 +144,51 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
|
|
|
137
144
|
featureFlags: [],
|
|
138
145
|
environmentOverrides: await fetchEnvironmentVariables(),
|
|
139
146
|
});
|
|
140
|
-
const
|
|
147
|
+
const toolCaller = new definitions_1.ToolCaller({
|
|
148
|
+
chatSessionId: null,
|
|
149
|
+
selectedModel,
|
|
150
|
+
branchName,
|
|
151
|
+
});
|
|
152
|
+
const fileInfo = await (0, file_system_1.getFileInfoFromFS)(process.cwd());
|
|
141
153
|
await (0, agent_loop_1.chatAgentLoop)({
|
|
142
154
|
chatModel,
|
|
143
155
|
selectedModel,
|
|
144
156
|
reporter: reporterFunc,
|
|
157
|
+
streamingMessageReporter: (() => {
|
|
158
|
+
if (!enableStreaming) {
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
let hasStarted = false;
|
|
162
|
+
let startedRole = undefined;
|
|
163
|
+
return async (delta, snapshot, thinking) => {
|
|
164
|
+
if (delta) {
|
|
165
|
+
const role = thinking ? "Thinking" : "Assistant";
|
|
166
|
+
if (!hasStarted) {
|
|
167
|
+
process.stdout.write(`${(0, picocolors_1.blue)(role)}: `);
|
|
168
|
+
hasStarted = true;
|
|
169
|
+
startedRole = role;
|
|
170
|
+
}
|
|
171
|
+
else if (hasStarted && role !== startedRole) {
|
|
172
|
+
// Changing from thinking -> text block
|
|
173
|
+
process.stdout.write("\n");
|
|
174
|
+
process.stdout.write(`${(0, picocolors_1.blue)(role)}: `);
|
|
175
|
+
startedRole = role;
|
|
176
|
+
}
|
|
177
|
+
process.stdout.write(delta);
|
|
178
|
+
}
|
|
179
|
+
};
|
|
180
|
+
})(),
|
|
145
181
|
trace,
|
|
146
|
-
|
|
147
|
-
fileInfo,
|
|
182
|
+
repoInfo: fileInfo,
|
|
148
183
|
isToolExecutionRemote: false,
|
|
184
|
+
toolExecutor,
|
|
185
|
+
tools: toolCaller.tools,
|
|
186
|
+
onPendingToolCall: async () => { },
|
|
187
|
+
featureFlags: ["extendedThinkingClaude"],
|
|
149
188
|
});
|
|
150
189
|
}
|
|
151
190
|
}
|
|
152
|
-
trace?.update({
|
|
153
|
-
output: {
|
|
154
|
-
messages: chatModel.messages,
|
|
155
|
-
},
|
|
156
|
-
});
|
|
191
|
+
trace?.update({ output: { messages: chatModel.messages } });
|
|
157
192
|
await llm_1.langfuseInstance?.flushAsync();
|
|
158
193
|
const usageSummary = chatModel.getUsageSummary();
|
|
159
194
|
console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + usageSummary)}`);
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import { FileInfo } from "
|
|
2
|
-
export declare function buildSystemPrompt(
|
|
1
|
+
import { FileInfo } from "@empiricalrun/shared-types";
|
|
2
|
+
export declare function buildSystemPrompt(repoInfo: FileInfo): Promise<string>;
|
|
3
3
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/agent/chat/prompt/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/agent/chat/prompt/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAKtD,wBAAsB,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,mBA4GzD"}
|
|
@@ -3,7 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.buildSystemPrompt = buildSystemPrompt;
|
|
4
4
|
const pw_utils_docs_1 = require("./pw-utils-docs");
|
|
5
5
|
const repo_1 = require("./repo");
|
|
6
|
-
async function buildSystemPrompt(
|
|
6
|
+
async function buildSystemPrompt(repoInfo) {
|
|
7
7
|
const preamble = `
|
|
8
8
|
You are a helpful assistant that can answer questions and help with tasks related to writing and maintaining Playwright tests.
|
|
9
9
|
|
|
@@ -101,7 +101,7 @@ if (await saveButton.isVisible()) {
|
|
|
101
101
|
\`\`\`
|
|
102
102
|
|
|
103
103
|
`;
|
|
104
|
-
const repoContext = await (0, repo_1.getRepoInfoPrompt)(
|
|
104
|
+
const repoContext = await (0, repo_1.getRepoInfoPrompt)(repoInfo);
|
|
105
105
|
return `${preamble}
|
|
106
106
|
|
|
107
107
|
# Recipes
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import { FileInfo } from "
|
|
2
|
-
export declare function getRepoInfoPrompt(
|
|
1
|
+
import { FileInfo } from "@empiricalrun/shared-types";
|
|
2
|
+
export declare function getRepoInfoPrompt(repoInfo: FileInfo): Promise<string>;
|
|
3
3
|
//# sourceMappingURL=repo.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"repo.d.ts","sourceRoot":"","sources":["../../../../src/agent/chat/prompt/repo.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"repo.d.ts","sourceRoot":"","sources":["../../../../src/agent/chat/prompt/repo.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAqDtD,wBAAsB,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,mBA0CzD"}
|
|
@@ -7,24 +7,31 @@ exports.getRepoInfoPrompt = getRepoInfoPrompt;
|
|
|
7
7
|
const path_1 = __importDefault(require("path"));
|
|
8
8
|
const repo_tree_1 = require("../../../utils/repo-tree");
|
|
9
9
|
async function getAllMarkdownFiles(directory) {
|
|
10
|
-
if (
|
|
10
|
+
if (directory.type !== "directory") {
|
|
11
11
|
return [];
|
|
12
12
|
}
|
|
13
|
-
const files = directory.children
|
|
14
|
-
if (!files) {
|
|
13
|
+
const files = directory.children;
|
|
14
|
+
if (!files || files.length === 0) {
|
|
15
15
|
return [];
|
|
16
16
|
}
|
|
17
17
|
return Promise.all(files
|
|
18
|
-
.filter((file) => file.
|
|
18
|
+
.filter((file) => file.type === "file" && file.path.endsWith(".md"))
|
|
19
19
|
.map(async (file) => {
|
|
20
20
|
return {
|
|
21
21
|
name: path_1.default.basename(file.path),
|
|
22
|
-
content: await file.getContent(),
|
|
22
|
+
content: file.type === "file" ? await file.getContent() : "",
|
|
23
23
|
};
|
|
24
24
|
}));
|
|
25
25
|
}
|
|
26
|
-
async function knowledgeContext(
|
|
27
|
-
|
|
26
|
+
async function knowledgeContext(repoInfo) {
|
|
27
|
+
if (repoInfo.type !== "directory") {
|
|
28
|
+
throw new Error(`"${repoInfo.path}" is not a valid directory`);
|
|
29
|
+
}
|
|
30
|
+
const knowledgeDir = repoInfo.children.find((child) => child.type === "directory" && child.path === ".empiricalrun");
|
|
31
|
+
if (!knowledgeDir) {
|
|
32
|
+
return "";
|
|
33
|
+
}
|
|
34
|
+
const mdFiles = await getAllMarkdownFiles(knowledgeDir);
|
|
28
35
|
const knowledge = mdFiles.map((file) => {
|
|
29
36
|
return `
|
|
30
37
|
<knowledge_file>
|
|
@@ -37,17 +44,19 @@ async function knowledgeContext(directory) {
|
|
|
37
44
|
});
|
|
38
45
|
return knowledge.join("\n");
|
|
39
46
|
}
|
|
40
|
-
async function getRepoInfoPrompt(
|
|
47
|
+
async function getRepoInfoPrompt(repoInfo) {
|
|
48
|
+
if (!repoInfo)
|
|
49
|
+
return "";
|
|
41
50
|
let REPO_CONTEXT_PROMPT = `
|
|
42
51
|
You are running as a CLI tool inside the directory of the repo that has Playwright tests.
|
|
43
52
|
|
|
44
53
|
Here is the repo directory structure:
|
|
45
54
|
|
|
46
|
-
${(0, repo_tree_1.generateAsciiTree)(
|
|
55
|
+
${(0, repo_tree_1.generateAsciiTree)(repoInfo)}
|
|
47
56
|
|
|
48
57
|
While specifying paths to files, use relative paths from the current working directory. For example:
|
|
49
58
|
- Correct path: "tests/lesson.spec.ts"
|
|
50
|
-
- Incorrect path: "/repo/tests/lesson.spec.ts" or "${
|
|
59
|
+
- Incorrect path: "/repo/tests/lesson.spec.ts" or "${repoInfo.name}/tests/lesson.spec.ts"
|
|
51
60
|
`;
|
|
52
61
|
// try {
|
|
53
62
|
// const playwrightConfig = await readPlaywrightConfig(process.cwd());
|
|
@@ -62,7 +71,7 @@ While specifying paths to files, use relative paths from the current working dir
|
|
|
62
71
|
// } catch (error) {
|
|
63
72
|
// console.warn("Failed to read playwright config", error);
|
|
64
73
|
// }
|
|
65
|
-
const knowledge = await knowledgeContext(
|
|
74
|
+
const knowledge = await knowledgeContext(repoInfo);
|
|
66
75
|
if (knowledge.length > 0) {
|
|
67
76
|
REPO_CONTEXT_PROMPT += `
|
|
68
77
|
## Repo-specific knowledge
|
|
@@ -12,7 +12,7 @@ export declare function createChatState({ userPrompt, attachments, existingState
|
|
|
12
12
|
error: ChatStateError | null;
|
|
13
13
|
}): ChatState;
|
|
14
14
|
export declare function createChatStateForMessages({ messages, selectedModel, askUserForInput, error, }: {
|
|
15
|
-
messages:
|
|
15
|
+
messages: CanonicalMessage[];
|
|
16
16
|
selectedModel: SupportedChatModels;
|
|
17
17
|
askUserForInput: boolean;
|
|
18
18
|
error: ChatStateError | null;
|
|
@@ -28,7 +28,7 @@ export declare function loadChatState(): ChatState | undefined;
|
|
|
28
28
|
* Add migration logic for each version as needed.
|
|
29
29
|
*/
|
|
30
30
|
export declare function migrateChatState(oldState: any): ChatState;
|
|
31
|
-
export declare function saveToDisk
|
|
31
|
+
export declare function saveToDisk(messages: Array<CanonicalMessage>, selectedModel: SupportedChatModels, askUserForInput: boolean, error: ChatStateError | null): void;
|
|
32
32
|
export declare function getLatestDownloadBuildUrl(messages: CanonicalMessage[]): string | null;
|
|
33
33
|
export declare function fetchToolCallAvailability(toolCallId: String, messages: CanonicalMessage[]): {
|
|
34
34
|
hasToolRequest: boolean;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,UAAU,EACX,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,UAAU,EACV,gBAAgB,EAChB,SAAS,EACT,cAAc,EACd,mBAAmB,EACnB,mBAAmB,EAEpB,MAAM,4BAA4B,CAAC;AA+BpC,eAAO,MAAM,kCAAkC,EAAE,MAAM,CACrD,MAAM,EACN,CAAC,KAAK,EAAE,GAAG,KAAK,GAAG,CAIpB,CAAC;AAEF,eAAO,MAAM,yBAAyB,QAAQ,CAAC;AAE/C,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,wBAAsB,kBAAkB,CACtC,WAAW,EAAE,UAAU,EAAE,EACzB,aAAa,EAAE,mBAAmB,GACjC,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAMhC;AAED,wBAAgB,eAAe,CAAC,EAC9B,UAAU,EACV,WAAW,EACX,aAAa,EACb,aAAa,EACb,KAAK,GACN,EAAE;IACD,UAAU,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B,WAAW,EAAE,mBAAmB,EAAE,CAAC;IACnC,aAAa,EAAE,SAAS,GAAG,SAAS,CAAC;IACrC,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAYA;AAED,wBAAgB,0BAA0B,CAAC,EACzC,QAAQ,EACR,aAAa,EACb,eAAe,EACf,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,
|
|
1
|
+
{"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,UAAU,EACX,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,UAAU,EACV,gBAAgB,EAChB,SAAS,EACT,cAAc,EACd,mBAAmB,EACnB,mBAAmB,EAEpB,MAAM,4BAA4B,CAAC;AA+BpC,eAAO,MAAM,kCAAkC,EAAE,MAAM,CACrD,MAAM,EACN,CAAC,KAAK,EAAE,GAAG,KAAK,GAAG,CAIpB,CAAC;AAEF,eAAO,MAAM,yBAAyB,QAAQ,CAAC;AAE/C,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,wBAAsB,kBAAkB,CACtC,WAAW,EAAE,UAAU,EAAE,EACzB,aAAa,EAAE,mBAAmB,GACjC,OAAO,CAAC,mBAAmB,EAAE,CAAC,CAMhC;AAED,wBAAgB,eAAe,CAAC,EAC9B,UAAU,EACV,WAAW,EACX,aAAa,EACb,aAAa,EACb,KAAK,GACN,EAAE;IACD,UAAU,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B,WAAW,EAAE,mBAAmB,EAAE,CAAC;IACnC,aAAa,EAAE,SAAS,GAAG,SAAS,CAAC;IACrC,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAYA;AAED,wBAAgB,0BAA0B,CAAC,EACzC,QAAQ,EACR,aAAa,EACb,eAAe,EACf,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,gBAAgB,EAAE,CAAC;IAC7B,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,OAAO,CAAC;IACzB,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,GAAG,SAAS,CASZ;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,EACpC,SAAS,EACT,aAAa,EACb,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC;IACzB,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAOA;AAED,wBAAgB,aAAa,IAAI,SAAS,GAAG,SAAS,CAarD;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,GAAG,GAAG,SAAS,CAqBzD;AAED,wBAAgB,UAAU,CACxB,QAAQ,EAAE,KAAK,CAAC,gBAAgB,CAAC,EACjC,aAAa,EAAE,mBAAmB,EAClC,eAAe,EAAE,OAAO,EACxB,KAAK,EAAE,cAAc,GAAG,IAAI,QAgB7B;AA2BD,wBAAgB,yBAAyB,CACvC,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,MAAM,GAAG,IAAI,CAef;AAED,wBAAgB,yBAAyB,CACvC,UAAU,EAAE,MAAM,EAClB,QAAQ,EAAE,gBAAgB,EAAE;;;;;EA2B7B"}
|
|
@@ -1,8 +1,2 @@
|
|
|
1
|
-
import { ChatState } from "@empiricalrun/shared-types";
|
|
2
|
-
type LatestMessage = {
|
|
3
|
-
role: string;
|
|
4
|
-
textMessage: string;
|
|
5
|
-
};
|
|
6
|
-
export type ReporterFunction = (state: ChatState, latestHumanReadableMessage: LatestMessage | undefined) => Promise<void>;
|
|
7
1
|
export {};
|
|
8
2
|
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/types.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { IChatModel } from "@empiricalrun/llm/chat";
|
|
2
|
+
import type { ChatState, PendingToolCall, SupportedChatModels, ToolResult } from "@empiricalrun/shared-types";
|
|
3
|
+
export declare function rejectPendingToolCalls({ chatState, chatModel, }: {
|
|
4
|
+
chatState: ChatState;
|
|
5
|
+
chatModel: IChatModel<any>;
|
|
6
|
+
}): ChatState;
|
|
7
|
+
export declare function filterPendingToolCalls({ toolCalls, toolResults, pendingToolCalls, }: {
|
|
8
|
+
toolCalls: PendingToolCall[];
|
|
9
|
+
toolResults: ToolResult[];
|
|
10
|
+
pendingToolCalls: PendingToolCall[];
|
|
11
|
+
}): {
|
|
12
|
+
filteredToolCalls: PendingToolCall[];
|
|
13
|
+
filteredToolResults: ToolResult[];
|
|
14
|
+
};
|
|
15
|
+
export declare function processToolCallsAndUpdateChatState({ toolCalls, toolResults, chatModel, selectedModel, }: {
|
|
16
|
+
toolCalls: PendingToolCall[];
|
|
17
|
+
toolResults: ToolResult[];
|
|
18
|
+
chatModel: IChatModel<any>;
|
|
19
|
+
selectedModel: SupportedChatModels;
|
|
20
|
+
}): ChatState | undefined;
|
|
21
|
+
//# sourceMappingURL=tool-calls.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tool-calls.d.ts","sourceRoot":"","sources":["../../../../src/agent/chat/utils/tool-calls.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACpD,OAAO,KAAK,EACV,SAAS,EACT,eAAe,EACf,mBAAmB,EACnB,UAAU,EACX,MAAM,4BAA4B,CAAC;AAOpC,wBAAgB,sBAAsB,CAAC,EACrC,SAAS,EACT,SAAS,GACV,EAAE;IACD,SAAS,EAAE,SAAS,CAAC;IACrB,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;CAC5B,GAAG,SAAS,CA8BZ;AAED,wBAAgB,sBAAsB,CAAC,EACrC,SAAS,EACT,WAAW,EACX,gBAAgB,GACjB,EAAE;IACD,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,WAAW,EAAE,UAAU,EAAE,CAAC;IAC1B,gBAAgB,EAAE,eAAe,EAAE,CAAC;CACrC;;;EAgBA;AAED,wBAAgB,kCAAkC,CAAC,EACjD,SAAS,EACT,WAAW,EACX,SAAS,EACT,aAAa,GACd,EAAE;IACD,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,WAAW,EAAE,UAAU,EAAE,CAAC;IAC1B,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;CACpC,GAAG,SAAS,GAAG,SAAS,CAkBxB"}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.rejectPendingToolCalls = rejectPendingToolCalls;
|
|
4
|
+
exports.filterPendingToolCalls = filterPendingToolCalls;
|
|
5
|
+
exports.processToolCallsAndUpdateChatState = processToolCallsAndUpdateChatState;
|
|
6
|
+
const state_1 = require("../state");
|
|
7
|
+
const TOOL_CALL_REJECTED_BY_USER_ERROR = "This tool call was rejected by the user.";
|
|
8
|
+
function rejectPendingToolCalls({ chatState, chatModel, }) {
|
|
9
|
+
const pendingToolCalls = chatModel.getPendingToolCalls();
|
|
10
|
+
let updatedChatState = {
|
|
11
|
+
...chatState,
|
|
12
|
+
// When tool calls are rejected, we reset askUserForInput to true
|
|
13
|
+
// so that the user can provide input for the next tool call. The agent
|
|
14
|
+
// loop is not run again until the user provides input.
|
|
15
|
+
askUserForInput: true,
|
|
16
|
+
};
|
|
17
|
+
if (pendingToolCalls.length > 0) {
|
|
18
|
+
const toolResults = pendingToolCalls.map((toolCall) => {
|
|
19
|
+
console.log(`Rejecting pending tool calls with toolCallId: ${toolCall.id} `);
|
|
20
|
+
return {
|
|
21
|
+
isError: true,
|
|
22
|
+
result: TOOL_CALL_REJECTED_BY_USER_ERROR,
|
|
23
|
+
};
|
|
24
|
+
});
|
|
25
|
+
chatModel.pushToolResultsMessage(pendingToolCalls, toolResults);
|
|
26
|
+
return {
|
|
27
|
+
...updatedChatState,
|
|
28
|
+
messages: chatModel.messages,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
return updatedChatState;
|
|
32
|
+
}
|
|
33
|
+
function filterPendingToolCalls({ toolCalls, toolResults, pendingToolCalls, }) {
|
|
34
|
+
const filteredToolResults = [];
|
|
35
|
+
const filteredToolCalls = toolCalls.filter((call, index) => {
|
|
36
|
+
if (pendingToolCalls.some((p) => p.id === call.id)) {
|
|
37
|
+
filteredToolResults.push(toolResults[index]);
|
|
38
|
+
return true;
|
|
39
|
+
}
|
|
40
|
+
return false;
|
|
41
|
+
});
|
|
42
|
+
return {
|
|
43
|
+
filteredToolCalls,
|
|
44
|
+
filteredToolResults,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
function processToolCallsAndUpdateChatState({ toolCalls, toolResults, chatModel, selectedModel, }) {
|
|
48
|
+
const pendingToolCalls = chatModel.getPendingToolCalls() || [];
|
|
49
|
+
const { filteredToolCalls, filteredToolResults } = filterPendingToolCalls({
|
|
50
|
+
toolCalls,
|
|
51
|
+
toolResults,
|
|
52
|
+
pendingToolCalls,
|
|
53
|
+
});
|
|
54
|
+
if (filteredToolCalls.length > 0) {
|
|
55
|
+
chatModel.pushToolResultsMessage(filteredToolCalls, filteredToolResults);
|
|
56
|
+
const newChatState = (0, state_1.createChatStateForMessages)({
|
|
57
|
+
messages: chatModel.messages || [],
|
|
58
|
+
selectedModel,
|
|
59
|
+
askUserForInput: chatModel.askUserForInput,
|
|
60
|
+
error: null,
|
|
61
|
+
});
|
|
62
|
+
return newChatState;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
import { TraceClient } from "@empiricalrun/llm";
|
|
2
2
|
import { IChatModel } from "@empiricalrun/llm/chat";
|
|
3
|
-
import { Attachment, SupportedChatModels } from "@empiricalrun/shared-types";
|
|
4
|
-
import { ReporterFunction } from "./types";
|
|
3
|
+
import { Attachment, ReporterFunction, SupportedChatModels } from "@empiricalrun/shared-types";
|
|
5
4
|
export declare const log: (...args: any[]) => void;
|
|
6
5
|
export declare function getModelName(model: string): string;
|
|
7
|
-
export declare function handleAgentError({
|
|
8
|
-
context: string;
|
|
6
|
+
export declare function handleAgentError({ error, chatModel, selectedModel, reporter, trace, }: {
|
|
9
7
|
error: unknown;
|
|
10
8
|
chatModel: IChatModel<any>;
|
|
11
9
|
selectedModel: SupportedChatModels;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAuB,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACzE,OAAO,EACL,UAAU,EAEV,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAuB,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACzE,OAAO,EACL,UAAU,EAEV,gBAAgB,EAChB,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AAKpC,eAAO,MAAM,GAAG,GAAI,GAAG,MAAM,GAAG,EAAE,SAEjC,CAAC;AAcF,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED,wBAAsB,gBAAgB,CAAC,EACrC,KAAK,EACL,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBAgBA;AAED,wBAAgB,kBAAkB,CAAC,UAAU,EAAE,MAAM,GAAG;IACtD,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,UAAU,EAAE,CAAC;CAC3B,CAsBA"}
|
package/dist/agent/chat/utils.js
CHANGED
|
@@ -11,8 +11,8 @@ const log = (...args) => {
|
|
|
11
11
|
console.log((0, picocolors_1.gray)(args.join(" ")));
|
|
12
12
|
};
|
|
13
13
|
exports.log = log;
|
|
14
|
-
function logError(
|
|
15
|
-
console.error((0, picocolors_1.gray)(`[Error
|
|
14
|
+
function logError(error, trace) {
|
|
15
|
+
console.error((0, picocolors_1.gray)(`[Error]:`), error instanceof Error ? error.stack || error.message : error);
|
|
16
16
|
trace?.update({
|
|
17
17
|
output: {
|
|
18
18
|
error: error instanceof Error ? error.message : String(error),
|
|
@@ -29,7 +29,7 @@ function getModelName(model) {
|
|
|
29
29
|
return "o4";
|
|
30
30
|
return "AI";
|
|
31
31
|
}
|
|
32
|
-
async function handleAgentError({
|
|
32
|
+
async function handleAgentError({ error, chatModel, selectedModel, reporter, trace, }) {
|
|
33
33
|
const errorObject = {
|
|
34
34
|
message: error.message,
|
|
35
35
|
stack: error.stack || "Stack trace not available",
|
|
@@ -40,14 +40,8 @@ async function handleAgentError({ context, error, chatModel, selectedModel, repo
|
|
|
40
40
|
selectedModel,
|
|
41
41
|
error: errorObject,
|
|
42
42
|
}), chatModel.getHumanReadableLatestMessage());
|
|
43
|
-
trace?.update({
|
|
44
|
-
|
|
45
|
-
error: errorObject,
|
|
46
|
-
},
|
|
47
|
-
});
|
|
48
|
-
if (context) {
|
|
49
|
-
logError(context, error, trace);
|
|
50
|
-
}
|
|
43
|
+
trace?.update({ output: { error: errorObject } });
|
|
44
|
+
logError(error, trace);
|
|
51
45
|
}
|
|
52
46
|
function extractAttachments(userPrompt) {
|
|
53
47
|
const attachments = [];
|
|
@@ -72,7 +72,7 @@ fixtures_1.test.skip("master agent can click icons accurately", async ({ page, s
|
|
|
72
72
|
// expect(icons.length).toBe(4); // 1 for each unique icon
|
|
73
73
|
fs_1.default.unlinkSync(iconsRegistryFile);
|
|
74
74
|
});
|
|
75
|
-
|
|
75
|
+
fixtures_1.test.skip("cua agent can click icons accurately", async ({ page, server }) => {
|
|
76
76
|
await page.goto(`${server.baseURL}/icons-navbar.html`);
|
|
77
77
|
await (0, fixtures_1.expect)(page.getByText("select an icon")).toBeVisible();
|
|
78
78
|
const response = await (0, run_1.createTestUsingComputerUseAgent)({
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"api-client.d.ts","sourceRoot":"","sources":["../../src/auth/api-client.ts"],"names":[],"mappings":"AAQA,cAAM,SAAS;IACb,OAAO,CAAC,MAAM,CAAS;;IAMjB,OAAO,CAAC,CAAC,EACb,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,WAAgB,GACxB,OAAO,CAAC,QAAQ,GAAG;QAAE,IAAI,IAAI,OAAO,CAAC,CAAC,CAAC,CAAA;KAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"api-client.d.ts","sourceRoot":"","sources":["../../src/auth/api-client.ts"],"names":[],"mappings":"AAQA,cAAM,SAAS;IACb,OAAO,CAAC,MAAM,CAAS;;IAMjB,OAAO,CAAC,CAAC,EACb,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,WAAgB,GACxB,OAAO,CAAC,QAAQ,GAAG;QAAE,IAAI,IAAI,OAAO,CAAC,CAAC,CAAC,CAAA;KAAE,CAAC;YA+B/B,WAAW;IAuBnB,mBAAmB,IAAI,OAAO,CAAC,IAAI,CAAC;YAiB5B,YAAY;CA4C3B;AAED,eAAO,MAAM,SAAS,WAAkB,CAAC"}
|