npm - @empiricalrun/test-gen - Versions diffs - 0.75.0 → 0.76.0 - Mend

@empiricalrun/test-gen 0.75.0 → 0.76.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

package/CHANGELOG.md +15 -0
package/dist/agent/base/index.d.ts +26 -19
package/dist/agent/base/index.d.ts.map +1 -1
package/dist/agent/base/index.js +88 -56
package/dist/agent/chat/agent-loop.d.ts +4 -3
package/dist/agent/chat/agent-loop.d.ts.map +1 -1
package/dist/agent/chat/agent-loop.js +4 -10
package/dist/agent/chat/exports.d.ts +4 -2
package/dist/agent/chat/exports.d.ts.map +1 -1
package/dist/agent/chat/exports.js +8 -7
package/dist/agent/chat/index.d.ts +6 -10
package/dist/agent/chat/index.d.ts.map +1 -1
package/dist/agent/chat/index.js +129 -196
package/dist/agent/chat/prompt/index.d.ts +5 -4
package/dist/agent/chat/prompt/index.d.ts.map +1 -1
package/dist/agent/chat/prompt/index.js +79 -68
package/dist/agent/chat/state.d.ts +1 -2
package/dist/agent/chat/state.d.ts.map +1 -1
package/dist/agent/chat/state.js +2 -2
package/dist/agent/chat/utils.d.ts +2 -3
package/dist/agent/chat/utils.d.ts.map +1 -1
package/dist/agent/chat/utils.js +1 -2
package/dist/agent/cli.d.ts +11 -0
package/dist/agent/cli.d.ts.map +1 -0
package/dist/agent/cli.js +209 -0
package/dist/agent/code-review/index.d.ts +7 -0
package/dist/agent/code-review/index.d.ts.map +1 -0
package/dist/agent/code-review/index.js +65 -0
package/dist/agent/code-review/prompt.d.ts +1 -1
package/dist/agent/code-review/prompt.d.ts.map +1 -1
package/dist/agent/code-review/prompt.js +52 -16
package/dist/agent/index.d.ts +10 -0
package/dist/agent/index.d.ts.map +1 -0
package/dist/agent/index.js +19 -0
package/dist/agent/triage/index.d.ts +7 -0
package/dist/agent/triage/index.d.ts.map +1 -0
package/dist/agent/triage/index.js +102 -0
package/dist/agent/video-analysis/index.d.ts +7 -0
package/dist/agent/video-analysis/index.d.ts.map +1 -0
package/dist/agent/video-analysis/index.js +35 -0
package/dist/bin/index.js +6 -6
package/dist/file-info/adapters/github/index.d.ts.map +1 -1
package/dist/file-info/adapters/github/index.js +1 -2
package/dist/file-info/adapters/github/reader.d.ts +4 -9
package/dist/file-info/adapters/github/reader.d.ts.map +1 -1
package/dist/file-info/adapters/github/reader.js +163 -134
package/dist/tools/create-pull-request/index.d.ts.map +1 -0
package/dist/tools/{definitions/commit-and-create-pr.js → create-pull-request/index.js} +30 -1
package/dist/tools/create-pull-request/utils.d.ts +21 -0
package/dist/tools/create-pull-request/utils.d.ts.map +1 -0
package/dist/tools/create-pull-request/utils.js +83 -0
package/dist/tools/definitions/extract-frames-from-video.d.ts +39 -0
package/dist/tools/definitions/extract-frames-from-video.d.ts.map +1 -0
package/dist/tools/definitions/extract-frames-from-video.js +60 -0
package/dist/tools/definitions/fetch-video-analysis.d.ts +4 -4
package/dist/tools/executor/index.d.ts +1 -1
package/dist/tools/executor/index.d.ts.map +1 -1
package/dist/tools/executor/index.js +18 -4
package/dist/tools/extract-frames-from-video/index.d.ts +7 -0
package/dist/tools/extract-frames-from-video/index.d.ts.map +1 -0
package/dist/tools/extract-frames-from-video/index.js +145 -0
package/dist/tools/fetch-session-diff/index.d.ts +3 -0
package/dist/tools/fetch-session-diff/index.d.ts.map +1 -0
package/dist/tools/fetch-session-diff/index.js +46 -0
package/dist/tools/fetch-video-analysis/index.d.ts.map +1 -1
package/dist/tools/fetch-video-analysis/index.js +18 -7
package/dist/tools/fetch-video-analysis/utils.d.ts +5 -2
package/dist/tools/fetch-video-analysis/utils.d.ts.map +1 -1
package/dist/tools/fetch-video-analysis/utils.js +34 -11
package/dist/tools/fetch-video-analysis/video-analysis.d.ts +2 -2
package/dist/tools/fetch-video-analysis/video-analysis.d.ts.map +1 -1
package/dist/tools/fetch-video-analysis/video-analysis.js +24 -8
package/dist/tools/index.d.ts +28 -2
package/dist/tools/index.d.ts.map +1 -1
package/dist/tools/index.js +46 -28
package/dist/tools/review-pull-request/index.d.ts +3 -0
package/dist/tools/review-pull-request/index.d.ts.map +1 -0
package/dist/tools/review-pull-request/index.js +103 -0
package/dist/tools/test-run-fetcher/index.d.ts.map +1 -1
package/dist/tools/test-run-fetcher/index.js +4 -14
package/dist/tools/utils/urls.d.ts +5 -0
package/dist/tools/utils/urls.d.ts.map +1 -0
package/dist/tools/utils/urls.js +19 -0
package/dist/tools/view-failed-test-run-report/index.d.ts.map +1 -1
package/dist/tools/view-failed-test-run-report/index.js +3 -15
package/dist/utils/file.d.ts +1 -0
package/dist/utils/file.d.ts.map +1 -1
package/dist/utils/file.js +45 -1
package/dist/{tools/fetch-video-analysis → utils}/local-ffmpeg-client.d.ts +4 -0
package/dist/utils/local-ffmpeg-client.d.ts.map +1 -0
package/dist/{tools/fetch-video-analysis → utils}/local-ffmpeg-client.js +63 -11
package/package.json +2 -2
package/tsconfig.tsbuildinfo +1 -1
package/dist/agent/chat/utils/tool-calls.d.ts +0 -21
package/dist/agent/chat/utils/tool-calls.d.ts.map +0 -1
package/dist/agent/chat/utils/tool-calls.js +0 -64
package/dist/tools/commit-and-create-pr/index.d.ts.map +0 -1
package/dist/tools/commit-and-create-pr/index.js +0 -83
package/dist/tools/definitions/commit-and-create-pr.d.ts +0 -3
package/dist/tools/definitions/commit-and-create-pr.d.ts.map +0 -1
package/dist/tools/fetch-video-analysis/local-ffmpeg-client.d.ts.map +0 -1
/package/dist/tools/{commit-and-create-pr → create-pull-request}/index.d.ts +0 -0

package/dist/agent/cli.js ADDED Viewed

@@ -0,0 +1,209 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.fetchEnvironmentVariables = fetchEnvironmentVariables;
+exports.runChatAgentForCLI = runChatAgentForCLI;
+const llm_1 = require("@empiricalrun/llm");
+const chat_1 = require("@empiricalrun/llm/chat");
+const picocolors_1 = require("picocolors");
+const client_1 = require("../dashboard/client");
+const reader_1 = require("../file-info/adapters/file-system/reader");
+const human_in_the_loop_1 = require("../human-in-the-loop");
+const validation_1 = require("../recorder/validation");
+const executor_1 = require("../tools/executor");
+const git_1 = require("../tools/executor/utils/git");
+const filesystem_cache_1 = require("./chat/filesystem-cache");
+const state_1 = require("./chat/state");
+const utils_1 = require("./chat/utils");
+const index_1 = require("./index");
+function stopCriteria(userPrompt) {
+    return userPrompt?.toLowerCase() === "stop";
+}
+function concludeAgent(chatModel, useDiskForChatState, selectedModel, error) {
+    console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + (0, state_1.getUsageSummary)(chatModel))}`);
+    if (useDiskForChatState) {
+        (0, state_1.saveToDisk)(chatModel.messages, selectedModel, chatModel.askUserForInput, error);
+    }
+}
+async function fetchEnvironmentVariables() {
+    const projectApiKey = process.env.EMPIRICALRUN_API_KEY;
+    let apiClient;
+    let queryParams = {};
+    if (projectApiKey) {
+        apiClient = new client_1.DashboardAPIClient({
+            authType: "project-api-key",
+            projectApiKey: process.env.EMPIRICALRUN_API_KEY,
+        });
+    }
+    else {
+        apiClient = new client_1.DashboardAPIClient({
+            authType: "user-access-token",
+        });
+        const repoName = await (0, validation_1.validatePackageJson)(process.cwd());
+        queryParams = { project_repo_name: repoName };
+    }
+    const data = await apiClient.request("/api/environment-variables", { method: "GET", params: queryParams });
+    if (!data.data || !data.data.environment_variables) {
+        console.error("Failed to fetch environment variables:", data);
+        throw new Error("Failed to fetch environment variables");
+    }
+    const envVars = data.data.environment_variables.reduce((acc, envVar) => {
+        acc[envVar.name] = envVar.value;
+        return acc;
+    }, {});
+    return envVars;
+}
+async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, agentMode, resetChat, useFSCache, }) {
+    let chatState;
+    const enableStreaming = !useFSCache;
+    const cache = useFSCache ? new filesystem_cache_1.FilesystemLLMCache() : undefined;
+    if (resetChat) {
+        (0, state_1.clearChatState)();
+    }
+    if (useDiskForChatState) {
+        chatState = (0, state_1.loadChatState)();
+    }
+    // TODO: Store branch name in chat state so that we don't recreate it every time
+    const randomId = crypto.randomUUID().substring(0, 8);
+    const branchName = `branch-${randomId}`;
+    await (0, git_1.checkoutBranch)(branchName, process.cwd());
+    let messagesLoadedFromDisk = chatState?.messages || [];
+    let chatModel = (0, chat_1.createChatModel)(messagesLoadedFromDisk, selectedModel, undefined, cache);
+    chatModel.validateEnvVarsForAuth();
+    if (initialPromptContent && chatModel.messages.length === 0) {
+        chatModel.pushUserMessage(initialPromptContent, []);
+    }
+    else if (initialPromptContent && chatModel.messages.length > 0) {
+        console.warn(`Ignoring initial prompt because we have existing messages.`);
+    }
+    if (chatModel.askUserForInput) {
+        // Show last message to the user for context when we loaded from disk
+        const latest = chatModel.getHumanReadableLatestMessage();
+        if (latest) {
+            console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
+        }
+    }
+    if (chatState && chatState.error) {
+        // Reset error state as we are attempting a retry
+        chatState.error = null;
+    }
+    const handleSigInt = () => {
+        concludeAgent(chatModel, useDiskForChatState, selectedModel, null);
+        process.exit(0);
+    };
+    process.once("SIGINT", handleSigInt);
+    process.once("SIGTERM", handleSigInt);
+    let userPrompt;
+    let reporterFunc = async (chatState, latest) => {
+        if (useDiskForChatState) {
+            (0, state_1.saveToDisk)(chatState.messages, selectedModel, chatState.askUserForInput, chatState.error);
+        }
+        if (latest) {
+            if (!enableStreaming) {
+                console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
+            }
+            else {
+                process.stdout.write(`\n`);
+            }
+        }
+    };
+    const trace = (0, llm_1.createLangfuseTrace)({
+        name: "chat_agent",
+        input: initialPromptContent || "",
+        tags: [selectedModel, "chat_agent"],
+    });
+    if (trace) {
+        const traceUrl = trace.getTraceUrl();
+        console.log(`Starting ${selectedModel}: ${traceUrl}`);
+    }
+    let authType = "user-access-token";
+    if (process.env.EMPIRICALRUN_API_KEY) {
+        authType = "project-api-key";
+    }
+    const apiClient = new client_1.DashboardAPIClient({
+        authType,
+    });
+    const fileInfoBuilder = () => (0, reader_1.getFileInfoFromFS)(process.cwd());
+    const agentParams = {
+        selectedModel,
+    };
+    const agent = index_1.MODE_TO_AGENT_MAP[agentMode](agentParams);
+    while (!stopCriteria(userPrompt)) {
+        if (chatModel.askUserForInput) {
+            try {
+                userPrompt = await human_in_the_loop_1.humanLoop.getFeedback({
+                    message: "User:",
+                });
+            }
+            catch (e) {
+                // https://github.com/SBoudrias/Inquirer.js/issues/1502#issuecomment-2275991680
+                if (e instanceof Error && e.name === "ExitPromptError") {
+                    concludeAgent(chatModel, useDiskForChatState, selectedModel, null);
+                    process.exit(0);
+                }
+                concludeAgent(chatModel, useDiskForChatState, selectedModel, {
+                    message: e.message,
+                    stack: e.stack || "Stack trace not available",
+                    timestamp: new Date().toISOString(),
+                });
+                throw e;
+            }
+            if (!stopCriteria(userPrompt)) {
+                const { text, attachments } = (0, utils_1.extractAttachments)(userPrompt);
+                chatModel.pushUserMessage(text, attachments);
+            }
+        }
+        else {
+            const toolExecutor = new executor_1.ToolExecutor({
+                chatSession: null,
+                branchName,
+                repoPath: process.cwd(),
+                apiClient,
+                trace,
+                featureFlags: [],
+                environmentOverrides: await fetchEnvironmentVariables(),
+            });
+            await agent.runLoop({
+                messages: chatModel.messages,
+                reporter: reporterFunc,
+                streamingMessageReporter: (() => {
+                    if (!enableStreaming) {
+                        return;
+                    }
+                    let hasStarted = false;
+                    let startedRole = undefined;
+                    return async (delta, snapshot, thinking) => {
+                        if (delta) {
+                            const role = thinking ? "Thinking" : "Assistant";
+                            if (!hasStarted) {
+                                process.stdout.write(`${(0, picocolors_1.blue)(role)}: `);
+                                hasStarted = true;
+                                startedRole = role;
+                            }
+                            else if (hasStarted && role !== startedRole) {
+                                // Changing from thinking -> text block
+                                process.stdout.write("\n");
+                                process.stdout.write(`${(0, picocolors_1.blue)(role)}: `);
+                                startedRole = role;
+                            }
+                            process.stdout.write(delta);
+                        }
+                    };
+                })(),
+                trace,
+                repoInfoBuilder: fileInfoBuilder,
+                onPendingToolCall: async (toolCalls) => {
+                    const toolResults = await toolExecutor.execute(toolCalls);
+                    chatModel.pushToolResultsMessage(toolCalls, toolResults);
+                },
+            });
+            // Update the chatModel with the agent's final state for next iteration
+            if (agent.messages) {
+                chatModel = (0, chat_1.createChatModel)(agent.messages, selectedModel, undefined, cache);
+            }
+        }
+    }
+    trace?.update({ output: { messages: chatModel.messages } });
+    await llm_1.langfuseInstance?.flushAsync();
+    const usageSummary = (0, state_1.getUsageSummary)(chatModel);
+    console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + usageSummary)}`);
+}

package/dist/agent/code-review/index.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import type { ToolDefinition } from "@empiricalrun/shared-types";
+import { BaseAgent } from "../base";
+export declare class CodeReviewAgent extends BaseAgent {
+    protected getTools(): ToolDefinition[];
+    protected buildSystemPrompt(): Promise<string>;
+}
+//# sourceMappingURL=index.d.ts.map

package/dist/agent/code-review/index.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAGjE,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEpC,qBAAa,eAAgB,SAAQ,SAAS;IAC5C,SAAS,CAAC,QAAQ,IAAI,cAAc,EAAE;cAItB,iBAAiB,IAAI,OAAO,CAAC,MAAM,CAAC;CAsDrD"}

package/dist/agent/code-review/index.js ADDED Viewed

@@ -0,0 +1,65 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.CodeReviewAgent = void 0;
+const tools_1 = require("../../tools");
+const base_1 = require("../base");
+class CodeReviewAgent extends base_1.BaseAgent {
+    getTools() {
+        return [tools_1.fetchSessionDiffTool];
+    }
+    async buildSystemPrompt() {
+        return `
+You are an expert code reviewer that specializes in reviewing Playwright test code. You are
+provided with tools to fetch diff for a code review, where a test has been added, test modified,
+or some configuration has changed.
+# Your goals
+- Identify code smells in test code - see below
+- Call out test data assumptions or lack of clean up
+# Output format
+- You are expected to return two sections in your response: describe_code_change and code_review_comments
+- describe_code_change: A brief summary of what the code change is doing. This should be 4-6 sentences in a bullet list.
+- code_review_comments: A bulleted list of code review comments that catch for any of the specific bits below or other
+  red flags you might see in the code. Each comment should be 1-2 sentences.
+Return these as XML tags with markdown inside them
+<describe_code_change>
+- ...
+</describe_code_change>
+<code_review_comments>
+- ...
+</code_review_comments>
+# Specific bits to catch in the code review
+## Code smells to look for
+- Any form of try-catch or exception handling is a code smell in test code. If there's an
+  exception, the test should fail
+- Any conditionals (if, switch, ternary) in test code is a code smell. Tests are expected to be
+  deterministic. If you see conditionals, check if there's a comment explaining why it's needed.
+  Critically review the comment -- if it's not convincing, call it out as a code smell.
+## Ensure Playwright best practices
+- Use locators instead of selectors: waitForSelector, $, $$ are bad - use locators instead (e.g. locator.waitFor)
+- If the test relies on some Playwright APIs that do not auto-wait (e.g. isVisible(), count()), we need to ensure
+  they are used AFTER some action that ensures the page has loaded. If nothing, at least it should have a waitForTimeout
+- Don't use waitForLoadState or networkidle - these are not required since Playwright auto-waits after navigations
+## Call out test data assumptions
+- If new test data is created (e.g. creating a new entity in the app, doing some actions on it) - it should be cleaned up
+  at the end of the test. If not, call it out.
+- If the test data cannot be cleaned up, are we using some random names to ensure no conflicts in future test runs?
+- If the test assumes some data exists (e.g. a user with a specific email) - call it out. It might fail across other
+  environments.
+- No hard coded URLs - use relative URLs instead - that can work across environments.
+- Dependency on static data that can change across environments (e.g. number of rows in a table) should be avoided.
+## Remove debug artifacts
+- If there are console.logs or page.screenshot usage, call it out. They should be removed before merging.
+`;
+    }
+}
+exports.CodeReviewAgent = CodeReviewAgent;

package/dist/agent/code-review/prompt.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export declare const PROMPT = "\nIdentify code smells in tests\n- try-catch\n- conditionals added without a comment\n\nPlaywright gotchas\n- isVisible(), count() do not auto-wait\n\nPlaywright code smells\n- waitForLoadState is not required\n    - Especially with \"networkidle\", because modern webapps keep doing network activity\n\nIdentify test assumptions\n- What data does the test require?\n- Clean up test entities\n";
+export declare const SYSTEM_PROMPT = "\nYou are an expert code reviewer that specializes in reviewing Playwright test code. You are\nprovided with tools to fetch diff for a code review, where a test has been added, test modified,\nor some configuration has changed.\n\n# Your goals\n- Identify code smells in test code - see below\n- Call out test data assumptions or lack of clean up\n\n# Output format\n- You are expected to return two sections in your response: describe_code_change and code_review_comments\n- describe_code_change: A brief summary of what the code change is doing. This should be 4-6 sentences in a bullet list.\n- code_review_comments: A bulleted list of code review comments that catch for any of the specific bits below or other\n  red flags you might see in the code. Each comment should be 1-2 sentences.\n\nReturn these as XML tags with markdown inside them\n\n<describe_code_change>\n- ...\n</describe_code_change>\n\n<code_review_comments>\n- ...\n</code_review_comments>\n\n# Specific bits to catch in the code review\n\n## Code smells to look for\n- Any form of try-catch or exception handling is a code smell in test code. If there's an\n  exception, the test should fail\n- Any conditionals (if, switch, ternary) in test code is a code smell. Tests are expected to be\n  deterministic. If you see conditionals, check if there's a comment explaining why it's needed.\n  Critically review the comment -- if it's not convincing, call it out as a code smell.\n\n## Ensure Playwright best practices\n- Use locators instead of selectors: waitForSelector, $, $$ are bad - use locators instead (e.g. locator.waitFor)\n- If the test relies on some Playwright APIs that do not auto-wait (e.g. isVisible(), count()), we need to ensure \n  they are used AFTER some action that ensures the page has loaded. If nothing, at least it should have a waitForTimeout\n- Don't use waitForLoadState or networkidle - these are not required since Playwright auto-waits after navigations\n\n## Call out test data assumptions\n- If new test data is created (e.g. creating a new entity in the app, doing some actions on it) - it should be cleaned up\n  at the end of the test. If not, call it out.\n- If the test data cannot be cleaned up, are we using some random names to ensure no conflicts in future test runs?\n- If the test assumes some data exists (e.g. a user with a specific email) - call it out. It might fail across other\n  environments.\n- No hard coded URLs - use relative URLs instead - that can work across environments.\n- Dependency on static data that can change across environments (e.g. number of rows in a table) should be avoided.\n\n## Remove debug artifacts\n- If there are console.logs or page.screenshot usage, call it out. They should be removed before merging.\n";
 //# sourceMappingURL=prompt.d.ts.map

package/dist/agent/code-review/prompt.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/prompt.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,~~MAAM~~,~~8YAelB~~,CAAC"}
1	+ {"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../../../src/agent/code-review/prompt.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,aAAa,4sFAmDzB,CAAC"}

package/dist/agent/code-review/prompt.js CHANGED Viewed

@@ -1,19 +1,55 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.PROMPT = void 0;
-exports.PROMPT = `
-Identify code smells in tests
-- try-catch
-- conditionals added without a comment
-Playwright gotchas
-- isVisible(), count() do not auto-wait
-Playwright code smells
-- waitForLoadState is not required
-    - Especially with "networkidle", because modern webapps keep doing network activity
-Identify test assumptions
-- What data does the test require?
-- Clean up test entities
+exports.SYSTEM_PROMPT = void 0;
+exports.SYSTEM_PROMPT = `
+You are an expert code reviewer that specializes in reviewing Playwright test code. You are
+provided with tools to fetch diff for a code review, where a test has been added, test modified,
+or some configuration has changed.
+# Your goals
+- Identify code smells in test code - see below
+- Call out test data assumptions or lack of clean up
+# Output format
+- You are expected to return two sections in your response: describe_code_change and code_review_comments
+- describe_code_change: A brief summary of what the code change is doing. This should be 4-6 sentences in a bullet list.
+- code_review_comments: A bulleted list of code review comments that catch for any of the specific bits below or other
+  red flags you might see in the code. Each comment should be 1-2 sentences.
+Return these as XML tags with markdown inside them
+<describe_code_change>
+- ...
+</describe_code_change>
+<code_review_comments>
+- ...
+</code_review_comments>
+# Specific bits to catch in the code review
+## Code smells to look for
+- Any form of try-catch or exception handling is a code smell in test code. If there's an
+  exception, the test should fail
+- Any conditionals (if, switch, ternary) in test code is a code smell. Tests are expected to be
+  deterministic. If you see conditionals, check if there's a comment explaining why it's needed.
+  Critically review the comment -- if it's not convincing, call it out as a code smell.
+## Ensure Playwright best practices
+- Use locators instead of selectors: waitForSelector, $, $$ are bad - use locators instead (e.g. locator.waitFor)
+- If the test relies on some Playwright APIs that do not auto-wait (e.g. isVisible(), count()), we need to ensure
+  they are used AFTER some action that ensures the page has loaded. If nothing, at least it should have a waitForTimeout
+- Don't use waitForLoadState or networkidle - these are not required since Playwright auto-waits after navigations
+## Call out test data assumptions
+- If new test data is created (e.g. creating a new entity in the app, doing some actions on it) - it should be cleaned up
+  at the end of the test. If not, call it out.
+- If the test data cannot be cleaned up, are we using some random names to ensure no conflicts in future test runs?
+- If the test assumes some data exists (e.g. a user with a specific email) - call it out. It might fail across other
+  environments.
+- No hard coded URLs - use relative URLs instead - that can work across environments.
+- Dependency on static data that can change across environments (e.g. number of rows in a table) should be avoided.
+## Remove debug artifacts
+- If there are console.logs or page.screenshot usage, call it out. They should be removed before merging.
 `;

package/dist/agent/index.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+import type { AgentModeEnum } from "@empiricalrun/shared-types";
+import { type AgentParams, BaseAgent } from "./base";
+import { ChatAgent } from "./chat";
+import { CodeReviewAgent } from "./code-review";
+import { TriageAgent } from "./triage";
+import { VideoAnalysisAgent } from "./video-analysis";
+export declare const MODE_TO_AGENT_MAP: Record<AgentModeEnum, (params: AgentParams) => BaseAgent>;
+export { BaseAgent, ChatAgent, CodeReviewAgent, TriageAgent, VideoAnalysisAgent, };
+export type { AgentParams };
+//# sourceMappingURL=index.d.ts.map

package/dist/agent/index.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/agent/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAEhE,OAAO,EAAE,KAAK,WAAW,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACrD,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AACvC,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAEtD,eAAO,MAAM,iBAAiB,EAAE,MAAM,CACpC,aAAa,EACb,CAAC,MAAM,EAAE,WAAW,KAAK,SAAS,CAMnC,CAAC;AAEF,OAAO,EACL,SAAS,EACT,SAAS,EACT,eAAe,EACf,WAAW,EACX,kBAAkB,GACnB,CAAC;AACF,YAAY,EAAE,WAAW,EAAE,CAAC"}

package/dist/agent/index.js ADDED Viewed

@@ -0,0 +1,19 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.VideoAnalysisAgent = exports.TriageAgent = exports.CodeReviewAgent = exports.ChatAgent = exports.BaseAgent = exports.MODE_TO_AGENT_MAP = void 0;
+const base_1 = require("./base");
+Object.defineProperty(exports, "BaseAgent", { enumerable: true, get: function () { return base_1.BaseAgent; } });
+const chat_1 = require("./chat");
+Object.defineProperty(exports, "ChatAgent", { enumerable: true, get: function () { return chat_1.ChatAgent; } });
+const code_review_1 = require("./code-review");
+Object.defineProperty(exports, "CodeReviewAgent", { enumerable: true, get: function () { return code_review_1.CodeReviewAgent; } });
+const triage_1 = require("./triage");
+Object.defineProperty(exports, "TriageAgent", { enumerable: true, get: function () { return triage_1.TriageAgent; } });
+const video_analysis_1 = require("./video-analysis");
+Object.defineProperty(exports, "VideoAnalysisAgent", { enumerable: true, get: function () { return video_analysis_1.VideoAnalysisAgent; } });
+exports.MODE_TO_AGENT_MAP = {
+    triage: (params) => new triage_1.TriageAgent(params),
+    chat: (params) => new chat_1.ChatAgent(params),
+    video: (params) => new video_analysis_1.VideoAnalysisAgent(params),
+    "code-review": (params) => new code_review_1.CodeReviewAgent(params),
+};

package/dist/agent/triage/index.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import type { ToolDefinition } from "@empiricalrun/shared-types";
+import { BaseAgent } from "../base";
+export declare class TriageAgent extends BaseAgent {
+    protected getTools(): ToolDefinition[];
+    protected buildSystemPrompt(repoContext?: string): Promise<string>;
+}
+//# sourceMappingURL=index.d.ts.map

package/dist/agent/triage/index.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/triage/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAsBjE,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEpC,qBAAa,WAAY,SAAQ,SAAS;IACxC,SAAS,CAAC,QAAQ,IAAI,cAAc,EAAE;cAiCtB,iBAAiB,CAAC,WAAW,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;CAkEzE"}

package/dist/agent/triage/index.js ADDED Viewed

@@ -0,0 +1,102 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.TriageAgent = void 0;
+const tools_1 = require("../../tools");
+const base_1 = require("../base");
+class TriageAgent extends base_1.BaseAgent {
+    getTools() {
+        const tools = [
+            // Common tools
+            tools_1.runTestTool,
+            tools_1.grepTool,
+            tools_1.fetchDiagnosisReportTool,
+            tools_1.listEnvironmentsTool,
+            tools_1.downloadBuildTool,
+            tools_1.fetchFileTool,
+            tools_1.traceDotZipTool,
+            // Triage specific tools
+            tools_1.listIssuesTool,
+            tools_1.createIssueTool,
+            tools_1.updateIssueTool,
+            tools_1.viewFailedTestRunReportTool,
+            tools_1.fetchVideoAnalysis,
+            tools_1.fetchLastSuccessfulTestRunTool,
+            tools_1.sendTriageSummaryTool,
+            // Model-specific tools
+            ...(0, tools_1.textEditorToolsForModel)(this.selectedModel),
+        ];
+        // Feature flag: video analysis (if not already included)
+        if (this.featureFlags?.includes("useVideoAnalysis") &&
+            !tools.some((tool) => tool.schema.name === "fetchVideoAnalysis")) {
+            tools.push(tools_1.fetchVideoAnalysis);
+        }
+        return tools;
+    }
+    async buildSystemPrompt(repoContext) {
+        if (!repoContext) {
+            throw new Error(`Triage agent needs repo context`);
+        }
+        return `
+You are a helpful assistant that help with analysis of Playwright test reports. Your goal is to help the user analyse a test report and identify the root cause of the test failures, and log the unique failuers as issues so that the user can keep a track and fix them.
+You are working on a test code repository that contains Playwright tests and other related files. Your working directory has been checked out on a git branch.
+# Your capabilities
+When provided with a test report URL, you can use these capabilities to triage the test failures in the report:
+## Fetch and view the test report
+- Use viewFailedTestRunReportTool tool to get more information about all tests that failed in the run
+## Analyze each test case
+You are provided with multiple tools to help you understand each failing test case better. Understanding each test case allows you to identify the root cause and create more accurate issues. These tools can also be called in parallel.
+- Each test case generates artifacts: images, videos, playwright trace zip file. With your tools, you can fetch image, analyze the video frames and trace.zip to find out failing network requests and console logs
+  - Each video represents one browser tab of the test case (so multiple videos implies the test had multiple tabs or browser windows)
+- Read the error stack and test file to understand what the test is doing
+- Fetch the last successful run of the test case to understand the earlier flow. This report will contain image and video URLs that can also be analyzed with your available tools.
+- If you think the issue is explained by a timing or intermittent issue, you can also re-run the test case
+## Listing, updating and creating issues
+- Test failures will become issues that can be assigned to developers to fix the app or update the test. Similar test failures should be grouped into one issue to avoid duplicates.
+- Before you create a new issue, you MUST list existing issues that have been created for this repo, to avoid creating duplicate issues.
+- If you find duplicates, use the update issue tool to update the existing issue with new information from the test report
+- When you are creating a new issue, use the description and title to clearly call out the error reason (share error stack, error message, relevant lines of code, etc.) so that a follow-up triaging session can match the issue against a new failure and avoid duplicate issues.
+- What makes a good issue: accurate classification between app or test issue, accurate grouping, and a good auto-fix prompt - see more about this below
+## Classify tests as app or test issues
+- An app issue is an issue in the application that is being tested. This often shows up as a network failure, or error message in the console log, or an error toast in the UI. Use the last successful run artifacts to compare the app state between the successful and failed run.
+- A test issue is an issue in the test code. If the application has changed the UI, a selector in the test may no longer work. Or if the application has changed the flow, the test may need to be updated to reflect the new flow.
+## Grouping test failures
+- Before you create issues, group the failures together so that we create useful issues
+- What makes a good group: failures that have the same root cause - because of similar error stacks - and can be fixed with the same change to the app or test
+  - Both "type of failure" and "proposed fix" are important to determine if two failures belong to the same group
+  - Example: if two tests fail with strict mode violations, but for 2 different selectors, they are different groups because the proposed fixes are different
+- What does not make a good group: the location of the test or the name of the test. Two tests that are located in the same file or have similar names should ONLY be grouped together if the root cause of failures is same
+## Crafting a good auto-fix prompt for test issues
+- When you create issues with type "test", you are expected to share a test_issue_prompt which is your proposed change to the test to adapt to the new app state.
+- This prompt is handed over to another agent to update the test code, and your prompt is the ONLY context that the agent has to update the test.
+- Therefore, your prompt must contain:
+  - Which test cases to be updated - with test and describe block names, file name
+  - What failed in the test - error message, error stack, relevant lines of code, or bits to locate the failure
+  - Your suggested change to the test
+  - Emphasis to re-run the test after making the change, to ensure that the change works
+## Conclusion
+- After you are done with triaging and creating issues, summarize the work done with a list of created issues for the user to review. Don't be too verbose - a bullet list of issues created or updated, with a small description is enough.
+- It is important to show proof that you have gone through all of the failures in the test run report, so use numbers to call out 1. total failures, and 2. failures associated with each issue.
+# Repo context
+${repoContext}
+# Reference
+Today's date is ${new Date().toDateString()}
+`;
+    }
+}
+exports.TriageAgent = TriageAgent;

package/dist/agent/video-analysis/index.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import type { ToolDefinition } from "@empiricalrun/shared-types";
+import { BaseAgent } from "../base";
+export declare class VideoAnalysisAgent extends BaseAgent {
+    protected getTools(): ToolDefinition[];
+    protected buildSystemPrompt(): Promise<string>;
+}
+//# sourceMappingURL=index.d.ts.map

package/dist/agent/video-analysis/index.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/video-analysis/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAGjE,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEpC,qBAAa,kBAAmB,SAAQ,SAAS;IAC/C,SAAS,CAAC,QAAQ,IAAI,cAAc,EAAE;cAItB,iBAAiB,IAAI,OAAO,CAAC,MAAM,CAAC;CAwBrD"}

package/dist/agent/video-analysis/index.js ADDED Viewed

@@ -0,0 +1,35 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.VideoAnalysisAgent = void 0;
+const tools_1 = require("../../tools");
+const base_1 = require("../base");
+class VideoAnalysisAgent extends base_1.BaseAgent {
+    getTools() {
+        return [tools_1.extractFramesFromVideo];
+    }
+    async buildSystemPrompt() {
+        return `
+You are a video analysis agent specialized in analyzing screen recordings and user interface interactions.
+Available Tools:
+- extract_frames: Extract frames from videos for detailed visual analysis
+- fetch_video_analysis: Get comprehensive video analysis summary (legacy)
+When analyzing videos:
+1. Use extract_frames to get individual frames for detailed analysis
+2. Analyze each frame for UI elements, user actions, and state changes
+3. Provide specific observations about what's happening in each frame
+4. The Summary should be in a bullet point format
+5. Reference frame IDs when discussing specific moments: "In frame_abc123_001, I can see..."
+Your analysis should be:
+- Detailed and specific about UI elements and interactions
+- Sequential, following the flow of actions in the video
+After the final summary you need to include the key frame IDs that best represent the important moments in the video.
+Example Frame Id Reference: <frame_abc123_001>
+  `;
+    }
+}
+exports.VideoAnalysisAgent = VideoAnalysisAgent;

package/dist/bin/index.js CHANGED Viewed

@@ -7,8 +7,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
 const commander_1 = require("commander");
 const dotenv_1 = __importDefault(require("dotenv"));
 const fs_1 = __importDefault(require("fs"));
-const chat_1 = require("../agent/chat");
 const models_1 = require("../agent/chat/models");
+const cli_1 = require("../agent/cli");
 const auth_1 = require("../auth");
 const client_1 = require("../dashboard/client");
 const recorder_1 = require("../recorder");
@@ -20,13 +20,13 @@ const utils_1 = require("./utils");
 dotenv_1.default.config({
     path: [".env.local", ".env"],
 });
-async function runChatAgent({ modelInput, useDiskForChatState, prompt: initialPromptContent, useTriage, resetChat, useFSCache, }) {
+async function runChatAgent({ modelInput, useDiskForChatState, prompt: initialPromptContent, agentMode = "chat", resetChat, useFSCache, }) {
     const resolvedModel = (0, models_1.resolveChatModelBasedOnInput)(modelInput);
-    return await (0, chat_1.runChatAgentForCLI)({
+    return await (0, cli_1.runChatAgentForCLI)({
         selectedModel: resolvedModel,
         useDiskForChatState: useDiskForChatState || false,
         initialPromptContent,
-        useTriage: useTriage || false,
+        agentMode,
         resetChat: resetChat || false,
         useFSCache: useFSCache || false,
     });
@@ -178,7 +178,7 @@ async function main() {
         .option("--model <model>", "LLM to use (gpt-5, claude-4 or gemini-2.5)")
         .option("--use-disk", "Save and load chat state from disk")
         .option("--prompt <string>", "String to pass as user prompt")
-        .option("--use-triage", "run the model in triage mode, different set of tools")
+        .option("--agent-mode <mode>", "Mode of the agent: 'chat' or 'triage' or 'video' or 'code-review' (Defaults to 'chat')")
         .option("--use-cache", "Use filesystem cache for LLM responses (Claude-only, and will disable streaming)")
         .option("--reset-chat", "Clear any saved chat state (last-chat.json) before starting")
         .action(async (options) => {
@@ -186,7 +186,7 @@ async function main() {
             modelInput: options.model,
             useDiskForChatState: options.useDisk,
             prompt: options.prompt,
-            useTriage: options.useTriage,
+            agentMode: options.agentMode,
             resetChat: options.resetChat,
             useFSCache: options.useCache,
         });

package/dist/file-info/adapters/github/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/file-info/adapters/github/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,UAAU,EAAE,MAAM,4BAA4B,CAAC;AAE7E,OAAO,EAEL,KAAK,qBAAqB,EAC3B,MAAM,+CAA+C,CAAC;AAEvD,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;~~AAIjD~~,wBAAsB,mBAAmB,CAAC,EACxC,KAAK,EACL,QAAQ,EACR,SAAS,EACT,UAAU,EACV,UAAU,GACX,EAAE;IACD,KAAK,EAAE,qBAAqB,CAAC;IAC7B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,mBAAmB,CAAC;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB,GAAG,OAAO,CAAC,UAAU,CAAC,CA2BtB"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/file-info/adapters/github/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,UAAU,EAAE,MAAM,4BAA4B,CAAC;AAE7E,OAAO,EAEL,KAAK,qBAAqB,EAC3B,MAAM,+CAA+C,CAAC;AAEvD,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAEjD,wBAAsB,mBAAmB,CAAC,EACxC,KAAK,EACL,QAAQ,EACR,SAAS,EACT,UAAU,EACV,UAAU,GACX,EAAE;IACD,KAAK,EAAE,qBAAqB,CAAC;IAC7B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,mBAAmB,CAAC;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB,GAAG,OAAO,CAAC,UAAU,CAAC,CA2BtB"}

package/dist/file-info/adapters/github/index.js CHANGED Viewed

@@ -6,10 +6,9 @@ const helpers_1 = require("../../../tools/file-operations/shared/helpers");
 const reader_1 = require("./reader");
 var reader_2 = require("./reader");
 Object.defineProperty(exports, "getFileInfoFromGitHub", { enumerable: true, get: function () { return reader_2.getFileInfoFromGitHub; } });
-const REPO_OWNER = "empirical-run";
 async function viewFileUsingGitHub({ input, repoName, apiClient, branchName, baseBranch, }) {
     const filePath = input.path;
-    const githubReader = new reader_1.GitHubFileReader(repoName, apiClient, REPO_OWNER);
+    const githubReader = new reader_1.GitHubFileReader(repoName, apiClient);
     const fileData = await githubReader.readFile(filePath, branchName, baseBranch);
     if (!fileData) {
         return {