npm - bangonit - Versions diffs - 0.4.3 → 0.5.2 - Mend

bangonit 0.4.3 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

package/app/webapp/src/shared/api/chat.ts CHANGED Viewed

@@ -64,8 +64,8 @@ Actions:
   close                         — close current page
   wait --timeout=<seconds>      — pause execution (use sparingly — only when you must wait for an animation or timer, not for page loads which are handled automatically)
-Observation — the "observe" field:
-  Set "observe" to observe the page after all actions execute. Two modes:
+Observation — the "observe" field (REQUIRED):
+  Every browser tool call must include "observe". Two modes:
   - "snapshot" — takes an accessibility snapshot (fast). Returns a text tree of all visible elements with refs (e.g. ref=e5) for precise clicking. This is your default — use it almost always.
   - "snapshot_and_screenshot" — takes a snapshot AND a screenshot analyzed by a vision subagent (slower). Use only when the snapshot alone doesn't have the info you need (e.g. visual content like images, charts, colors, spatial layout, or sites using canvas/non-semantic HTML).
   When using "snapshot_and_screenshot", pass "prompts" (top-level, alongside "observe") — an array of questions about the screenshot. Ask multiple questions per call to minimize screenshot usage.
@@ -143,19 +143,22 @@ export function createChatHandler() {
       const tools = {
         browser: tool({
-          description: "Control the browser. Pass an array of actions to execute sequentially. Set 'observe' to observe the page after actions complete. The system waits for network idle before observation.",
+          description:
+            "Control the browser. Pass an array of actions to execute sequentially. Set 'observe' to observe the page after actions complete. The system waits for network idle before observation.",
           inputSchema: jsonSchema({
             type: "object",
             properties: {
               observe: {
                 type: "string",
                 enum: ["snapshot", "snapshot_and_screenshot"],
-                description: "How to observe the page after actions. 'snapshot' takes a fast accessibility snapshot with element refs. 'snapshot_and_screenshot' also takes a screenshot analyzed by a vision subagent (slower — use only when the snapshot doesn't have the info you need).",
+                description:
+                  "How to observe the page after actions. 'snapshot' takes a fast accessibility snapshot with element refs. 'snapshot_and_screenshot' also takes a screenshot analyzed by a vision subagent (slower — use only when the snapshot doesn't have the info you need).",
               },
               prompts: {
                 type: "array",
                 items: { type: "string" },
-                description: "Questions for screenshot vision analysis (only used with observe: 'snapshot_and_screenshot')",
+                description:
+                  "Questions for screenshot vision analysis (only used with observe: 'snapshot_and_screenshot')",
               },
               actions: {
                 type: "array",
@@ -164,7 +167,18 @@ export function createChatHandler() {
                   properties: {
                     action: {
                       type: "string",
-                      enum: ["navigate", "back", "forward", "mouse", "type", "press", "upload", "tabs", "wait", "close"],
+                      enum: [
+                        "navigate",
+                        "back",
+                        "forward",
+                        "mouse",
+                        "type",
+                        "press",
+                        "upload",
+                        "tabs",
+                        "wait",
+                        "close",
+                      ],
                       description: "The browser action to perform",
                     },
                     url: { type: "string", description: "URL for navigate or new tab" },
@@ -175,7 +189,10 @@ export function createChatHandler() {
                       items: {
                         type: "object",
                         properties: {
-                          action: { type: "string", enum: ["click", "dblclick", "down", "move", "up", "wheel", "wait"] },
+                          action: {
+                            type: "string",
+                            enum: ["click", "dblclick", "down", "move", "up", "wheel", "wait"],
+                          },
                           ref: { type: "string", description: "Element ref from snapshot" },
                           x: { type: "number" },
                           y: { type: "number" },
@@ -198,38 +215,40 @@ export function createChatHandler() {
                 },
               },
             },
-            required: ["actions"],
+            required: ["actions", "observe"],
           }),
-          toModelOutput({output}: {output: string}) {
+          toModelOutput({ output }: { output: string }) {
             const parsedOutput = browserToolOutputSchema.parse(JSON.parse(output));
             if (!parsedOutput.imageOutput) {
               return {
                 type: "content",
-                value: [
-                  {type: "text", text: parsedOutput.textOutput},
-                ]
-              }
+                value: [{ type: "text", text: parsedOutput.textOutput }],
+              };
             }
             if (parsedOutput.imageOutput.type === "not_changed") {
               return {
                 type: "content",
                 value: [
-                  {type: "text", text: parsedOutput.textOutput + "\n\n[screenshot did not change since last screenshot]"},
-                ]
-              }
+                  {
+                    type: "text",
+                    text: parsedOutput.textOutput + "\n\n[screenshot did not change since last screenshot]",
+                  },
+                ],
+              };
             }
             return {
               type: "content",
               value: [
-                {type: "text", text: parsedOutput.textOutput},
-                {type: "image-data", mediaType: "image/jpeg", data: parsedOutput.imageOutput.base64}
-              ]
-            }
-          }
+                { type: "text", text: parsedOutput.textOutput },
+                { type: "image-data", mediaType: "image/jpeg", data: parsedOutput.imageOutput.base64 },
+              ],
+            };
+          },
         }),
         todos: tool({
-          description: "Track progress on multi-step tasks. Call with the FULL list of todos to update, or omit 'todos' to read the current list.",
+          description:
+            "Track progress on multi-step tasks. Call with the FULL list of todos to update, or omit 'todos' to read the current list.",
           inputSchema: jsonSchema({
             type: "object",
             properties: {
@@ -248,12 +267,16 @@ export function createChatHandler() {
           }),
         }),
         report_result: tool({
-          description: "Report the final test result. You MUST call this exactly once at the end of every test run. Pass 'pass' if all test steps succeeded, or 'fail' with a summary of what went wrong.",
+          description:
+            "Report the final test result. You MUST call this exactly once at the end of every test run. Pass 'pass' if all test steps succeeded, or 'fail' with a summary of what went wrong.",
           inputSchema: jsonSchema({
             type: "object",
             properties: {
               result: { type: "string", enum: ["pass", "fail"], description: "Whether the test passed or failed" },
-              summary: { type: "string", description: "Brief summary of test results. For failures, describe what went wrong." },
+              summary: {
+                type: "string",
+                description: "Brief summary of test results. For failures, describe what went wrong.",
+              },
             },
             required: ["result", "summary"],
           }),
@@ -273,7 +296,8 @@ export function createChatHandler() {
                 {
                   type: "compact_20260112",
                   trigger: { type: "input_tokens", value: 120000 },
-                  instructions: "Summarize the conversation concisely. Preserve: the test plan, all URLs visited, current page/tab state, actions completed, current test state (done/in-progress/remaining), errors and resolutions, key decisions made. Include exact URLs, filenames, and values.",
+                  instructions:
+                    "Summarize the conversation concisely. Preserve: the test plan, all URLs visited, current page/tab state, actions completed, current test state (done/in-progress/remaining), errors and resolutions, key decisions made. Include exact URLs, filenames, and values.",
                 },
               ],
             },

package/app/webapp/src/shared/api/screenshot.ts CHANGED Viewed

@@ -30,13 +30,15 @@ async function analyzeWithClaude(image: string, prompt: string): Promise<string>
     model: "claude-haiku-4-5",
     max_tokens: 4096,
     system: SYSTEM_PROMPT,
-    messages: [{
-      role: "user",
-      content: [
-        { type: "image", source: { type: "base64", media_type: "image/jpeg", data: image } },
-        { type: "text", text: prompt },
-      ],
-    }],
+    messages: [
+      {
+        role: "user",
+        content: [
+          { type: "image", source: { type: "base64", media_type: "image/jpeg", data: image } },
+          { type: "text", text: prompt },
+        ],
+      },
+    ],
   });
   return result.content
@@ -57,9 +59,8 @@ export function createScreenshotHandler() {
           headers: { "Content-Type": "application/json", ...CORS_HEADERS },
         });
       }
-      const prompt = prompts.length === 1
-        ? prompts[0]
-        : prompts.map((p: string, i: number) => `${i + 1}. ${p}`).join("\n");
+      const prompt =
+        prompts.length === 1 ? prompts[0] : prompts.map((p: string, i: number) => `${i + 1}. ${p}`).join("\n");
       try {
         const text = await analyzeWithClaude(image, prompt);

package/app/webapp/src/shared/components/AppShell.tsx CHANGED Viewed

@@ -5,19 +5,6 @@ import SessionView from "./SessionView";
 import { AgentConfig, AgentStatus } from "..";
 import { SessionRecorder } from "../lib/recorder/session-recorder";
-function patchIncompleteToolCalls(messages: any[]): any[] {
-  return messages.map((msg) => {
-    if (msg.role !== "assistant" || !msg.parts) return msg;
-    const patched = msg.parts.map((part: any) => {
-      if (part.type === "tool-invocation" && part.state !== "result") {
-        return { ...part, state: "result", output: "[Interrupted — app was restarted]" };
-      }
-      return part;
-    });
-    return { ...msg, parts: patched };
-  });
-}
 interface TestPlanMessage {
   agentIndex: number;
   testPlan: string;
@@ -31,7 +18,19 @@ interface TestPlanMessage {
 export default function AppShell() {
   const [agents, setAgents] = useState<AgentConfig[]>([]);
   const [agentStatuses, setAgentStatuses] = useState<Record<string, AgentStatus>>({});
-  const [agentSessions, setAgentSessions] = useState<Record<string, { initialPrompt: string; initialMessages?: any[]; initialTabs?: { id: number; url: string; title: string }[]; initialActiveTabId?: number; initialTodos?: { content: string; status: "pending" | "in_progress" | "completed" }[]; planDir?: string }>>({});
+  const [agentSessions, setAgentSessions] = useState<
+    Record<
+      string,
+      {
+        initialPrompt: string;
+        initialMessages?: any[];
+        initialTabs?: { id: number; url: string; title: string }[];
+        initialActiveTabId?: number;
+        initialTodos?: { content: string; status: "pending" | "in_progress" | "completed" }[];
+        planDir?: string;
+      }
+    >
+  >({});
   const [loaded, setLoaded] = useState(false);
   const [retryKeys, setRetryKeys] = useState<Record<string, number>>({});
   const retryConfigRef = useRef<Map<string, { maxRetries: number; attempt: number; testPlan: string }>>(new Map());
@@ -41,68 +40,44 @@ export default function AppShell() {
   const sessionRecorderRef = useRef<SessionRecorder | null>(null);
   const browserRecordersRef = useRef<Map<string, import("../lib/browser/recorder").BrowserRecorder>>(new Map());
-  // Load persisted agents on mount
+  // Initialize with a single agent on mount
   useEffect(() => {
-    (async () => {
-      const savedAgents: AgentConfig[] = (await window.bangonit?.getAgents()) || [];
-      if (savedAgents.length === 0) {
-        const first: AgentConfig = {
-          id: crypto.randomUUID(),
-          name: "Test 1",
-          createdAt: Date.now(),
-        };
-        savedAgents.push(first);
-        await window.bangonit?.setAgents(savedAgents);
-      }
-      setAgents(savedAgents);
-      const sessions: typeof agentSessions = {};
-      const statuses: Record<string, AgentStatus> = {};
-      for (const agent of savedAgents) {
-        const session = await window.bangonit?.getAgentSession(agent.id);
-        if (session && session.messages?.length > 0) {
-          sessions[agent.id] = {
-            initialPrompt: session.initialPrompt,
-            initialMessages: patchIncompleteToolCalls(session.messages),
-            initialTabs: session.tabs,
-            initialActiveTabId: session.activeTabId,
-            initialTodos: session.todos,
-          };
-          statuses[agent.id] = "idle";
-        }
-      }
-      setAgentSessions(sessions);
-      setAgentStatuses(statuses);
-      setActiveAgentId(savedAgents[0]?.id || null);
-      setLoaded(true);
-    })();
+    const first: AgentConfig = {
+      id: crypto.randomUUID(),
+      name: "Test 1",
+      createdAt: Date.now(),
+    };
+    setAgents([first]);
+    setActiveAgentId(first.id);
+    setLoaded(true);
   }, []);
-  // Persist agents
+  // Ref for async access to current agents list
   const agentsRef = useRef(agents);
   agentsRef.current = agents;
-  useEffect(() => {
-    if (loaded && agents.length > 0) {
-      window.bangonit?.setAgents(agents);
-    }
-  }, [agents, loaded]);
   // Rerun a test — clears partition, bumps key to remount SessionView
-  const rerunAgent = useCallback(async (agentId: string) => {
-    await window.bangonit?.clearPartition?.(agentId);
-    const session = agentSessions[agentId];
-    if (!session) return;
-    setAgentStatuses((prev) => ({ ...prev, [agentId]: "running" }));
-    setRetryKeys((prev) => ({ ...prev, [agentId]: (prev[agentId] || 0) + 1 }));
-    setAgentSessions((prev) => ({
-      ...prev,
-      [agentId]: { initialPrompt: session.initialPrompt },
-    }));
-  }, [agentSessions]);
+  const rerunAgent = useCallback(
+    async (agentId: string) => {
+      await window.bangonit?.clearPartition?.(agentId);
+      const session = agentSessions[agentId];
+      if (!session) return;
+      setAgentStatuses((prev) => ({ ...prev, [agentId]: "running" }));
+      setRetryKeys((prev) => ({ ...prev, [agentId]: (prev[agentId] || 0) + 1 }));
+      setAgentSessions((prev) => ({
+        ...prev,
+        [agentId]: { initialPrompt: session.initialPrompt },
+      }));
+    },
+    [agentSessions],
+  );
-  const registerBrowserRecorder = useCallback((agentId: string, recorder: import("../lib/browser/recorder").BrowserRecorder) => {
-    browserRecordersRef.current.set(agentId, recorder);
-  }, []);
+  const registerBrowserRecorder = useCallback(
+    (agentId: string, recorder: import("../lib/browser/recorder").BrowserRecorder) => {
+      browserRecordersRef.current.set(agentId, recorder);
+    },
+    [],
+  );
   // Track statuses in a ref for async access
   const agentStatusesRef = useRef(agentStatuses);
@@ -126,7 +101,7 @@ export default function AppShell() {
       return {
         id: a.id,
         name: a.name,
-        result: st === "completed" ? "pass" as const : st === "failed" ? "fail" as const : undefined,
+        result: st === "completed" ? ("pass" as const) : st === "failed" ? ("fail" as const) : undefined,
       };
     });
@@ -134,32 +109,35 @@ export default function AppShell() {
     await window.bangonit?.generateReplayHtml?.({ runDir, data: JSON.stringify(data) });
   }, []);
-  const handleStatusChange = useCallback((agentId: string, status: "running" | "idle", result?: "pass" | "fail") => {
-    let mapped: AgentStatus;
-    if (status === "running") mapped = "running";
-    else if (result === "pass") mapped = "completed";
-    else if (result === "fail") mapped = "failed";
-    else mapped = "idle";
-    setAgentStatuses((prev) => ({ ...prev, [agentId]: mapped }));
-    if (recording && (result === "pass" || result === "fail")) {
-      saveReplayData(agentId, mapped);
-    }
+  const handleStatusChange = useCallback(
+    (agentId: string, status: "running" | "idle", result?: "pass" | "fail") => {
+      let mapped: AgentStatus;
+      if (status === "running") mapped = "running";
+      else if (result === "pass") mapped = "completed";
+      else if (result === "fail") mapped = "failed";
+      else mapped = "idle";
+      setAgentStatuses((prev) => ({ ...prev, [agentId]: mapped }));
+      if (recording && (result === "pass" || result === "fail")) {
+        saveReplayData(agentId, mapped);
+      }
-    if (result === "fail") {
-      const config = retryConfigRef.current.get(agentId);
-      if (config && config.attempt < config.maxRetries) {
-        config.attempt++;
-        window.bangonit?.emitTestRetry?.({ agentId, attempt: config.attempt, maxRetries: config.maxRetries });
-        setTimeout(() => rerunAgent(agentId), 1000);
+      if (result === "fail") {
+        const config = retryConfigRef.current.get(agentId);
+        if (config && config.attempt < config.maxRetries) {
+          config.attempt++;
+          window.bangonit?.emitTestRetry?.({ agentId, attempt: config.attempt, maxRetries: config.maxRetries });
+          setTimeout(() => rerunAgent(agentId), 1000);
+        }
       }
-    }
-  }, [rerunAgent, recording, saveReplayData]);
+    },
+    [rerunAgent, recording, saveReplayData],
+  );
   // Start a test from the UI
   const startTest = useCallback((agentId: string, prompt: string, name?: string) => {
     if (name) {
-      setAgents((prev) => prev.map((a) => a.id === agentId ? { ...a, name } : a));
+      setAgents((prev) => prev.map((a) => (a.id === agentId ? { ...a, name } : a)));
     }
     setAgentSessions((prev) => ({
       ...prev,
@@ -201,7 +179,7 @@ export default function AppShell() {
           });
         }
         if (name) {
-          next = next.map((a, i) => i === agentIndex ? { ...a, name } : a);
+          next = next.map((a, i) => (i === agentIndex ? { ...a, name } : a));
         }
         return next;
       });
@@ -211,9 +189,7 @@ export default function AppShell() {
           const agent = currentAgents[agentIndex];
           if (!agent) return currentAgents;
-          const prompt = extraPrompt
-            ? `${testPlan}\n\n## Additional Instructions\n${extraPrompt}`
-            : testPlan;
+          const prompt = extraPrompt ? `${testPlan}\n\n## Additional Instructions\n${extraPrompt}` : testPlan;
           setAgentSessions((prev) => ({
             ...prev,
@@ -244,7 +220,6 @@ export default function AppShell() {
     return <div className="flex h-screen bg-zinc-950" />;
   }
-  const hasAnySessions = agents.some((a) => agentSessions[a.id]);
   const activeAgent = agents.find((a) => a.id === activeAgentId);
   const activeHasSession = activeAgentId ? !!agentSessions[activeAgentId] : false;
@@ -265,11 +240,14 @@ export default function AppShell() {
         </div>
         {agents.map((agent) => {
           const status = agentStatuses[agent.id];
-          const hasSession = !!agentSessions[agent.id];
-          const dotClass = status === "running" ? "bg-blue-500 animate-pulse" :
-                           status === "completed" ? "bg-green-500" :
-                           status === "failed" ? "bg-red-500" :
-                           "bg-zinc-600";
+          const dotClass =
+            status === "running"
+              ? "bg-blue-500 animate-pulse"
+              : status === "completed"
+                ? "bg-green-500"
+                : status === "failed"
+                  ? "bg-red-500"
+                  : "bg-zinc-600";
           return (
             <button
               key={agent.id}
@@ -316,16 +294,12 @@ export default function AppShell() {
                 planDir={agentSessions[agent.id].planDir}
               />
             </div>
-          ) : null
+          ) : null,
         )}
         {/* Prompt screen — shown for the active agent if it has no session yet (hidden in CLI mode) */}
         {activeAgent && !activeHasSession && !cliMode && (
-          <TestPrompt
-            agentId={activeAgent.id}
-            agentName={activeAgent.name}
-            onSubmit={startTest}
-          />
+          <TestPrompt agentId={activeAgent.id} agentName={activeAgent.name} onSubmit={startTest} />
         )}
       </div>
     </div>
@@ -334,7 +308,6 @@ export default function AppShell() {
 function TestPrompt({
   agentId,
-  agentName,
   onSubmit,
 }: {
   agentId: string;
@@ -389,9 +362,7 @@ function TestPrompt({
             Run
           </button>
         </div>
-        <p className="text-xs text-zinc-600 mt-2">
-          Press Enter to run. Shift+Enter for a new line.
-        </p>
+        <p className="text-xs text-zinc-600 mt-2">Press Enter to run. Shift+Enter for a new line.</p>
         <div className="mt-4">
           <p className="text-xs text-zinc-600 mb-2">Examples:</p>
           <div className="flex flex-col gap-1.5">