npm - @empiricalrun/test-gen - Versions diffs - 0.10.3 → 0.10.5 - Mend

@empiricalrun/test-gen 0.10.3 → 0.10.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/CHANGELOG.md +13 -0
package/dist/actions/index.d.ts.map +1 -1
package/dist/actions/index.js +0 -1
package/dist/agent/browsing/index.d.ts.map +1 -1
package/dist/agent/browsing/index.js +30 -8
package/dist/agent/browsing/utils.d.ts.map +1 -1
package/dist/agent/browsing/utils.js +1 -0
package/dist/bin/scenarios/index.d.ts.map +1 -1
package/dist/bin/scenarios/index.js +8 -0
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,18 @@
 # @empiricalrun/test-gen
+## 0.10.5
+### Patch Changes
+- 08ecca2: fix: test gen should quit after 3 consecutive errors and update dashboard sink message format
+- 08ecca2: fix: test gen should quit after 3 consecutive errors
+## 0.10.4
+### Patch Changes
+- 5400dfa: fix: prepare files in subdirectory for browsing agent
 ## 0.10.3
 ### Patch Changes

package/dist/actions/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;AAOhD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,eAAe,CAAmC;gBAC9C,IAAI,EAAE,IAAI;IAWhB,aAAa,CAAC,IAAI,oBAAa,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;~~IAkBhE~~,gBAAgB,IAAI,YAAY,EAAE;IAIlC,YAAY;IAIZ,UAAU;CAMX"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;AAOhD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,eAAe,CAAmC;gBAC9C,IAAI,EAAE,IAAI;IAWhB,aAAa,CAAC,IAAI,oBAAa,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAiBhE,gBAAgB,IAAI,YAAY,EAAE;IAIlC,YAAY;IAIZ,UAAU;CAMX"}

package/dist/actions/index.js CHANGED Viewed

@@ -33,7 +33,6 @@ class PlaywrightActions {
             console.log(`code: ${code}`, "\n\n");
         }
         catch (e) {
-            // TODO: make these specific errors so that its easy to consume
             throw Error(`Error executing ${name} action of playwright: ${e}`);
         }
     }

package/dist/agent/browsing/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,GAAE;IACP,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACE,~~mBAwDP~~"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,GAAE;IACP,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACE,mBAgFP"}

package/dist/agent/browsing/index.js CHANGED Viewed

@@ -17,12 +17,11 @@ async function browsingAgent(task, page, options = {}) {
     const actions = new actions_1.PlaywrightActions(page);
     const tools = actions.getActionSchemas();
     let isTaskDone = false;
-    const previousActions = [];
+    const executedActions = [];
     await (0, utils_1.injectPwLocatorGenerator)(page);
-    let lastActionErrors = [];
+    trace.update({ input: { task } });
     // await page.waitForTimeout(3_00_000)
     while (!isTaskDone) {
-        trace.update({ input: { task } });
         const pageContentSpan = trace.startSpan("page-content");
         const pageContent = await page.content();
         pageContentSpan.end({ output: { pageContent } });
@@ -30,14 +29,21 @@ async function browsingAgent(task, page, options = {}) {
         const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
         sanitizationSpan.end({ output: { pageSnapshot } });
         const promptSpan = trace.startSpan("page-prompt");
+        // check whether last action was an error
+        const lastErrorAction = executedActions[executedActions.length - 1]?.isError
+            ? executedActions[executedActions.length - 1]?.action
+            : undefined;
+        // extract all successful actions
+        const successfulActions = executedActions
+            .filter((a) => !a.isError)
+            .map((a) => a.action);
         const messages = await (0, provider_1.getPromptForNextAction)({
             pageSnapshot,
-            previousActions,
+            previousActions: successfulActions,
             task,
-            lastActionErrors,
+            lastActionErrors: lastErrorAction ? [lastErrorAction] : [],
         });
         promptSpan.end({ output: { messages } });
-        lastActionErrors = [];
         const completion = await (0, ai_1.getLLMResult)({
             messages,
             tools,
@@ -48,15 +54,31 @@ async function browsingAgent(task, page, options = {}) {
             const toolCall = toolCalls[i];
             try {
                 await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments));
-                previousActions.push(JSON.stringify(toolCall));
+                executedActions.push({
+                    isError: false,
+                    action: JSON.stringify(toolCall),
+                });
             }
             catch (e) {
                 // TODO: implement feedback loop to llm
-                lastActionErrors.push(e.message);
+                executedActions.push({
+                    isError: true,
+                    action: JSON.stringify(toolCall),
+                });
                 logger.error(e);
             }
         }
         isTaskDone = actions.isComplete();
+        // mark task as done if llm is stuck in loop
+        if (executedActions.length > 3) {
+            const lastThreeActions = executedActions.slice(-3);
+            const lastThreeActionsFailed = lastThreeActions.every((a) => a.isError);
+            if (lastThreeActionsFailed) {
+                // TODO: this should be sent to dashboard
+                logger.error("Agent is not able to figure out next action, marking task as done");
+                isTaskDone = true;
+            }
+        }
     }
     const code = actions.generateCode();
     trace.update({ input: { task }, output: { code } });

package/dist/agent/browsing/utils.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,wBAAsB,2BAA2B,CAAC,SAAS,EAAE,aAAa,~~iBAiBzE~~;AAWD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAMxD;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,QA4BnD"}
1	+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,wBAAsB,2BAA2B,CAAC,SAAS,EAAE,aAAa,iBAkBzE;AAWD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAMxD;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,QA4BnD"}

package/dist/agent/browsing/utils.js CHANGED Viewed

@@ -13,6 +13,7 @@ async function prepareFileForBrowsingAgent(genConfig) {
     const { name, steps, assert } = scenarios[0];
     const mergedSteps = `${steps.join("\n")}\n${assert}`;
     if (!fs_extra_1.default.existsSync(specPath)) {
+        await fs_extra_1.default.createFile(specPath);
         await fs_extra_1.default.writeFile(specPath, (0, web_1.addNewImport)("", ["test", "expect"], "@playwright/test"));
     }
     // TODO: this assumes that test code repo has `page` as the main entrypoint fixture

package/dist/bin/scenarios/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/scenarios/index.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAiGvC,iBAAe,aAAa,CAC1B,aAAa,EAAE,MAAM,GACpB,OAAO,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,QAAQ,EAAE,CAAA;CAAE,EAAE,CAAC,~~CA8BxD~~;AAED,OAAO,EAAE,aAAa,EAAE,CAAC"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/scenarios/index.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAiGvC,iBAAe,aAAa,CAC1B,aAAa,EAAE,MAAM,GACpB,OAAO,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,QAAQ,EAAE,CAAA;CAAE,EAAE,CAAC,CAqCxD;AAED,OAAO,EAAE,aAAa,EAAE,CAAC"}

package/dist/bin/scenarios/index.js CHANGED Viewed

@@ -91,6 +91,14 @@ async function loadScenarios(scenariosPath) {
     else if (scenariosPath.endsWith(".yaml")) {
         return await loadScenariosFromYAML(scenariosPath);
     }
+    else if (scenariosPath.endsWith(".ts")) {
+        return [
+            {
+                specPath: scenariosPath,
+                scenarios: [],
+            },
+        ];
+    }
     else if (isValidJSON(atob(scenariosPath))) {
         const str = atob(scenariosPath);
         const config = JSON.parse(str);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@empiricalrun/test-gen",
-  "version": "0.10.3",
+  "version": "0.10.5",
   "publishConfig": {
     "registry": "https://registry.npmjs.org/",
     "access": "public"