npm - @wix/evalforge-evaluator - Versions diffs - 0.120.0 → 0.122.0 - Mend

@wix/evalforge-evaluator 0.120.0 → 0.122.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/build/index.js +16 -93
package/build/index.js.map +2 -2
package/build/index.mjs +16 -93
package/build/index.mjs.map +2 -2
package/build/types/api-client.d.ts +1 -2
package/build/types/fetch-evaluation-data.d.ts +1 -5
package/package.json +5 -5

package/build/index.js CHANGED Viewed

@@ -186,9 +186,6 @@ function createApiClient(serverUrl, options = "") {
     getPreset(projectId2, id) {
       return fetchJson(`/projects/${projectId2}/presets/${id}`);
     },
-    getAssertion(projectId2, id) {
-      return fetchJson(`/projects/${projectId2}/assertions/${id}`);
-    },
     addResult(projectId2, evalRunId2, result) {
       return postJson(
         `/projects/${projectId2}/eval-runs/${evalRunId2}/results`,
@@ -250,9 +247,6 @@ function resolveValue(value, placeholders) {
   }
   return value;
 }
-function resolvePlaceholdersInString(text, placeholders) {
-  return resolveValue(text, placeholders);
-}
 // src/fetch-evaluation-data.ts
 function parseSkillNamesFromParams(value) {
@@ -265,59 +259,6 @@ function parseSkillNamesFromParams(value) {
   }
   return [];
 }
-function applyParamsToAssertion(assertion, params) {
-  if (!params || Object.keys(params).length === 0) {
-    return assertion;
-  }
-  if (assertion.type === "llm_judge") {
-    const stringParams = {};
-    for (const [key, value] of Object.entries(params)) {
-      stringParams[key] = String(value ?? "");
-    }
-    const prompt = resolvePlaceholdersInString(assertion.prompt, stringParams);
-    return {
-      ...assertion,
-      prompt,
-      ...params.model !== void 0 && { model: params.model },
-      ...params.maxTokens !== void 0 && {
-        maxTokens: params.maxTokens
-      },
-      ...params.temperature !== void 0 && {
-        temperature: params.temperature
-      },
-      ...params.minScore !== void 0 && {
-        minScore: params.minScore
-      }
-    };
-  }
-  if (assertion.type === "time_limit" && params.maxDurationMs !== void 0) {
-    return {
-      ...assertion,
-      maxDurationMs: params.maxDurationMs
-    };
-  }
-  if (assertion.type === "skill_was_called" && params.skillNames !== void 0) {
-    return {
-      ...assertion,
-      skillNames: parseSkillNamesFromParams(params.skillNames)
-    };
-  }
-  if (assertion.type === "tool_called_with_param") {
-    return {
-      ...assertion,
-      ...params.toolName !== void 0 && {
-        toolName: params.toolName
-      },
-      ...params.expectedParams !== void 0 && {
-        expectedParams: params.expectedParams
-      },
-      ...params.requireSuccess !== void 0 && {
-        requireSuccess: params.requireSuccess
-      }
-    };
-  }
-  return { ...assertion, ...params };
-}
 function resolveSystemAssertion(assertionId, params) {
   const systemAssertion = import_evalforge_types.SYSTEM_ASSERTIONS[assertionId];
   let baseAssertion;
@@ -372,18 +313,6 @@ function resolveSystemAssertion(assertionId, params) {
   }
   return baseAssertion;
 }
-function customAssertionToAssertion(ca, params) {
-  const config = ca.config;
-  const baseAssertion = {
-    type: "llm_judge",
-    prompt: config?.prompt ?? "",
-    minScore: config?.minScore,
-    model: config?.model,
-    maxTokens: config?.maxTokens,
-    temperature: config?.temperature
-  };
-  return applyParamsToAssertion(baseAssertion, params);
-}
 async function fetchEvaluationData(api, projectId2, evalRunId2) {
   const evalRun = await api.getEvalRun(projectId2, evalRunId2);
   const scenarios = await Promise.all(
@@ -453,30 +382,14 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
     templateIds.map((id) => api.getTemplate(projectId2, id))
   ) : [];
   const templateMap = new Map(templates.map((t) => [t.id, t]));
-  const assertionIds = [
-    ...new Set(
-      scenarios.flatMap((s) => s.assertionLinks ?? []).map((link) => link.assertionId).filter((id) => !(0, import_evalforge_types.isSystemAssertionId)(id))
-    )
-  ];
-  const assertions = assertionIds.length > 0 ? await Promise.all(
-    assertionIds.map((id) => api.getAssertion(projectId2, id))
-  ) : [];
-  const assertionMap = new Map(assertions.map((a) => [a.id, a]));
   const scenarioItems = scenarios.map((scenario) => {
     const resolvedAssertions = (scenario.assertionLinks ?? []).map((link) => {
       const { assertionId, params } = link;
-      if ((0, import_evalforge_types.isSystemAssertionId)(assertionId)) {
-        return resolveSystemAssertion(
-          assertionId,
-          params
-        );
-      }
-      const customAssertion = assertionMap.get(assertionId);
-      if (!customAssertion) {
+      if (!(0, import_evalforge_types.isSystemAssertionId)(assertionId)) {
         return null;
       }
-      return customAssertionToAssertion(
-        customAssertion,
+      return resolveSystemAssertion(
+        assertionId,
         params
       );
     }).filter((a) => a !== null);
@@ -2693,6 +2606,7 @@ var import_promises9 = require("fs/promises");
 var import_path10 = require("path");
 var KILL_GRACE_PERIOD_MS = 5e3;
 var IDLE_TIMEOUT_MS = 12e4;
+var TOOL_RUNNING_IDLE_TIMEOUT_MS = 36e4;
 var IDLE_CHECK_INTERVAL_MS = 15e3;
 function extractToolAction(toolName, args) {
   if (!toolName) return "Using tool...";
@@ -2848,6 +2762,7 @@ async function executeWithOpenCode(skills, scenario, options) {
   let lastAction = "Starting...";
   let lastToolName;
   let lastFilePath;
+  let isToolRunning = false;
   if (traceContext) {
     emitTraceEvent(
       {
@@ -3048,15 +2963,16 @@ Stderr: ${stderr.slice(0, 1e3)}`
     timers.idleCheck = setInterval(() => {
       if (resolved) return;
       const idleTime = Date.now() - lastOutputTime;
-      if (idleTime >= IDLE_TIMEOUT_MS) {
+      const effectiveTimeout = isToolRunning ? TOOL_RUNNING_IDLE_TIMEOUT_MS : IDLE_TIMEOUT_MS;
+      if (idleTime >= effectiveTimeout) {
         console.warn(
-          `[OpenCode] Process appears stuck - no output for ${Math.round(idleTime / 1e3)}s. Killing process.`
+          `[OpenCode] Process appears stuck - no output for ${Math.round(idleTime / 1e3)}s (tool running: ${isToolRunning}). Killing process.`
         );
         killProcess(child, resolved);
         finalize(
           false,
           new Error(
-            `OpenCode process stuck - no output for ${Math.round(idleTime / 1e3)} seconds (idle timeout). Skills: ${skillNames}, Scenario: ${scenario.name}`
+            `OpenCode process stuck - no output for ${Math.round(idleTime / 1e3)} seconds (idle timeout, tool running: ${isToolRunning}). Skills: ${skillNames}, Scenario: ${scenario.name}`
           )
         );
       }
@@ -3117,6 +3033,13 @@ Stderr: ${stderr.slice(0, 1e3)}`
         const evt = tryParseJson(line);
         if (!evt || !evt.type) continue;
         allEvents.push({ event: evt, receivedAt: Date.now() });
+        if (evt.type === "tool_use") {
+          const tu = evt;
+          const status = tu.part.state.status;
+          isToolRunning = status !== "completed" && status !== "error";
+        } else {
+          isToolRunning = false;
+        }
         if (traceContext) {
           traceStepNumber++;
           const traceEvt = createTraceEventFromNdjson(