@wix/evalforge-evaluator 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/index.ts
4
- import { EvalStatus } from "@wix/evalforge-types";
4
+ import { EvalStatus as EvalStatus2 } from "@wix/evalforge-types";
5
5
 
6
6
  // src/config.ts
7
7
  function loadConfig() {
@@ -6603,6 +6603,67 @@ async function runScenario(config, evalRunId2, scenario, target, template) {
6603
6603
  };
6604
6604
  }
6605
6605
 
6606
+ // src/error-reporter.ts
6607
+ import { EvalStatus } from "@wix/evalforge-types";
6608
+ function formatError(error, phase, context) {
6609
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString();
6610
+ if (error instanceof Error) {
6611
+ return {
6612
+ message: error.message,
6613
+ stack: error.stack,
6614
+ errorType: error.constructor.name,
6615
+ phase,
6616
+ context,
6617
+ timestamp
6618
+ };
6619
+ }
6620
+ return {
6621
+ message: String(error),
6622
+ errorType: typeof error,
6623
+ phase,
6624
+ context,
6625
+ timestamp
6626
+ };
6627
+ }
6628
+ function formatErrorForJobError(details) {
6629
+ const parts = [];
6630
+ if (details.phase) {
6631
+ parts.push(`[Phase: ${details.phase}]`);
6632
+ }
6633
+ if (details.errorType && details.errorType !== "Error") {
6634
+ parts.push(`${details.errorType}: ${details.message}`);
6635
+ } else {
6636
+ parts.push(details.message);
6637
+ }
6638
+ if (details.context && Object.keys(details.context).length > 0) {
6639
+ parts.push(`
6640
+ Context: ${JSON.stringify(details.context)}`);
6641
+ }
6642
+ if (details.stack) {
6643
+ const stackLines = details.stack.split("\n").slice(0, 6);
6644
+ parts.push(`
6645
+ Stack:
6646
+ ${stackLines.join("\n")}`);
6647
+ }
6648
+ return parts.join(" ");
6649
+ }
6650
+ var ExecutionPhase = {
6651
+ CONFIG: "config-loading",
6652
+ API_CLIENT: "api-client-creation",
6653
+ FETCH_EVAL_RUN: "fetch-eval-run",
6654
+ FETCH_SKILLS: "fetch-skills",
6655
+ FETCH_AGENT: "fetch-agent",
6656
+ FETCH_SCENARIOS: "fetch-scenarios",
6657
+ VALIDATION: "validation",
6658
+ PREPARE_WORKSPACE: "prepare-workspace",
6659
+ EXECUTE_SKILL: "execute-skill",
6660
+ EXECUTE_AGENT: "execute-agent",
6661
+ CLAUDE_SDK_IMPORT: "claude-sdk-import",
6662
+ CLAUDE_SDK_EXECUTION: "claude-sdk-execution",
6663
+ ADD_RESULT: "add-result",
6664
+ UPDATE_STATUS: "update-status"
6665
+ };
6666
+
6606
6667
  // src/index.ts
6607
6668
  console.error(
6608
6669
  "[EVALUATOR-BOOT] Module loading started",
@@ -6610,13 +6671,22 @@ console.error(
6610
6671
  );
6611
6672
  console.error("[EVALUATOR-BOOT] All static imports successful");
6612
6673
  async function runEvaluation(projectId2, evalRunId2) {
6674
+ const state = {
6675
+ config: null,
6676
+ api: null,
6677
+ currentPhase: ExecutionPhase.CONFIG,
6678
+ currentContext: { projectId: projectId2, evalRunId: evalRunId2 }
6679
+ };
6613
6680
  console.error(
6614
6681
  "[DEBUG-H1] runEvaluation entry",
6615
6682
  JSON.stringify({ projectId: projectId2, evalRunId: evalRunId2, timestamp: Date.now() })
6616
6683
  );
6684
+ state.currentPhase = ExecutionPhase.CONFIG;
6685
+ state.currentContext = { projectId: projectId2, evalRunId: evalRunId2 };
6617
6686
  let config;
6618
6687
  try {
6619
6688
  config = loadConfig();
6689
+ state.config = config;
6620
6690
  console.error(
6621
6691
  "[DEBUG-H1] loadConfig SUCCESS",
6622
6692
  JSON.stringify({
@@ -6632,10 +6702,13 @@ async function runEvaluation(projectId2, evalRunId2) {
6632
6702
  "[DEBUG-H1] loadConfig FAILED",
6633
6703
  JSON.stringify({
6634
6704
  error: configErr instanceof Error ? configErr.message : String(configErr),
6705
+ stack: configErr instanceof Error ? configErr.stack : void 0,
6635
6706
  timestamp: Date.now()
6636
6707
  })
6637
6708
  );
6638
- throw configErr;
6709
+ throw new Error(
6710
+ `[${ExecutionPhase.CONFIG}] ${configErr instanceof Error ? configErr.message : String(configErr)}`
6711
+ );
6639
6712
  }
6640
6713
  console.log("[Evaluator] Config loaded", {
6641
6714
  serverUrl: config.serverUrl,
@@ -6644,11 +6717,22 @@ async function runEvaluation(projectId2, evalRunId2) {
6644
6717
  hasAiGatewayHeaders: Object.keys(config.aiGatewayHeaders).length > 0,
6645
6718
  hasRouteHeader: !!config.routeHeader
6646
6719
  });
6647
- const api = createApiClient(config.serverUrl, {
6648
- apiPrefix: config.apiPrefix,
6649
- routeHeader: config.routeHeader,
6650
- authToken: config.authToken
6651
- });
6720
+ state.currentPhase = ExecutionPhase.API_CLIENT;
6721
+ let api;
6722
+ try {
6723
+ api = createApiClient(config.serverUrl, {
6724
+ apiPrefix: config.apiPrefix,
6725
+ routeHeader: config.routeHeader,
6726
+ authToken: config.authToken
6727
+ });
6728
+ state.api = api;
6729
+ } catch (apiErr) {
6730
+ throw new Error(
6731
+ `[${ExecutionPhase.API_CLIENT}] Failed to create API client: ${apiErr instanceof Error ? apiErr.message : String(apiErr)}`
6732
+ );
6733
+ }
6734
+ state.currentPhase = ExecutionPhase.FETCH_EVAL_RUN;
6735
+ state.currentContext = { projectId: projectId2, evalRunId: evalRunId2, serverUrl: config.serverUrl };
6652
6736
  console.error(
6653
6737
  "[DEBUG-H2] fetchEvaluationData START",
6654
6738
  JSON.stringify({ serverUrl: config.serverUrl, timestamp: Date.now() })
@@ -6667,32 +6751,61 @@ async function runEvaluation(projectId2, evalRunId2) {
6667
6751
  })
6668
6752
  );
6669
6753
  } catch (fetchErr) {
6754
+ const errorMsg = fetchErr instanceof Error ? fetchErr.message : String(fetchErr);
6670
6755
  console.error(
6671
6756
  "[DEBUG-H2] fetchEvaluationData FAILED",
6672
6757
  JSON.stringify({
6673
- error: fetchErr instanceof Error ? fetchErr.message : String(fetchErr),
6758
+ error: errorMsg,
6759
+ stack: fetchErr instanceof Error ? fetchErr.stack : void 0,
6674
6760
  timestamp: Date.now()
6675
6761
  })
6676
6762
  );
6677
- throw fetchErr;
6763
+ throw new Error(
6764
+ `[${ExecutionPhase.FETCH_EVAL_RUN}] Failed to fetch evaluation data: ${errorMsg}`
6765
+ );
6678
6766
  }
6679
6767
  const { codeAgent, skills, scenarioItems } = evalData;
6768
+ state.currentPhase = ExecutionPhase.VALIDATION;
6769
+ state.currentContext = {
6770
+ projectId: projectId2,
6771
+ evalRunId: evalRunId2,
6772
+ scenarioCount: scenarioItems.length,
6773
+ skillCount: skills.length,
6774
+ hasAgent: !!codeAgent,
6775
+ agentId: evalData.evalRun.agentId,
6776
+ skillsGroupId: evalData.evalRun.skillsGroupId
6777
+ };
6680
6778
  if (scenarioItems.length > 0 && skills.length === 0) {
6681
6779
  throw new Error(
6682
- "Eval run has no skills: set skillsGroupId and ensure the group has skills"
6780
+ `[${ExecutionPhase.VALIDATION}] Eval run has no skills: set skillsGroupId and ensure the group has skills. (skillsGroupId: ${evalData.evalRun.skillsGroupId || "not set"})`
6683
6781
  );
6684
6782
  }
6685
6783
  if (scenarioItems.length > 0 && skills.length > 0 && !codeAgent) {
6686
6784
  throw new Error(
6687
- "Eval run has no code agent: set agentId for skill-based runs"
6785
+ `[${ExecutionPhase.VALIDATION}] Eval run has no code agent: set agentId for skill-based runs. (agentId: ${evalData.evalRun.agentId || "not set"})`
6688
6786
  );
6689
6787
  }
6788
+ let completedScenarios = 0;
6789
+ const totalScenarios = scenarioItems.length * skills.length;
6690
6790
  for (const { scenario, template } of scenarioItems) {
6691
6791
  for (const skill of skills) {
6792
+ state.currentPhase = ExecutionPhase.EXECUTE_SKILL;
6793
+ state.currentContext = {
6794
+ projectId: projectId2,
6795
+ evalRunId: evalRunId2,
6796
+ scenarioId: scenario.id,
6797
+ scenarioName: scenario.name,
6798
+ skillId: skill.id,
6799
+ skillName: skill.name,
6800
+ agentId: codeAgent?.id,
6801
+ agentName: codeAgent?.name,
6802
+ progress: `${completedScenarios + 1}/${totalScenarios}`
6803
+ };
6692
6804
  console.log(
6693
6805
  "[Evaluator] Running skill:",
6694
6806
  skill.name,
6695
- codeAgent ? `with agent: ${codeAgent.name}` : ""
6807
+ codeAgent ? `with agent: ${codeAgent.name}` : "",
6808
+ `(${completedScenarios + 1}/${totalScenarios})`
6696
6809
  );
6697
6810
  try {
6698
6811
  const result = await runScenario(
@@ -6703,17 +6816,48 @@ async function runEvaluation(projectId2, evalRunId2) {
6703
6816
  template
6704
6817
  );
6705
6818
  console.log("[Evaluator] Skill completed, adding result");
6819
+ state.currentPhase = ExecutionPhase.ADD_RESULT;
6820
+ state.currentContext = {
6821
+ ...state.currentContext,
6822
+ resultId: result.id
6823
+ };
6706
6824
  await api.addResult(projectId2, evalRunId2, result);
6825
+ completedScenarios++;
6707
6826
  } catch (err) {
6708
- console.error("[Evaluator] Failed to run skill:", skill.name, err);
6709
- throw err;
6827
+ const errorMsg = err instanceof Error ? err.message : String(err);
6828
+ const errorStack = err instanceof Error ? err.stack : void 0;
6829
+ console.error(
6830
+ "[Evaluator] Failed to run skill:",
6831
+ skill.name,
6832
+ "Error:",
6833
+ errorMsg
6834
+ );
6835
+ if (errorStack) {
6836
+ console.error("[Evaluator] Stack trace:", errorStack);
6837
+ }
6838
+ throw new Error(
6839
+ `[${state.currentPhase}] Failed to execute skill "${skill.name}" on scenario "${scenario.name}": ${errorMsg}`
6840
+ );
6710
6841
  }
6711
6842
  }
6712
6843
  }
6713
- await api.updateEvalRun(projectId2, evalRunId2, {
6714
- status: EvalStatus.COMPLETED,
6715
- completedAt: (/* @__PURE__ */ new Date()).toISOString()
6716
- });
6844
+ state.currentPhase = ExecutionPhase.UPDATE_STATUS;
6845
+ state.currentContext = {
6846
+ projectId: projectId2,
6847
+ evalRunId: evalRunId2,
6848
+ completedScenarios,
6849
+ totalScenarios
6850
+ };
6851
+ try {
6852
+ await api.updateEvalRun(projectId2, evalRunId2, {
6853
+ status: EvalStatus2.COMPLETED,
6854
+ completedAt: (/* @__PURE__ */ new Date()).toISOString()
6855
+ });
6856
+ } catch (updateErr) {
6857
+ throw new Error(
6858
+ `[${ExecutionPhase.UPDATE_STATUS}] Failed to update eval run status to COMPLETED: ${updateErr instanceof Error ? updateErr.message : String(updateErr)}`
6859
+ );
6860
+ }
6717
6861
  }
6718
6862
  var projectId = process.argv[2];
6719
6863
  var evalRunId = process.argv[3];
@@ -6729,7 +6873,16 @@ runEvaluation(projectId, evalRunId).then(() => {
6729
6873
  console.error("[EVALUATOR-BOOT] runEvaluation completed successfully");
6730
6874
  process.exit(0);
6731
6875
  }).catch(async (err) => {
6732
- console.error("[EVALUATOR-BOOT] runEvaluation FAILED:", err);
6876
+ const errorDetails = formatError(err, "main-execution", {
6877
+ projectId,
6878
+ evalRunId
6879
+ });
6880
+ const jobError = formatErrorForJobError(errorDetails);
6881
+ console.error("[EVALUATOR-BOOT] runEvaluation FAILED");
6882
+ console.error(
6883
+ "[EVALUATOR-BOOT] Error details:",
6884
+ JSON.stringify(errorDetails, null, 2)
6885
+ );
6733
6886
  try {
6734
6887
  const config = loadConfig();
6735
6888
  const api = createApiClient(config.serverUrl, {
@@ -6738,15 +6891,42 @@ runEvaluation(projectId, evalRunId).then(() => {
6738
6891
  authToken: config.authToken
6739
6892
  });
6740
6893
  await api.updateEvalRun(projectId, evalRunId, {
6741
- status: EvalStatus.FAILED,
6742
- completedAt: (/* @__PURE__ */ new Date()).toISOString()
6894
+ status: EvalStatus2.FAILED,
6895
+ completedAt: (/* @__PURE__ */ new Date()).toISOString(),
6896
+ jobError,
6897
+ jobStatus: "FAILED"
6743
6898
  });
6744
- console.error("[EVALUATOR-BOOT] Updated eval run status to FAILED");
6899
+ console.error(
6900
+ "[EVALUATOR-BOOT] Updated eval run status to FAILED with error details"
6901
+ );
6745
6902
  } catch (updateErr) {
6746
6903
  console.error(
6747
6904
  "[EVALUATOR-BOOT] Failed to update eval run status:",
6748
- updateErr
6905
+ updateErr instanceof Error ? updateErr.message : String(updateErr)
6749
6906
  );
6907
+ try {
6908
+ const serverUrl = process.env.EVAL_SERVER_URL;
6909
+ const authToken = process.env.EVAL_AUTH_TOKEN;
6910
+ const routeHeader = process.env.EVAL_ROUTE_HEADER;
6911
+ if (serverUrl) {
6912
+ const api = createApiClient(serverUrl, {
6913
+ routeHeader,
6914
+ authToken
6915
+ });
6916
+ await api.updateEvalRun(projectId, evalRunId, {
6917
+ status: EvalStatus2.FAILED,
6918
+ completedAt: (/* @__PURE__ */ new Date()).toISOString(),
6919
+ jobError: `Config load failed, then: ${jobError}`,
6920
+ jobStatus: "FAILED"
6921
+ });
6922
+ console.error("[EVALUATOR-BOOT] Fallback: Updated status to FAILED");
6923
+ }
6924
+ } catch (fallbackErr) {
6925
+ console.error(
6926
+ "[EVALUATOR-BOOT] Fallback also failed:",
6927
+ fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)
6928
+ );
6929
+ }
6750
6930
  }
6751
6931
  process.exit(1);
6752
6932
  });