@wix/evalforge-evaluator 0.183.0 → 0.184.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,12 @@
1
1
  import type { SkillWithLatestVersion, TestScenario, LLMTrace, ConversationMessage, TriggerPromptImage } from '@wix/evalforge-types';
2
+ import type { CapturedStep } from '../../types.js';
2
3
  import type { ClaudeCodeExecutionOptions, ClaudeCodeExecutionResult } from './types.js';
3
4
  /**
4
5
  * Import SDK types directly from Claude Agent SDK.
5
6
  * Type-only imports are erased at compile time - zero runtime overhead.
6
7
  * The SDK is still dynamically imported at runtime in executeWithClaudeCode().
7
8
  */
8
- import type { SDKUserMessage, SDKMessage } from '@anthropic-ai/claude-agent-sdk' with { 'resolution-mode': 'import' };
9
+ import type { SDKResultMessage, SDKUserMessage, SDKMessage } from '@anthropic-ai/claude-agent-sdk' with { 'resolution-mode': 'import' };
9
10
  /**
10
11
  * Message with timestamp — tracks when each message was received.
11
12
  */
@@ -46,3 +47,24 @@ export declare function executeWithClaudeCode(skills: SkillWithLatestVersion[],
46
47
  llmTrace: LLMTrace;
47
48
  conversation: ConversationMessage[];
48
49
  }>;
50
+ /**
51
+ * Process SDK messages into CapturedSteps for LLM trace building.
52
+ * Uses actual timestamps from when messages were received to calculate durations.
53
+ */
54
+ export declare function processMessages(timestampedMessages: TimestampedMessage[], startTime: Date, endTime: Date): {
55
+ steps: CapturedStep[];
56
+ result?: SDKResultMessage;
57
+ };
58
+ /**
59
+ * Build LLM trace from captured steps.
60
+ * Calculates per-step costs using model pricing and sums tokens from steps as fallback.
61
+ */
62
+ export declare function buildLLMTraceFromSteps(steps: CapturedStep[], totalDurationMs: number, usage: {
63
+ inputTokens: number;
64
+ outputTokens: number;
65
+ totalTokens: number;
66
+ costUsd?: number;
67
+ cacheReadTokens?: number;
68
+ cacheWriteTokens?: number;
69
+ durationApiMs?: number;
70
+ }, model: string): LLMTrace;
@@ -21,6 +21,8 @@ export interface CapturedStep {
21
21
  toolName: string;
22
22
  toolUseId?: string;
23
23
  args: unknown;
24
+ isError?: boolean;
25
+ errorContent?: string;
24
26
  }>;
25
27
  toolResults?: unknown[];
26
28
  /** True if any tool result for this step's tool calls was an error */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wix/evalforge-evaluator",
3
- "version": "0.183.0",
3
+ "version": "0.184.0",
4
4
  "description": "EvalForge Evaluator",
5
5
  "bin": "./build/index.js",
6
6
  "files": [
@@ -71,5 +71,5 @@
71
71
  "artifactId": "evalforge-evaluator"
72
72
  }
73
73
  },
74
- "falconPackageHash": "29debc912e78ca8ff33cbeeafe55eca432be7646d7ccc604f9a2f57b"
74
+ "falconPackageHash": "e51797f0d074a5b087399ed74f317c6a01d157d49801b14659740125"
75
75
  }