@wix/evalforge-evaluator 0.102.0 → 0.103.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ import type { ConversationMessage } from '@wix/evalforge-types';
2
+ import type { TimestampedMessage } from './execute.js';
3
+ /**
4
+ * Build a full conversation from raw SDK messages.
5
+ * Extracts text, thinking, tool_use, and tool_result blocks into a
6
+ * serializable ConversationMessage array for storage and later display.
7
+ */
8
+ export declare function buildConversation(timestampedMessages: TimestampedMessage[]): ConversationMessage[];
@@ -1,5 +1,18 @@
1
- import type { SkillWithLatestVersion, TestScenario, LLMTrace } from '@wix/evalforge-types';
1
+ import type { SkillWithLatestVersion, TestScenario, LLMTrace, ConversationMessage } from '@wix/evalforge-types';
2
2
  import type { ClaudeCodeExecutionOptions, ClaudeCodeExecutionResult } from './types.js';
3
+ /**
4
+ * Import SDK types directly from Claude Agent SDK.
5
+ * Type-only imports are erased at compile time - zero runtime overhead.
6
+ * The SDK is still dynamically imported at runtime in executeWithClaudeCode().
7
+ */
8
+ import type { SDKMessage } from '@anthropic-ai/claude-agent-sdk' with { 'resolution-mode': 'import' };
9
+ /**
10
+ * Message with timestamp — tracks when each message was received.
11
+ */
12
+ export interface TimestampedMessage {
13
+ message: SDKMessage;
14
+ receivedAt: Date;
15
+ }
3
16
  /**
4
17
  * Execute skills using the Claude Agent SDK.
5
18
  *
@@ -15,4 +28,5 @@ import type { ClaudeCodeExecutionOptions, ClaudeCodeExecutionResult } from './ty
15
28
  export declare function executeWithClaudeCode(skills: SkillWithLatestVersion[], scenario: TestScenario, options: ClaudeCodeExecutionOptions): Promise<{
16
29
  result: ClaudeCodeExecutionResult;
17
30
  llmTrace: LLMTrace;
31
+ conversation: ConversationMessage[];
18
32
  }>;
@@ -10,5 +10,6 @@
10
10
  */
11
11
  export { ClaudeCodeAdapter, claudeCodeAdapter } from './claude-code-adapter.js';
12
12
  export { executeWithClaudeCode } from './execute.js';
13
+ export { buildConversation } from './build-conversation.js';
13
14
  export type { ClaudeCodeExecutionOptions, ClaudeCodeExecutionResult } from './types.js';
14
15
  export type { TraceContext } from '@wix/evalforge-types';
@@ -8,6 +8,7 @@ export type PartialEvalRunResult = Omit<EvalRunResult, 'assertionResults' | 'pas
8
8
  */
9
9
  export interface CapturedStep {
10
10
  text: string;
11
+ thinking?: string;
11
12
  usage: {
12
13
  inputTokens: number;
13
14
  outputTokens: number;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wix/evalforge-evaluator",
3
- "version": "0.102.0",
3
+ "version": "0.103.0",
4
4
  "description": "EvalForge Evaluator",
5
5
  "bin": "./build/index.js",
6
6
  "files": [
@@ -20,8 +20,8 @@
20
20
  "@anthropic-ai/claude-agent-sdk": "^0.2.44",
21
21
  "@anthropic-ai/claude-code": "^2.1.44",
22
22
  "@wix/eval-assertions": "0.22.0",
23
- "@wix/evalforge-github-client": "0.20.0",
24
- "@wix/evalforge-types": "0.45.0",
23
+ "@wix/evalforge-github-client": "0.21.0",
24
+ "@wix/evalforge-types": "0.46.0",
25
25
  "ai": "^6.0.6",
26
26
  "diff": "^7.0.0",
27
27
  "tar": "^7.5.3",
@@ -60,5 +60,5 @@
60
60
  "artifactId": "evalforge-evaluator"
61
61
  }
62
62
  },
63
- "falconPackageHash": "db973c328d16212a1aea0a801b5e3d93245f0611fceadb4a64f7d87e"
63
+ "falconPackageHash": "770ab064ab729180951ced129aaf6e04a6d3bdf65e4cc014a353566e"
64
64
  }