npm - @wix/evalforge-evaluator - Versions diffs - 0.102.0 → 0.103.0 - Mend

@wix/evalforge-evaluator 0.102.0 → 0.103.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/build/index.js +109 -13
package/build/index.js.map +4 -4
package/build/index.mjs +109 -13
package/build/index.mjs.map +4 -4
package/build/types/run-scenario/agents/claude-code/build-conversation.d.ts +8 -0
package/build/types/run-scenario/agents/claude-code/execute.d.ts +15 -1
package/build/types/run-scenario/agents/claude-code/index.d.ts +1 -0
package/build/types/run-scenario/types.d.ts +1 -0
package/package.json +4 -4

package/build/types/run-scenario/agents/claude-code/build-conversation.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+import type { ConversationMessage } from '@wix/evalforge-types';
+import type { TimestampedMessage } from './execute.js';
+/**
+ * Build a full conversation from raw SDK messages.
+ * Extracts text, thinking, tool_use, and tool_result blocks into a
+ * serializable ConversationMessage array for storage and later display.
+ */
+export declare function buildConversation(timestampedMessages: TimestampedMessage[]): ConversationMessage[];

package/build/types/run-scenario/agents/claude-code/execute.d.ts CHANGED Viewed

@@ -1,5 +1,18 @@
-import type { SkillWithLatestVersion, TestScenario, LLMTrace } from '@wix/evalforge-types';
+import type { SkillWithLatestVersion, TestScenario, LLMTrace, ConversationMessage } from '@wix/evalforge-types';
 import type { ClaudeCodeExecutionOptions, ClaudeCodeExecutionResult } from './types.js';
+/**
+ * Import SDK types directly from Claude Agent SDK.
+ * Type-only imports are erased at compile time - zero runtime overhead.
+ * The SDK is still dynamically imported at runtime in executeWithClaudeCode().
+ */
+import type { SDKMessage } from '@anthropic-ai/claude-agent-sdk' with { 'resolution-mode': 'import' };
+/**
+ * Message with timestamp — tracks when each message was received.
+ */
+export interface TimestampedMessage {
+    message: SDKMessage;
+    receivedAt: Date;
+}
 /**
  * Execute skills using the Claude Agent SDK.
  *
@@ -15,4 +28,5 @@ import type { ClaudeCodeExecutionOptions, ClaudeCodeExecutionResult } from './ty
 export declare function executeWithClaudeCode(skills: SkillWithLatestVersion[], scenario: TestScenario, options: ClaudeCodeExecutionOptions): Promise<{
     result: ClaudeCodeExecutionResult;
     llmTrace: LLMTrace;
+    conversation: ConversationMessage[];
 }>;

package/build/types/run-scenario/agents/claude-code/index.d.ts CHANGED Viewed

@@ -10,5 +10,6 @@
  */
 export { ClaudeCodeAdapter, claudeCodeAdapter } from './claude-code-adapter.js';
 export { executeWithClaudeCode } from './execute.js';
+export { buildConversation } from './build-conversation.js';
 export type { ClaudeCodeExecutionOptions, ClaudeCodeExecutionResult } from './types.js';
 export type { TraceContext } from '@wix/evalforge-types';

package/build/types/run-scenario/types.d.ts CHANGED Viewed

@@ -8,6 +8,7 @@ export type PartialEvalRunResult = Omit<EvalRunResult, 'assertionResults' | 'pas
  */
 export interface CapturedStep {
     text: string;
+    thinking?: string;
     usage: {
         inputTokens: number;
         outputTokens: number;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@wix/evalforge-evaluator",
-  "version": "0.102.0",
+  "version": "0.103.0",
   "description": "EvalForge Evaluator",
   "bin": "./build/index.js",
   "files": [
@@ -20,8 +20,8 @@
     "@anthropic-ai/claude-agent-sdk": "^0.2.44",
     "@anthropic-ai/claude-code": "^2.1.44",
     "@wix/eval-assertions": "0.22.0",
-    "@wix/evalforge-github-client": "0.20.0",
-    "@wix/evalforge-types": "0.45.0",
+    "@wix/evalforge-github-client": "0.21.0",
+    "@wix/evalforge-types": "0.46.0",
     "ai": "^6.0.6",
     "diff": "^7.0.0",
     "tar": "^7.5.3",
@@ -60,5 +60,5 @@
       "artifactId": "evalforge-evaluator"
     }
   },
-  "falconPackageHash": "db973c328d16212a1aea0a801b5e3d93245f0611fceadb4a64f7d87e"
+  "falconPackageHash": "770ab064ab729180951ced129aaf6e04a6d3bdf65e4cc014a353566e"
 }