@wix/evalforge-evaluator 0.182.0 → 0.184.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,12 @@
1
1
  import type { SkillWithLatestVersion, TestScenario, LLMTrace, ConversationMessage, TriggerPromptImage } from '@wix/evalforge-types';
2
+ import type { CapturedStep } from '../../types.js';
2
3
  import type { ClaudeCodeExecutionOptions, ClaudeCodeExecutionResult } from './types.js';
3
4
  /**
4
5
  * Import SDK types directly from Claude Agent SDK.
5
6
  * Type-only imports are erased at compile time - zero runtime overhead.
6
7
  * The SDK is still dynamically imported at runtime in executeWithClaudeCode().
7
8
  */
8
- import type { SDKUserMessage, SDKMessage } from '@anthropic-ai/claude-agent-sdk' with { 'resolution-mode': 'import' };
9
+ import type { SDKResultMessage, SDKUserMessage, SDKMessage } from '@anthropic-ai/claude-agent-sdk' with { 'resolution-mode': 'import' };
9
10
  /**
10
11
  * Message with timestamp — tracks when each message was received.
11
12
  */
@@ -46,3 +47,24 @@ export declare function executeWithClaudeCode(skills: SkillWithLatestVersion[],
46
47
  llmTrace: LLMTrace;
47
48
  conversation: ConversationMessage[];
48
49
  }>;
50
+ /**
51
+ * Process SDK messages into CapturedSteps for LLM trace building.
52
+ * Uses actual timestamps from when messages were received to calculate durations.
53
+ */
54
+ export declare function processMessages(timestampedMessages: TimestampedMessage[], startTime: Date, endTime: Date): {
55
+ steps: CapturedStep[];
56
+ result?: SDKResultMessage;
57
+ };
58
+ /**
59
+ * Build LLM trace from captured steps.
60
+ * Calculates per-step costs using model pricing and sums tokens from steps as fallback.
61
+ */
62
+ export declare function buildLLMTraceFromSteps(steps: CapturedStep[], totalDurationMs: number, usage: {
63
+ inputTokens: number;
64
+ outputTokens: number;
65
+ totalTokens: number;
66
+ costUsd?: number;
67
+ cacheReadTokens?: number;
68
+ cacheWriteTokens?: number;
69
+ durationApiMs?: number;
70
+ }, model: string): LLMTrace;
@@ -1,4 +1,6 @@
1
1
  import type { MCPEntity } from '@wix/evalforge-types';
2
+ import { type FetchGitHubFileFn } from '../shared/resolve-capability-content.js';
3
+ export type { FetchGitHubFileFn };
2
4
  /**
3
5
  * Write .mcp.json at the project root (cwd) for Claude Code to discover MCPs.
4
6
  *
@@ -9,8 +11,12 @@ import type { MCPEntity } from '@wix/evalforge-types';
9
11
  * Any `{{placeholder}}` values in the config are resolved before writing
10
12
  * (e.g. `{{wix-auth-token}}` → token from ~/.wix/auth/api-key.json).
11
13
  *
14
+ * For MCPs with a GitHub source, the latest config is live-fetched.
15
+ * For inline MCPs, the stored config is used directly.
16
+ *
12
17
  * @see https://code.claude.com/docs/en/mcp#mcp-installation-scopes
13
18
  * @param cwd - Working directory (project root for Claude Code)
14
19
  * @param mcps - MCP entities whose config is merged into mcpServers
20
+ * @param fetchFn - Optional GitHub fetcher (injectable for tests)
15
21
  */
16
- export declare function writeMcpToFilesystem(cwd: string, mcps: MCPEntity[]): Promise<void>;
22
+ export declare function writeMcpToFilesystem(cwd: string, mcps: MCPEntity[], fetchFn?: FetchGitHubFileFn): Promise<void>;
@@ -1,4 +1,6 @@
1
1
  import type { Rule } from '@wix/evalforge-types';
2
+ import { type FetchGitHubFileFn } from '../shared/resolve-capability-content.js';
3
+ export type { FetchGitHubFileFn };
2
4
  /**
3
5
  * Write rule content to the filesystem based on each rule's `ruleType`.
4
6
  *
@@ -8,7 +10,11 @@ import type { Rule } from '@wix/evalforge-types';
8
10
  * - `cursor-rule` -> `{cwd}/.cursor/rules/{name}.md` (one file per rule)
9
11
  * - `generic` -> `{cwd}/{rule.directory}` (defaults to `.opencode/rules`)
10
12
  *
13
+ * For rules with a GitHub source, the latest content is live-fetched.
14
+ * For inline rules, the stored content is used directly.
15
+ *
11
16
  * @param cwd - Working directory (project root for Claude Code)
12
17
  * @param rules - Rule entities to write
18
+ * @param fetchFn - Optional GitHub fetcher (injectable for tests)
13
19
  */
14
- export declare function writeRulesToFilesystem(cwd: string, rules: Rule[]): Promise<void>;
20
+ export declare function writeRulesToFilesystem(cwd: string, rules: Rule[], fetchFn?: FetchGitHubFileFn): Promise<void>;
@@ -1,18 +1,15 @@
1
- import type { GitHubSource, SkillFile, SkillWithLatestVersion } from '@wix/evalforge-types';
1
+ import type { SkillWithLatestVersion } from '@wix/evalforge-types';
2
+ import { type FetchGitHubFolderFn } from '../shared/resolve-capability-content.js';
2
3
  import { writeFilesToDirectory } from '../../utils/write-files.js';
3
- export type FetchGitHubFolderFn = (source: GitHubSource, options?: {
4
- userAgent?: string;
5
- }) => Promise<SkillFile[]>;
4
+ export type { FetchGitHubFolderFn };
6
5
  /**
7
6
  * Write all skills to the filesystem so Claude Agent SDK can discover them.
8
- *
9
- * Content resolution:
10
- * 1. Pinned (version has `files`): writes all files from the stored snapshot
11
- * 2. Live (skill has `source`, no pinned files): fetches from GitHub at runtime
7
+ * Content is resolved by the shared capability fetcher (stored snapshot or
8
+ * live GitHub fetch).
12
9
  *
13
10
  * @param cwd - Working directory where .claude/skills/ will be created
14
11
  * @param skills - All skills to write
15
- * @param fetchFn - Function to fetch files from GitHub (defaults to fetchGitHubFolder)
12
+ * @param fetchFn - Optional GitHub folder fetcher (injectable for tests)
16
13
  */
17
14
  export declare function writeSkillsToFilesystem(cwd: string, skills: SkillWithLatestVersion[], fetchFn?: FetchGitHubFolderFn): Promise<void>;
18
15
  export declare function writeSkillToFilesystem(cwd: string, skill: SkillWithLatestVersion, fetchFn?: FetchGitHubFolderFn): Promise<void>;
@@ -1,7 +1,6 @@
1
- import type { SubAgent, GitHubSource } from '@wix/evalforge-types';
2
- import { type FetchGitHubFolderOptions } from '@wix/evalforge-github-client';
3
- /** Signature for the single-file GitHub fetch function (injectable for tests). */
4
- export type FetchGitHubFileFn = (source: GitHubSource, options?: FetchGitHubFolderOptions) => Promise<string>;
1
+ import type { SubAgent } from '@wix/evalforge-types';
2
+ import { type FetchGitHubFileFn } from '../shared/resolve-capability-content.js';
3
+ export type { FetchGitHubFileFn };
5
4
  /**
6
5
  * Write sub-agent markdown files to .claude/agents/ for Claude Code to discover.
7
6
  *
@@ -11,6 +10,6 @@ export type FetchGitHubFileFn = (source: GitHubSource, options?: FetchGitHubFold
11
10
  * @see https://code.claude.com/docs/en/sub-agents#write-subagent-files
12
11
  * @param cwd - Working directory (project root for Claude Code)
13
12
  * @param subAgents - Sub-agent entities to write
14
- * @param fetchFn - Optional fetch function for testing (defaults to fetchGitHubFile)
13
+ * @param fetchFn - Optional GitHub fetcher (injectable for tests)
15
14
  */
16
15
  export declare function writeSubAgentsToFilesystem(cwd: string, subAgents: SubAgent[], fetchFn?: FetchGitHubFileFn): Promise<void>;
@@ -1,12 +1,9 @@
1
- import type { GitHubSource, SkillFile, SkillWithLatestVersion } from '@wix/evalforge-types';
2
- export type FetchGitHubFolderFn = (source: GitHubSource, options?: {
3
- userAgent?: string;
4
- }) => Promise<SkillFile[]>;
1
+ import type { SkillWithLatestVersion } from '@wix/evalforge-types';
2
+ import { type FetchGitHubFolderFn } from '../shared/resolve-capability-content.js';
3
+ export type { FetchGitHubFolderFn };
5
4
  /**
6
5
  * Write all skills to .opencode/skills/ so OpenCode can discover them.
7
- *
8
- * Content resolution:
9
- * 1. Pinned (version has `files`): writes all files from the stored snapshot
10
- * 2. Live (skill has `source`, no pinned files): fetches from GitHub at runtime
6
+ * Content is resolved by the shared capability fetcher (stored snapshot or
7
+ * live GitHub fetch).
11
8
  */
12
9
  export declare function writeSkillsToFilesystem(cwd: string, skills: SkillWithLatestVersion[], fetchFn?: FetchGitHubFolderFn): Promise<void>;
@@ -1,6 +1,6 @@
1
- import type { SubAgent, GitHubSource } from '@wix/evalforge-types';
2
- import { type FetchGitHubFolderOptions } from '@wix/evalforge-github-client';
3
- export type FetchGitHubFileFn = (source: GitHubSource, options?: FetchGitHubFolderOptions) => Promise<string>;
1
+ import type { SubAgent } from '@wix/evalforge-types';
2
+ import { type FetchGitHubFileFn } from '../shared/resolve-capability-content.js';
3
+ export type { FetchGitHubFileFn };
4
4
  /**
5
5
  * Write sub-agent markdown files to .opencode/agents/ for OpenCode to discover.
6
6
  */
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Capability content fetcher.
3
+ *
4
+ * Single place that resolves the runtime content of a capability for an eval
5
+ * run, choosing between two sources:
6
+ * 1. DB snapshot — the stored version content already loaded onto the entity
7
+ * (skill version files, sub-agent markdown, rule text, MCP config).
8
+ * 2. Live GitHub — when the entity has a `source` and no usable snapshot,
9
+ * the latest content is fetched from GitHub at run time.
10
+ *
11
+ * Shared by every agent runner (claude-code, opencode) and every capability
12
+ * type so the GitHub-vs-DB decision lives in exactly one place.
13
+ */
14
+ import { type FetchGitHubFolderOptions } from '@wix/evalforge-github-client';
15
+ import type { GitHubSource, SkillFile, SkillWithLatestVersion, SubAgent, Rule, MCPEntity } from '@wix/evalforge-types';
16
+ /** Fetches a single file from GitHub and returns its UTF-8 content. */
17
+ export type FetchGitHubFileFn = (source: GitHubSource, options?: FetchGitHubFolderOptions) => Promise<string>;
18
+ /** Fetches a directory snapshot from GitHub and returns its files. */
19
+ export type FetchGitHubFolderFn = (source: GitHubSource, options?: {
20
+ userAgent?: string;
21
+ }) => Promise<SkillFile[]>;
22
+ /**
23
+ * Resolve the files for a skill: the stored version snapshot when present,
24
+ * otherwise a live folder fetch from GitHub. Throws when neither is available.
25
+ */
26
+ export declare function resolveSkillFiles(skill: SkillWithLatestVersion, fetchFn?: FetchGitHubFolderFn): Promise<SkillFile[]>;
27
+ /**
28
+ * Resolve a sub-agent's markdown: live-fetch when a source is set, else the
29
+ * inline `subAgentMd`.
30
+ */
31
+ export declare function resolveSubAgentMd(agent: SubAgent, fetchFn?: FetchGitHubFileFn): Promise<string>;
32
+ /**
33
+ * Resolve a rule's text content: live-fetch when a source is set, else the
34
+ * inline `content`.
35
+ */
36
+ export declare function resolveRuleText(rule: Rule, fetchFn?: FetchGitHubFileFn): Promise<string>;
37
+ /**
38
+ * Resolve an MCP's keyed server config: live-fetch and parse the JSON config
39
+ * file when a source is set (unwrapping a top-level `mcpServers` key when
40
+ * present), else the inline `config`.
41
+ */
42
+ export declare function resolveMcpConfig(mcp: MCPEntity, fetchFn?: FetchGitHubFileFn): Promise<Record<string, unknown>>;
@@ -21,6 +21,8 @@ export interface CapturedStep {
21
21
  toolName: string;
22
22
  toolUseId?: string;
23
23
  args: unknown;
24
+ isError?: boolean;
25
+ errorContent?: string;
24
26
  }>;
25
27
  toolResults?: unknown[];
26
28
  /** True if any tool result for this step's tool calls was an error */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wix/evalforge-evaluator",
3
- "version": "0.182.0",
3
+ "version": "0.184.0",
4
4
  "description": "EvalForge Evaluator",
5
5
  "bin": "./build/index.js",
6
6
  "files": [
@@ -71,5 +71,5 @@
71
71
  "artifactId": "evalforge-evaluator"
72
72
  }
73
73
  },
74
- "falconPackageHash": "a60b5c0fdd6d077a38372bc12537d26fcf190979f7c1ace877bfebe9"
74
+ "falconPackageHash": "e51797f0d074a5b087399ed74f317c6a01d157d49801b14659740125"
75
75
  }