@wix/evalforge-evaluator 0.186.0 → 0.188.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/build/index.js +346 -93
- package/build/index.js.map +4 -4
- package/build/index.mjs +345 -93
- package/build/index.mjs.map +4 -4
- package/build/types/ambassador-converters.d.ts +8 -1
- package/build/types/api-client.d.ts +3 -1
- package/build/types/run-scenario/environment.d.ts +6 -2
- package/build/types/run-scenario/index.d.ts +2 -1
- package/build/types/run-scenario/install-dependencies.d.ts +9 -1
- package/package.json +7 -6
|
@@ -30,7 +30,8 @@ import type { TestScenario as WireTestScenario } from '@wix/ambassador-evalforge
|
|
|
30
30
|
import type { Template as WireTemplate } from '@wix/ambassador-evalforge-v1-project/types';
|
|
31
31
|
import type { Preset as WirePreset } from '@wix/ambassador-evalforge-v1-preset/types';
|
|
32
32
|
import type { Capability as WireCapability, CapabilityVersion as WireCapabilityVersion } from '@wix/ambassador-evalforge-v1-capability/types';
|
|
33
|
-
import type {
|
|
33
|
+
import type { ProvisionedSite as WireProvisionedSite, SiteBootstrapResult as WireSiteBootstrapResult } from '@wix/ambassador-evalforge-v1-site-provisioning/types';
|
|
34
|
+
import type { Agent, Capability, CapabilityVersion, EvalRun, EvalRunResult, LiveTraceEvent, Preset, ProvisionedSite, SiteBootstrapResult, Template, TestScenario } from '@wix/evalforge-types';
|
|
34
35
|
export declare function evalRunResultToProto(result: EvalRunResult): WireEvalRunResult;
|
|
35
36
|
export declare function liveTraceEventToProto(event: LiveTraceEvent): WireLiveTraceEvent;
|
|
36
37
|
export declare function agentFromProto(wire: WireAgent): Agent;
|
|
@@ -66,3 +67,9 @@ export declare function capabilityFromProto(wire: WireCapability): Capability;
|
|
|
66
67
|
* by the parent capability's `capabilityType`.
|
|
67
68
|
*/
|
|
68
69
|
export declare function capabilityVersionFromProto(wire: WireCapabilityVersion, projectId: string): CapabilityVersion;
|
|
70
|
+
export declare function provisionedSiteFromProto(proto: WireProvisionedSite): ProvisionedSite;
|
|
71
|
+
/**
|
|
72
|
+
* Map the wire bootstrap result onto the domain shape. Returns `undefined`
|
|
73
|
+
* when no bootstrap ran (no result on the response).
|
|
74
|
+
*/
|
|
75
|
+
export declare function siteBootstrapResultFromProto(proto: WireSiteBootstrapResult | undefined | null): SiteBootstrapResult | undefined;
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
* (status/completedAt/jobError/jobStatus) the evaluator writes.
|
|
12
12
|
*/
|
|
13
13
|
import { type IHttpClient } from '@wix/http-client';
|
|
14
|
-
import type { EvalRun, EvalRunResult, LiveTraceEvent, TestScenario, Template, Agent, Preset, CapabilityWithLatestVersion, CapabilityVersion } from '@wix/evalforge-types';
|
|
14
|
+
import type { EvalRun, EvalRunResult, LiveTraceEvent, TestScenario, Template, Agent, Preset, CapabilityWithLatestVersion, CapabilityVersion, ProvisionedSite } from '@wix/evalforge-types';
|
|
15
15
|
export interface ApiClient {
|
|
16
16
|
getEvalRun(projectId: string, id: string): Promise<EvalRun>;
|
|
17
17
|
getScenario(projectId: string, id: string): Promise<TestScenario>;
|
|
@@ -24,6 +24,8 @@ export interface ApiClient {
|
|
|
24
24
|
clearResults(projectId: string, evalRunId: string): Promise<void>;
|
|
25
25
|
pushTraceEvent(projectId: string, evalRunId: string, event: LiveTraceEvent): Promise<void>;
|
|
26
26
|
updateEvalRun(projectId: string, evalRunId: string, update: Partial<EvalRun>): Promise<void>;
|
|
27
|
+
provisionScenarioSite(projectId: string, evalRunId: string, scenarioId: string): Promise<ProvisionedSite>;
|
|
28
|
+
deleteProvisionedSite(projectId: string, siteId: string): Promise<void>;
|
|
27
29
|
}
|
|
28
30
|
/**
|
|
29
31
|
* Options for creating an API client.
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import type { Template } from '@wix/evalforge-types';
|
|
2
2
|
import type { EvaluatorConfig } from '../config.js';
|
|
3
|
+
export interface PrepareWorkingDirectoryOptions {
|
|
4
|
+
template?: Template;
|
|
5
|
+
}
|
|
3
6
|
/**
|
|
4
7
|
* Prepare the working directory for a scenario.
|
|
5
8
|
* If a template with source is provided, fetch files from GitHub.
|
|
@@ -9,7 +12,8 @@ import type { EvaluatorConfig } from '../config.js';
|
|
|
9
12
|
* @param evalRunId - The evaluation run ID
|
|
10
13
|
* @param targetId - The target ID (prompt agent, skill, or agent)
|
|
11
14
|
* @param scenarioId - The scenario ID (for per-scenario isolation when no template)
|
|
12
|
-
* @param
|
|
15
|
+
* @param onProgress - Required callback invoked with human-readable setup progress messages
|
|
16
|
+
* @param options - Optional settings: template (pre-fetched template entity)
|
|
13
17
|
* @returns The working directory path
|
|
14
18
|
*/
|
|
15
|
-
export declare function prepareWorkingDirectory(config: EvaluatorConfig, evalRunId: string, targetId: string, scenarioId: string,
|
|
19
|
+
export declare function prepareWorkingDirectory(config: EvaluatorConfig, evalRunId: string, targetId: string, scenarioId: string, onProgress: (message: string) => void, options?: PrepareWorkingDirectoryOptions): Promise<string>;
|
|
@@ -2,6 +2,7 @@ import type { EvalRunResult, LiveTraceEvent, TestScenario, Template } from '@wix
|
|
|
2
2
|
import type { EvaluatorConfig } from '../config.js';
|
|
3
3
|
import type { Assertion } from '@wix/evalforge-types';
|
|
4
4
|
import type { EvaluationData } from '../fetch-evaluation-data.js';
|
|
5
|
+
import type { ApiClient } from '../api-client.js';
|
|
5
6
|
export type { PartialEvalRunResult } from './types.js';
|
|
6
7
|
export type { ScenarioItem, EvaluationData } from '../fetch-evaluation-data.js';
|
|
7
8
|
/**
|
|
@@ -17,4 +18,4 @@ export type { ScenarioItem, EvaluationData } from '../fetch-evaluation-data.js';
|
|
|
17
18
|
* (remote job execution, where stdout is not accessible)
|
|
18
19
|
* @returns Complete evaluation result
|
|
19
20
|
*/
|
|
20
|
-
export declare function runScenario(config: EvaluatorConfig, evalRunId: string, scenario: TestScenario, evalData: EvaluationData, template?: Template, resolvedAssertions?: Assertion[], pushEvent?: (event: LiveTraceEvent) => void): Promise<EvalRunResult>;
|
|
21
|
+
export declare function runScenario(config: EvaluatorConfig, evalRunId: string, scenario: TestScenario, evalData: EvaluationData, template?: Template, resolvedAssertions?: Assertion[], pushEvent?: (event: LiveTraceEvent) => void, apiClient?: ApiClient, projectId?: string): Promise<EvalRunResult>;
|
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
import { execFileSync } from 'child_process';
|
|
2
|
+
export interface InstallDependenciesOptions {
|
|
3
|
+
exec?: typeof execFileSync;
|
|
4
|
+
cacheBase?: string;
|
|
5
|
+
}
|
|
2
6
|
/**
|
|
3
7
|
* Install dependencies in workDir if a package.json is present.
|
|
4
8
|
* Detects the package manager from the lock file and runs the appropriate install command.
|
|
@@ -7,5 +11,9 @@ import { execFileSync } from 'child_process';
|
|
|
7
11
|
* When cacheBase is provided, node_modules is cached keyed by the lock file hash.
|
|
8
12
|
* Subsequent calls with the same lock file content skip the install entirely —
|
|
9
13
|
* node_modules is restored via APFS clone on macOS (near-instant) or recursive copy on Linux.
|
|
14
|
+
*
|
|
15
|
+
* @param workDir - The directory in which to install dependencies
|
|
16
|
+
* @param onProgress - Callback invoked with a human-readable message when an install runs
|
|
17
|
+
* @param options - Optional settings: exec override and cacheBase directory
|
|
10
18
|
*/
|
|
11
|
-
export declare function installDependencies(workDir: string,
|
|
19
|
+
export declare function installDependencies(workDir: string, onProgress: (message: string) => void, options?: InstallDependenciesOptions): Promise<void>;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wix/evalforge-evaluator",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.188.0",
|
|
4
4
|
"description": "EvalForge Evaluator",
|
|
5
5
|
"bin": "./build/index.js",
|
|
6
6
|
"files": [
|
|
@@ -22,9 +22,9 @@
|
|
|
22
22
|
"@ai-sdk/openai": "^3.0.39",
|
|
23
23
|
"@anthropic-ai/claude-agent-sdk": "^0.2.63",
|
|
24
24
|
"@anthropic-ai/claude-code": "^2.1.63",
|
|
25
|
-
"@wix/eval-assertions": "0.
|
|
26
|
-
"@wix/evalforge-github-client": "0.
|
|
27
|
-
"@wix/evalforge-types": "0.
|
|
25
|
+
"@wix/eval-assertions": "0.71.0",
|
|
26
|
+
"@wix/evalforge-github-client": "0.71.0",
|
|
27
|
+
"@wix/evalforge-types": "0.96.0",
|
|
28
28
|
"ai": "^6.0.107",
|
|
29
29
|
"diff": "^7.0.0",
|
|
30
30
|
"tar": "^7.5.3",
|
|
@@ -40,7 +40,8 @@
|
|
|
40
40
|
"@wix/ambassador-evalforge-v1-eval-run": "^1.0.8",
|
|
41
41
|
"@wix/ambassador-evalforge-v1-preset": "^1.0.5",
|
|
42
42
|
"@wix/ambassador-evalforge-v1-project": "^1.0.6",
|
|
43
|
-
"@wix/ambassador-evalforge-v1-
|
|
43
|
+
"@wix/ambassador-evalforge-v1-site-provisioning": "^1.0.3",
|
|
44
|
+
"@wix/ambassador-evalforge-v1-test-scenario": "^1.0.6",
|
|
44
45
|
"@wix/http-client": "^2.85.0",
|
|
45
46
|
"@wix/http-client-testkit": "^1.764.0",
|
|
46
47
|
"dotenv": "^17.2.3",
|
|
@@ -71,5 +72,5 @@
|
|
|
71
72
|
"artifactId": "evalforge-evaluator"
|
|
72
73
|
}
|
|
73
74
|
},
|
|
74
|
-
"falconPackageHash": "
|
|
75
|
+
"falconPackageHash": "b8b148914d1fa0beef82451a376a65ce77a0c5aa77199a1f55f12f63"
|
|
75
76
|
}
|