@wix/evalforge-evaluator 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +753 -82
- package/build/index.js.map +4 -4
- package/build/index.mjs +749 -78
- package/build/index.mjs.map +4 -4
- package/build/types/diagnostics.d.ts +48 -0
- package/build/types/error-reporter.d.ts +2 -0
- package/package.json +3 -3
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Diagnostics Module
|
|
3
|
+
*
|
|
4
|
+
* Runs comprehensive environment diagnostics before executing evaluations.
|
|
5
|
+
* All results are reported via trace events for visibility in the UI.
|
|
6
|
+
*
|
|
7
|
+
* Each test produces REAL output that can be inspected in the logs.
|
|
8
|
+
* Test failures don't throw - they just return a failed status and continue.
|
|
9
|
+
*
|
|
10
|
+
* IMPORTANT: All diagnostic output is sent to the backend via trace events
|
|
11
|
+
* so you can see them without accessing the dev machine logs.
|
|
12
|
+
*/
|
|
13
|
+
import type { EvaluatorConfig } from './config.js';
|
|
14
|
+
/**
|
|
15
|
+
* Result of a single diagnostic test.
|
|
16
|
+
*/
|
|
17
|
+
export interface DiagnosticResult {
|
|
18
|
+
/** Name of the test */
|
|
19
|
+
name: string;
|
|
20
|
+
/** Whether the test passed */
|
|
21
|
+
passed: boolean;
|
|
22
|
+
/** Detailed result information - contains REAL command outputs */
|
|
23
|
+
details: Record<string, unknown>;
|
|
24
|
+
/** Error message if the test failed */
|
|
25
|
+
error?: string;
|
|
26
|
+
/** Duration of the test in milliseconds */
|
|
27
|
+
durationMs: number;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* All diagnostic results from a run.
|
|
31
|
+
*/
|
|
32
|
+
export interface DiagnosticReport {
|
|
33
|
+
/** Timestamp when diagnostics started */
|
|
34
|
+
startedAt: string;
|
|
35
|
+
/** Timestamp when diagnostics completed */
|
|
36
|
+
completedAt: string;
|
|
37
|
+
/** Total duration in milliseconds */
|
|
38
|
+
totalDurationMs: number;
|
|
39
|
+
/** Individual test results */
|
|
40
|
+
tests: DiagnosticResult[];
|
|
41
|
+
/** Summary counts */
|
|
42
|
+
summary: {
|
|
43
|
+
total: number;
|
|
44
|
+
passed: number;
|
|
45
|
+
failed: number;
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
export declare function runDiagnostics(config: EvaluatorConfig, evalRunId: string): Promise<DiagnosticReport>;
|
|
@@ -60,6 +60,8 @@ export declare function reportError(config: Partial<EvaluatorConfig> | null, pro
|
|
|
60
60
|
* Execution phases for error context.
|
|
61
61
|
*/
|
|
62
62
|
export declare const ExecutionPhase: {
|
|
63
|
+
/** Environment diagnostics phase (runs before execution) */
|
|
64
|
+
readonly DIAGNOSTICS: "diagnostics";
|
|
63
65
|
readonly CONFIG: "config-loading";
|
|
64
66
|
readonly API_CLIENT: "api-client-creation";
|
|
65
67
|
readonly FETCH_EVAL_RUN: "fetch-eval-run";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wix/evalforge-evaluator",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.16.0",
|
|
4
4
|
"description": "EvalForge Evaluator",
|
|
5
5
|
"bin": "./build/index.js",
|
|
6
6
|
"files": [
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
"@ai-sdk/anthropic": "^3.0.2",
|
|
20
20
|
"@anthropic-ai/claude-agent-sdk": "^0.2.12",
|
|
21
21
|
"@anthropic-ai/claude-code": "^2.0.76",
|
|
22
|
-
"@wix/evalforge-types": "0.
|
|
22
|
+
"@wix/evalforge-types": "0.5.0",
|
|
23
23
|
"ai": "^6.0.6",
|
|
24
24
|
"tar": "^7.5.3",
|
|
25
25
|
"zod": "^4.3.5"
|
|
@@ -56,5 +56,5 @@
|
|
|
56
56
|
"artifactId": "evalforge-evaluator"
|
|
57
57
|
}
|
|
58
58
|
},
|
|
59
|
-
"falconPackageHash": "
|
|
59
|
+
"falconPackageHash": "bb3e47f8ebe607d070c4ed78ba7009040bf74231e10965f48ac67830"
|
|
60
60
|
}
|