@wix/evalforge-evaluator 0.13.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Diagnostics Module
3
+ *
4
+ * Runs comprehensive environment diagnostics before executing evaluations.
5
+ * All results are reported via trace events for visibility in the UI.
6
+ *
7
+ * This helps debug issues on remote machines (Dev Machines) where we can't
8
+ * easily access logs.
9
+ */
10
+ import type { EvaluatorConfig } from './config.js';
11
+ /**
12
+ * Result of a single diagnostic test.
13
+ */
14
+ export interface DiagnosticResult {
15
+ /** Name of the test */
16
+ name: string;
17
+ /** Whether the test passed */
18
+ passed: boolean;
19
+ /** Detailed result information */
20
+ details: Record<string, unknown>;
21
+ /** Error message if the test failed */
22
+ error?: string;
23
+ /** Duration of the test in milliseconds */
24
+ durationMs: number;
25
+ }
26
+ /**
27
+ * All diagnostic results from a run.
28
+ */
29
+ export interface DiagnosticReport {
30
+ /** Timestamp when diagnostics started */
31
+ startedAt: string;
32
+ /** Timestamp when diagnostics completed */
33
+ completedAt: string;
34
+ /** Total duration in milliseconds */
35
+ totalDurationMs: number;
36
+ /** Individual test results */
37
+ tests: DiagnosticResult[];
38
+ /** Summary counts */
39
+ summary: {
40
+ total: number;
41
+ passed: number;
42
+ failed: number;
43
+ };
44
+ }
45
+ /**
46
+ * Run all diagnostics and return a comprehensive report.
47
+ *
48
+ * @param config - Evaluator configuration
49
+ * @param evalRunId - The eval run ID for trace events
50
+ * @returns Diagnostic report with all test results
51
+ */
52
+ export declare function runDiagnostics(config: EvaluatorConfig, evalRunId: string): Promise<DiagnosticReport>;
@@ -60,6 +60,8 @@ export declare function reportError(config: Partial<EvaluatorConfig> | null, pro
60
60
  * Execution phases for error context.
61
61
  */
62
62
  export declare const ExecutionPhase: {
63
+ /** Environment diagnostics phase (runs before execution) */
64
+ readonly DIAGNOSTICS: "diagnostics";
63
65
  readonly CONFIG: "config-loading";
64
66
  readonly API_CLIENT: "api-client-creation";
65
67
  readonly FETCH_EVAL_RUN: "fetch-eval-run";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wix/evalforge-evaluator",
3
- "version": "0.13.0",
3
+ "version": "0.15.0",
4
4
  "description": "EvalForge Evaluator",
5
5
  "bin": "./build/index.js",
6
6
  "files": [
@@ -19,7 +19,7 @@
19
19
  "@ai-sdk/anthropic": "^3.0.2",
20
20
  "@anthropic-ai/claude-agent-sdk": "^0.2.12",
21
21
  "@anthropic-ai/claude-code": "^2.0.76",
22
- "@wix/evalforge-types": "0.4.0",
22
+ "@wix/evalforge-types": "0.5.0",
23
23
  "ai": "^6.0.6",
24
24
  "tar": "^7.5.3",
25
25
  "zod": "^4.3.5"
@@ -56,5 +56,5 @@
56
56
  "artifactId": "evalforge-evaluator"
57
57
  }
58
58
  },
59
- "falconPackageHash": "b3ce91898c7759199a30ec3fbe1f1eb5d6107ef3c6268acbf377e51c"
59
+ "falconPackageHash": "2fa7adc6b9b22ef16e175101994266c21244a702ece07d027d38f44d"
60
60
  }