claudeye 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of claudeye might be problematic. Click here for more details.
- package/.next/standalone/.next/BUILD_ID +1 -1
- package/.next/standalone/.next/app-path-routes-manifest.json +1 -0
- package/.next/standalone/.next/build-manifest.json +5 -5
- package/.next/standalone/.next/routes-manifest.json +9 -0
- package/.next/standalone/.next/server/app/_global-error/page/build-manifest.json +3 -3
- package/.next/standalone/.next/server/app/_global-error/page.js +1 -1
- package/.next/standalone/.next/server/app/_global-error/page.js.nft.json +1 -1
- package/.next/standalone/.next/server/app/_global-error/page_client-reference-manifest.js +1 -1
- package/.next/standalone/.next/server/app/_global-error.html +2 -2
- package/.next/standalone/.next/server/app/_global-error.rsc +1 -1
- package/.next/standalone/.next/server/app/_global-error.segments/__PAGE__.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_not-found/page/build-manifest.json +3 -3
- package/.next/standalone/.next/server/app/_not-found/page.js +1 -1
- package/.next/standalone/.next/server/app/_not-found/page.js.nft.json +1 -1
- package/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
- package/.next/standalone/.next/server/app/_not-found.html +2 -2
- package/.next/standalone/.next/server/app/_not-found.rsc +5 -5
- package/.next/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +5 -5
- package/.next/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +5 -5
- package/.next/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
- package/.next/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +2 -2
- package/.next/standalone/.next/server/app/api/download/[project]/[session]/route/app-paths-manifest.json +3 -0
- package/.next/standalone/.next/server/app/api/download/[project]/[session]/route/build-manifest.json +11 -0
- package/.next/standalone/.next/server/app/api/download/[project]/[session]/route/server-reference-manifest.json +4 -0
- package/.next/standalone/.next/server/app/api/download/[project]/[session]/route.js +6 -0
- package/.next/standalone/.next/server/app/api/download/[project]/[session]/route.js.map +5 -0
- package/.next/standalone/.next/server/app/api/download/[project]/[session]/route.js.nft.json +1 -0
- package/.next/standalone/.next/server/app/api/download/[project]/[session]/route_client-reference-manifest.js +2 -0
- package/.next/standalone/.next/server/app/icon.png/route.js +2 -1
- package/.next/standalone/.next/server/app/icon.png/route.js.nft.json +1 -1
- package/.next/standalone/.next/server/app/page/build-manifest.json +3 -3
- package/.next/standalone/.next/server/app/page.js +1 -1
- package/.next/standalone/.next/server/app/page.js.nft.json +1 -1
- package/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -1
- package/.next/standalone/.next/server/app/project/[name]/page/build-manifest.json +3 -3
- package/.next/standalone/.next/server/app/project/[name]/page.js +1 -1
- package/.next/standalone/.next/server/app/project/[name]/page.js.nft.json +1 -1
- package/.next/standalone/.next/server/app/project/[name]/page_client-reference-manifest.js +1 -1
- package/.next/standalone/.next/server/app/project/[name]/session/[sessionId]/page/build-manifest.json +3 -3
- package/.next/standalone/.next/server/app/project/[name]/session/[sessionId]/page/react-loadable-manifest.json +1 -1
- package/.next/standalone/.next/server/app/project/[name]/session/[sessionId]/page/server-reference-manifest.json +35 -5
- package/.next/standalone/.next/server/app/project/[name]/session/[sessionId]/page.js +4 -3
- package/.next/standalone/.next/server/app/project/[name]/session/[sessionId]/page.js.nft.json +1 -1
- package/.next/standalone/.next/server/app/project/[name]/session/[sessionId]/page_client-reference-manifest.js +1 -1
- package/.next/standalone/.next/server/app-paths-manifest.json +1 -0
- package/.next/standalone/.next/server/chunks/[root-of-the-server]__f408c708._.js +21 -0
- package/.next/standalone/.next/server/chunks/[root-of-the-server]__fde83e67._.js +3 -0
- package/.next/standalone/.next/server/chunks/ce889_server_app_api_download_[project]_[session]_route_actions_bbdd823f.js +3 -0
- package/.next/standalone/.next/server/chunks/node_modules_next_dist_esm_build_templates_app-route_64175717.js +3 -0
- package/.next/standalone/.next/server/chunks/ssr/[root-of-the-server]__070e2009._.js +1 -1
- package/.next/standalone/.next/server/chunks/ssr/[root-of-the-server]__0a745465._.js +3 -0
- package/.next/standalone/.next/server/chunks/ssr/{[root-of-the-server]__bc37261c._.js → [root-of-the-server]__14f58da3._.js} +2 -2
- package/.next/standalone/.next/server/chunks/ssr/[root-of-the-server]__164d9311._.js +3 -0
- package/.next/standalone/.next/server/chunks/ssr/[root-of-the-server]__2822fd21._.js +6 -0
- package/.next/standalone/.next/server/chunks/ssr/[root-of-the-server]__31b4c2fd._.js +3 -0
- package/.next/standalone/.next/server/chunks/ssr/{[root-of-the-server]__45656df2._.js → [root-of-the-server]__4e339665._.js} +2 -2
- package/.next/standalone/.next/server/chunks/ssr/[root-of-the-server]__55018089._.js +3 -0
- package/.next/standalone/.next/server/chunks/ssr/[root-of-the-server]__6313e929._.js +3 -0
- package/.next/standalone/.next/server/chunks/ssr/[root-of-the-server]__7e21395a._.js +1 -1
- package/.next/standalone/.next/server/chunks/ssr/[root-of-the-server]__ee388ee0._.js +3 -0
- package/.next/standalone/.next/server/chunks/ssr/_0b4924bd._.js +3 -0
- package/.next/standalone/.next/server/chunks/ssr/{node_modules_9e089768._.js → _1404b353._.js} +2 -2
- package/.next/standalone/.next/server/chunks/ssr/_3d21dde5._.js +8 -0
- package/.next/standalone/.next/server/chunks/ssr/_fd9b1ff7._.js +3 -0
- package/.next/standalone/.next/server/chunks/ssr/{node_modules_next_dist_7769b563._.js → node_modules_next_dist_esm_eedfc1fd._.js} +2 -2
- package/.next/standalone/.next/server/middleware-build-manifest.js +3 -3
- package/.next/standalone/.next/server/pages/404.html +2 -2
- package/.next/standalone/.next/server/pages/500.html +2 -2
- package/.next/standalone/.next/server/server-reference-manifest.js +1 -1
- package/.next/standalone/.next/server/server-reference-manifest.json +35 -5
- package/.next/standalone/.next/static/chunks/0e266948a26a3cdf.js +1 -0
- package/.next/standalone/.next/static/chunks/2774382cf796393c.js +4 -0
- package/.next/standalone/.next/static/chunks/6189ca16caad4352.js +3 -0
- package/.next/standalone/.next/static/chunks/8111dbe882e31821.js +1 -0
- package/.next/standalone/.next/static/chunks/{5a424275276f2bb9.js → bdeaeb8c9876394b.js} +1 -1
- package/.next/standalone/.next/static/chunks/cdbb6932218650fd.js +1 -0
- package/.next/standalone/.next/static/chunks/ea03555bb726c073.css +1 -0
- package/.next/standalone/.next/static/chunks/f091501564eb2ea3.js +4 -0
- package/.next/standalone/.next/static/chunks/{turbopack-2315171089e56fad.js → turbopack-fc1f23734a087d36.js} +1 -1
- package/.next/standalone/README.md +528 -41
- package/.next/standalone/app/actions/run-enrichments.ts +26 -5
- package/.next/standalone/app/actions/run-evals.ts +26 -5
- package/.next/standalone/app/actions/run-subagent-enrichments.ts +89 -0
- package/.next/standalone/app/actions/run-subagent-evals.ts +88 -0
- package/.next/standalone/app/api/download/[project]/[session]/route.ts +49 -0
- package/.next/standalone/app/components/copy-button.tsx +37 -0
- package/.next/standalone/app/components/enrichment-results-panel.tsx +33 -13
- package/.next/standalone/app/components/eval-results-panel.tsx +33 -13
- package/.next/standalone/app/components/log-viewer/entry-row.tsx +43 -14
- package/.next/standalone/app/components/log-viewer/queue-divider.tsx +50 -7
- package/.next/standalone/app/components/log-viewer/tool-input-output.tsx +13 -3
- package/.next/standalone/app/components/project-list.tsx +11 -11
- package/.next/standalone/app/components/raw-log-viewer.tsx +80 -11
- package/.next/standalone/app/components/refresh-button.tsx +79 -0
- package/.next/standalone/app/components/sessions-list.tsx +23 -14
- package/.next/standalone/app/project/[name]/session/[sessionId]/page.tsx +23 -12
- package/.next/standalone/bin/claudeye.mjs +112 -25
- package/.next/standalone/components/navbar.tsx +2 -0
- package/.next/standalone/dist/app.js +10 -4
- package/.next/standalone/dist/condition-registry.js +20 -0
- package/.next/standalone/dist/enrich-registry.js +26 -3
- package/.next/standalone/dist/enrich-runner.js +68 -13
- package/.next/standalone/dist/registry.js +26 -3
- package/.next/standalone/dist/runner.js +78 -20
- package/.next/standalone/dist/server-spawn.js +58 -34
- package/.next/standalone/lib/cache/hash.ts +67 -0
- package/.next/standalone/lib/cache/index.ts +9 -0
- package/.next/standalone/lib/cache/local-backend.ts +81 -0
- package/.next/standalone/lib/cache/manager.ts +127 -0
- package/.next/standalone/lib/cache/types.ts +19 -0
- package/.next/standalone/lib/evals/app.ts +30 -7
- package/.next/standalone/lib/evals/condition-registry.ts +26 -0
- package/.next/standalone/lib/evals/enrich-registry.ts +29 -3
- package/.next/standalone/lib/evals/enrich-runner.ts +68 -14
- package/.next/standalone/lib/evals/enrich-types.ts +6 -1
- package/.next/standalone/lib/evals/index.ts +3 -1
- package/.next/standalone/lib/evals/registry.ts +29 -4
- package/.next/standalone/lib/evals/runner.ts +77 -20
- package/.next/standalone/lib/evals/server-spawn.ts +67 -41
- package/.next/standalone/lib/evals/types.ts +16 -0
- package/.next/standalone/lib/log-format.ts +22 -1
- package/.next/standalone/package-lock.json +244 -308
- package/.next/standalone/package.json +1 -1
- package/.next/standalone/scripts/dev.ts +3 -1
- package/.next/standalone/scripts/parse-script-args.ts +30 -2
- package/.next/standalone/scripts/start.ts +3 -1
- package/.next/standalone/tsconfig.tsbuildinfo +1 -1
- package/README.md +528 -41
- package/bin/claudeye.mjs +112 -25
- package/dist/app.d.ts +17 -3
- package/dist/app.d.ts.map +1 -1
- package/dist/app.js +10 -4
- package/dist/app.js.map +1 -1
- package/dist/condition-registry.d.ts +9 -0
- package/dist/condition-registry.d.ts.map +1 -0
- package/dist/condition-registry.js +20 -0
- package/dist/condition-registry.js.map +1 -0
- package/dist/enrich-registry.d.ts +5 -1
- package/dist/enrich-registry.d.ts.map +1 -1
- package/dist/enrich-registry.js +26 -3
- package/dist/enrich-registry.js.map +1 -1
- package/dist/enrich-runner.d.ts +3 -3
- package/dist/enrich-runner.d.ts.map +1 -1
- package/dist/enrich-runner.js +68 -13
- package/dist/enrich-runner.js.map +1 -1
- package/dist/enrich-types.d.ts +6 -1
- package/dist/enrich-types.d.ts.map +1 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/registry.d.ts +5 -2
- package/dist/registry.d.ts.map +1 -1
- package/dist/registry.js +26 -3
- package/dist/registry.js.map +1 -1
- package/dist/runner.d.ts +2 -2
- package/dist/runner.d.ts.map +1 -1
- package/dist/runner.js +78 -20
- package/dist/runner.js.map +1 -1
- package/dist/server-spawn.d.ts +2 -1
- package/dist/server-spawn.d.ts.map +1 -1
- package/dist/server-spawn.js +58 -34
- package/dist/server-spawn.js.map +1 -1
- package/dist/types.d.ts +14 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +1 -1
- package/.next/standalone/.next/server/chunks/[root-of-the-server]__24a1e50a._.js +0 -21
- package/.next/standalone/.next/server/chunks/ssr/[root-of-the-server]__32f115c9._.js +0 -6
- package/.next/standalone/.next/server/chunks/ssr/[root-of-the-server]__476a1712._.js +0 -3
- package/.next/standalone/.next/server/chunks/ssr/[root-of-the-server]__4ddcabf2._.js +0 -3
- package/.next/standalone/.next/server/chunks/ssr/[root-of-the-server]__ad593585._.js +0 -3
- package/.next/standalone/.next/server/chunks/ssr/[root-of-the-server]__afd8e13b._.js +0 -3
- package/.next/standalone/.next/server/chunks/ssr/[root-of-the-server]__dd7ee810._.js +0 -3
- package/.next/standalone/.next/server/chunks/ssr/[root-of-the-server]__ff3004de._.js +0 -3
- package/.next/standalone/.next/server/chunks/ssr/_53472598._.js +0 -3
- package/.next/standalone/.next/server/chunks/ssr/_863b6ca8._.js +0 -3
- package/.next/standalone/.next/server/chunks/ssr/_f7347c74._.js +0 -5
- package/.next/standalone/.next/static/chunks/2243ff2814e7a781.js +0 -3
- package/.next/standalone/.next/static/chunks/50531467396cea91.css +0 -1
- package/.next/standalone/.next/static/chunks/8f288c01f8d7ef2d.js +0 -1
- package/.next/standalone/.next/static/chunks/abab1b00b2788443.js +0 -4
- package/.next/standalone/.next/static/chunks/d250d7f6f0a8c325.js +0 -1
- package/.next/standalone/.next/static/chunks/d7a572a8b7eb1ec8.js +0 -1
- package/.next/standalone/.next/static/chunks/fb1b0b9da3f03023.js +0 -4
- /package/.next/standalone/.next/static/{LoGIEEP4cORCqcFv-Ywg0 → 5JsV7rfAEOIwNOQPaX3UP}/_buildManifest.js +0 -0
- /package/.next/standalone/.next/static/{LoGIEEP4cORCqcFv-Ywg0 → 5JsV7rfAEOIwNOQPaX3UP}/_clientMiddlewareManifest.json +0 -0
- /package/.next/standalone/.next/static/{LoGIEEP4cORCqcFv-Ywg0 → 5JsV7rfAEOIwNOQPaX3UP}/_ssgManifest.js +0 -0
|
@@ -3,41 +3,95 @@
|
|
|
3
3
|
* Each enricher is individually try/caught so one failure doesn't block others.
|
|
4
4
|
*/
|
|
5
5
|
import { getRegisteredEnrichers } from "./enrich-registry";
|
|
6
|
-
import
|
|
7
|
-
import type {
|
|
6
|
+
import { getGlobalCondition } from "./condition-registry";
|
|
7
|
+
import type { EvalContext, EvalLogEntry, EvalLogStats } from "./types";
|
|
8
|
+
import type { EnrichRunResult, EnrichRunSummary, RegisteredEnricher } from "./enrich-types";
|
|
8
9
|
|
|
9
10
|
export async function runAllEnrichers(
|
|
10
11
|
entries: EvalLogEntry[],
|
|
11
12
|
stats: EvalLogStats,
|
|
12
13
|
projectName: string,
|
|
13
14
|
sessionId: string,
|
|
15
|
+
enrichersToRun?: RegisteredEnricher[],
|
|
16
|
+
contextOverrides?: Partial<EvalContext>,
|
|
14
17
|
): Promise<EnrichRunSummary> {
|
|
15
|
-
const registeredEnrichers = getRegisteredEnrichers();
|
|
18
|
+
const registeredEnrichers = enrichersToRun ?? getRegisteredEnrichers();
|
|
16
19
|
const results: EnrichRunResult[] = [];
|
|
17
20
|
const overallStart = performance.now();
|
|
21
|
+
const context: EvalContext = { entries, stats, projectName, sessionId, scope: 'session', ...contextOverrides };
|
|
18
22
|
|
|
19
|
-
|
|
20
|
-
|
|
23
|
+
// Check global condition first
|
|
24
|
+
const globalCondition = getGlobalCondition();
|
|
25
|
+
let globalSkip = false;
|
|
26
|
+
if (globalCondition) {
|
|
21
27
|
try {
|
|
22
|
-
const
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
28
|
+
const result = await globalCondition(context);
|
|
29
|
+
if (!result) globalSkip = true;
|
|
30
|
+
} catch {
|
|
31
|
+
globalSkip = true;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (globalSkip) {
|
|
36
|
+
// All enrichers skipped due to global condition
|
|
37
|
+
for (const { name } of registeredEnrichers) {
|
|
27
38
|
results.push({
|
|
28
39
|
name,
|
|
29
40
|
data: {},
|
|
30
|
-
durationMs,
|
|
31
|
-
|
|
41
|
+
durationMs: 0,
|
|
42
|
+
skipped: true,
|
|
32
43
|
});
|
|
33
44
|
}
|
|
45
|
+
} else {
|
|
46
|
+
for (const { name, fn, condition } of registeredEnrichers) {
|
|
47
|
+
// Check per-enrichment condition
|
|
48
|
+
if (condition) {
|
|
49
|
+
try {
|
|
50
|
+
const shouldRun = await condition(context);
|
|
51
|
+
if (!shouldRun) {
|
|
52
|
+
results.push({
|
|
53
|
+
name,
|
|
54
|
+
data: {},
|
|
55
|
+
durationMs: 0,
|
|
56
|
+
skipped: true,
|
|
57
|
+
});
|
|
58
|
+
continue;
|
|
59
|
+
}
|
|
60
|
+
} catch (err) {
|
|
61
|
+
results.push({
|
|
62
|
+
name,
|
|
63
|
+
data: {},
|
|
64
|
+
durationMs: 0,
|
|
65
|
+
error: `Condition error: ${err instanceof Error ? err.message : String(err)}`,
|
|
66
|
+
});
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const start = performance.now();
|
|
72
|
+
try {
|
|
73
|
+
const data = await fn(context);
|
|
74
|
+
const durationMs = Math.round(performance.now() - start);
|
|
75
|
+
results.push({ name, data, durationMs });
|
|
76
|
+
} catch (err) {
|
|
77
|
+
const durationMs = Math.round(performance.now() - start);
|
|
78
|
+
results.push({
|
|
79
|
+
name,
|
|
80
|
+
data: {},
|
|
81
|
+
durationMs,
|
|
82
|
+
error: err instanceof Error ? err.message : String(err),
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
}
|
|
34
86
|
}
|
|
35
87
|
|
|
36
88
|
const totalDurationMs = Math.round(performance.now() - overallStart);
|
|
37
89
|
let errorCount = 0;
|
|
90
|
+
let skippedCount = 0;
|
|
38
91
|
for (const r of results) {
|
|
39
|
-
if (r.
|
|
92
|
+
if (r.skipped) skippedCount++;
|
|
93
|
+
else if (r.error) errorCount++;
|
|
40
94
|
}
|
|
41
95
|
|
|
42
|
-
return { results, totalDurationMs, errorCount };
|
|
96
|
+
return { results, totalDurationMs, errorCount, skippedCount };
|
|
43
97
|
}
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* Enrichers return key-value metadata for sessions (as opposed to evals which grade them).
|
|
4
4
|
* Reuses EvalContext from types.ts — no duplication.
|
|
5
5
|
*/
|
|
6
|
-
import type { EvalContext } from "./types";
|
|
6
|
+
import type { EvalContext, ConditionFunction, EvalScope } from "./types";
|
|
7
7
|
|
|
8
8
|
/** Allowed value types for enrichment results. */
|
|
9
9
|
export type EnrichmentValue = string | number | boolean;
|
|
@@ -20,6 +20,9 @@ export type EnrichFunction = (
|
|
|
20
20
|
export interface RegisteredEnricher {
|
|
21
21
|
name: string;
|
|
22
22
|
fn: EnrichFunction;
|
|
23
|
+
condition?: ConditionFunction;
|
|
24
|
+
scope: EvalScope;
|
|
25
|
+
subagentType?: string;
|
|
23
26
|
}
|
|
24
27
|
|
|
25
28
|
/** Result of running a single enricher. */
|
|
@@ -28,6 +31,7 @@ export interface EnrichRunResult {
|
|
|
28
31
|
data: EnrichmentResult;
|
|
29
32
|
durationMs: number;
|
|
30
33
|
error?: string;
|
|
34
|
+
skipped?: boolean;
|
|
31
35
|
}
|
|
32
36
|
|
|
33
37
|
/** Summary of running all registered enrichers. */
|
|
@@ -35,4 +39,5 @@ export interface EnrichRunSummary {
|
|
|
35
39
|
results: EnrichRunResult[];
|
|
36
40
|
totalDurationMs: number;
|
|
37
41
|
errorCount: number;
|
|
42
|
+
skippedCount: number;
|
|
38
43
|
}
|
|
@@ -3,8 +3,9 @@
|
|
|
3
3
|
* This is the public API surface published via `dist/`.
|
|
4
4
|
*/
|
|
5
5
|
export { createApp } from "./app";
|
|
6
|
-
export type { ClaudeyeApp, ClaudeyeAppOptions } from "./app";
|
|
6
|
+
export type { ClaudeyeApp, ClaudeyeAppOptions, EvalOptions, EnrichOptions } from "./app";
|
|
7
7
|
export type {
|
|
8
|
+
EvalScope,
|
|
8
9
|
EvalContext,
|
|
9
10
|
EvalResult,
|
|
10
11
|
EvalFunction,
|
|
@@ -14,6 +15,7 @@ export type {
|
|
|
14
15
|
RegisteredEval,
|
|
15
16
|
EvalRunResult,
|
|
16
17
|
EvalRunSummary,
|
|
18
|
+
ConditionFunction,
|
|
17
19
|
} from "./types";
|
|
18
20
|
export type {
|
|
19
21
|
EnrichmentValue,
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* Using globalThis ensures the registry survives webpack chunk splitting
|
|
4
4
|
* and remains a true singleton across dynamic imports.
|
|
5
5
|
*/
|
|
6
|
-
import type { EvalFunction, RegisteredEval } from "./types";
|
|
6
|
+
import type { ConditionFunction, EvalFunction, EvalScope, RegisteredEval } from "./types";
|
|
7
7
|
|
|
8
8
|
const REGISTRY_KEY = "__CLAUDEYE_EVAL_REGISTRY__";
|
|
9
9
|
|
|
@@ -19,14 +19,23 @@ function getRegistry(): RegisteredEval[] {
|
|
|
19
19
|
return g[REGISTRY_KEY];
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
-
export function registerEval(
|
|
22
|
+
export function registerEval(
|
|
23
|
+
name: string,
|
|
24
|
+
fn: EvalFunction,
|
|
25
|
+
condition?: ConditionFunction,
|
|
26
|
+
scope: EvalScope = 'session',
|
|
27
|
+
subagentType?: string,
|
|
28
|
+
): void {
|
|
23
29
|
const registry = getRegistry();
|
|
30
|
+
const entry: RegisteredEval = { name, fn, scope };
|
|
31
|
+
if (condition) entry.condition = condition;
|
|
32
|
+
if (subagentType) entry.subagentType = subagentType;
|
|
24
33
|
// Replace if an eval with the same name already exists
|
|
25
34
|
const idx = registry.findIndex((e) => e.name === name);
|
|
26
35
|
if (idx >= 0) {
|
|
27
|
-
registry[idx] =
|
|
36
|
+
registry[idx] = entry;
|
|
28
37
|
} else {
|
|
29
|
-
registry.push(
|
|
38
|
+
registry.push(entry);
|
|
30
39
|
}
|
|
31
40
|
}
|
|
32
41
|
|
|
@@ -34,6 +43,22 @@ export function getRegisteredEvals(): RegisteredEval[] {
|
|
|
34
43
|
return getRegistry();
|
|
35
44
|
}
|
|
36
45
|
|
|
46
|
+
export function getSessionScopedEvals(): RegisteredEval[] {
|
|
47
|
+
return getRegistry().filter((e) => e.scope === 'session' || e.scope === 'both');
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function getSubagentScopedEvals(subagentType?: string): RegisteredEval[] {
|
|
51
|
+
return getRegistry().filter((e) => {
|
|
52
|
+
if (e.scope !== 'subagent' && e.scope !== 'both') return false;
|
|
53
|
+
if (e.subagentType && subagentType && e.subagentType !== subagentType) return false;
|
|
54
|
+
return true;
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export function hasSubagentEvals(): boolean {
|
|
59
|
+
return getRegistry().some((e) => e.scope === 'subagent' || e.scope === 'both');
|
|
60
|
+
}
|
|
61
|
+
|
|
37
62
|
export function hasEvals(): boolean {
|
|
38
63
|
return getRegistry().length > 0;
|
|
39
64
|
}
|
|
@@ -3,7 +3,8 @@
|
|
|
3
3
|
* Each eval is individually try/caught so one failure doesn't block others.
|
|
4
4
|
*/
|
|
5
5
|
import { getRegisteredEvals } from "./registry";
|
|
6
|
-
import
|
|
6
|
+
import { getGlobalCondition } from "./condition-registry";
|
|
7
|
+
import type { EvalContext, EvalLogEntry, EvalLogStats, EvalRunResult, EvalRunSummary, RegisteredEval } from "./types";
|
|
7
8
|
|
|
8
9
|
function clampScore(score: number | undefined): number {
|
|
9
10
|
if (score === undefined || score === null) return 1;
|
|
@@ -15,45 +16,101 @@ export async function runAllEvals(
|
|
|
15
16
|
stats: EvalLogStats,
|
|
16
17
|
projectName: string,
|
|
17
18
|
sessionId: string,
|
|
19
|
+
evalsToRun?: RegisteredEval[],
|
|
20
|
+
contextOverrides?: Partial<EvalContext>,
|
|
18
21
|
): Promise<EvalRunSummary> {
|
|
19
|
-
const registeredEvals = getRegisteredEvals();
|
|
22
|
+
const registeredEvals = evalsToRun ?? getRegisteredEvals();
|
|
20
23
|
const results: EvalRunResult[] = [];
|
|
21
24
|
const overallStart = performance.now();
|
|
25
|
+
const context: EvalContext = { entries, stats, projectName, sessionId, scope: 'session', ...contextOverrides };
|
|
22
26
|
|
|
23
|
-
|
|
24
|
-
|
|
27
|
+
// Check global condition first
|
|
28
|
+
const globalCondition = getGlobalCondition();
|
|
29
|
+
let globalSkip = false;
|
|
30
|
+
if (globalCondition) {
|
|
25
31
|
try {
|
|
26
|
-
const result = await
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
} catch (err) {
|
|
37
|
-
const durationMs = Math.round(performance.now() - start);
|
|
32
|
+
const result = await globalCondition(context);
|
|
33
|
+
if (!result) globalSkip = true;
|
|
34
|
+
} catch {
|
|
35
|
+
globalSkip = true;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if (globalSkip) {
|
|
40
|
+
// All evals skipped due to global condition
|
|
41
|
+
for (const { name } of registeredEvals) {
|
|
38
42
|
results.push({
|
|
39
43
|
name,
|
|
40
44
|
pass: false,
|
|
41
45
|
score: 0,
|
|
42
|
-
durationMs,
|
|
43
|
-
|
|
46
|
+
durationMs: 0,
|
|
47
|
+
skipped: true,
|
|
44
48
|
});
|
|
45
49
|
}
|
|
50
|
+
} else {
|
|
51
|
+
for (const { name, fn, condition } of registeredEvals) {
|
|
52
|
+
// Check per-eval condition
|
|
53
|
+
if (condition) {
|
|
54
|
+
try {
|
|
55
|
+
const shouldRun = await condition(context);
|
|
56
|
+
if (!shouldRun) {
|
|
57
|
+
results.push({
|
|
58
|
+
name,
|
|
59
|
+
pass: false,
|
|
60
|
+
score: 0,
|
|
61
|
+
durationMs: 0,
|
|
62
|
+
skipped: true,
|
|
63
|
+
});
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
} catch (err) {
|
|
67
|
+
results.push({
|
|
68
|
+
name,
|
|
69
|
+
pass: false,
|
|
70
|
+
score: 0,
|
|
71
|
+
durationMs: 0,
|
|
72
|
+
error: `Condition error: ${err instanceof Error ? err.message : String(err)}`,
|
|
73
|
+
});
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const start = performance.now();
|
|
79
|
+
try {
|
|
80
|
+
const result = await fn(context);
|
|
81
|
+
const durationMs = Math.round(performance.now() - start);
|
|
82
|
+
results.push({
|
|
83
|
+
name,
|
|
84
|
+
pass: result.pass,
|
|
85
|
+
score: clampScore(result.score),
|
|
86
|
+
message: result.message,
|
|
87
|
+
metadata: result.metadata,
|
|
88
|
+
durationMs,
|
|
89
|
+
});
|
|
90
|
+
} catch (err) {
|
|
91
|
+
const durationMs = Math.round(performance.now() - start);
|
|
92
|
+
results.push({
|
|
93
|
+
name,
|
|
94
|
+
pass: false,
|
|
95
|
+
score: 0,
|
|
96
|
+
durationMs,
|
|
97
|
+
error: err instanceof Error ? err.message : String(err),
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
}
|
|
46
101
|
}
|
|
47
102
|
|
|
48
103
|
const totalDurationMs = Math.round(performance.now() - overallStart);
|
|
49
104
|
let passCount = 0;
|
|
50
105
|
let failCount = 0;
|
|
51
106
|
let errorCount = 0;
|
|
107
|
+
let skippedCount = 0;
|
|
52
108
|
for (const r of results) {
|
|
53
|
-
if (r.
|
|
109
|
+
if (r.skipped) skippedCount++;
|
|
110
|
+
else if (r.error) errorCount++;
|
|
54
111
|
else if (r.pass) passCount++;
|
|
55
112
|
else failCount++;
|
|
56
113
|
}
|
|
57
114
|
|
|
58
|
-
return { results, totalDurationMs, passCount, failCount, errorCount };
|
|
115
|
+
return { results, totalDurationMs, passCount, failCount, errorCount, skippedCount };
|
|
59
116
|
}
|
|
@@ -7,26 +7,43 @@ import { spawn, execSync } from "node:child_process";
|
|
|
7
7
|
import { createServer } from "node:net";
|
|
8
8
|
import { resolve, dirname } from "node:path";
|
|
9
9
|
import { existsSync } from "node:fs";
|
|
10
|
-
import { platform } from "node:os";
|
|
10
|
+
import { platform, networkInterfaces } from "node:os";
|
|
11
11
|
|
|
12
12
|
interface SpawnOptions {
|
|
13
13
|
open?: boolean;
|
|
14
|
+
host?: string;
|
|
14
15
|
}
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
|
|
17
|
+
function getNetworkAddress(): string {
|
|
18
|
+
for (const addrs of Object.values(networkInterfaces())) {
|
|
19
|
+
for (const iface of addrs!) {
|
|
20
|
+
if (iface.family === "IPv4" && !iface.internal) {
|
|
21
|
+
return iface.address;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
return "0.0.0.0";
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function resolveBindAddress(host: string): string {
|
|
29
|
+
return host === "localhost" ? "127.0.0.1" : host;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function findAvailablePort(preferred: number, host = "localhost"): Promise<number> {
|
|
33
|
+
const bindAddr = resolveBindAddress(host);
|
|
34
|
+
return new Promise((resolve, reject) => {
|
|
18
35
|
const srv = createServer();
|
|
19
|
-
srv.listen(preferred,
|
|
20
|
-
srv.close(() =>
|
|
36
|
+
srv.listen(preferred, bindAddr, () => {
|
|
37
|
+
srv.close(() => resolve(preferred));
|
|
21
38
|
});
|
|
22
39
|
srv.on("error", () => {
|
|
23
|
-
const
|
|
24
|
-
|
|
25
|
-
const addr =
|
|
40
|
+
const fallback = createServer();
|
|
41
|
+
fallback.listen(0, bindAddr, () => {
|
|
42
|
+
const addr = fallback.address();
|
|
26
43
|
const port = typeof addr === "object" && addr ? addr.port : 0;
|
|
27
|
-
|
|
44
|
+
fallback.close(() => resolve(port));
|
|
28
45
|
});
|
|
29
|
-
|
|
46
|
+
fallback.on("error", reject);
|
|
30
47
|
});
|
|
31
48
|
});
|
|
32
49
|
}
|
|
@@ -81,66 +98,75 @@ function resolveServerInfo(): { mode: "standalone"; script: string; cwd: string
|
|
|
81
98
|
return { mode: "dev" };
|
|
82
99
|
}
|
|
83
100
|
|
|
101
|
+
function logServerInfo(host: string, port: number, localUrl: string, evalsModule?: string): void {
|
|
102
|
+
console.log(`Starting Claudeye dashboard...`);
|
|
103
|
+
if (evalsModule) {
|
|
104
|
+
console.log(` Evals: ${evalsModule}`);
|
|
105
|
+
}
|
|
106
|
+
if (host === "0.0.0.0") {
|
|
107
|
+
console.log(` Local: ${localUrl}`);
|
|
108
|
+
console.log(` Network: http://${getNetworkAddress()}:${port}`);
|
|
109
|
+
} else {
|
|
110
|
+
console.log(` URL: http://${host}:${port}`);
|
|
111
|
+
}
|
|
112
|
+
console.log();
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function spawnChildProcess(serverInfo: ReturnType<typeof resolveServerInfo>, env: NodeJS.ProcessEnv, port: number): ReturnType<typeof spawn> {
|
|
116
|
+
if (serverInfo.mode === "standalone") {
|
|
117
|
+
return spawn(process.execPath, [serverInfo.script], {
|
|
118
|
+
cwd: serverInfo.cwd,
|
|
119
|
+
stdio: "inherit",
|
|
120
|
+
env,
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
return spawn("npx", ["next", "dev", "--port", String(port)], {
|
|
124
|
+
stdio: "inherit",
|
|
125
|
+
shell: true,
|
|
126
|
+
env,
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
|
|
84
130
|
export async function spawnServer(preferredPort: number, options: SpawnOptions): Promise<void> {
|
|
85
|
-
const
|
|
131
|
+
const host = options.host ?? "localhost";
|
|
132
|
+
const port = await findAvailablePort(preferredPort, host);
|
|
86
133
|
if (port !== preferredPort) {
|
|
87
134
|
console.log(`Port ${preferredPort} is busy, using port ${port} instead.`);
|
|
88
135
|
}
|
|
89
136
|
|
|
90
|
-
const
|
|
137
|
+
const localUrl = `http://localhost:${port}`;
|
|
91
138
|
const evalsModule = process.argv[1] ? resolve(process.argv[1]) : undefined;
|
|
92
|
-
|
|
93
|
-
console.log(`Starting Claudeye dashboard...`);
|
|
94
|
-
if (evalsModule) {
|
|
95
|
-
console.log(` Evals: ${evalsModule}`);
|
|
96
|
-
}
|
|
97
|
-
console.log(` URL: ${url}\n`);
|
|
139
|
+
logServerInfo(host, port, localUrl, evalsModule);
|
|
98
140
|
|
|
99
141
|
const env: NodeJS.ProcessEnv = {
|
|
100
142
|
...process.env,
|
|
101
143
|
PORT: String(port),
|
|
102
|
-
HOSTNAME:
|
|
144
|
+
HOSTNAME: host,
|
|
103
145
|
};
|
|
104
146
|
if (evalsModule) {
|
|
105
147
|
env.CLAUDEYE_EVALS_MODULE = evalsModule;
|
|
106
148
|
}
|
|
107
149
|
|
|
108
|
-
const
|
|
109
|
-
|
|
110
|
-
let child: ReturnType<typeof spawn>;
|
|
111
|
-
if (serverInfo.mode === "standalone") {
|
|
112
|
-
child = spawn(process.execPath, [serverInfo.script], {
|
|
113
|
-
cwd: serverInfo.cwd,
|
|
114
|
-
stdio: "inherit",
|
|
115
|
-
env,
|
|
116
|
-
});
|
|
117
|
-
} else {
|
|
118
|
-
child = spawn("npx", ["next", "dev", "--port", String(port)], {
|
|
119
|
-
stdio: "inherit",
|
|
120
|
-
shell: true,
|
|
121
|
-
env,
|
|
122
|
-
});
|
|
123
|
-
}
|
|
150
|
+
const child = spawnChildProcess(resolveServerInfo(), env, port);
|
|
124
151
|
|
|
125
152
|
if (options.open) {
|
|
126
|
-
waitForServer(
|
|
127
|
-
console.warn(`\nServer did not respond within 15 s.\nOpen manually: ${
|
|
153
|
+
waitForServer(localUrl).then(() => openBrowser(localUrl)).catch(() => {
|
|
154
|
+
console.warn(`\nServer did not respond within 15 s.\nOpen manually: ${localUrl}\n`);
|
|
128
155
|
});
|
|
129
156
|
}
|
|
130
157
|
|
|
131
158
|
// Forward signals for clean shutdown
|
|
132
|
-
const shutdown = (signal: NodeJS.Signals) => {
|
|
159
|
+
const shutdown = (signal: NodeJS.Signals): void => {
|
|
133
160
|
child.kill(signal);
|
|
134
161
|
setTimeout(() => process.exit(0), 2000).unref();
|
|
135
162
|
};
|
|
136
163
|
process.on("SIGINT", () => shutdown("SIGINT"));
|
|
137
164
|
process.on("SIGTERM", () => shutdown("SIGTERM"));
|
|
138
165
|
|
|
139
|
-
|
|
166
|
+
// Block until the child exits — server runs until killed
|
|
167
|
+
await new Promise<never>(() => {
|
|
140
168
|
child.on("exit", (code) => {
|
|
141
169
|
process.exit(code ?? 0);
|
|
142
170
|
});
|
|
143
|
-
// Never resolves — server runs until killed
|
|
144
|
-
void res;
|
|
145
171
|
});
|
|
146
172
|
}
|
|
@@ -38,12 +38,20 @@ export interface EvalLogStats {
|
|
|
38
38
|
models: string[];
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
+
/** Scope for eval/enrichment registration. */
|
|
42
|
+
export type EvalScope = 'session' | 'subagent' | 'both';
|
|
43
|
+
|
|
41
44
|
/** Context passed to each eval function. */
|
|
42
45
|
export interface EvalContext {
|
|
43
46
|
entries: EvalLogEntry[];
|
|
44
47
|
stats: EvalLogStats;
|
|
45
48
|
projectName: string;
|
|
46
49
|
sessionId: string;
|
|
50
|
+
scope: 'session' | 'subagent';
|
|
51
|
+
subagentId?: string;
|
|
52
|
+
subagentType?: string;
|
|
53
|
+
subagentDescription?: string;
|
|
54
|
+
parentSessionId?: string;
|
|
47
55
|
}
|
|
48
56
|
|
|
49
57
|
/** Result returned by an eval function. */
|
|
@@ -57,10 +65,16 @@ export interface EvalResult {
|
|
|
57
65
|
/** An eval function signature. */
|
|
58
66
|
export type EvalFunction = (context: EvalContext) => EvalResult | Promise<EvalResult>;
|
|
59
67
|
|
|
68
|
+
/** A condition function that gates eval/enrichment execution. */
|
|
69
|
+
export type ConditionFunction = (context: EvalContext) => boolean | Promise<boolean>;
|
|
70
|
+
|
|
60
71
|
/** An eval function stored in the registry. */
|
|
61
72
|
export interface RegisteredEval {
|
|
62
73
|
name: string;
|
|
63
74
|
fn: EvalFunction;
|
|
75
|
+
condition?: ConditionFunction;
|
|
76
|
+
scope: EvalScope;
|
|
77
|
+
subagentType?: string;
|
|
64
78
|
}
|
|
65
79
|
|
|
66
80
|
/** Result of running a single eval. */
|
|
@@ -72,6 +86,7 @@ export interface EvalRunResult {
|
|
|
72
86
|
metadata?: Record<string, unknown>;
|
|
73
87
|
durationMs: number;
|
|
74
88
|
error?: string;
|
|
89
|
+
skipped?: boolean;
|
|
75
90
|
}
|
|
76
91
|
|
|
77
92
|
/** Summary of running all registered evals. */
|
|
@@ -81,4 +96,5 @@ export interface EvalRunSummary {
|
|
|
81
96
|
passCount: number;
|
|
82
97
|
failCount: number;
|
|
83
98
|
errorCount: number;
|
|
99
|
+
skippedCount: number;
|
|
84
100
|
}
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* Separated from log-entries.ts to avoid pulling in Node.js fs modules
|
|
4
4
|
* when imported from "use client" components.
|
|
5
5
|
*/
|
|
6
|
-
import type { ToolUseBlock, GenericEntry } from "./log-entries";
|
|
6
|
+
import type { LogEntry, ToolUseBlock, GenericEntry } from "./log-entries";
|
|
7
7
|
|
|
8
8
|
export function formatInput(block: ToolUseBlock): string {
|
|
9
9
|
return JSON.stringify(block.input, null, 2);
|
|
@@ -13,6 +13,27 @@ export function formatRaw(entry: GenericEntry): string {
|
|
|
13
13
|
return JSON.stringify(entry.raw, null, 2);
|
|
14
14
|
}
|
|
15
15
|
|
|
16
|
+
/**
|
|
17
|
+
* Extracts a copyable plain-text representation from any log entry type.
|
|
18
|
+
*/
|
|
19
|
+
export function getEntryTextContent(entry: LogEntry): string {
|
|
20
|
+
switch (entry.type) {
|
|
21
|
+
case "user":
|
|
22
|
+
return entry.message.content;
|
|
23
|
+
case "assistant":
|
|
24
|
+
return entry.message.content
|
|
25
|
+
.filter((b): b is { type: "text"; text: string } => b.type === "text")
|
|
26
|
+
.map((b) => b.text)
|
|
27
|
+
.join("\n");
|
|
28
|
+
case "file-history-snapshot":
|
|
29
|
+
case "progress":
|
|
30
|
+
case "system":
|
|
31
|
+
return formatRaw(entry);
|
|
32
|
+
case "queue-operation":
|
|
33
|
+
return entry.label;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
16
37
|
const tsFormatter = new Intl.DateTimeFormat("en-US", {
|
|
17
38
|
month: "short",
|
|
18
39
|
day: "numeric",
|