selftune 0.2.29 → 0.2.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/apps/local-dashboard/dist/assets/index-B7v_o1WC.js +15 -0
- package/apps/local-dashboard/dist/assets/index-CrO77SVi.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-B0H8s1mP.js +1 -0
- package/apps/local-dashboard/dist/index.html +3 -3
- package/cli/selftune/auto-update.ts +40 -8
- package/cli/selftune/command-surface.ts +1 -1
- package/cli/selftune/constants.ts +5 -0
- package/cli/selftune/dashboard-action-events.ts +117 -0
- package/cli/selftune/dashboard-action-instrumentation.ts +103 -0
- package/cli/selftune/dashboard-action-result.ts +90 -0
- package/cli/selftune/dashboard-action-stream.ts +252 -0
- package/cli/selftune/dashboard-contract.ts +81 -1
- package/cli/selftune/dashboard-server.ts +133 -16
- package/cli/selftune/eval/hooks-to-evals.ts +157 -0
- package/cli/selftune/eval/synthetic-evals.ts +33 -2
- package/cli/selftune/eval/unit-test-cli.ts +53 -5
- package/cli/selftune/evolution/validate-host-replay.ts +191 -14
- package/cli/selftune/index.ts +4 -0
- package/cli/selftune/ingestors/opencode-ingest.ts +117 -8
- package/cli/selftune/localdb/schema.ts +34 -0
- package/cli/selftune/registry/github-install.ts +256 -0
- package/cli/selftune/registry/index.ts +1 -1
- package/cli/selftune/registry/install.ts +58 -7
- package/cli/selftune/routes/actions.ts +273 -42
- package/cli/selftune/testing-readiness.ts +203 -10
- package/cli/selftune/utils/llm-call.ts +90 -1
- package/package.json +1 -1
- package/packages/dashboard-core/src/routes/manifest.ts +2 -2
- package/packages/ui/src/components/EvolutionTimeline.tsx +1 -1
- package/packages/ui/src/components/SkillReportPanels.tsx +7 -7
- package/packages/ui/src/primitives/button.tsx +5 -0
- package/skill/SKILL.md +1 -1
- package/skill/workflows/Dashboard.md +50 -23
- package/skill/workflows/Registry.md +19 -13
- package/apps/local-dashboard/dist/assets/index-BcvtYmmL.js +0 -15
- package/apps/local-dashboard/dist/assets/index-BpRIxnpS.css +0 -1
- package/apps/local-dashboard/dist/assets/vendor-ui-DqH_uxum.js +0 -1
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { appendFileSync, existsSync, mkdirSync } from "node:fs";
|
|
2
|
+
import { dirname } from "node:path";
|
|
3
|
+
|
|
4
|
+
import { DASHBOARD_ACTION_STREAM_LOG } from "./constants.js";
|
|
5
|
+
import type {
|
|
6
|
+
DashboardActionEvent,
|
|
7
|
+
DashboardActionMetrics,
|
|
8
|
+
DashboardActionName,
|
|
9
|
+
DashboardActionProgress,
|
|
10
|
+
} from "./dashboard-contract.js";
|
|
11
|
+
|
|
12
|
+
const ACTION_EVENT_ID_ENV = "SELFTUNE_DASHBOARD_ACTION_EVENT_ID";
|
|
13
|
+
const ACTION_NAME_ENV = "SELFTUNE_DASHBOARD_ACTION_NAME";
|
|
14
|
+
const ACTION_SKILL_NAME_ENV = "SELFTUNE_DASHBOARD_ACTION_SKILL_NAME";
|
|
15
|
+
const ACTION_SKILL_PATH_ENV = "SELFTUNE_DASHBOARD_ACTION_SKILL_PATH";
|
|
16
|
+
|
|
17
|
+
export interface DashboardActionContext {
|
|
18
|
+
eventId: string;
|
|
19
|
+
action: DashboardActionName;
|
|
20
|
+
skillName: string | null;
|
|
21
|
+
skillPath: string | null;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
let currentContext: DashboardActionContext | null = null;
|
|
25
|
+
|
|
26
|
+
function appendDashboardActionEvent(event: DashboardActionEvent): void {
|
|
27
|
+
try {
|
|
28
|
+
const path = process.env.SELFTUNE_DASHBOARD_ACTION_STREAM_LOG || DASHBOARD_ACTION_STREAM_LOG;
|
|
29
|
+
const parent = dirname(path);
|
|
30
|
+
if (!existsSync(parent)) {
|
|
31
|
+
mkdirSync(parent, { recursive: true });
|
|
32
|
+
}
|
|
33
|
+
appendFileSync(path, `${JSON.stringify(event)}\n`, "utf-8");
|
|
34
|
+
} catch {
|
|
35
|
+
// fail-open: dashboard instrumentation must never block the real CLI
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function isDashboardActionName(value: string | undefined): value is DashboardActionName {
|
|
40
|
+
return (
|
|
41
|
+
value === "generate-evals" ||
|
|
42
|
+
value === "generate-unit-tests" ||
|
|
43
|
+
value === "replay-dry-run" ||
|
|
44
|
+
value === "measure-baseline" ||
|
|
45
|
+
value === "deploy-candidate" ||
|
|
46
|
+
value === "watch" ||
|
|
47
|
+
value === "orchestrate" ||
|
|
48
|
+
value === "rollback"
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function readContextFromEnv(): DashboardActionContext | null {
|
|
53
|
+
const eventId = process.env[ACTION_EVENT_ID_ENV];
|
|
54
|
+
const action = process.env[ACTION_NAME_ENV];
|
|
55
|
+
if (!eventId || !action || !isDashboardActionName(action)) return null;
|
|
56
|
+
|
|
57
|
+
return {
|
|
58
|
+
eventId,
|
|
59
|
+
action,
|
|
60
|
+
skillName: process.env[ACTION_SKILL_NAME_ENV] ?? null,
|
|
61
|
+
skillPath: process.env[ACTION_SKILL_PATH_ENV] ?? null,
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export function setCurrentDashboardActionContext(context: DashboardActionContext | null): void {
|
|
66
|
+
currentContext = context;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export function getCurrentDashboardActionContext(): DashboardActionContext | null {
|
|
70
|
+
return currentContext ?? readContextFromEnv();
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export function dashboardActionContextEnv(
|
|
74
|
+
context: DashboardActionContext | null,
|
|
75
|
+
): Record<string, string> {
|
|
76
|
+
if (!context) return {};
|
|
77
|
+
|
|
78
|
+
const env: Record<string, string> = {
|
|
79
|
+
[ACTION_EVENT_ID_ENV]: context.eventId,
|
|
80
|
+
[ACTION_NAME_ENV]: context.action,
|
|
81
|
+
};
|
|
82
|
+
if (context.skillName) env[ACTION_SKILL_NAME_ENV] = context.skillName;
|
|
83
|
+
if (context.skillPath) env[ACTION_SKILL_PATH_ENV] = context.skillPath;
|
|
84
|
+
return env;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export function emitDashboardActionMetrics(metrics: DashboardActionMetrics): void {
|
|
88
|
+
const context = getCurrentDashboardActionContext();
|
|
89
|
+
if (!context) return;
|
|
90
|
+
|
|
91
|
+
appendDashboardActionEvent({
|
|
92
|
+
event_id: context.eventId,
|
|
93
|
+
action: context.action,
|
|
94
|
+
stage: "metrics",
|
|
95
|
+
skill_name: context.skillName,
|
|
96
|
+
skill_path: context.skillPath,
|
|
97
|
+
ts: Date.now(),
|
|
98
|
+
metrics,
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export function emitDashboardActionProgress(progress: DashboardActionProgress): void {
|
|
103
|
+
const context = getCurrentDashboardActionContext();
|
|
104
|
+
if (!context) return;
|
|
105
|
+
|
|
106
|
+
appendDashboardActionEvent({
|
|
107
|
+
event_id: context.eventId,
|
|
108
|
+
action: context.action,
|
|
109
|
+
stage: "progress",
|
|
110
|
+
skill_name: context.skillName,
|
|
111
|
+
skill_path: context.skillPath,
|
|
112
|
+
ts: Date.now(),
|
|
113
|
+
progress,
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
export { appendDashboardActionEvent };
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import {
|
|
2
|
+
emitDashboardActionMetrics,
|
|
3
|
+
emitDashboardActionProgress,
|
|
4
|
+
} from "./dashboard-action-events.js";
|
|
5
|
+
import type {
|
|
6
|
+
DashboardActionMetrics,
|
|
7
|
+
DashboardActionProgress,
|
|
8
|
+
DashboardActionProgressUnit,
|
|
9
|
+
} from "./dashboard-contract.js";
|
|
10
|
+
import type { LlmCallObserver, LlmCallLifecycleEvent } from "./utils/llm-call.js";
|
|
11
|
+
|
|
12
|
+
export interface DashboardStepProgressOptions {
|
|
13
|
+
current: number;
|
|
14
|
+
total: number;
|
|
15
|
+
status: DashboardActionProgress["status"];
|
|
16
|
+
unit?: DashboardActionProgressUnit;
|
|
17
|
+
phase?: string | null;
|
|
18
|
+
label?: string | null;
|
|
19
|
+
query?: string | null;
|
|
20
|
+
passed?: boolean | null;
|
|
21
|
+
evidence?: string | null;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface DashboardLlmObserverOptions {
|
|
25
|
+
current: number;
|
|
26
|
+
total: number;
|
|
27
|
+
phase: string;
|
|
28
|
+
label: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function buildRuntimeMetrics(event: LlmCallLifecycleEvent): DashboardActionMetrics {
|
|
32
|
+
return {
|
|
33
|
+
platform: event.platform,
|
|
34
|
+
model: event.model,
|
|
35
|
+
session_id: null,
|
|
36
|
+
input_tokens: null,
|
|
37
|
+
output_tokens: null,
|
|
38
|
+
cache_creation_input_tokens: null,
|
|
39
|
+
cache_read_input_tokens: null,
|
|
40
|
+
total_cost_usd: null,
|
|
41
|
+
duration_ms: event.durationMs,
|
|
42
|
+
num_turns: null,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function describeInvocation(event: LlmCallLifecycleEvent): string {
|
|
47
|
+
const parts = [event.platform, event.model].filter(Boolean);
|
|
48
|
+
return parts.length > 0 ? parts.join(" · ") : "runtime invoked";
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function describeCompletion(event: LlmCallLifecycleEvent): string {
|
|
52
|
+
const durationText =
|
|
53
|
+
event.durationMs != null ? `${(event.durationMs / 1000).toFixed(1)}s` : "completed";
|
|
54
|
+
if (event.success === false && event.error) {
|
|
55
|
+
return event.error;
|
|
56
|
+
}
|
|
57
|
+
return `${describeInvocation(event)} · ${durationText}`;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function emitDashboardStepProgress(options: DashboardStepProgressOptions): void {
|
|
61
|
+
emitDashboardActionProgress({
|
|
62
|
+
current: options.current,
|
|
63
|
+
total: options.total,
|
|
64
|
+
status: options.status,
|
|
65
|
+
unit: options.unit ?? "step",
|
|
66
|
+
phase: options.phase ?? null,
|
|
67
|
+
label: options.label ?? null,
|
|
68
|
+
query: options.query ?? null,
|
|
69
|
+
passed: options.passed ?? null,
|
|
70
|
+
evidence: options.evidence ?? null,
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export function createDashboardLlmObserver(options: DashboardLlmObserverOptions): LlmCallObserver {
|
|
75
|
+
return {
|
|
76
|
+
onStart(event) {
|
|
77
|
+
emitDashboardActionMetrics(buildRuntimeMetrics(event));
|
|
78
|
+
emitDashboardStepProgress({
|
|
79
|
+
current: options.current,
|
|
80
|
+
total: options.total,
|
|
81
|
+
status: "started",
|
|
82
|
+
unit: "llm_call",
|
|
83
|
+
phase: options.phase,
|
|
84
|
+
label: options.label,
|
|
85
|
+
passed: null,
|
|
86
|
+
evidence: describeInvocation(event),
|
|
87
|
+
});
|
|
88
|
+
},
|
|
89
|
+
onFinish(event) {
|
|
90
|
+
emitDashboardActionMetrics(buildRuntimeMetrics(event));
|
|
91
|
+
emitDashboardStepProgress({
|
|
92
|
+
current: options.current,
|
|
93
|
+
total: options.total,
|
|
94
|
+
status: "finished",
|
|
95
|
+
unit: "llm_call",
|
|
96
|
+
phase: options.phase,
|
|
97
|
+
label: options.label,
|
|
98
|
+
passed: event.success,
|
|
99
|
+
evidence: describeCompletion(event),
|
|
100
|
+
});
|
|
101
|
+
},
|
|
102
|
+
};
|
|
103
|
+
}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import type { DashboardActionName, DashboardActionResultSummary } from "./dashboard-contract.js";
|
|
2
|
+
|
|
3
|
+
export interface DashboardActionOutcomeInput {
|
|
4
|
+
action: DashboardActionName;
|
|
5
|
+
stdout: string;
|
|
6
|
+
stderr: string | null;
|
|
7
|
+
exitCode: number | null;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export interface DashboardActionOutcome {
|
|
11
|
+
success: boolean;
|
|
12
|
+
error: string | null;
|
|
13
|
+
summary: DashboardActionResultSummary | null;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function extractJsonObject(stdout: string): Record<string, unknown> | null {
|
|
17
|
+
const trimmed = stdout.trim();
|
|
18
|
+
if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) return null;
|
|
19
|
+
|
|
20
|
+
try {
|
|
21
|
+
const parsed = JSON.parse(trimmed) as unknown;
|
|
22
|
+
return parsed && typeof parsed === "object" ? (parsed as Record<string, unknown>) : null;
|
|
23
|
+
} catch {
|
|
24
|
+
return null;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function readBoolean(value: unknown): boolean | null {
|
|
29
|
+
return typeof value === "boolean" ? value : null;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function readNumber(value: unknown): number | null {
|
|
33
|
+
return typeof value === "number" && Number.isFinite(value) ? value : null;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function readString(value: unknown): string | null {
|
|
37
|
+
return typeof value === "string" && value.trim().length > 0 ? value : null;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function extractDashboardActionSummary(
|
|
41
|
+
action: DashboardActionName,
|
|
42
|
+
stdout: string,
|
|
43
|
+
): DashboardActionResultSummary | null {
|
|
44
|
+
if (action !== "replay-dry-run") return null;
|
|
45
|
+
|
|
46
|
+
const parsed = extractJsonObject(stdout);
|
|
47
|
+
if (!parsed) return null;
|
|
48
|
+
|
|
49
|
+
return {
|
|
50
|
+
reason: readString(parsed["reason"]),
|
|
51
|
+
improved: readBoolean(parsed["improved"]),
|
|
52
|
+
deployed: readBoolean(parsed["deployed"]),
|
|
53
|
+
before_pass_rate: readNumber(parsed["before_pass_rate"]) ?? readNumber(parsed["before"]),
|
|
54
|
+
after_pass_rate: readNumber(parsed["after_pass_rate"]) ?? readNumber(parsed["after"]),
|
|
55
|
+
net_change: readNumber(parsed["net_change"]),
|
|
56
|
+
validation_mode: readString(parsed["validation_mode"]),
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function isSuccessfulReplayDryRun(summary: DashboardActionResultSummary | null): boolean {
|
|
61
|
+
if (!summary) return false;
|
|
62
|
+
|
|
63
|
+
return (
|
|
64
|
+
summary.reason === "Dry run - proposal validated but not deployed" &&
|
|
65
|
+
summary.improved === true &&
|
|
66
|
+
summary.deployed === false
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export function resolveDashboardActionOutcome(
|
|
71
|
+
input: DashboardActionOutcomeInput,
|
|
72
|
+
): DashboardActionOutcome {
|
|
73
|
+
const summary = extractDashboardActionSummary(input.action, input.stdout);
|
|
74
|
+
|
|
75
|
+
if (input.exitCode === 0) {
|
|
76
|
+
return { success: true, error: null, summary };
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (input.action === "replay-dry-run" && isSuccessfulReplayDryRun(summary)) {
|
|
80
|
+
return { success: true, error: null, summary };
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
success: false,
|
|
85
|
+
summary,
|
|
86
|
+
error:
|
|
87
|
+
input.stderr ||
|
|
88
|
+
(input.exitCode == null ? "Unknown action failure" : `Exit code ${input.exitCode}`),
|
|
89
|
+
};
|
|
90
|
+
}
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
import { randomUUID } from "node:crypto";
|
|
2
|
+
import {
|
|
3
|
+
appendDashboardActionEvent,
|
|
4
|
+
setCurrentDashboardActionContext,
|
|
5
|
+
} from "./dashboard-action-events.js";
|
|
6
|
+
import { resolveDashboardActionOutcome } from "./dashboard-action-result.js";
|
|
7
|
+
import type { DashboardActionName } from "./dashboard-contract.js";
|
|
8
|
+
|
|
9
|
+
const STREAM_DISABLE_ENV = "SELFTUNE_DASHBOARD_STREAM_DISABLE";
|
|
10
|
+
|
|
11
|
+
function readFlagValue(argv: string[], flag: string): string | null {
|
|
12
|
+
const index = argv.indexOf(flag);
|
|
13
|
+
if (index === -1) return null;
|
|
14
|
+
const value = argv[index + 1];
|
|
15
|
+
if (!value || value.startsWith("--")) return null;
|
|
16
|
+
return value;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function hasFlag(argv: string[], flag: string): boolean {
|
|
20
|
+
return argv.includes(flag);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function detectDashboardAction(argv: string[]): {
|
|
24
|
+
action: DashboardActionName;
|
|
25
|
+
skillName: string | null;
|
|
26
|
+
skillPath: string | null;
|
|
27
|
+
} | null {
|
|
28
|
+
if (argv.includes("--help") || argv.includes("-h")) {
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const [command, subcommand] = argv;
|
|
33
|
+
|
|
34
|
+
if (command === "eval" && subcommand === "generate") {
|
|
35
|
+
return {
|
|
36
|
+
action: "generate-evals",
|
|
37
|
+
skillName: readFlagValue(argv, "--skill"),
|
|
38
|
+
skillPath: readFlagValue(argv, "--skill-path"),
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (command === "eval" && subcommand === "unit-test" && hasFlag(argv, "--generate")) {
|
|
43
|
+
return {
|
|
44
|
+
action: "generate-unit-tests",
|
|
45
|
+
skillName: readFlagValue(argv, "--skill"),
|
|
46
|
+
skillPath: readFlagValue(argv, "--skill-path"),
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (command === "grade" && subcommand === "baseline") {
|
|
51
|
+
return {
|
|
52
|
+
action: "measure-baseline",
|
|
53
|
+
skillName: readFlagValue(argv, "--skill"),
|
|
54
|
+
skillPath: readFlagValue(argv, "--skill-path"),
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (command === "watch") {
|
|
59
|
+
return {
|
|
60
|
+
action: "watch",
|
|
61
|
+
skillName: readFlagValue(argv, "--skill"),
|
|
62
|
+
skillPath: readFlagValue(argv, "--skill-path"),
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (command === "orchestrate") {
|
|
67
|
+
return {
|
|
68
|
+
action: "orchestrate",
|
|
69
|
+
skillName: null,
|
|
70
|
+
skillPath: null,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (command === "evolve" && subcommand === "rollback") {
|
|
75
|
+
return {
|
|
76
|
+
action: "rollback",
|
|
77
|
+
skillName: readFlagValue(argv, "--skill"),
|
|
78
|
+
skillPath: readFlagValue(argv, "--skill-path"),
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (command === "evolve" && (!subcommand || subcommand.startsWith("--"))) {
|
|
83
|
+
return {
|
|
84
|
+
action: hasFlag(argv, "--dry-run") ? "replay-dry-run" : "deploy-candidate",
|
|
85
|
+
skillName: readFlagValue(argv, "--skill"),
|
|
86
|
+
skillPath: readFlagValue(argv, "--skill-path"),
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return null;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function normalizeChunk(chunk: unknown): string {
|
|
94
|
+
if (typeof chunk === "string") return chunk;
|
|
95
|
+
if (Buffer.isBuffer(chunk)) return chunk.toString("utf-8");
|
|
96
|
+
return String(chunk);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export interface DashboardActionStreamSession {
|
|
100
|
+
eventId: string;
|
|
101
|
+
finish: (exitCode?: number | null) => void;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export function startDashboardActionStream(argv: string[]): DashboardActionStreamSession | null {
|
|
105
|
+
if (process.env[STREAM_DISABLE_ENV] === "1") return null;
|
|
106
|
+
|
|
107
|
+
const detected = detectDashboardAction(argv);
|
|
108
|
+
if (!detected) return null;
|
|
109
|
+
|
|
110
|
+
const eventId = randomUUID();
|
|
111
|
+
let finished = false;
|
|
112
|
+
let lastError: string | null = null;
|
|
113
|
+
let suppressChunkCapture = false;
|
|
114
|
+
let stdoutBuffer = "";
|
|
115
|
+
let stderrBuffer = "";
|
|
116
|
+
|
|
117
|
+
appendDashboardActionEvent({
|
|
118
|
+
event_id: eventId,
|
|
119
|
+
action: detected.action,
|
|
120
|
+
stage: "started",
|
|
121
|
+
skill_name: detected.skillName,
|
|
122
|
+
skill_path: detected.skillPath,
|
|
123
|
+
ts: Date.now(),
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
const originalStdoutWrite = process.stdout.write.bind(process.stdout);
|
|
127
|
+
const originalStderrWrite = process.stderr.write.bind(process.stderr);
|
|
128
|
+
const originalConsoleLog = console.log.bind(console);
|
|
129
|
+
const originalConsoleInfo = console.info.bind(console);
|
|
130
|
+
const originalConsoleWarn = console.warn.bind(console);
|
|
131
|
+
const originalConsoleError = console.error.bind(console);
|
|
132
|
+
setCurrentDashboardActionContext({
|
|
133
|
+
eventId,
|
|
134
|
+
action: detected.action,
|
|
135
|
+
skillName: detected.skillName,
|
|
136
|
+
skillPath: detected.skillPath,
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
process.stdout.write = ((chunk: unknown, ...args: unknown[]) => {
|
|
140
|
+
if (!suppressChunkCapture) {
|
|
141
|
+
stdoutBuffer += normalizeChunk(chunk);
|
|
142
|
+
appendDashboardActionEvent({
|
|
143
|
+
event_id: eventId,
|
|
144
|
+
action: detected.action,
|
|
145
|
+
stage: "stdout",
|
|
146
|
+
skill_name: detected.skillName,
|
|
147
|
+
skill_path: detected.skillPath,
|
|
148
|
+
ts: Date.now(),
|
|
149
|
+
chunk: normalizeChunk(chunk),
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
return originalStdoutWrite(chunk as never, ...(args as []));
|
|
153
|
+
}) as typeof process.stdout.write;
|
|
154
|
+
|
|
155
|
+
process.stderr.write = ((chunk: unknown, ...args: unknown[]) => {
|
|
156
|
+
const normalized = normalizeChunk(chunk);
|
|
157
|
+
if (!suppressChunkCapture) {
|
|
158
|
+
stderrBuffer += normalized;
|
|
159
|
+
if (normalized.trim()) {
|
|
160
|
+
lastError = normalized.trim();
|
|
161
|
+
}
|
|
162
|
+
appendDashboardActionEvent({
|
|
163
|
+
event_id: eventId,
|
|
164
|
+
action: detected.action,
|
|
165
|
+
stage: "stderr",
|
|
166
|
+
skill_name: detected.skillName,
|
|
167
|
+
skill_path: detected.skillPath,
|
|
168
|
+
ts: Date.now(),
|
|
169
|
+
chunk: normalized,
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
return originalStderrWrite(chunk as never, ...(args as []));
|
|
173
|
+
}) as typeof process.stderr.write;
|
|
174
|
+
|
|
175
|
+
function wrapConsole(
|
|
176
|
+
stage: "stdout" | "stderr",
|
|
177
|
+
originalMethod: typeof console.log,
|
|
178
|
+
): typeof console.log {
|
|
179
|
+
return (...args: unknown[]) => {
|
|
180
|
+
const message = args
|
|
181
|
+
.map((arg) => (typeof arg === "string" ? arg : Bun.inspect(arg)))
|
|
182
|
+
.join(" ");
|
|
183
|
+
if (message.trim()) {
|
|
184
|
+
if (stage === "stderr") {
|
|
185
|
+
stderrBuffer += `${message}\n`;
|
|
186
|
+
lastError = message.trim();
|
|
187
|
+
} else {
|
|
188
|
+
stdoutBuffer += `${message}\n`;
|
|
189
|
+
}
|
|
190
|
+
appendDashboardActionEvent({
|
|
191
|
+
event_id: eventId,
|
|
192
|
+
action: detected.action,
|
|
193
|
+
stage,
|
|
194
|
+
skill_name: detected.skillName,
|
|
195
|
+
skill_path: detected.skillPath,
|
|
196
|
+
ts: Date.now(),
|
|
197
|
+
chunk: `${message}\n`,
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
suppressChunkCapture = true;
|
|
201
|
+
try {
|
|
202
|
+
originalMethod(...args);
|
|
203
|
+
} finally {
|
|
204
|
+
suppressChunkCapture = false;
|
|
205
|
+
}
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
console.log = wrapConsole("stdout", originalConsoleLog);
|
|
210
|
+
console.info = wrapConsole("stdout", originalConsoleInfo);
|
|
211
|
+
console.warn = wrapConsole("stderr", originalConsoleWarn);
|
|
212
|
+
console.error = wrapConsole("stderr", originalConsoleError);
|
|
213
|
+
|
|
214
|
+
const exitListener = (code: number) => {
|
|
215
|
+
finish(code);
|
|
216
|
+
};
|
|
217
|
+
|
|
218
|
+
function finish(exitCode?: number | null): void {
|
|
219
|
+
if (finished) return;
|
|
220
|
+
finished = true;
|
|
221
|
+
process.removeListener("exit", exitListener);
|
|
222
|
+
process.stdout.write = originalStdoutWrite;
|
|
223
|
+
process.stderr.write = originalStderrWrite;
|
|
224
|
+
console.log = originalConsoleLog;
|
|
225
|
+
console.info = originalConsoleInfo;
|
|
226
|
+
console.warn = originalConsoleWarn;
|
|
227
|
+
console.error = originalConsoleError;
|
|
228
|
+
setCurrentDashboardActionContext(null);
|
|
229
|
+
const outcome = resolveDashboardActionOutcome({
|
|
230
|
+
action: detected.action,
|
|
231
|
+
stdout: stdoutBuffer,
|
|
232
|
+
stderr: stderrBuffer || lastError,
|
|
233
|
+
exitCode: exitCode ?? 0,
|
|
234
|
+
});
|
|
235
|
+
appendDashboardActionEvent({
|
|
236
|
+
event_id: eventId,
|
|
237
|
+
action: detected.action,
|
|
238
|
+
stage: "finished",
|
|
239
|
+
skill_name: detected.skillName,
|
|
240
|
+
skill_path: detected.skillPath,
|
|
241
|
+
ts: Date.now(),
|
|
242
|
+
success: outcome.success,
|
|
243
|
+
exit_code: exitCode ?? 0,
|
|
244
|
+
error: outcome.error,
|
|
245
|
+
summary: outcome.summary,
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
process.once("exit", exitListener);
|
|
250
|
+
|
|
251
|
+
return { eventId, finish };
|
|
252
|
+
}
|
|
@@ -353,6 +353,77 @@ export interface SkillTestingReadiness {
|
|
|
353
353
|
latest_evolution_at: string | null;
|
|
354
354
|
}
|
|
355
355
|
|
|
356
|
+
export type DashboardActionName =
|
|
357
|
+
| "generate-evals"
|
|
358
|
+
| "generate-unit-tests"
|
|
359
|
+
| "replay-dry-run"
|
|
360
|
+
| "measure-baseline"
|
|
361
|
+
| "deploy-candidate"
|
|
362
|
+
| "watch"
|
|
363
|
+
| "orchestrate"
|
|
364
|
+
| "rollback";
|
|
365
|
+
|
|
366
|
+
export type DashboardActionEventStage =
|
|
367
|
+
| "started"
|
|
368
|
+
| "progress"
|
|
369
|
+
| "stdout"
|
|
370
|
+
| "stderr"
|
|
371
|
+
| "metrics"
|
|
372
|
+
| "finished";
|
|
373
|
+
|
|
374
|
+
export interface DashboardActionResultSummary {
|
|
375
|
+
reason: string | null;
|
|
376
|
+
improved: boolean | null;
|
|
377
|
+
deployed: boolean | null;
|
|
378
|
+
before_pass_rate: number | null;
|
|
379
|
+
after_pass_rate: number | null;
|
|
380
|
+
net_change: number | null;
|
|
381
|
+
validation_mode: string | null;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
export interface DashboardActionMetrics {
|
|
385
|
+
platform: string | null;
|
|
386
|
+
model: string | null;
|
|
387
|
+
session_id: string | null;
|
|
388
|
+
input_tokens: number | null;
|
|
389
|
+
output_tokens: number | null;
|
|
390
|
+
cache_creation_input_tokens: number | null;
|
|
391
|
+
cache_read_input_tokens: number | null;
|
|
392
|
+
total_cost_usd: number | null;
|
|
393
|
+
duration_ms: number | null;
|
|
394
|
+
num_turns: number | null;
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
export type DashboardActionProgressUnit = "eval" | "llm_call" | "step";
|
|
398
|
+
|
|
399
|
+
export interface DashboardActionProgress {
|
|
400
|
+
current: number;
|
|
401
|
+
total: number;
|
|
402
|
+
status: "started" | "finished";
|
|
403
|
+
unit?: DashboardActionProgressUnit | null;
|
|
404
|
+
phase?: string | null;
|
|
405
|
+
label?: string | null;
|
|
406
|
+
query: string | null;
|
|
407
|
+
passed: boolean | null;
|
|
408
|
+
evidence: string | null;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
export interface DashboardActionEvent {
|
|
412
|
+
event_id: string;
|
|
413
|
+
action: DashboardActionName;
|
|
414
|
+
stage: DashboardActionEventStage;
|
|
415
|
+
skill_name: string | null;
|
|
416
|
+
skill_path: string | null;
|
|
417
|
+
ts: number;
|
|
418
|
+
chunk?: string;
|
|
419
|
+
success?: boolean;
|
|
420
|
+
exit_code?: number | null;
|
|
421
|
+
error?: string | null;
|
|
422
|
+
summary?: DashboardActionResultSummary | null;
|
|
423
|
+
metrics?: DashboardActionMetrics | null;
|
|
424
|
+
progress?: DashboardActionProgress | null;
|
|
425
|
+
}
|
|
426
|
+
|
|
356
427
|
export interface CreatorTestingOverview {
|
|
357
428
|
summary: string;
|
|
358
429
|
counts: {
|
|
@@ -452,6 +523,10 @@ export interface HealthResponse {
|
|
|
452
523
|
ok: boolean;
|
|
453
524
|
service: string;
|
|
454
525
|
version: string;
|
|
526
|
+
latest_version: string | null;
|
|
527
|
+
update_available: boolean;
|
|
528
|
+
auto_update_supported: boolean;
|
|
529
|
+
update_hint: string | null;
|
|
455
530
|
pid: number;
|
|
456
531
|
spa: boolean;
|
|
457
532
|
spa_mode?: "dist" | "proxy" | "missing";
|
|
@@ -513,7 +588,12 @@ export interface CommitRecord {
|
|
|
513
588
|
export interface CommitSummary {
|
|
514
589
|
total_commits: number;
|
|
515
590
|
unique_branches: number;
|
|
516
|
-
recent_commits: Array<{
|
|
591
|
+
recent_commits: Array<{
|
|
592
|
+
sha: string;
|
|
593
|
+
title: string;
|
|
594
|
+
branch: string;
|
|
595
|
+
timestamp: string;
|
|
596
|
+
}>;
|
|
517
597
|
}
|
|
518
598
|
|
|
519
599
|
// -- Trust-oriented types for skill report ------------------------------------
|