selftune 0.2.6 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/apps/local-dashboard/dist/assets/index-Bs3Y4ixf.css +1 -0
- package/apps/local-dashboard/dist/assets/index-C4UYGWKr.js +15 -0
- package/apps/local-dashboard/dist/assets/vendor-react-BQH_6WrG.js +60 -0
- package/apps/local-dashboard/dist/assets/{vendor-table-B7VF2Ipl.js → vendor-table-dK1QMLq9.js} +1 -1
- package/apps/local-dashboard/dist/assets/{vendor-ui-r2k_Ku_V.js → vendor-ui-CO2mrx6e.js} +60 -65
- package/apps/local-dashboard/dist/index.html +5 -5
- package/cli/selftune/activation-rules.ts +57 -18
- package/cli/selftune/agent-guidance.ts +96 -0
- package/cli/selftune/alpha-identity.ts +156 -0
- package/cli/selftune/alpha-upload/build-payloads.ts +151 -0
- package/cli/selftune/alpha-upload/client.ts +113 -0
- package/cli/selftune/alpha-upload/flush.ts +191 -0
- package/cli/selftune/alpha-upload/index.ts +194 -0
- package/cli/selftune/alpha-upload/queue.ts +252 -0
- package/cli/selftune/alpha-upload/stage-canonical.ts +251 -0
- package/cli/selftune/alpha-upload-contract.ts +52 -0
- package/cli/selftune/auth/device-code.ts +110 -0
- package/cli/selftune/auto-update.ts +130 -0
- package/cli/selftune/badge/badge.ts +19 -9
- package/cli/selftune/canonical-export.ts +16 -3
- package/cli/selftune/constants.ts +28 -8
- package/cli/selftune/contribute/bundle.ts +33 -5
- package/cli/selftune/dashboard-contract.ts +32 -1
- package/cli/selftune/dashboard-server.ts +215 -693
- package/cli/selftune/dashboard.ts +1 -1
- package/cli/selftune/eval/baseline.ts +11 -7
- package/cli/selftune/eval/hooks-to-evals.ts +39 -15
- package/cli/selftune/eval/synthetic-evals.ts +54 -1
- package/cli/selftune/evolution/audit.ts +24 -19
- package/cli/selftune/evolution/constitutional.ts +176 -0
- package/cli/selftune/evolution/evidence.ts +18 -13
- package/cli/selftune/evolution/evolve-body.ts +104 -7
- package/cli/selftune/evolution/evolve.ts +195 -22
- package/cli/selftune/evolution/propose-body.ts +18 -1
- package/cli/selftune/evolution/propose-description.ts +27 -2
- package/cli/selftune/evolution/rollback.ts +11 -15
- package/cli/selftune/export.ts +84 -0
- package/cli/selftune/grading/auto-grade.ts +14 -4
- package/cli/selftune/grading/grade-session.ts +17 -6
- package/cli/selftune/hooks/auto-activate.ts +5 -0
- package/cli/selftune/hooks/evolution-guard.ts +25 -11
- package/cli/selftune/hooks/prompt-log.ts +23 -9
- package/cli/selftune/hooks/session-stop.ts +78 -15
- package/cli/selftune/hooks/skill-eval.ts +189 -10
- package/cli/selftune/index.ts +274 -2
- package/cli/selftune/ingestors/claude-replay.ts +48 -21
- package/cli/selftune/init.ts +260 -49
- package/cli/selftune/last.ts +7 -7
- package/cli/selftune/localdb/db.ts +90 -10
- package/cli/selftune/localdb/direct-write.ts +573 -0
- package/cli/selftune/localdb/materialize.ts +296 -42
- package/cli/selftune/localdb/queries.ts +482 -32
- package/cli/selftune/localdb/schema.ts +153 -1
- package/cli/selftune/monitoring/watch.ts +27 -8
- package/cli/selftune/normalization.ts +88 -15
- package/cli/selftune/observability.ts +257 -5
- package/cli/selftune/orchestrate.ts +176 -53
- package/cli/selftune/quickstart.ts +34 -10
- package/cli/selftune/repair/skill-usage.ts +15 -2
- package/cli/selftune/routes/actions.ts +77 -0
- package/cli/selftune/routes/badge.ts +66 -0
- package/cli/selftune/routes/doctor.ts +12 -0
- package/cli/selftune/routes/index.ts +14 -0
- package/cli/selftune/routes/orchestrate-runs.ts +13 -0
- package/cli/selftune/routes/overview.ts +14 -0
- package/cli/selftune/routes/report.ts +293 -0
- package/cli/selftune/routes/skill-report.ts +230 -0
- package/cli/selftune/status.ts +203 -7
- package/cli/selftune/sync.ts +14 -1
- package/cli/selftune/types.ts +52 -2
- package/cli/selftune/utils/jsonl.ts +58 -1
- package/cli/selftune/utils/selftune-meta.ts +38 -0
- package/cli/selftune/utils/skill-log.ts +30 -4
- package/cli/selftune/utils/transcript.ts +15 -0
- package/cli/selftune/workflows/workflows.ts +7 -6
- package/package.json +11 -6
- package/packages/telemetry-contract/fixtures/complete-push.ts +184 -0
- package/packages/telemetry-contract/fixtures/evidence-only-push.ts +58 -0
- package/packages/telemetry-contract/fixtures/golden.json +1 -0
- package/packages/telemetry-contract/fixtures/index.ts +4 -0
- package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +40 -0
- package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +79 -0
- package/packages/telemetry-contract/package.json +6 -1
- package/packages/telemetry-contract/src/schemas.ts +196 -0
- package/packages/telemetry-contract/src/types.ts +3 -1
- package/packages/telemetry-contract/src/validators.ts +3 -1
- package/packages/telemetry-contract/tests/compatibility.test.ts +144 -0
- package/packages/ui/package.json +4 -0
- package/packages/ui/src/components/ActivityTimeline.tsx +61 -29
- package/packages/ui/src/components/section-cards.tsx +31 -14
- package/packages/ui/src/types.ts +1 -0
- package/skill/SKILL.md +214 -174
- package/skill/Workflows/AlphaUpload.md +45 -0
- package/skill/Workflows/Baseline.md +18 -12
- package/skill/Workflows/Composability.md +3 -3
- package/skill/Workflows/Dashboard.md +39 -91
- package/skill/Workflows/Doctor.md +93 -66
- package/skill/Workflows/Evals.md +49 -40
- package/skill/Workflows/Evolve.md +76 -28
- package/skill/Workflows/EvolveBody.md +37 -38
- package/skill/Workflows/Initialize.md +145 -26
- package/skill/Workflows/Orchestrate.md +11 -2
- package/skill/Workflows/Sync.md +23 -0
- package/skill/Workflows/Watch.md +2 -5
- package/skill/agents/diagnosis-analyst.md +163 -0
- package/skill/agents/evolution-reviewer.md +149 -0
- package/skill/agents/integration-guide.md +154 -0
- package/skill/agents/pattern-analyst.md +149 -0
- package/skill/assets/multi-skill-settings.json +1 -1
- package/skill/assets/single-skill-settings.json +1 -1
- package/skill/references/interactive-config.md +39 -0
- package/skill/references/invocation-taxonomy.md +34 -0
- package/skill/references/logs.md +15 -1
- package/skill/references/setup-patterns.md +3 -3
- package/skill/settings_snippet.json +1 -1
- package/apps/local-dashboard/dist/assets/index-C75H1Q3n.css +0 -1
- package/apps/local-dashboard/dist/assets/index-axE4kz3Q.js +0 -15
- package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +0 -60
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import {
|
|
3
|
+
CANONICAL_CAPTURE_MODES,
|
|
4
|
+
CANONICAL_COMPLETION_STATUSES,
|
|
5
|
+
CANONICAL_INVOCATION_MODES,
|
|
6
|
+
CANONICAL_PLATFORMS,
|
|
7
|
+
CANONICAL_PROMPT_KINDS,
|
|
8
|
+
CANONICAL_RECORD_KINDS,
|
|
9
|
+
CANONICAL_SCHEMA_VERSION,
|
|
10
|
+
CANONICAL_SOURCE_SESSION_KINDS,
|
|
11
|
+
} from "./types.js";
|
|
12
|
+
|
|
13
|
+
export const canonicalPlatformSchema = z.enum(CANONICAL_PLATFORMS);
|
|
14
|
+
export const captureModeSchema = z.enum(CANONICAL_CAPTURE_MODES);
|
|
15
|
+
export const sourceSessionKindSchema = z.enum(CANONICAL_SOURCE_SESSION_KINDS);
|
|
16
|
+
export const promptKindSchema = z.enum(CANONICAL_PROMPT_KINDS);
|
|
17
|
+
export const invocationModeSchema = z.enum(CANONICAL_INVOCATION_MODES);
|
|
18
|
+
export const completionStatusSchema = z.enum(CANONICAL_COMPLETION_STATUSES);
|
|
19
|
+
export const recordKindSchema = z.enum(CANONICAL_RECORD_KINDS);
|
|
20
|
+
|
|
21
|
+
export const rawSourceRefSchema = z.object({
|
|
22
|
+
path: z.string().optional(),
|
|
23
|
+
line: z.number().int().nonnegative().optional(),
|
|
24
|
+
event_type: z.string().optional(),
|
|
25
|
+
raw_id: z.string().optional(),
|
|
26
|
+
metadata: z.record(z.string(), z.unknown()).optional(),
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
export const canonicalRecordBaseSchema = z.object({
|
|
30
|
+
record_kind: recordKindSchema,
|
|
31
|
+
schema_version: z.literal(CANONICAL_SCHEMA_VERSION),
|
|
32
|
+
normalizer_version: z.string().min(1),
|
|
33
|
+
normalized_at: z.string().datetime(),
|
|
34
|
+
platform: canonicalPlatformSchema,
|
|
35
|
+
capture_mode: captureModeSchema,
|
|
36
|
+
raw_source_ref: rawSourceRefSchema,
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
export const canonicalSessionRecordBaseSchema = canonicalRecordBaseSchema.extend({
|
|
40
|
+
source_session_kind: sourceSessionKindSchema,
|
|
41
|
+
session_id: z.string().min(1),
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
export const CanonicalSessionRecordSchema = canonicalSessionRecordBaseSchema.extend({
|
|
45
|
+
record_kind: z.literal("session"),
|
|
46
|
+
external_session_id: z.string().optional(),
|
|
47
|
+
parent_session_id: z.string().optional(),
|
|
48
|
+
agent_id: z.string().optional(),
|
|
49
|
+
agent_type: z.string().optional(),
|
|
50
|
+
agent_cli: z.string().optional(),
|
|
51
|
+
session_key: z.string().optional(),
|
|
52
|
+
channel: z.string().optional(),
|
|
53
|
+
workspace_path: z.string().optional(),
|
|
54
|
+
repo_root: z.string().optional(),
|
|
55
|
+
repo_remote: z.string().optional(),
|
|
56
|
+
branch: z.string().optional(),
|
|
57
|
+
commit_sha: z.string().optional(),
|
|
58
|
+
permission_mode: z.string().optional(),
|
|
59
|
+
approval_policy: z.string().optional(),
|
|
60
|
+
sandbox_policy: z.string().optional(),
|
|
61
|
+
provider: z.string().optional(),
|
|
62
|
+
model: z.string().optional(),
|
|
63
|
+
started_at: z.string().datetime().optional(),
|
|
64
|
+
ended_at: z.string().datetime().optional(),
|
|
65
|
+
completion_status: completionStatusSchema.optional(),
|
|
66
|
+
end_reason: z.string().optional(),
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
export const CanonicalPromptRecordSchema = canonicalSessionRecordBaseSchema.extend({
|
|
70
|
+
record_kind: z.literal("prompt"),
|
|
71
|
+
prompt_id: z.string().min(1),
|
|
72
|
+
occurred_at: z.string().datetime(),
|
|
73
|
+
prompt_text: z.string().min(1),
|
|
74
|
+
prompt_hash: z.string().optional(),
|
|
75
|
+
prompt_kind: promptKindSchema,
|
|
76
|
+
is_actionable: z.boolean(),
|
|
77
|
+
prompt_index: z.number().int().nonnegative().optional(),
|
|
78
|
+
parent_prompt_id: z.string().optional(),
|
|
79
|
+
source_message_id: z.string().optional(),
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
export const CanonicalSkillInvocationRecordSchema = canonicalSessionRecordBaseSchema.extend({
|
|
83
|
+
record_kind: z.literal("skill_invocation"),
|
|
84
|
+
skill_invocation_id: z.string().min(1),
|
|
85
|
+
occurred_at: z.string().datetime(),
|
|
86
|
+
matched_prompt_id: z.string().min(1).optional(),
|
|
87
|
+
skill_name: z.string().min(1),
|
|
88
|
+
skill_path: z.string().optional(),
|
|
89
|
+
skill_version_hash: z.string().optional(),
|
|
90
|
+
invocation_mode: invocationModeSchema,
|
|
91
|
+
triggered: z.boolean(),
|
|
92
|
+
confidence: z.number().min(0).max(1),
|
|
93
|
+
tool_name: z.string().optional(),
|
|
94
|
+
tool_call_id: z.string().optional(),
|
|
95
|
+
agent_type: z.string().optional(),
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
export const CanonicalExecutionFactRecordSchema = canonicalSessionRecordBaseSchema.extend({
|
|
99
|
+
record_kind: z.literal("execution_fact"),
|
|
100
|
+
execution_fact_id: z.string().min(1),
|
|
101
|
+
occurred_at: z.string().datetime(),
|
|
102
|
+
prompt_id: z.string().optional(),
|
|
103
|
+
tool_calls_json: z.record(z.string(), z.number().finite()),
|
|
104
|
+
total_tool_calls: z.number().int().nonnegative(),
|
|
105
|
+
bash_commands_redacted: z.array(z.string()).optional(),
|
|
106
|
+
assistant_turns: z.number().int().nonnegative(),
|
|
107
|
+
errors_encountered: z.number().int().nonnegative(),
|
|
108
|
+
input_tokens: z.number().int().nonnegative().optional(),
|
|
109
|
+
output_tokens: z.number().int().nonnegative().optional(),
|
|
110
|
+
duration_ms: z.number().nonnegative().optional(),
|
|
111
|
+
completion_status: completionStatusSchema.optional(),
|
|
112
|
+
end_reason: z.string().optional(),
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
export const CanonicalNormalizationRunRecordSchema = canonicalRecordBaseSchema.extend({
|
|
116
|
+
record_kind: z.literal("normalization_run"),
|
|
117
|
+
run_id: z.string().min(1),
|
|
118
|
+
run_at: z.string().datetime(),
|
|
119
|
+
raw_records_seen: z.number().int().nonnegative(),
|
|
120
|
+
canonical_records_written: z.number().int().nonnegative(),
|
|
121
|
+
repair_applied: z.boolean(),
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
export const CanonicalEvolutionEvidenceRecordSchema = z.object({
|
|
125
|
+
evidence_id: z.string().min(1),
|
|
126
|
+
skill_name: z.string().min(1),
|
|
127
|
+
proposal_id: z.string().optional(),
|
|
128
|
+
target: z.string().min(1),
|
|
129
|
+
stage: z.string().min(1),
|
|
130
|
+
rationale: z.string().optional(),
|
|
131
|
+
confidence: z.number().min(0).max(1).optional(),
|
|
132
|
+
original_text: z.string().optional(),
|
|
133
|
+
proposed_text: z.string().optional(),
|
|
134
|
+
eval_set_json: z.unknown().optional(),
|
|
135
|
+
validation_json: z.unknown().optional(),
|
|
136
|
+
raw_source_ref: rawSourceRefSchema.optional(),
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
export const OrchestrateRunSkillActionSchema = z.object({
|
|
140
|
+
skill: z.string().min(1),
|
|
141
|
+
action: z.enum(["evolve", "watch", "skip"]),
|
|
142
|
+
reason: z.string(),
|
|
143
|
+
deployed: z.boolean().optional(),
|
|
144
|
+
rolledBack: z.boolean().optional(),
|
|
145
|
+
alert: z.string().nullable().optional(),
|
|
146
|
+
elapsed_ms: z.number().nonnegative().optional(),
|
|
147
|
+
llm_calls: z.number().int().nonnegative().optional(),
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
export const PushOrchestrateRunRecordSchema = z.object({
|
|
151
|
+
run_id: z.string().min(1),
|
|
152
|
+
timestamp: z.string().datetime(),
|
|
153
|
+
elapsed_ms: z.number().int().nonnegative(),
|
|
154
|
+
dry_run: z.boolean(),
|
|
155
|
+
approval_mode: z.enum(["auto", "review"]),
|
|
156
|
+
total_skills: z.number().int().nonnegative(),
|
|
157
|
+
evaluated: z.number().int().nonnegative(),
|
|
158
|
+
evolved: z.number().int().nonnegative(),
|
|
159
|
+
deployed: z.number().int().nonnegative(),
|
|
160
|
+
watched: z.number().int().nonnegative(),
|
|
161
|
+
skipped: z.number().int().nonnegative(),
|
|
162
|
+
skill_actions: z.array(OrchestrateRunSkillActionSchema),
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
export const PushPayloadV2Schema = z.object({
|
|
166
|
+
schema_version: z.literal("2.0"),
|
|
167
|
+
client_version: z.string().min(1),
|
|
168
|
+
// Queue-generated push IDs are typically UUIDs, but the wire contract only
|
|
169
|
+
// requires a stable non-empty idempotency key.
|
|
170
|
+
push_id: z.string().min(1),
|
|
171
|
+
normalizer_version: z.string().min(1),
|
|
172
|
+
canonical: z.object({
|
|
173
|
+
sessions: z.array(CanonicalSessionRecordSchema).min(0),
|
|
174
|
+
prompts: z.array(CanonicalPromptRecordSchema).min(0),
|
|
175
|
+
skill_invocations: z.array(CanonicalSkillInvocationRecordSchema).min(0),
|
|
176
|
+
execution_facts: z.array(CanonicalExecutionFactRecordSchema).min(0),
|
|
177
|
+
normalization_runs: z.array(CanonicalNormalizationRunRecordSchema).min(0),
|
|
178
|
+
evolution_evidence: z.array(CanonicalEvolutionEvidenceRecordSchema).optional(),
|
|
179
|
+
orchestrate_runs: z.array(PushOrchestrateRunRecordSchema).optional(),
|
|
180
|
+
}),
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
export type PushPayloadV2 = z.infer<typeof PushPayloadV2Schema>;
|
|
184
|
+
export type ZodCanonicalSessionRecord = z.infer<typeof CanonicalSessionRecordSchema>;
|
|
185
|
+
export type ZodCanonicalPromptRecord = z.infer<typeof CanonicalPromptRecordSchema>;
|
|
186
|
+
export type ZodCanonicalSkillInvocationRecord = z.infer<
|
|
187
|
+
typeof CanonicalSkillInvocationRecordSchema
|
|
188
|
+
>;
|
|
189
|
+
export type ZodCanonicalExecutionFactRecord = z.infer<typeof CanonicalExecutionFactRecordSchema>;
|
|
190
|
+
export type ZodCanonicalNormalizationRunRecord = z.infer<
|
|
191
|
+
typeof CanonicalNormalizationRunRecordSchema
|
|
192
|
+
>;
|
|
193
|
+
export type ZodCanonicalEvolutionEvidenceRecord = z.infer<
|
|
194
|
+
typeof CanonicalEvolutionEvidenceRecordSchema
|
|
195
|
+
>;
|
|
196
|
+
export type ZodPushOrchestrateRunRecord = z.infer<typeof PushOrchestrateRunRecordSchema>;
|
|
@@ -128,15 +128,17 @@ export interface CanonicalSkillInvocationRecord extends CanonicalSessionRecordBa
|
|
|
128
128
|
confidence: number;
|
|
129
129
|
tool_name?: string;
|
|
130
130
|
tool_call_id?: string;
|
|
131
|
+
agent_type?: string;
|
|
131
132
|
}
|
|
132
133
|
|
|
133
134
|
export interface CanonicalExecutionFactRecord extends CanonicalSessionRecordBase {
|
|
134
135
|
record_kind: "execution_fact";
|
|
136
|
+
execution_fact_id: string;
|
|
135
137
|
occurred_at: string;
|
|
136
138
|
prompt_id?: string;
|
|
137
139
|
tool_calls_json: Record<string, number>;
|
|
138
140
|
total_tool_calls: number;
|
|
139
|
-
bash_commands_redacted
|
|
141
|
+
bash_commands_redacted?: string[];
|
|
140
142
|
assistant_turns: number;
|
|
141
143
|
errors_encountered: number;
|
|
142
144
|
input_tokens?: number;
|
|
@@ -86,10 +86,12 @@ export function isCanonicalRecord(value: unknown): value is CanonicalRecord {
|
|
|
86
86
|
case "execution_fact":
|
|
87
87
|
return (
|
|
88
88
|
hasSessionScope(value) &&
|
|
89
|
+
hasString(value, "execution_fact_id") &&
|
|
89
90
|
hasString(value, "occurred_at") &&
|
|
90
91
|
isNumberRecord(value.tool_calls_json) &&
|
|
91
92
|
isFiniteNumber(value.total_tool_calls) &&
|
|
92
|
-
|
|
93
|
+
(value.bash_commands_redacted === undefined ||
|
|
94
|
+
isStringArray(value.bash_commands_redacted)) &&
|
|
93
95
|
isFiniteNumber(value.assistant_turns) &&
|
|
94
96
|
isFiniteNumber(value.errors_encountered) &&
|
|
95
97
|
(value.completion_status === undefined ||
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import { completePush } from "../fixtures/complete-push.js";
|
|
3
|
+
import { evidenceOnlyPush } from "../fixtures/evidence-only-push.js";
|
|
4
|
+
import { partialPushNoSessions } from "../fixtures/partial-push-no-sessions.js";
|
|
5
|
+
import { partialPushUnresolvedParents } from "../fixtures/partial-push-unresolved-parents.js";
|
|
6
|
+
import { PushPayloadV2Schema } from "../src/schemas.js";
|
|
7
|
+
|
|
8
|
+
describe("PushPayloadV2Schema compatibility", () => {
|
|
9
|
+
// ---- Fixture validation ----
|
|
10
|
+
|
|
11
|
+
test("complete-push fixture passes validation", () => {
|
|
12
|
+
const result = PushPayloadV2Schema.safeParse(completePush);
|
|
13
|
+
if (!result.success) {
|
|
14
|
+
throw new Error(`Validation failed: ${JSON.stringify(result.error.issues, null, 2)}`);
|
|
15
|
+
}
|
|
16
|
+
expect(result.success).toBe(true);
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
test("partial-push-no-sessions fixture passes validation", () => {
|
|
20
|
+
const result = PushPayloadV2Schema.safeParse(partialPushNoSessions);
|
|
21
|
+
if (!result.success) {
|
|
22
|
+
throw new Error(`Validation failed: ${JSON.stringify(result.error.issues, null, 2)}`);
|
|
23
|
+
}
|
|
24
|
+
expect(result.success).toBe(true);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
test("partial-push-unresolved-parents fixture passes validation", () => {
|
|
28
|
+
const result = PushPayloadV2Schema.safeParse(partialPushUnresolvedParents);
|
|
29
|
+
if (!result.success) {
|
|
30
|
+
throw new Error(`Validation failed: ${JSON.stringify(result.error.issues, null, 2)}`);
|
|
31
|
+
}
|
|
32
|
+
expect(result.success).toBe(true);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
test("evidence-only-push fixture passes validation", () => {
|
|
36
|
+
const result = PushPayloadV2Schema.safeParse(evidenceOnlyPush);
|
|
37
|
+
if (!result.success) {
|
|
38
|
+
throw new Error(`Validation failed: ${JSON.stringify(result.error.issues, null, 2)}`);
|
|
39
|
+
}
|
|
40
|
+
expect(result.success).toBe(true);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
// ---- execution_fact_id is required ----
|
|
44
|
+
|
|
45
|
+
test("execution_fact_id is required on execution facts", () => {
|
|
46
|
+
const badPayload = structuredClone(completePush);
|
|
47
|
+
delete (badPayload.canonical.execution_facts[0] as Record<string, unknown>).execution_fact_id;
|
|
48
|
+
const result = PushPayloadV2Schema.safeParse(badPayload);
|
|
49
|
+
expect(result.success).toBe(false);
|
|
50
|
+
if (!result.success) {
|
|
51
|
+
const paths = result.error.issues.map((i) => i.path.join("."));
|
|
52
|
+
expect(paths).toContain("canonical.execution_facts.0.execution_fact_id");
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
test("execution_fact_id rejects empty string", () => {
|
|
57
|
+
const badPayload = structuredClone(completePush);
|
|
58
|
+
(badPayload.canonical.execution_facts[0] as Record<string, unknown>).execution_fact_id = "";
|
|
59
|
+
const result = PushPayloadV2Schema.safeParse(badPayload);
|
|
60
|
+
expect(result.success).toBe(false);
|
|
61
|
+
if (!result.success) {
|
|
62
|
+
const paths = result.error.issues.map((i) => i.path.join("."));
|
|
63
|
+
expect(paths).toContain("canonical.execution_facts.0.execution_fact_id");
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
// ---- bash_commands_redacted is optional ----
|
|
68
|
+
|
|
69
|
+
test("bash_commands_redacted is optional (omitting it passes)", () => {
|
|
70
|
+
// The unresolved-parents fixture already omits bash_commands_redacted
|
|
71
|
+
const ef = partialPushUnresolvedParents.canonical.execution_facts[0];
|
|
72
|
+
expect(ef.bash_commands_redacted).toBeUndefined();
|
|
73
|
+
|
|
74
|
+
const result = PushPayloadV2Schema.safeParse(partialPushUnresolvedParents);
|
|
75
|
+
expect(result.success).toBe(true);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
test("bash_commands_redacted accepts an array when present", () => {
|
|
79
|
+
const ef = completePush.canonical.execution_facts[0];
|
|
80
|
+
expect(Array.isArray(ef.bash_commands_redacted)).toBe(true);
|
|
81
|
+
|
|
82
|
+
const result = PushPayloadV2Schema.safeParse(completePush);
|
|
83
|
+
expect(result.success).toBe(true);
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
// ---- Zero-session pushes ----
|
|
87
|
+
|
|
88
|
+
test("zero-session pushes pass validation", () => {
|
|
89
|
+
expect(partialPushNoSessions.canonical.sessions).toHaveLength(0);
|
|
90
|
+
const result = PushPayloadV2Schema.safeParse(partialPushNoSessions);
|
|
91
|
+
expect(result.success).toBe(true);
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
test("evidence-only push with all empty arrays passes", () => {
|
|
95
|
+
expect(evidenceOnlyPush.canonical.sessions).toHaveLength(0);
|
|
96
|
+
expect(evidenceOnlyPush.canonical.prompts).toHaveLength(0);
|
|
97
|
+
expect(evidenceOnlyPush.canonical.skill_invocations).toHaveLength(0);
|
|
98
|
+
expect(evidenceOnlyPush.canonical.execution_facts).toHaveLength(0);
|
|
99
|
+
expect(evidenceOnlyPush.canonical.normalization_runs).toHaveLength(0);
|
|
100
|
+
const result = PushPayloadV2Schema.safeParse(evidenceOnlyPush);
|
|
101
|
+
expect(result.success).toBe(true);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
// ---- Unresolved parent references ----
|
|
105
|
+
|
|
106
|
+
test("unresolved parent references pass (invocation references session_id not in sessions)", () => {
|
|
107
|
+
const sessionIds = new Set(
|
|
108
|
+
partialPushUnresolvedParents.canonical.sessions.map((s) => s.session_id),
|
|
109
|
+
);
|
|
110
|
+
const invSessionIds = partialPushUnresolvedParents.canonical.skill_invocations.map(
|
|
111
|
+
(i) => i.session_id,
|
|
112
|
+
);
|
|
113
|
+
|
|
114
|
+
// Precondition: arrays must be non-empty for the test to be meaningful
|
|
115
|
+
expect(invSessionIds.length).toBeGreaterThan(0);
|
|
116
|
+
|
|
117
|
+
// Confirm the invocation references a session not in the sessions array
|
|
118
|
+
for (const sid of invSessionIds) {
|
|
119
|
+
expect(sessionIds.has(sid)).toBe(false);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const result = PushPayloadV2Schema.safeParse(partialPushUnresolvedParents);
|
|
123
|
+
expect(result.success).toBe(true);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
test("prompts with unresolved session_id pass validation", () => {
|
|
127
|
+
const sessionIds = new Set(
|
|
128
|
+
partialPushUnresolvedParents.canonical.sessions.map((s) => s.session_id),
|
|
129
|
+
);
|
|
130
|
+
const promptSessionIds = partialPushUnresolvedParents.canonical.prompts.map(
|
|
131
|
+
(p) => p.session_id,
|
|
132
|
+
);
|
|
133
|
+
|
|
134
|
+
// Precondition: arrays must be non-empty for the test to be meaningful
|
|
135
|
+
expect(promptSessionIds.length).toBeGreaterThan(0);
|
|
136
|
+
|
|
137
|
+
for (const sid of promptSessionIds) {
|
|
138
|
+
expect(sessionIds.has(sid)).toBe(false);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const result = PushPayloadV2Schema.safeParse(partialPushUnresolvedParents);
|
|
142
|
+
expect(result.success).toBe(true);
|
|
143
|
+
});
|
|
144
|
+
});
|
package/packages/ui/package.json
CHANGED
|
@@ -7,6 +7,7 @@ import {
|
|
|
7
7
|
CardTitle,
|
|
8
8
|
} from "../primitives/card"
|
|
9
9
|
import { Tabs, TabsContent, TabsList, TabsTrigger } from "../primitives/tabs"
|
|
10
|
+
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "../primitives/tooltip"
|
|
10
11
|
import type { EvolutionEntry, PendingProposal, UnmatchedQuery } from "../types"
|
|
11
12
|
import { timeAgo } from "../lib/format"
|
|
12
13
|
import {
|
|
@@ -29,10 +30,12 @@ export function ActivityPanel({
|
|
|
29
30
|
evolution,
|
|
30
31
|
pendingProposals,
|
|
31
32
|
unmatchedQueries,
|
|
33
|
+
onSelectProposal,
|
|
32
34
|
}: {
|
|
33
35
|
evolution: EvolutionEntry[]
|
|
34
36
|
pendingProposals: PendingProposal[]
|
|
35
37
|
unmatchedQueries: UnmatchedQuery[]
|
|
38
|
+
onSelectProposal?: (skillName: string, proposalId: string) => void
|
|
36
39
|
}) {
|
|
37
40
|
const hasActivity = evolution.length > 0 || pendingProposals.length > 0 || unmatchedQueries.length > 0
|
|
38
41
|
|
|
@@ -73,35 +76,51 @@ export function ActivityPanel({
|
|
|
73
76
|
: "unmatched"
|
|
74
77
|
}
|
|
75
78
|
>
|
|
76
|
-
<
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
<
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
79
|
+
<TooltipProvider>
|
|
80
|
+
<TabsList className="w-full">
|
|
81
|
+
{pendingProposals.length > 0 && (
|
|
82
|
+
<Tooltip>
|
|
83
|
+
<TooltipTrigger render={<TabsTrigger value="pending" className="flex-1 gap-1.5" />}>
|
|
84
|
+
<GitPullRequestArrowIcon className="size-3.5" />
|
|
85
|
+
<Badge variant="secondary" className="h-4 px-1 text-[10px]">
|
|
86
|
+
{pendingProposals.length}
|
|
87
|
+
</Badge>
|
|
88
|
+
</TooltipTrigger>
|
|
89
|
+
<TooltipContent>Pending proposals</TooltipContent>
|
|
90
|
+
</Tooltip>
|
|
91
|
+
)}
|
|
92
|
+
<Tooltip>
|
|
93
|
+
<TooltipTrigger render={<TabsTrigger value="timeline" className="flex-1" />}>
|
|
94
|
+
<ClockIcon className="size-3.5" />
|
|
95
|
+
</TooltipTrigger>
|
|
96
|
+
<TooltipContent>Timeline</TooltipContent>
|
|
97
|
+
</Tooltip>
|
|
98
|
+
{unmatchedQueries.length > 0 && (
|
|
99
|
+
<Tooltip>
|
|
100
|
+
<TooltipTrigger render={<TabsTrigger value="unmatched" className="flex-1 gap-1.5" />}>
|
|
101
|
+
<SearchXIcon className="size-3.5" />
|
|
102
|
+
<Badge variant="destructive" className="h-4 px-1 text-[10px]">
|
|
103
|
+
{unmatchedQueries.length}
|
|
104
|
+
</Badge>
|
|
105
|
+
</TooltipTrigger>
|
|
106
|
+
<TooltipContent>Unmatched queries</TooltipContent>
|
|
107
|
+
</Tooltip>
|
|
108
|
+
)}
|
|
109
|
+
</TabsList>
|
|
110
|
+
</TooltipProvider>
|
|
100
111
|
|
|
101
112
|
{pendingProposals.length > 0 && (
|
|
102
113
|
<TabsContent value="pending" className="mt-4 space-y-3">
|
|
103
114
|
{pendingProposals.slice(0, 10).map((p) => (
|
|
104
|
-
<
|
|
115
|
+
<button
|
|
116
|
+
key={p.proposal_id}
|
|
117
|
+
type="button"
|
|
118
|
+
onClick={() => {
|
|
119
|
+
if (p.skill_name && onSelectProposal) onSelectProposal(p.skill_name, p.proposal_id)
|
|
120
|
+
}}
|
|
121
|
+
disabled={!p.skill_name || !onSelectProposal}
|
|
122
|
+
className="flex w-full gap-3 rounded-md p-1.5 text-left transition-colors enabled:hover:bg-accent/40 disabled:cursor-default"
|
|
123
|
+
>
|
|
105
124
|
<div className="mt-1 size-2 shrink-0 rounded-full bg-amber-400" />
|
|
106
125
|
<div className="flex-1 min-w-0 space-y-1">
|
|
107
126
|
<div className="flex items-center gap-2">
|
|
@@ -113,15 +132,28 @@ export function ActivityPanel({
|
|
|
113
132
|
</span>
|
|
114
133
|
</div>
|
|
115
134
|
<p className="text-xs text-muted-foreground line-clamp-2">{p.details}</p>
|
|
135
|
+
{p.skill_name && (
|
|
136
|
+
<span className="text-[10px] text-muted-foreground/60 font-mono">
|
|
137
|
+
{p.skill_name} · #{p.proposal_id.slice(0, 8)}
|
|
138
|
+
</span>
|
|
139
|
+
)}
|
|
116
140
|
</div>
|
|
117
|
-
</
|
|
141
|
+
</button>
|
|
118
142
|
))}
|
|
119
143
|
</TabsContent>
|
|
120
144
|
)}
|
|
121
145
|
|
|
122
146
|
<TabsContent value="timeline" className="mt-4 space-y-3">
|
|
123
147
|
{evolution.slice(0, 30).map((entry, i) => (
|
|
124
|
-
<
|
|
148
|
+
<button
|
|
149
|
+
key={`${entry.proposal_id}-${i}`}
|
|
150
|
+
type="button"
|
|
151
|
+
onClick={() => {
|
|
152
|
+
if (entry.skill_name && onSelectProposal) onSelectProposal(entry.skill_name, entry.proposal_id)
|
|
153
|
+
}}
|
|
154
|
+
disabled={!entry.skill_name || !onSelectProposal}
|
|
155
|
+
className="flex w-full gap-3 rounded-md p-1.5 text-left transition-colors enabled:hover:bg-accent/40 disabled:cursor-default"
|
|
156
|
+
>
|
|
125
157
|
<div className={`mt-1 size-2 shrink-0 rounded-full ${
|
|
126
158
|
entry.action === "deployed" ? "bg-emerald-500"
|
|
127
159
|
: entry.action === "rejected" || entry.action === "rolled_back" ? "bg-red-500"
|
|
@@ -139,10 +171,10 @@ export function ActivityPanel({
|
|
|
139
171
|
</div>
|
|
140
172
|
<p className="text-xs text-muted-foreground line-clamp-2">{entry.details}</p>
|
|
141
173
|
<span className="text-[10px] text-muted-foreground/60 font-mono">
|
|
142
|
-
#{entry.proposal_id.slice(0, 8)}
|
|
174
|
+
{entry.skill_name ? `${entry.skill_name} · ` : ""}#{entry.proposal_id.slice(0, 8)}
|
|
143
175
|
</span>
|
|
144
176
|
</div>
|
|
145
|
-
</
|
|
177
|
+
</button>
|
|
146
178
|
))}
|
|
147
179
|
{evolution.length === 0 && (
|
|
148
180
|
<p className="text-sm text-muted-foreground text-center py-4">No timeline events</p>
|
|
@@ -25,6 +25,7 @@ interface SectionCardsProps {
|
|
|
25
25
|
sessionsCount: number
|
|
26
26
|
pendingCount: number
|
|
27
27
|
evidenceCount: number
|
|
28
|
+
hasEvolution?: boolean
|
|
28
29
|
}
|
|
29
30
|
|
|
30
31
|
export function SectionCards({
|
|
@@ -34,6 +35,7 @@ export function SectionCards({
|
|
|
34
35
|
sessionsCount,
|
|
35
36
|
pendingCount,
|
|
36
37
|
evidenceCount,
|
|
38
|
+
hasEvolution = true,
|
|
37
39
|
}: SectionCardsProps) {
|
|
38
40
|
const passRateStr = avgPassRate !== null ? `${Math.round(avgPassRate * 100)}%` : "--"
|
|
39
41
|
const passRateGood = avgPassRate !== null && avgPassRate >= 0.7
|
|
@@ -63,14 +65,14 @@ export function SectionCards({
|
|
|
63
65
|
<CardHeader>
|
|
64
66
|
<CardDescription className="flex items-center gap-1.5">
|
|
65
67
|
<FlaskConicalIcon className="size-3.5" />
|
|
66
|
-
Avg
|
|
67
|
-
<InfoTip text="Average percentage of
|
|
68
|
+
Avg Trigger Rate
|
|
69
|
+
<InfoTip text="Average percentage of skill checks that resulted in a trigger across all graded skills (5+ checks). Run selftune evolve to improve this." />
|
|
68
70
|
</CardDescription>
|
|
69
71
|
<CardTitle className={`text-2xl font-semibold tabular-nums @[250px]/card:text-3xl ${!passRateGood && avgPassRate !== null ? "text-red-600" : ""}`}>
|
|
70
72
|
{passRateStr}
|
|
71
73
|
</CardTitle>
|
|
72
|
-
|
|
73
|
-
|
|
74
|
+
<CardAction>
|
|
75
|
+
{avgPassRate !== null ? (
|
|
74
76
|
<Badge variant={passRateGood ? "outline" : "destructive"}>
|
|
75
77
|
{passRateGood ? (
|
|
76
78
|
<TrendingUpIcon className="size-3" />
|
|
@@ -79,8 +81,12 @@ export function SectionCards({
|
|
|
79
81
|
)}
|
|
80
82
|
{passRateStr}
|
|
81
83
|
</Badge>
|
|
82
|
-
|
|
83
|
-
|
|
84
|
+
) : (
|
|
85
|
+
<Badge variant="secondary" className="text-[10px]">
|
|
86
|
+
needs 5+ checks
|
|
87
|
+
</Badge>
|
|
88
|
+
)}
|
|
89
|
+
</CardAction>
|
|
84
90
|
</CardHeader>
|
|
85
91
|
</Card>
|
|
86
92
|
|
|
@@ -123,18 +129,22 @@ export function SectionCards({
|
|
|
123
129
|
<CardDescription className="flex items-center gap-1.5">
|
|
124
130
|
<AlertTriangleIcon className="size-3.5" />
|
|
125
131
|
Pending Proposals
|
|
126
|
-
<InfoTip text="Evolution proposals that have been generated but not yet validated or deployed" />
|
|
132
|
+
<InfoTip text="Evolution proposals that have been generated but not yet validated or deployed. Requires running selftune evolve." />
|
|
127
133
|
</CardDescription>
|
|
128
134
|
<CardTitle className="text-2xl font-semibold tabular-nums @[250px]/card:text-3xl">
|
|
129
|
-
{pendingCount}
|
|
135
|
+
{hasEvolution ? pendingCount : "--"}
|
|
130
136
|
</CardTitle>
|
|
131
|
-
|
|
132
|
-
|
|
137
|
+
<CardAction>
|
|
138
|
+
{!hasEvolution ? (
|
|
139
|
+
<Badge variant="secondary" className="text-[10px]">
|
|
140
|
+
no evolution runs yet
|
|
141
|
+
</Badge>
|
|
142
|
+
) : pendingCount > 0 ? (
|
|
133
143
|
<Badge variant="secondary">
|
|
134
144
|
awaiting review
|
|
135
145
|
</Badge>
|
|
136
|
-
|
|
137
|
-
|
|
146
|
+
) : null}
|
|
147
|
+
</CardAction>
|
|
138
148
|
</CardHeader>
|
|
139
149
|
</Card>
|
|
140
150
|
|
|
@@ -143,11 +153,18 @@ export function SectionCards({
|
|
|
143
153
|
<CardDescription className="flex items-center gap-1.5">
|
|
144
154
|
<EyeIcon className="size-3.5" />
|
|
145
155
|
Total Evidence
|
|
146
|
-
<InfoTip text="Number of evidence entries documenting skill changes with before/after validation results" />
|
|
156
|
+
<InfoTip text="Number of evidence entries documenting skill changes with before/after validation results. Requires running selftune evolve." />
|
|
147
157
|
</CardDescription>
|
|
148
158
|
<CardTitle className="text-2xl font-semibold tabular-nums @[250px]/card:text-3xl">
|
|
149
|
-
{evidenceCount}
|
|
159
|
+
{hasEvolution ? evidenceCount : "--"}
|
|
150
160
|
</CardTitle>
|
|
161
|
+
{!hasEvolution && (
|
|
162
|
+
<CardAction>
|
|
163
|
+
<Badge variant="secondary" className="text-[10px]">
|
|
164
|
+
no evolution runs yet
|
|
165
|
+
</Badge>
|
|
166
|
+
</CardAction>
|
|
167
|
+
)}
|
|
151
168
|
</CardHeader>
|
|
152
169
|
</Card>
|
|
153
170
|
</div>
|