selftune 0.2.6 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/README.md +1 -0
  2. package/apps/local-dashboard/dist/assets/index-Bs3Y4ixf.css +1 -0
  3. package/apps/local-dashboard/dist/assets/index-C4UYGWKr.js +15 -0
  4. package/apps/local-dashboard/dist/assets/vendor-react-BQH_6WrG.js +60 -0
  5. package/apps/local-dashboard/dist/assets/{vendor-table-B7VF2Ipl.js → vendor-table-dK1QMLq9.js} +1 -1
  6. package/apps/local-dashboard/dist/assets/{vendor-ui-r2k_Ku_V.js → vendor-ui-CO2mrx6e.js} +60 -65
  7. package/apps/local-dashboard/dist/index.html +5 -5
  8. package/cli/selftune/activation-rules.ts +57 -18
  9. package/cli/selftune/agent-guidance.ts +96 -0
  10. package/cli/selftune/alpha-identity.ts +156 -0
  11. package/cli/selftune/alpha-upload/build-payloads.ts +151 -0
  12. package/cli/selftune/alpha-upload/client.ts +113 -0
  13. package/cli/selftune/alpha-upload/flush.ts +191 -0
  14. package/cli/selftune/alpha-upload/index.ts +194 -0
  15. package/cli/selftune/alpha-upload/queue.ts +252 -0
  16. package/cli/selftune/alpha-upload/stage-canonical.ts +251 -0
  17. package/cli/selftune/alpha-upload-contract.ts +52 -0
  18. package/cli/selftune/auth/device-code.ts +110 -0
  19. package/cli/selftune/auto-update.ts +130 -0
  20. package/cli/selftune/badge/badge.ts +19 -9
  21. package/cli/selftune/canonical-export.ts +16 -3
  22. package/cli/selftune/constants.ts +28 -8
  23. package/cli/selftune/contribute/bundle.ts +33 -5
  24. package/cli/selftune/dashboard-contract.ts +32 -1
  25. package/cli/selftune/dashboard-server.ts +215 -693
  26. package/cli/selftune/dashboard.ts +1 -1
  27. package/cli/selftune/eval/baseline.ts +11 -7
  28. package/cli/selftune/eval/hooks-to-evals.ts +39 -15
  29. package/cli/selftune/eval/synthetic-evals.ts +54 -1
  30. package/cli/selftune/evolution/audit.ts +24 -19
  31. package/cli/selftune/evolution/constitutional.ts +176 -0
  32. package/cli/selftune/evolution/evidence.ts +18 -13
  33. package/cli/selftune/evolution/evolve-body.ts +104 -7
  34. package/cli/selftune/evolution/evolve.ts +195 -22
  35. package/cli/selftune/evolution/propose-body.ts +18 -1
  36. package/cli/selftune/evolution/propose-description.ts +27 -2
  37. package/cli/selftune/evolution/rollback.ts +11 -15
  38. package/cli/selftune/export.ts +84 -0
  39. package/cli/selftune/grading/auto-grade.ts +14 -4
  40. package/cli/selftune/grading/grade-session.ts +17 -6
  41. package/cli/selftune/hooks/auto-activate.ts +5 -0
  42. package/cli/selftune/hooks/evolution-guard.ts +25 -11
  43. package/cli/selftune/hooks/prompt-log.ts +23 -9
  44. package/cli/selftune/hooks/session-stop.ts +78 -15
  45. package/cli/selftune/hooks/skill-eval.ts +189 -10
  46. package/cli/selftune/index.ts +274 -2
  47. package/cli/selftune/ingestors/claude-replay.ts +48 -21
  48. package/cli/selftune/init.ts +260 -49
  49. package/cli/selftune/last.ts +7 -7
  50. package/cli/selftune/localdb/db.ts +90 -10
  51. package/cli/selftune/localdb/direct-write.ts +573 -0
  52. package/cli/selftune/localdb/materialize.ts +296 -42
  53. package/cli/selftune/localdb/queries.ts +482 -32
  54. package/cli/selftune/localdb/schema.ts +153 -1
  55. package/cli/selftune/monitoring/watch.ts +27 -8
  56. package/cli/selftune/normalization.ts +88 -15
  57. package/cli/selftune/observability.ts +257 -5
  58. package/cli/selftune/orchestrate.ts +176 -53
  59. package/cli/selftune/quickstart.ts +34 -10
  60. package/cli/selftune/repair/skill-usage.ts +15 -2
  61. package/cli/selftune/routes/actions.ts +77 -0
  62. package/cli/selftune/routes/badge.ts +66 -0
  63. package/cli/selftune/routes/doctor.ts +12 -0
  64. package/cli/selftune/routes/index.ts +14 -0
  65. package/cli/selftune/routes/orchestrate-runs.ts +13 -0
  66. package/cli/selftune/routes/overview.ts +14 -0
  67. package/cli/selftune/routes/report.ts +293 -0
  68. package/cli/selftune/routes/skill-report.ts +230 -0
  69. package/cli/selftune/status.ts +203 -7
  70. package/cli/selftune/sync.ts +14 -1
  71. package/cli/selftune/types.ts +52 -2
  72. package/cli/selftune/utils/jsonl.ts +58 -1
  73. package/cli/selftune/utils/selftune-meta.ts +38 -0
  74. package/cli/selftune/utils/skill-log.ts +30 -4
  75. package/cli/selftune/utils/transcript.ts +15 -0
  76. package/cli/selftune/workflows/workflows.ts +7 -6
  77. package/package.json +11 -6
  78. package/packages/telemetry-contract/fixtures/complete-push.ts +184 -0
  79. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +58 -0
  80. package/packages/telemetry-contract/fixtures/golden.json +1 -0
  81. package/packages/telemetry-contract/fixtures/index.ts +4 -0
  82. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +40 -0
  83. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +79 -0
  84. package/packages/telemetry-contract/package.json +6 -1
  85. package/packages/telemetry-contract/src/schemas.ts +196 -0
  86. package/packages/telemetry-contract/src/types.ts +3 -1
  87. package/packages/telemetry-contract/src/validators.ts +3 -1
  88. package/packages/telemetry-contract/tests/compatibility.test.ts +144 -0
  89. package/packages/ui/package.json +4 -0
  90. package/packages/ui/src/components/ActivityTimeline.tsx +61 -29
  91. package/packages/ui/src/components/section-cards.tsx +31 -14
  92. package/packages/ui/src/types.ts +1 -0
  93. package/skill/SKILL.md +214 -174
  94. package/skill/Workflows/AlphaUpload.md +45 -0
  95. package/skill/Workflows/Baseline.md +18 -12
  96. package/skill/Workflows/Composability.md +3 -3
  97. package/skill/Workflows/Dashboard.md +39 -91
  98. package/skill/Workflows/Doctor.md +93 -66
  99. package/skill/Workflows/Evals.md +49 -40
  100. package/skill/Workflows/Evolve.md +76 -28
  101. package/skill/Workflows/EvolveBody.md +37 -38
  102. package/skill/Workflows/Initialize.md +145 -26
  103. package/skill/Workflows/Orchestrate.md +11 -2
  104. package/skill/Workflows/Sync.md +23 -0
  105. package/skill/Workflows/Watch.md +2 -5
  106. package/skill/agents/diagnosis-analyst.md +163 -0
  107. package/skill/agents/evolution-reviewer.md +149 -0
  108. package/skill/agents/integration-guide.md +154 -0
  109. package/skill/agents/pattern-analyst.md +149 -0
  110. package/skill/assets/multi-skill-settings.json +1 -1
  111. package/skill/assets/single-skill-settings.json +1 -1
  112. package/skill/references/interactive-config.md +39 -0
  113. package/skill/references/invocation-taxonomy.md +34 -0
  114. package/skill/references/logs.md +15 -1
  115. package/skill/references/setup-patterns.md +3 -3
  116. package/skill/settings_snippet.json +1 -1
  117. package/apps/local-dashboard/dist/assets/index-C75H1Q3n.css +0 -1
  118. package/apps/local-dashboard/dist/assets/index-axE4kz3Q.js +0 -15
  119. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +0 -60
@@ -0,0 +1,196 @@
1
+ import { z } from "zod";
2
+ import {
3
+ CANONICAL_CAPTURE_MODES,
4
+ CANONICAL_COMPLETION_STATUSES,
5
+ CANONICAL_INVOCATION_MODES,
6
+ CANONICAL_PLATFORMS,
7
+ CANONICAL_PROMPT_KINDS,
8
+ CANONICAL_RECORD_KINDS,
9
+ CANONICAL_SCHEMA_VERSION,
10
+ CANONICAL_SOURCE_SESSION_KINDS,
11
+ } from "./types.js";
12
+
13
+ export const canonicalPlatformSchema = z.enum(CANONICAL_PLATFORMS);
14
+ export const captureModeSchema = z.enum(CANONICAL_CAPTURE_MODES);
15
+ export const sourceSessionKindSchema = z.enum(CANONICAL_SOURCE_SESSION_KINDS);
16
+ export const promptKindSchema = z.enum(CANONICAL_PROMPT_KINDS);
17
+ export const invocationModeSchema = z.enum(CANONICAL_INVOCATION_MODES);
18
+ export const completionStatusSchema = z.enum(CANONICAL_COMPLETION_STATUSES);
19
+ export const recordKindSchema = z.enum(CANONICAL_RECORD_KINDS);
20
+
21
+ export const rawSourceRefSchema = z.object({
22
+ path: z.string().optional(),
23
+ line: z.number().int().nonnegative().optional(),
24
+ event_type: z.string().optional(),
25
+ raw_id: z.string().optional(),
26
+ metadata: z.record(z.string(), z.unknown()).optional(),
27
+ });
28
+
29
+ export const canonicalRecordBaseSchema = z.object({
30
+ record_kind: recordKindSchema,
31
+ schema_version: z.literal(CANONICAL_SCHEMA_VERSION),
32
+ normalizer_version: z.string().min(1),
33
+ normalized_at: z.string().datetime(),
34
+ platform: canonicalPlatformSchema,
35
+ capture_mode: captureModeSchema,
36
+ raw_source_ref: rawSourceRefSchema,
37
+ });
38
+
39
+ export const canonicalSessionRecordBaseSchema = canonicalRecordBaseSchema.extend({
40
+ source_session_kind: sourceSessionKindSchema,
41
+ session_id: z.string().min(1),
42
+ });
43
+
44
+ export const CanonicalSessionRecordSchema = canonicalSessionRecordBaseSchema.extend({
45
+ record_kind: z.literal("session"),
46
+ external_session_id: z.string().optional(),
47
+ parent_session_id: z.string().optional(),
48
+ agent_id: z.string().optional(),
49
+ agent_type: z.string().optional(),
50
+ agent_cli: z.string().optional(),
51
+ session_key: z.string().optional(),
52
+ channel: z.string().optional(),
53
+ workspace_path: z.string().optional(),
54
+ repo_root: z.string().optional(),
55
+ repo_remote: z.string().optional(),
56
+ branch: z.string().optional(),
57
+ commit_sha: z.string().optional(),
58
+ permission_mode: z.string().optional(),
59
+ approval_policy: z.string().optional(),
60
+ sandbox_policy: z.string().optional(),
61
+ provider: z.string().optional(),
62
+ model: z.string().optional(),
63
+ started_at: z.string().datetime().optional(),
64
+ ended_at: z.string().datetime().optional(),
65
+ completion_status: completionStatusSchema.optional(),
66
+ end_reason: z.string().optional(),
67
+ });
68
+
69
+ export const CanonicalPromptRecordSchema = canonicalSessionRecordBaseSchema.extend({
70
+ record_kind: z.literal("prompt"),
71
+ prompt_id: z.string().min(1),
72
+ occurred_at: z.string().datetime(),
73
+ prompt_text: z.string().min(1),
74
+ prompt_hash: z.string().optional(),
75
+ prompt_kind: promptKindSchema,
76
+ is_actionable: z.boolean(),
77
+ prompt_index: z.number().int().nonnegative().optional(),
78
+ parent_prompt_id: z.string().optional(),
79
+ source_message_id: z.string().optional(),
80
+ });
81
+
82
+ export const CanonicalSkillInvocationRecordSchema = canonicalSessionRecordBaseSchema.extend({
83
+ record_kind: z.literal("skill_invocation"),
84
+ skill_invocation_id: z.string().min(1),
85
+ occurred_at: z.string().datetime(),
86
+ matched_prompt_id: z.string().min(1).optional(),
87
+ skill_name: z.string().min(1),
88
+ skill_path: z.string().optional(),
89
+ skill_version_hash: z.string().optional(),
90
+ invocation_mode: invocationModeSchema,
91
+ triggered: z.boolean(),
92
+ confidence: z.number().min(0).max(1),
93
+ tool_name: z.string().optional(),
94
+ tool_call_id: z.string().optional(),
95
+ agent_type: z.string().optional(),
96
+ });
97
+
98
+ export const CanonicalExecutionFactRecordSchema = canonicalSessionRecordBaseSchema.extend({
99
+ record_kind: z.literal("execution_fact"),
100
+ execution_fact_id: z.string().min(1),
101
+ occurred_at: z.string().datetime(),
102
+ prompt_id: z.string().optional(),
103
+ tool_calls_json: z.record(z.string(), z.number().finite()),
104
+ total_tool_calls: z.number().int().nonnegative(),
105
+ bash_commands_redacted: z.array(z.string()).optional(),
106
+ assistant_turns: z.number().int().nonnegative(),
107
+ errors_encountered: z.number().int().nonnegative(),
108
+ input_tokens: z.number().int().nonnegative().optional(),
109
+ output_tokens: z.number().int().nonnegative().optional(),
110
+ duration_ms: z.number().nonnegative().optional(),
111
+ completion_status: completionStatusSchema.optional(),
112
+ end_reason: z.string().optional(),
113
+ });
114
+
115
+ export const CanonicalNormalizationRunRecordSchema = canonicalRecordBaseSchema.extend({
116
+ record_kind: z.literal("normalization_run"),
117
+ run_id: z.string().min(1),
118
+ run_at: z.string().datetime(),
119
+ raw_records_seen: z.number().int().nonnegative(),
120
+ canonical_records_written: z.number().int().nonnegative(),
121
+ repair_applied: z.boolean(),
122
+ });
123
+
124
+ export const CanonicalEvolutionEvidenceRecordSchema = z.object({
125
+ evidence_id: z.string().min(1),
126
+ skill_name: z.string().min(1),
127
+ proposal_id: z.string().optional(),
128
+ target: z.string().min(1),
129
+ stage: z.string().min(1),
130
+ rationale: z.string().optional(),
131
+ confidence: z.number().min(0).max(1).optional(),
132
+ original_text: z.string().optional(),
133
+ proposed_text: z.string().optional(),
134
+ eval_set_json: z.unknown().optional(),
135
+ validation_json: z.unknown().optional(),
136
+ raw_source_ref: rawSourceRefSchema.optional(),
137
+ });
138
+
139
+ export const OrchestrateRunSkillActionSchema = z.object({
140
+ skill: z.string().min(1),
141
+ action: z.enum(["evolve", "watch", "skip"]),
142
+ reason: z.string(),
143
+ deployed: z.boolean().optional(),
144
+ rolledBack: z.boolean().optional(),
145
+ alert: z.string().nullable().optional(),
146
+ elapsed_ms: z.number().nonnegative().optional(),
147
+ llm_calls: z.number().int().nonnegative().optional(),
148
+ });
149
+
150
+ export const PushOrchestrateRunRecordSchema = z.object({
151
+ run_id: z.string().min(1),
152
+ timestamp: z.string().datetime(),
153
+ elapsed_ms: z.number().int().nonnegative(),
154
+ dry_run: z.boolean(),
155
+ approval_mode: z.enum(["auto", "review"]),
156
+ total_skills: z.number().int().nonnegative(),
157
+ evaluated: z.number().int().nonnegative(),
158
+ evolved: z.number().int().nonnegative(),
159
+ deployed: z.number().int().nonnegative(),
160
+ watched: z.number().int().nonnegative(),
161
+ skipped: z.number().int().nonnegative(),
162
+ skill_actions: z.array(OrchestrateRunSkillActionSchema),
163
+ });
164
+
165
+ export const PushPayloadV2Schema = z.object({
166
+ schema_version: z.literal("2.0"),
167
+ client_version: z.string().min(1),
168
+ // Queue-generated push IDs are typically UUIDs, but the wire contract only
169
+ // requires a stable non-empty idempotency key.
170
+ push_id: z.string().min(1),
171
+ normalizer_version: z.string().min(1),
172
+ canonical: z.object({
173
+ sessions: z.array(CanonicalSessionRecordSchema).min(0),
174
+ prompts: z.array(CanonicalPromptRecordSchema).min(0),
175
+ skill_invocations: z.array(CanonicalSkillInvocationRecordSchema).min(0),
176
+ execution_facts: z.array(CanonicalExecutionFactRecordSchema).min(0),
177
+ normalization_runs: z.array(CanonicalNormalizationRunRecordSchema).min(0),
178
+ evolution_evidence: z.array(CanonicalEvolutionEvidenceRecordSchema).optional(),
179
+ orchestrate_runs: z.array(PushOrchestrateRunRecordSchema).optional(),
180
+ }),
181
+ });
182
+
183
+ export type PushPayloadV2 = z.infer<typeof PushPayloadV2Schema>;
184
+ export type ZodCanonicalSessionRecord = z.infer<typeof CanonicalSessionRecordSchema>;
185
+ export type ZodCanonicalPromptRecord = z.infer<typeof CanonicalPromptRecordSchema>;
186
+ export type ZodCanonicalSkillInvocationRecord = z.infer<
187
+ typeof CanonicalSkillInvocationRecordSchema
188
+ >;
189
+ export type ZodCanonicalExecutionFactRecord = z.infer<typeof CanonicalExecutionFactRecordSchema>;
190
+ export type ZodCanonicalNormalizationRunRecord = z.infer<
191
+ typeof CanonicalNormalizationRunRecordSchema
192
+ >;
193
+ export type ZodCanonicalEvolutionEvidenceRecord = z.infer<
194
+ typeof CanonicalEvolutionEvidenceRecordSchema
195
+ >;
196
+ export type ZodPushOrchestrateRunRecord = z.infer<typeof PushOrchestrateRunRecordSchema>;
@@ -128,15 +128,17 @@ export interface CanonicalSkillInvocationRecord extends CanonicalSessionRecordBa
128
128
  confidence: number;
129
129
  tool_name?: string;
130
130
  tool_call_id?: string;
131
+ agent_type?: string;
131
132
  }
132
133
 
133
134
  export interface CanonicalExecutionFactRecord extends CanonicalSessionRecordBase {
134
135
  record_kind: "execution_fact";
136
+ execution_fact_id: string;
135
137
  occurred_at: string;
136
138
  prompt_id?: string;
137
139
  tool_calls_json: Record<string, number>;
138
140
  total_tool_calls: number;
139
- bash_commands_redacted: string[];
141
+ bash_commands_redacted?: string[];
140
142
  assistant_turns: number;
141
143
  errors_encountered: number;
142
144
  input_tokens?: number;
@@ -86,10 +86,12 @@ export function isCanonicalRecord(value: unknown): value is CanonicalRecord {
86
86
  case "execution_fact":
87
87
  return (
88
88
  hasSessionScope(value) &&
89
+ hasString(value, "execution_fact_id") &&
89
90
  hasString(value, "occurred_at") &&
90
91
  isNumberRecord(value.tool_calls_json) &&
91
92
  isFiniteNumber(value.total_tool_calls) &&
92
- isStringArray(value.bash_commands_redacted) &&
93
+ (value.bash_commands_redacted === undefined ||
94
+ isStringArray(value.bash_commands_redacted)) &&
93
95
  isFiniteNumber(value.assistant_turns) &&
94
96
  isFiniteNumber(value.errors_encountered) &&
95
97
  (value.completion_status === undefined ||
@@ -0,0 +1,144 @@
1
+ import { describe, expect, test } from "bun:test";
2
+ import { completePush } from "../fixtures/complete-push.js";
3
+ import { evidenceOnlyPush } from "../fixtures/evidence-only-push.js";
4
+ import { partialPushNoSessions } from "../fixtures/partial-push-no-sessions.js";
5
+ import { partialPushUnresolvedParents } from "../fixtures/partial-push-unresolved-parents.js";
6
+ import { PushPayloadV2Schema } from "../src/schemas.js";
7
+
8
+ describe("PushPayloadV2Schema compatibility", () => {
9
+ // ---- Fixture validation ----
10
+
11
+ test("complete-push fixture passes validation", () => {
12
+ const result = PushPayloadV2Schema.safeParse(completePush);
13
+ if (!result.success) {
14
+ throw new Error(`Validation failed: ${JSON.stringify(result.error.issues, null, 2)}`);
15
+ }
16
+ expect(result.success).toBe(true);
17
+ });
18
+
19
+ test("partial-push-no-sessions fixture passes validation", () => {
20
+ const result = PushPayloadV2Schema.safeParse(partialPushNoSessions);
21
+ if (!result.success) {
22
+ throw new Error(`Validation failed: ${JSON.stringify(result.error.issues, null, 2)}`);
23
+ }
24
+ expect(result.success).toBe(true);
25
+ });
26
+
27
+ test("partial-push-unresolved-parents fixture passes validation", () => {
28
+ const result = PushPayloadV2Schema.safeParse(partialPushUnresolvedParents);
29
+ if (!result.success) {
30
+ throw new Error(`Validation failed: ${JSON.stringify(result.error.issues, null, 2)}`);
31
+ }
32
+ expect(result.success).toBe(true);
33
+ });
34
+
35
+ test("evidence-only-push fixture passes validation", () => {
36
+ const result = PushPayloadV2Schema.safeParse(evidenceOnlyPush);
37
+ if (!result.success) {
38
+ throw new Error(`Validation failed: ${JSON.stringify(result.error.issues, null, 2)}`);
39
+ }
40
+ expect(result.success).toBe(true);
41
+ });
42
+
43
+ // ---- execution_fact_id is required ----
44
+
45
+ test("execution_fact_id is required on execution facts", () => {
46
+ const badPayload = structuredClone(completePush);
47
+ delete (badPayload.canonical.execution_facts[0] as Record<string, unknown>).execution_fact_id;
48
+ const result = PushPayloadV2Schema.safeParse(badPayload);
49
+ expect(result.success).toBe(false);
50
+ if (!result.success) {
51
+ const paths = result.error.issues.map((i) => i.path.join("."));
52
+ expect(paths).toContain("canonical.execution_facts.0.execution_fact_id");
53
+ }
54
+ });
55
+
56
+ test("execution_fact_id rejects empty string", () => {
57
+ const badPayload = structuredClone(completePush);
58
+ (badPayload.canonical.execution_facts[0] as Record<string, unknown>).execution_fact_id = "";
59
+ const result = PushPayloadV2Schema.safeParse(badPayload);
60
+ expect(result.success).toBe(false);
61
+ if (!result.success) {
62
+ const paths = result.error.issues.map((i) => i.path.join("."));
63
+ expect(paths).toContain("canonical.execution_facts.0.execution_fact_id");
64
+ }
65
+ });
66
+
67
+ // ---- bash_commands_redacted is optional ----
68
+
69
+ test("bash_commands_redacted is optional (omitting it passes)", () => {
70
+ // The unresolved-parents fixture already omits bash_commands_redacted
71
+ const ef = partialPushUnresolvedParents.canonical.execution_facts[0];
72
+ expect(ef.bash_commands_redacted).toBeUndefined();
73
+
74
+ const result = PushPayloadV2Schema.safeParse(partialPushUnresolvedParents);
75
+ expect(result.success).toBe(true);
76
+ });
77
+
78
+ test("bash_commands_redacted accepts an array when present", () => {
79
+ const ef = completePush.canonical.execution_facts[0];
80
+ expect(Array.isArray(ef.bash_commands_redacted)).toBe(true);
81
+
82
+ const result = PushPayloadV2Schema.safeParse(completePush);
83
+ expect(result.success).toBe(true);
84
+ });
85
+
86
+ // ---- Zero-session pushes ----
87
+
88
+ test("zero-session pushes pass validation", () => {
89
+ expect(partialPushNoSessions.canonical.sessions).toHaveLength(0);
90
+ const result = PushPayloadV2Schema.safeParse(partialPushNoSessions);
91
+ expect(result.success).toBe(true);
92
+ });
93
+
94
+ test("evidence-only push with all empty arrays passes", () => {
95
+ expect(evidenceOnlyPush.canonical.sessions).toHaveLength(0);
96
+ expect(evidenceOnlyPush.canonical.prompts).toHaveLength(0);
97
+ expect(evidenceOnlyPush.canonical.skill_invocations).toHaveLength(0);
98
+ expect(evidenceOnlyPush.canonical.execution_facts).toHaveLength(0);
99
+ expect(evidenceOnlyPush.canonical.normalization_runs).toHaveLength(0);
100
+ const result = PushPayloadV2Schema.safeParse(evidenceOnlyPush);
101
+ expect(result.success).toBe(true);
102
+ });
103
+
104
+ // ---- Unresolved parent references ----
105
+
106
+ test("unresolved parent references pass (invocation references session_id not in sessions)", () => {
107
+ const sessionIds = new Set(
108
+ partialPushUnresolvedParents.canonical.sessions.map((s) => s.session_id),
109
+ );
110
+ const invSessionIds = partialPushUnresolvedParents.canonical.skill_invocations.map(
111
+ (i) => i.session_id,
112
+ );
113
+
114
+ // Precondition: arrays must be non-empty for the test to be meaningful
115
+ expect(invSessionIds.length).toBeGreaterThan(0);
116
+
117
+ // Confirm the invocation references a session not in the sessions array
118
+ for (const sid of invSessionIds) {
119
+ expect(sessionIds.has(sid)).toBe(false);
120
+ }
121
+
122
+ const result = PushPayloadV2Schema.safeParse(partialPushUnresolvedParents);
123
+ expect(result.success).toBe(true);
124
+ });
125
+
126
+ test("prompts with unresolved session_id pass validation", () => {
127
+ const sessionIds = new Set(
128
+ partialPushUnresolvedParents.canonical.sessions.map((s) => s.session_id),
129
+ );
130
+ const promptSessionIds = partialPushUnresolvedParents.canonical.prompts.map(
131
+ (p) => p.session_id,
132
+ );
133
+
134
+ // Precondition: arrays must be non-empty for the test to be meaningful
135
+ expect(promptSessionIds.length).toBeGreaterThan(0);
136
+
137
+ for (const sid of promptSessionIds) {
138
+ expect(sessionIds.has(sid)).toBe(false);
139
+ }
140
+
141
+ const result = PushPayloadV2Schema.safeParse(partialPushUnresolvedParents);
142
+ expect(result.success).toBe(true);
143
+ });
144
+ });
@@ -25,6 +25,10 @@
25
25
  "lucide-react": "^0.577.0",
26
26
  "tailwind-merge": "^3.5.0"
27
27
  },
28
+ "devDependencies": {
29
+ "@types/react": "^19.0.0",
30
+ "@types/react-dom": "^19.0.0"
31
+ },
28
32
  "peerDependencies": {
29
33
  "react": "^19.0.0",
30
34
  "react-dom": "^19.0.0",
@@ -7,6 +7,7 @@ import {
7
7
  CardTitle,
8
8
  } from "../primitives/card"
9
9
  import { Tabs, TabsContent, TabsList, TabsTrigger } from "../primitives/tabs"
10
+ import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "../primitives/tooltip"
10
11
  import type { EvolutionEntry, PendingProposal, UnmatchedQuery } from "../types"
11
12
  import { timeAgo } from "../lib/format"
12
13
  import {
@@ -29,10 +30,12 @@ export function ActivityPanel({
29
30
  evolution,
30
31
  pendingProposals,
31
32
  unmatchedQueries,
33
+ onSelectProposal,
32
34
  }: {
33
35
  evolution: EvolutionEntry[]
34
36
  pendingProposals: PendingProposal[]
35
37
  unmatchedQueries: UnmatchedQuery[]
38
+ onSelectProposal?: (skillName: string, proposalId: string) => void
36
39
  }) {
37
40
  const hasActivity = evolution.length > 0 || pendingProposals.length > 0 || unmatchedQueries.length > 0
38
41
 
@@ -73,35 +76,51 @@ export function ActivityPanel({
73
76
  : "unmatched"
74
77
  }
75
78
  >
76
- <TabsList className="w-full">
77
- {pendingProposals.length > 0 && (
78
- <TabsTrigger value="pending" className="flex-1 gap-1.5">
79
- <GitPullRequestArrowIcon className="size-3.5" />
80
- Pending
81
- <Badge variant="secondary" className="ml-1 h-4 px-1.5 text-[10px]">
82
- {pendingProposals.length}
83
- </Badge>
84
- </TabsTrigger>
85
- )}
86
- <TabsTrigger value="timeline" className="flex-1 gap-1.5">
87
- <ClockIcon className="size-3.5" />
88
- Timeline
89
- </TabsTrigger>
90
- {unmatchedQueries.length > 0 && (
91
- <TabsTrigger value="unmatched" className="flex-1 gap-1.5">
92
- <SearchXIcon className="size-3.5" />
93
- Unmatched
94
- <Badge variant="destructive" className="ml-1 h-4 px-1.5 text-[10px]">
95
- {unmatchedQueries.length}
96
- </Badge>
97
- </TabsTrigger>
98
- )}
99
- </TabsList>
79
+ <TooltipProvider>
80
+ <TabsList className="w-full">
81
+ {pendingProposals.length > 0 && (
82
+ <Tooltip>
83
+ <TooltipTrigger render={<TabsTrigger value="pending" className="flex-1 gap-1.5" />}>
84
+ <GitPullRequestArrowIcon className="size-3.5" />
85
+ <Badge variant="secondary" className="h-4 px-1 text-[10px]">
86
+ {pendingProposals.length}
87
+ </Badge>
88
+ </TooltipTrigger>
89
+ <TooltipContent>Pending proposals</TooltipContent>
90
+ </Tooltip>
91
+ )}
92
+ <Tooltip>
93
+ <TooltipTrigger render={<TabsTrigger value="timeline" className="flex-1" />}>
94
+ <ClockIcon className="size-3.5" />
95
+ </TooltipTrigger>
96
+ <TooltipContent>Timeline</TooltipContent>
97
+ </Tooltip>
98
+ {unmatchedQueries.length > 0 && (
99
+ <Tooltip>
100
+ <TooltipTrigger render={<TabsTrigger value="unmatched" className="flex-1 gap-1.5" />}>
101
+ <SearchXIcon className="size-3.5" />
102
+ <Badge variant="destructive" className="h-4 px-1 text-[10px]">
103
+ {unmatchedQueries.length}
104
+ </Badge>
105
+ </TooltipTrigger>
106
+ <TooltipContent>Unmatched queries</TooltipContent>
107
+ </Tooltip>
108
+ )}
109
+ </TabsList>
110
+ </TooltipProvider>
100
111
 
101
112
  {pendingProposals.length > 0 && (
102
113
  <TabsContent value="pending" className="mt-4 space-y-3">
103
114
  {pendingProposals.slice(0, 10).map((p) => (
104
- <div key={p.proposal_id} className="flex gap-3">
115
+ <button
116
+ key={p.proposal_id}
117
+ type="button"
118
+ onClick={() => {
119
+ if (p.skill_name && onSelectProposal) onSelectProposal(p.skill_name, p.proposal_id)
120
+ }}
121
+ disabled={!p.skill_name || !onSelectProposal}
122
+ className="flex w-full gap-3 rounded-md p-1.5 text-left transition-colors enabled:hover:bg-accent/40 disabled:cursor-default"
123
+ >
105
124
  <div className="mt-1 size-2 shrink-0 rounded-full bg-amber-400" />
106
125
  <div className="flex-1 min-w-0 space-y-1">
107
126
  <div className="flex items-center gap-2">
@@ -113,15 +132,28 @@ export function ActivityPanel({
113
132
  </span>
114
133
  </div>
115
134
  <p className="text-xs text-muted-foreground line-clamp-2">{p.details}</p>
135
+ {p.skill_name && (
136
+ <span className="text-[10px] text-muted-foreground/60 font-mono">
137
+ {p.skill_name} · #{p.proposal_id.slice(0, 8)}
138
+ </span>
139
+ )}
116
140
  </div>
117
- </div>
141
+ </button>
118
142
  ))}
119
143
  </TabsContent>
120
144
  )}
121
145
 
122
146
  <TabsContent value="timeline" className="mt-4 space-y-3">
123
147
  {evolution.slice(0, 30).map((entry, i) => (
124
- <div key={`${entry.proposal_id}-${i}`} className="flex gap-3">
148
+ <button
149
+ key={`${entry.proposal_id}-${i}`}
150
+ type="button"
151
+ onClick={() => {
152
+ if (entry.skill_name && onSelectProposal) onSelectProposal(entry.skill_name, entry.proposal_id)
153
+ }}
154
+ disabled={!entry.skill_name || !onSelectProposal}
155
+ className="flex w-full gap-3 rounded-md p-1.5 text-left transition-colors enabled:hover:bg-accent/40 disabled:cursor-default"
156
+ >
125
157
  <div className={`mt-1 size-2 shrink-0 rounded-full ${
126
158
  entry.action === "deployed" ? "bg-emerald-500"
127
159
  : entry.action === "rejected" || entry.action === "rolled_back" ? "bg-red-500"
@@ -139,10 +171,10 @@ export function ActivityPanel({
139
171
  </div>
140
172
  <p className="text-xs text-muted-foreground line-clamp-2">{entry.details}</p>
141
173
  <span className="text-[10px] text-muted-foreground/60 font-mono">
142
- #{entry.proposal_id.slice(0, 8)}
174
+ {entry.skill_name ? `${entry.skill_name} · ` : ""}#{entry.proposal_id.slice(0, 8)}
143
175
  </span>
144
176
  </div>
145
- </div>
177
+ </button>
146
178
  ))}
147
179
  {evolution.length === 0 && (
148
180
  <p className="text-sm text-muted-foreground text-center py-4">No timeline events</p>
@@ -25,6 +25,7 @@ interface SectionCardsProps {
25
25
  sessionsCount: number
26
26
  pendingCount: number
27
27
  evidenceCount: number
28
+ hasEvolution?: boolean
28
29
  }
29
30
 
30
31
  export function SectionCards({
@@ -34,6 +35,7 @@ export function SectionCards({
34
35
  sessionsCount,
35
36
  pendingCount,
36
37
  evidenceCount,
38
+ hasEvolution = true,
37
39
  }: SectionCardsProps) {
38
40
  const passRateStr = avgPassRate !== null ? `${Math.round(avgPassRate * 100)}%` : "--"
39
41
  const passRateGood = avgPassRate !== null && avgPassRate >= 0.7
@@ -63,14 +65,14 @@ export function SectionCards({
63
65
  <CardHeader>
64
66
  <CardDescription className="flex items-center gap-1.5">
65
67
  <FlaskConicalIcon className="size-3.5" />
66
- Avg Pass Rate
67
- <InfoTip text="Average percentage of eval test cases that passed across all graded skills (5+ checks)" />
68
+ Avg Trigger Rate
69
+ <InfoTip text="Average percentage of skill checks that resulted in a trigger across all graded skills (5+ checks). Run selftune evolve to improve this." />
68
70
  </CardDescription>
69
71
  <CardTitle className={`text-2xl font-semibold tabular-nums @[250px]/card:text-3xl ${!passRateGood && avgPassRate !== null ? "text-red-600" : ""}`}>
70
72
  {passRateStr}
71
73
  </CardTitle>
72
- {avgPassRate !== null && (
73
- <CardAction>
74
+ <CardAction>
75
+ {avgPassRate !== null ? (
74
76
  <Badge variant={passRateGood ? "outline" : "destructive"}>
75
77
  {passRateGood ? (
76
78
  <TrendingUpIcon className="size-3" />
@@ -79,8 +81,12 @@ export function SectionCards({
79
81
  )}
80
82
  {passRateStr}
81
83
  </Badge>
82
- </CardAction>
83
- )}
84
+ ) : (
85
+ <Badge variant="secondary" className="text-[10px]">
86
+ needs 5+ checks
87
+ </Badge>
88
+ )}
89
+ </CardAction>
84
90
  </CardHeader>
85
91
  </Card>
86
92
 
@@ -123,18 +129,22 @@ export function SectionCards({
123
129
  <CardDescription className="flex items-center gap-1.5">
124
130
  <AlertTriangleIcon className="size-3.5" />
125
131
  Pending Proposals
126
- <InfoTip text="Evolution proposals that have been generated but not yet validated or deployed" />
132
+ <InfoTip text="Evolution proposals that have been generated but not yet validated or deployed. Requires running selftune evolve." />
127
133
  </CardDescription>
128
134
  <CardTitle className="text-2xl font-semibold tabular-nums @[250px]/card:text-3xl">
129
- {pendingCount}
135
+ {hasEvolution ? pendingCount : "--"}
130
136
  </CardTitle>
131
- {pendingCount > 0 && (
132
- <CardAction>
137
+ <CardAction>
138
+ {!hasEvolution ? (
139
+ <Badge variant="secondary" className="text-[10px]">
140
+ no evolution runs yet
141
+ </Badge>
142
+ ) : pendingCount > 0 ? (
133
143
  <Badge variant="secondary">
134
144
  awaiting review
135
145
  </Badge>
136
- </CardAction>
137
- )}
146
+ ) : null}
147
+ </CardAction>
138
148
  </CardHeader>
139
149
  </Card>
140
150
 
@@ -143,11 +153,18 @@ export function SectionCards({
143
153
  <CardDescription className="flex items-center gap-1.5">
144
154
  <EyeIcon className="size-3.5" />
145
155
  Total Evidence
146
- <InfoTip text="Number of evidence entries documenting skill changes with before/after validation results" />
156
+ <InfoTip text="Number of evidence entries documenting skill changes with before/after validation results. Requires running selftune evolve." />
147
157
  </CardDescription>
148
158
  <CardTitle className="text-2xl font-semibold tabular-nums @[250px]/card:text-3xl">
149
- {evidenceCount}
159
+ {hasEvolution ? evidenceCount : "--"}
150
160
  </CardTitle>
161
+ {!hasEvolution && (
162
+ <CardAction>
163
+ <Badge variant="secondary" className="text-[10px]">
164
+ no evolution runs yet
165
+ </Badge>
166
+ </CardAction>
167
+ )}
151
168
  </CardHeader>
152
169
  </Card>
153
170
  </div>
@@ -27,6 +27,7 @@ export interface EvalSnapshot {
27
27
  export interface EvolutionEntry {
28
28
  timestamp: string;
29
29
  proposal_id: string;
30
+ skill_name?: string;
30
31
  action: string;
31
32
  details: string;
32
33
  eval_snapshot?: EvalSnapshot | null;