selftune 0.1.4 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/.claude/agents/diagnosis-analyst.md +156 -0
  2. package/.claude/agents/evolution-reviewer.md +180 -0
  3. package/.claude/agents/integration-guide.md +212 -0
  4. package/.claude/agents/pattern-analyst.md +160 -0
  5. package/CHANGELOG.md +46 -1
  6. package/README.md +105 -257
  7. package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
  8. package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
  9. package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
  10. package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
  11. package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
  12. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
  13. package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
  14. package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
  15. package/apps/local-dashboard/dist/favicon.png +0 -0
  16. package/apps/local-dashboard/dist/index.html +17 -0
  17. package/apps/local-dashboard/dist/logo.png +0 -0
  18. package/apps/local-dashboard/dist/logo.svg +9 -0
  19. package/assets/BeforeAfter.gif +0 -0
  20. package/assets/FeedbackLoop.gif +0 -0
  21. package/assets/logo.svg +9 -0
  22. package/assets/skill-health-badge.svg +20 -0
  23. package/cli/selftune/activation-rules.ts +171 -0
  24. package/cli/selftune/badge/badge-data.ts +108 -0
  25. package/cli/selftune/badge/badge-svg.ts +212 -0
  26. package/cli/selftune/badge/badge.ts +99 -0
  27. package/cli/selftune/canonical-export.ts +183 -0
  28. package/cli/selftune/constants.ts +103 -1
  29. package/cli/selftune/contribute/bundle.ts +314 -0
  30. package/cli/selftune/contribute/contribute.ts +214 -0
  31. package/cli/selftune/contribute/sanitize.ts +162 -0
  32. package/cli/selftune/cron/setup.ts +266 -0
  33. package/cli/selftune/dashboard-contract.ts +202 -0
  34. package/cli/selftune/dashboard-server.ts +1049 -0
  35. package/cli/selftune/dashboard.ts +43 -156
  36. package/cli/selftune/eval/baseline.ts +248 -0
  37. package/cli/selftune/eval/composability-v2.ts +273 -0
  38. package/cli/selftune/eval/composability.ts +117 -0
  39. package/cli/selftune/eval/generate-unit-tests.ts +143 -0
  40. package/cli/selftune/eval/hooks-to-evals.ts +101 -16
  41. package/cli/selftune/eval/import-skillsbench.ts +221 -0
  42. package/cli/selftune/eval/synthetic-evals.ts +172 -0
  43. package/cli/selftune/eval/unit-test-cli.ts +152 -0
  44. package/cli/selftune/eval/unit-test.ts +196 -0
  45. package/cli/selftune/evolution/deploy-proposal.ts +142 -1
  46. package/cli/selftune/evolution/evidence.ts +26 -0
  47. package/cli/selftune/evolution/evolve-body.ts +586 -0
  48. package/cli/selftune/evolution/evolve.ts +825 -116
  49. package/cli/selftune/evolution/extract-patterns.ts +105 -16
  50. package/cli/selftune/evolution/pareto.ts +314 -0
  51. package/cli/selftune/evolution/propose-body.ts +171 -0
  52. package/cli/selftune/evolution/propose-description.ts +100 -2
  53. package/cli/selftune/evolution/propose-routing.ts +166 -0
  54. package/cli/selftune/evolution/refine-body.ts +141 -0
  55. package/cli/selftune/evolution/rollback.ts +21 -4
  56. package/cli/selftune/evolution/validate-body.ts +254 -0
  57. package/cli/selftune/evolution/validate-proposal.ts +257 -35
  58. package/cli/selftune/evolution/validate-routing.ts +177 -0
  59. package/cli/selftune/grading/auto-grade.ts +200 -0
  60. package/cli/selftune/grading/grade-session.ts +513 -42
  61. package/cli/selftune/grading/pre-gates.ts +104 -0
  62. package/cli/selftune/grading/results.ts +42 -0
  63. package/cli/selftune/hooks/auto-activate.ts +185 -0
  64. package/cli/selftune/hooks/evolution-guard.ts +165 -0
  65. package/cli/selftune/hooks/prompt-log.ts +172 -2
  66. package/cli/selftune/hooks/session-stop.ts +123 -3
  67. package/cli/selftune/hooks/skill-change-guard.ts +112 -0
  68. package/cli/selftune/hooks/skill-eval.ts +119 -3
  69. package/cli/selftune/index.ts +415 -48
  70. package/cli/selftune/ingestors/claude-replay.ts +377 -0
  71. package/cli/selftune/ingestors/codex-rollout.ts +345 -46
  72. package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
  73. package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
  74. package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
  75. package/cli/selftune/init.ts +376 -16
  76. package/cli/selftune/last.ts +14 -5
  77. package/cli/selftune/localdb/db.ts +63 -0
  78. package/cli/selftune/localdb/materialize.ts +428 -0
  79. package/cli/selftune/localdb/queries.ts +376 -0
  80. package/cli/selftune/localdb/schema.ts +204 -0
  81. package/cli/selftune/memory/writer.ts +447 -0
  82. package/cli/selftune/monitoring/watch.ts +90 -16
  83. package/cli/selftune/normalization.ts +682 -0
  84. package/cli/selftune/observability.ts +19 -44
  85. package/cli/selftune/orchestrate.ts +1073 -0
  86. package/cli/selftune/quickstart.ts +203 -0
  87. package/cli/selftune/repair/skill-usage.ts +576 -0
  88. package/cli/selftune/schedule.ts +561 -0
  89. package/cli/selftune/status.ts +59 -33
  90. package/cli/selftune/sync.ts +627 -0
  91. package/cli/selftune/types.ts +525 -5
  92. package/cli/selftune/utils/canonical-log.ts +45 -0
  93. package/cli/selftune/utils/frontmatter.ts +217 -0
  94. package/cli/selftune/utils/hooks.ts +41 -0
  95. package/cli/selftune/utils/html.ts +27 -0
  96. package/cli/selftune/utils/llm-call.ts +103 -19
  97. package/cli/selftune/utils/math.ts +10 -0
  98. package/cli/selftune/utils/query-filter.ts +139 -0
  99. package/cli/selftune/utils/skill-discovery.ts +340 -0
  100. package/cli/selftune/utils/skill-log.ts +68 -0
  101. package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
  102. package/cli/selftune/utils/transcript.ts +307 -26
  103. package/cli/selftune/utils/trigger-check.ts +89 -0
  104. package/cli/selftune/utils/tui.ts +156 -0
  105. package/cli/selftune/workflows/discover.ts +254 -0
  106. package/cli/selftune/workflows/skill-md-writer.ts +288 -0
  107. package/cli/selftune/workflows/workflows.ts +188 -0
  108. package/package.json +28 -11
  109. package/packages/telemetry-contract/README.md +11 -0
  110. package/packages/telemetry-contract/fixtures/golden.json +87 -0
  111. package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
  112. package/packages/telemetry-contract/index.ts +1 -0
  113. package/packages/telemetry-contract/package.json +19 -0
  114. package/packages/telemetry-contract/src/index.ts +2 -0
  115. package/packages/telemetry-contract/src/types.ts +163 -0
  116. package/packages/telemetry-contract/src/validators.ts +109 -0
  117. package/skill/SKILL.md +180 -33
  118. package/skill/Workflows/AutoActivation.md +145 -0
  119. package/skill/Workflows/Badge.md +124 -0
  120. package/skill/Workflows/Baseline.md +144 -0
  121. package/skill/Workflows/Composability.md +107 -0
  122. package/skill/Workflows/Contribute.md +94 -0
  123. package/skill/Workflows/Cron.md +132 -0
  124. package/skill/Workflows/Dashboard.md +214 -0
  125. package/skill/Workflows/Doctor.md +63 -14
  126. package/skill/Workflows/Evals.md +110 -18
  127. package/skill/Workflows/EvolutionMemory.md +154 -0
  128. package/skill/Workflows/Evolve.md +181 -21
  129. package/skill/Workflows/EvolveBody.md +159 -0
  130. package/skill/Workflows/Grade.md +36 -31
  131. package/skill/Workflows/ImportSkillsBench.md +117 -0
  132. package/skill/Workflows/Ingest.md +142 -21
  133. package/skill/Workflows/Initialize.md +91 -23
  134. package/skill/Workflows/Orchestrate.md +139 -0
  135. package/skill/Workflows/Replay.md +91 -0
  136. package/skill/Workflows/Rollback.md +23 -4
  137. package/skill/Workflows/Schedule.md +61 -0
  138. package/skill/Workflows/Sync.md +88 -0
  139. package/skill/Workflows/UnitTest.md +150 -0
  140. package/skill/Workflows/Watch.md +33 -1
  141. package/skill/Workflows/Workflows.md +129 -0
  142. package/skill/assets/activation-rules-default.json +26 -0
  143. package/skill/assets/multi-skill-settings.json +63 -0
  144. package/skill/assets/single-skill-settings.json +57 -0
  145. package/skill/references/invocation-taxonomy.md +2 -2
  146. package/skill/references/logs.md +164 -2
  147. package/skill/references/setup-patterns.md +65 -0
  148. package/skill/references/version-history.md +40 -0
  149. package/skill/settings_snippet.json +23 -0
  150. package/templates/activation-rules-default.json +27 -0
  151. package/templates/multi-skill-settings.json +64 -0
  152. package/templates/single-skill-settings.json +58 -0
  153. package/dashboard/index.html +0 -1119
@@ -7,7 +7,7 @@
7
7
  // ---------------------------------------------------------------------------
8
8
 
9
9
  export interface SelftuneConfig {
10
- agent_type: "claude_code" | "codex" | "opencode" | "unknown";
10
+ agent_type: "claude_code" | "codex" | "opencode" | "openclaw" | "unknown";
11
11
  cli_path: string;
12
12
  llm_mode: "agent";
13
13
  agent_cli: string | null;
@@ -31,6 +31,10 @@ export interface SkillUsageRecord {
31
31
  session_id: string;
32
32
  skill_name: string;
33
33
  skill_path: string;
34
+ skill_scope?: "project" | "global" | "admin" | "system" | "unknown";
35
+ skill_project_root?: string;
36
+ skill_registry_dir?: string;
37
+ skill_path_resolution_source?: "raw_log" | "installed_scope" | "launcher_base_dir" | "fallback";
34
38
  query: string;
35
39
  triggered: boolean;
36
40
  source?: string;
@@ -45,6 +49,7 @@ export interface SessionTelemetryRecord {
45
49
  total_tool_calls: number;
46
50
  bash_commands: string[];
47
51
  skills_triggered: string[];
52
+ skills_invoked?: string[];
48
53
  assistant_turns: number;
49
54
  errors_encountered: number;
50
55
  transcript_chars: number;
@@ -56,6 +61,49 @@ export interface SessionTelemetryRecord {
56
61
  rollout_path?: string;
57
62
  }
58
63
 
64
+ export interface ImprovementSignalRecord {
65
+ timestamp: string;
66
+ session_id: string;
67
+ query: string;
68
+ signal_type: "correction" | "explicit_request" | "manual_invocation";
69
+ mentioned_skill?: string;
70
+ consumed: boolean;
71
+ consumed_at?: string;
72
+ consumed_by_run?: string;
73
+ }
74
+
75
+ export type {
76
+ CanonicalCaptureMode,
77
+ CanonicalCompletionStatus,
78
+ CanonicalExecutionFactRecord,
79
+ CanonicalInvocationMode,
80
+ CanonicalNormalizationRunRecord,
81
+ CanonicalPlatform,
82
+ CanonicalPromptKind,
83
+ CanonicalPromptRecord,
84
+ CanonicalRawSourceRef,
85
+ CanonicalRecord,
86
+ CanonicalRecordBase,
87
+ CanonicalRecordKind,
88
+ CanonicalSchemaVersion,
89
+ CanonicalSessionRecord,
90
+ CanonicalSkillInvocationRecord,
91
+ CanonicalSourceSessionKind,
92
+ } from "@selftune/telemetry-contract";
93
+ // ---------------------------------------------------------------------------
94
+ // Canonical normalization types (local + cloud projection layer)
95
+ // ---------------------------------------------------------------------------
96
+ export {
97
+ CANONICAL_CAPTURE_MODES,
98
+ CANONICAL_COMPLETION_STATUSES,
99
+ CANONICAL_INVOCATION_MODES,
100
+ CANONICAL_PLATFORMS,
101
+ CANONICAL_PROMPT_KINDS,
102
+ CANONICAL_RECORD_KINDS,
103
+ CANONICAL_SCHEMA_VERSION,
104
+ CANONICAL_SOURCE_SESSION_KINDS,
105
+ } from "@selftune/telemetry-contract";
106
+
59
107
  // ---------------------------------------------------------------------------
60
108
  // Transcript parsing
61
109
  // ---------------------------------------------------------------------------
@@ -65,25 +113,33 @@ export interface TranscriptMetrics {
65
113
  total_tool_calls: number;
66
114
  bash_commands: string[];
67
115
  skills_triggered: string[];
116
+ skills_invoked: string[];
68
117
  assistant_turns: number;
69
118
  errors_encountered: number;
70
119
  transcript_chars: number;
71
120
  last_user_query: string;
121
+ input_tokens?: number;
122
+ output_tokens?: number;
123
+ duration_ms?: number;
72
124
  }
73
125
 
74
126
  // ---------------------------------------------------------------------------
75
127
  // Hook payloads (received via stdin from Claude Code)
76
128
  // ---------------------------------------------------------------------------
77
129
 
130
+ // Shared base for pre/post tool-use hook payloads
131
+ export interface BaseToolUsePayload {
132
+ tool_name: string;
133
+ tool_input: Record<string, unknown>;
134
+ session_id?: string;
135
+ }
136
+
78
137
  export interface PromptSubmitPayload {
79
138
  user_prompt: string;
80
139
  session_id?: string;
81
140
  }
82
141
 
83
- export interface PostToolUsePayload {
84
- tool_name: string;
85
- tool_input: Record<string, unknown>;
86
- session_id?: string;
142
+ export interface PostToolUsePayload extends BaseToolUsePayload {
87
143
  transcript_path?: string;
88
144
  }
89
145
 
@@ -113,6 +169,8 @@ export interface GradingExpectation {
113
169
  text: string;
114
170
  passed: boolean;
115
171
  evidence: string;
172
+ score?: number; // 0.0-1.0 graduated confidence
173
+ source?: "pre-gate" | "llm"; // which grading path produced this
116
174
  }
117
175
 
118
176
  export interface GradingClaim {
@@ -127,6 +185,15 @@ export interface GradingSummary {
127
185
  failed: number;
128
186
  total: number;
129
187
  pass_rate: number;
188
+ mean_score?: number; // mean of all expectation scores
189
+ score_std_dev?: number; // standard deviation
190
+ }
191
+
192
+ export interface FailureFeedback {
193
+ query: string;
194
+ failure_reason: string;
195
+ improvement_hint: string;
196
+ invocation_type?: InvocationType;
130
197
  }
131
198
 
132
199
  /** Raw output from the LLM grader (before assembly into GradingResult). */
@@ -135,6 +202,7 @@ export interface GraderOutput {
135
202
  summary: GradingSummary;
136
203
  claims: GradingClaim[];
137
204
  eval_feedback: EvalFeedback;
205
+ failure_feedback?: FailureFeedback[];
138
206
  }
139
207
 
140
208
  export interface EvalFeedback {
@@ -152,6 +220,7 @@ export interface GradingResult {
152
220
  execution_metrics: ExecutionMetrics;
153
221
  claims: GradingClaim[];
154
222
  eval_feedback: EvalFeedback;
223
+ failure_feedback?: FailureFeedback[];
155
224
  }
156
225
 
157
226
  export interface ExecutionMetrics {
@@ -197,6 +266,7 @@ export interface FailurePattern {
197
266
  frequency: number;
198
267
  sample_sessions: string[];
199
268
  extracted_at: string;
269
+ feedback?: FailureFeedback[];
200
270
  }
201
271
 
202
272
  export interface EvolutionProposal {
@@ -226,11 +296,41 @@ export interface EvalPassRate {
226
296
  export interface EvolutionAuditEntry {
227
297
  timestamp: string;
228
298
  proposal_id: string;
299
+ skill_name?: string;
229
300
  action: "created" | "validated" | "deployed" | "rolled_back" | "rejected";
230
301
  details: string;
231
302
  eval_snapshot?: EvalPassRate;
232
303
  }
233
304
 
305
+ export interface EvolutionEvidenceValidation {
306
+ improved?: boolean;
307
+ before_pass_rate?: number;
308
+ after_pass_rate?: number;
309
+ net_change?: number;
310
+ regressions?: EvalEntry[] | string[];
311
+ new_passes?: EvalEntry[];
312
+ per_entry_results?: Array<{ entry: EvalEntry; before_pass: boolean; after_pass: boolean }>;
313
+ gates_passed?: number;
314
+ gates_total?: number;
315
+ gate_results?: Array<{ gate: ValidationGate; passed: boolean; reason: string }>;
316
+ }
317
+
318
+ export interface EvolutionEvidenceEntry {
319
+ timestamp: string;
320
+ proposal_id: string;
321
+ skill_name: string;
322
+ skill_path: string;
323
+ target: EvolutionTarget;
324
+ stage: "created" | "validated" | "deployed" | "rejected" | "rolled_back";
325
+ rationale?: string;
326
+ confidence?: number;
327
+ details?: string;
328
+ original_text?: string;
329
+ proposed_text?: string;
330
+ eval_set?: EvalEntry[];
331
+ validation?: EvolutionEvidenceValidation;
332
+ }
333
+
234
334
  export interface EvolutionConfig {
235
335
  min_sessions: number;
236
336
  min_improvement: number; // e.g., 0.10 = 10 percentage points
@@ -239,6 +339,68 @@ export interface EvolutionConfig {
239
339
  dry_run: boolean;
240
340
  }
241
341
 
342
+ // ---------------------------------------------------------------------------
343
+ // Validation result base (self-contained for Pareto types)
344
+ // ---------------------------------------------------------------------------
345
+
346
+ /** Compact summary of an evolve run, used for CLI JSON output. */
347
+ export interface EvolveResultSummary {
348
+ skill: string;
349
+ deployed: boolean;
350
+ reason: string;
351
+ before: number;
352
+ after: number;
353
+ net_change: number;
354
+ improved: boolean;
355
+ regressions: number;
356
+ new_passes: number;
357
+ confidence: number;
358
+ llm_calls: number;
359
+ elapsed_s: number;
360
+ proposal_id: string;
361
+ rationale: string;
362
+ version?: string;
363
+ dashboard_url: string;
364
+ }
365
+
366
+ export interface ValidationResultBase {
367
+ proposal_id: string;
368
+ before_pass_rate: number;
369
+ after_pass_rate: number;
370
+ improved: boolean;
371
+ regressions: EvalEntry[];
372
+ new_passes: EvalEntry[];
373
+ net_change: number;
374
+ by_invocation_type?: InvocationTypeScores;
375
+ per_entry_results?: Array<{ entry: EvalEntry; before_pass: boolean; after_pass: boolean }>;
376
+ }
377
+
378
+ // ---------------------------------------------------------------------------
379
+ // Pareto types (multi-dimensional evolution selection)
380
+ // ---------------------------------------------------------------------------
381
+
382
+ export interface InvocationTypeScores {
383
+ explicit: { passed: number; total: number; pass_rate: number };
384
+ implicit: { passed: number; total: number; pass_rate: number };
385
+ contextual: { passed: number; total: number; pass_rate: number };
386
+ negative: { passed: number; total: number; pass_rate: number };
387
+ }
388
+
389
+ export interface ParetoCandidate {
390
+ proposal: EvolutionProposal;
391
+ validation: ValidationResultBase;
392
+ invocation_scores: InvocationTypeScores;
393
+ dominates_on: InvocationType[];
394
+ token_efficiency_score?: number;
395
+ }
396
+
397
+ export interface ParetoSelectionResult {
398
+ selected_proposal: EvolutionProposal;
399
+ frontier: ParetoCandidate[];
400
+ merge_applied: boolean;
401
+ merge_sources: string[];
402
+ }
403
+
242
404
  // ---------------------------------------------------------------------------
243
405
  // Monitoring types (v0.4)
244
406
  // ---------------------------------------------------------------------------
@@ -247,9 +409,367 @@ export interface MonitoringSnapshot {
247
409
  timestamp: string;
248
410
  skill_name: string;
249
411
  window_sessions: number;
412
+ skill_checks: number;
250
413
  pass_rate: number;
251
414
  false_negative_rate: number;
252
415
  by_invocation_type: Record<InvocationType, { passed: number; total: number }>;
253
416
  regression_detected: boolean;
254
417
  baseline_pass_rate: number;
255
418
  }
419
+
420
+ // ---------------------------------------------------------------------------
421
+ // Activation rule types (v0.5 — auto-activate hooks)
422
+ // ---------------------------------------------------------------------------
423
+
424
+ export interface ActivationRule {
425
+ id: string;
426
+ description: string;
427
+ /** Evaluate whether this rule fires. Returns a suggestion string or null. */
428
+ evaluate: (ctx: ActivationContext) => string | null;
429
+ }
430
+
431
+ export interface ActivationContext {
432
+ session_id: string;
433
+ query_log_path: string;
434
+ telemetry_log_path: string;
435
+ evolution_audit_log_path: string;
436
+ selftune_dir: string;
437
+ settings_path: string;
438
+ }
439
+
440
+ export interface SessionState {
441
+ session_id: string;
442
+ suggestions_shown: string[]; // rule IDs already fired this session
443
+ updated_at: string;
444
+ }
445
+
446
+ // ---------------------------------------------------------------------------
447
+ // PreToolUse hook payloads
448
+ // ---------------------------------------------------------------------------
449
+
450
+ export interface PreToolUsePayload extends BaseToolUsePayload {}
451
+
452
+ // ---------------------------------------------------------------------------
453
+ // Evolution memory types (session context persistence)
454
+ // ---------------------------------------------------------------------------
455
+
456
+ export interface EvolutionMemory {
457
+ context: MemoryContext;
458
+ plan: MemoryPlan;
459
+ decisions: DecisionRecord[];
460
+ }
461
+
462
+ export interface MemoryContext {
463
+ activeEvolutions: Array<{
464
+ skillName: string;
465
+ status: string;
466
+ description: string;
467
+ }>;
468
+ knownIssues: string[];
469
+ lastUpdated: string;
470
+ }
471
+
472
+ export interface MemoryPlan {
473
+ currentPriorities: string[];
474
+ strategy: string;
475
+ lastUpdated: string;
476
+ }
477
+
478
+ export interface DecisionRecord {
479
+ timestamp: string;
480
+ /** Imperative verb for markdown headings (e.g. "evolve", "rollback", "watch"). */
481
+ actionType: string;
482
+ skillName: string;
483
+ /** Past-tense result state used programmatically. */
484
+ action: "evolved" | "rolled-back" | "watched";
485
+ rationale: string;
486
+ result: string;
487
+ }
488
+
489
+ // ---------------------------------------------------------------------------
490
+ // Contribution types (contribute command)
491
+ // ---------------------------------------------------------------------------
492
+
493
+ export interface ContributionQuery {
494
+ query: string;
495
+ invocation_type: InvocationType;
496
+ source: string;
497
+ }
498
+
499
+ export interface ContributionEvalEntry {
500
+ query: string;
501
+ should_trigger: boolean;
502
+ invocation_type?: InvocationType;
503
+ }
504
+
505
+ export interface ContributionGradingSummary {
506
+ total_sessions: number;
507
+ graded_sessions: number;
508
+ average_pass_rate: number;
509
+ expectation_count: number;
510
+ }
511
+
512
+ export interface ContributionEvolutionSummary {
513
+ total_proposals: number;
514
+ deployed_proposals: number;
515
+ rolled_back_proposals: number;
516
+ average_improvement: number;
517
+ }
518
+
519
+ export interface ContributionSessionMetrics {
520
+ total_sessions: number;
521
+ avg_assistant_turns: number;
522
+ avg_tool_calls: number;
523
+ avg_errors: number;
524
+ top_tools: Array<{ tool: string; count: number }>;
525
+ }
526
+
527
+ export interface ContributionBundle {
528
+ schema_version: "1.0" | "1.1" | "1.2";
529
+ skill_name?: string;
530
+ contributor_id: string;
531
+ created_at: string;
532
+ selftune_version: string;
533
+ agent_type: string;
534
+ sanitization_level: "conservative" | "aggressive";
535
+ positive_queries: ContributionQuery[];
536
+ eval_entries: ContributionEvalEntry[];
537
+ grading_summary: ContributionGradingSummary | null;
538
+ evolution_summary: ContributionEvolutionSummary | null;
539
+ session_metrics: ContributionSessionMetrics;
540
+ unmatched_queries?: Array<{ query: string; timestamp: string }>;
541
+ pending_proposals?: Array<{
542
+ proposal_id: string;
543
+ skill_name?: string;
544
+ action: string;
545
+ timestamp: string;
546
+ details: string;
547
+ }>;
548
+ }
549
+
550
+ // ---------------------------------------------------------------------------
551
+ // Evolution target types (v0.6 — body + routing evolution)
552
+ // ---------------------------------------------------------------------------
553
+
554
+ /** Which part of a skill is being evolved. */
555
+ export type EvolutionTarget = "description" | "routing" | "body";
556
+
557
+ /** Parsed sections of a SKILL.md file. */
558
+ export interface SkillSections {
559
+ frontmatter: string;
560
+ title: string;
561
+ description: string;
562
+ sections: Record<string, string>;
563
+ }
564
+
565
+ /** Proposal for evolving the full body of a SKILL.md. */
566
+ export interface BodyEvolutionProposal {
567
+ proposal_id: string;
568
+ skill_name: string;
569
+ skill_path: string;
570
+ original_body: string;
571
+ proposed_body: string;
572
+ rationale: string;
573
+ target: EvolutionTarget;
574
+ failure_patterns: string[];
575
+ confidence: number;
576
+ created_at: string;
577
+ status: "pending" | "validated" | "deployed" | "rolled_back";
578
+ }
579
+
580
+ /** Closed union of gate names used in the validation pipeline. */
581
+ export type ValidationGate = "structural" | "trigger_accuracy" | "quality";
582
+
583
+ /** Result of validating a body evolution proposal. */
584
+ export interface BodyValidationResult {
585
+ proposal_id: string;
586
+ gates_passed: number;
587
+ gates_total: number;
588
+ gate_results: Array<{ gate: ValidationGate; passed: boolean; reason: string }>;
589
+ improved: boolean;
590
+ regressions: string[];
591
+ }
592
+
593
+ /** Configuration for which LLM model a role should use. */
594
+ export interface LlmRoleConfig {
595
+ role: string;
596
+ model: string;
597
+ temperature?: number;
598
+ max_tokens?: number;
599
+ }
600
+
601
+ /** Token usage metrics for a session or eval run. */
602
+ export interface TokenUsageMetrics {
603
+ input_tokens: number;
604
+ output_tokens: number;
605
+ total_tokens: number;
606
+ estimated_cost_usd?: number;
607
+ }
608
+
609
+ // ---------------------------------------------------------------------------
610
+ // Baseline comparison types
611
+ // ---------------------------------------------------------------------------
612
+
613
+ /** Result of a no-skill baseline measurement. */
614
+ export interface BaselineResult {
615
+ skill_name: string;
616
+ query: string;
617
+ with_skill: boolean;
618
+ triggered: boolean;
619
+ pass: boolean;
620
+ latency_ms?: number;
621
+ tokens?: TokenUsageMetrics;
622
+ measured_at: string;
623
+ }
624
+
625
+ // ---------------------------------------------------------------------------
626
+ // Skill unit test types
627
+ // ---------------------------------------------------------------------------
628
+
629
+ /** Type of assertion for a skill unit test. */
630
+ export type AssertionType =
631
+ | "contains"
632
+ | "not_contains"
633
+ | "regex"
634
+ | "json_path"
635
+ | "tool_called"
636
+ | "tool_not_called";
637
+
638
+ /** A single assertion within a skill unit test. */
639
+ export interface SkillAssertion {
640
+ type: AssertionType;
641
+ value: string;
642
+ description?: string;
643
+ }
644
+
645
+ /** A skill unit test case. */
646
+ export interface SkillUnitTest {
647
+ id: string;
648
+ skill_name: string;
649
+ query: string;
650
+ assertions: SkillAssertion[];
651
+ timeout_ms?: number;
652
+ tags?: string[];
653
+ }
654
+
655
+ /** Result of running a single skill unit test. */
656
+ export interface UnitTestResult {
657
+ test_id: string;
658
+ passed: boolean;
659
+ assertion_results: Array<{ assertion: SkillAssertion; passed: boolean; actual?: string }>;
660
+ duration_ms: number;
661
+ error?: string;
662
+ }
663
+
664
+ /** Aggregated result of a skill unit test suite. */
665
+ export interface UnitTestSuiteResult {
666
+ skill_name: string;
667
+ total: number;
668
+ passed: number;
669
+ failed: number;
670
+ pass_rate: number;
671
+ results: UnitTestResult[];
672
+ run_at: string;
673
+ }
674
+
675
+ // ---------------------------------------------------------------------------
676
+ // Composability types
677
+ // ---------------------------------------------------------------------------
678
+
679
+ /** A pair of skills that co-occur in sessions. */
680
+ export interface CoOccurrencePair {
681
+ skill_a: string;
682
+ skill_b: string;
683
+ co_occurrence_count: number;
684
+ conflict_detected: boolean;
685
+ conflict_reason?: string;
686
+ }
687
+
688
+ /** Report on skill composability / conflicts. */
689
+ export interface ComposabilityReport {
690
+ pairs: CoOccurrencePair[];
691
+ total_sessions_analyzed: number;
692
+ conflict_count: number;
693
+ generated_at: string;
694
+ }
695
+
696
+ // ---------------------------------------------------------------------------
697
+ // SkillsBench types
698
+ // ---------------------------------------------------------------------------
699
+
700
+ /** A task from the SkillsBench benchmark suite. */
701
+ export interface SkillsBenchTask {
702
+ task_id: string;
703
+ category: string;
704
+ query: string;
705
+ expected_skill?: string;
706
+ expected_tools?: string[];
707
+ difficulty: "easy" | "medium" | "hard";
708
+ tags?: string[];
709
+ }
710
+
711
+ // ---------------------------------------------------------------------------
712
+ // Composability V2 types (synergy + sequence detection)
713
+ // ---------------------------------------------------------------------------
714
+
715
+ /** Extended pair with synergy detection */
716
+ export interface CoOccurrencePairV2 extends CoOccurrencePair {
717
+ synergy_score: number;
718
+ avg_errors_together: number;
719
+ avg_errors_alone: number;
720
+ workflow_candidate: boolean;
721
+ }
722
+
723
+ /** Ordered skill sequence detected from timestamps */
724
+ export interface SkillSequence {
725
+ skills: string[];
726
+ occurrence_count: number;
727
+ synergy_score: number;
728
+ representative_query: string;
729
+ sequence_consistency: number;
730
+ }
731
+
732
+ /** Extended report with synergy and sequence detection */
733
+ export interface ComposabilityReportV2 extends ComposabilityReport {
734
+ pairs: CoOccurrencePairV2[];
735
+ sequences: SkillSequence[];
736
+ workflow_candidates: CoOccurrencePairV2[];
737
+ synergy_count: number;
738
+ }
739
+
740
+ // ---------------------------------------------------------------------------
741
+ // Workflow Support types
742
+ // ---------------------------------------------------------------------------
743
+
744
+ export interface DiscoveredWorkflow {
745
+ workflow_id: string; // deterministic hash: skills.join("→")
746
+ skills: string[]; // ordered skill sequence
747
+ occurrence_count: number;
748
+ avg_errors: number;
749
+ avg_errors_individual: number;
750
+ synergy_score: number; // clamp((individual - together) / (individual + 1), -1, 1)
751
+ representative_query: string;
752
+ sequence_consistency: number; // [0,1]
753
+ completion_rate: number; // % sessions where all skills fired
754
+ first_seen: string;
755
+ last_seen: string;
756
+ session_ids: string[]; // sessions that contributed to this workflow
757
+ }
758
+
759
+ export interface CodifiedWorkflow {
760
+ name: string;
761
+ skills: string[];
762
+ description?: string;
763
+ source: "discovered" | "authored";
764
+ discovered_from?: {
765
+ workflow_id: string;
766
+ occurrence_count: number;
767
+ synergy_score: number;
768
+ };
769
+ }
770
+
771
+ export interface WorkflowDiscoveryReport {
772
+ workflows: DiscoveredWorkflow[];
773
+ total_sessions_analyzed: number;
774
+ generated_at: string;
775
+ }
@@ -0,0 +1,45 @@
1
+ import { existsSync, writeFileSync } from "node:fs";
2
+ import {
3
+ type CanonicalPlatform,
4
+ type CanonicalRecord,
5
+ type CanonicalRecordKind,
6
+ isCanonicalRecord,
7
+ } from "@selftune/telemetry-contract";
8
+ import { CANONICAL_LOG } from "../constants.js";
9
+ import { readJsonl } from "./jsonl.js";
10
+
11
+ export interface CanonicalRecordFilter {
12
+ platform?: CanonicalPlatform;
13
+ record_kind?: CanonicalRecordKind;
14
+ }
15
+
16
+ export function readCanonicalRecords(logPath: string = CANONICAL_LOG): CanonicalRecord[] {
17
+ if (!existsSync(logPath)) return [];
18
+ return readJsonl<CanonicalRecord>(logPath).filter(isCanonicalRecord);
19
+ }
20
+
21
+ export function filterCanonicalRecords(
22
+ records: CanonicalRecord[],
23
+ filter: CanonicalRecordFilter,
24
+ ): CanonicalRecord[] {
25
+ return records.filter((record) => {
26
+ if (filter.platform && record.platform !== filter.platform) return false;
27
+ if (filter.record_kind && record.record_kind !== filter.record_kind) return false;
28
+ return true;
29
+ });
30
+ }
31
+
32
+ export function serializeCanonicalRecords(records: CanonicalRecord[], pretty = false): string {
33
+ if (pretty) return `${JSON.stringify(records, null, 2)}\n`;
34
+ return (
35
+ records.map((record) => JSON.stringify(record)).join("\n") + (records.length > 0 ? "\n" : "")
36
+ );
37
+ }
38
+
39
+ export function writeCanonicalExport(
40
+ records: CanonicalRecord[],
41
+ outPath: string,
42
+ pretty = false,
43
+ ): void {
44
+ writeFileSync(outPath, serializeCanonicalRecords(records, pretty), "utf-8");
45
+ }