selftune 0.1.4 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +156 -0
- package/.claude/agents/evolution-reviewer.md +180 -0
- package/.claude/agents/integration-guide.md +212 -0
- package/.claude/agents/pattern-analyst.md +160 -0
- package/CHANGELOG.md +46 -1
- package/README.md +105 -257
- package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
- package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
- package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
- package/apps/local-dashboard/dist/favicon.png +0 -0
- package/apps/local-dashboard/dist/index.html +17 -0
- package/apps/local-dashboard/dist/logo.png +0 -0
- package/apps/local-dashboard/dist/logo.svg +9 -0
- package/assets/BeforeAfter.gif +0 -0
- package/assets/FeedbackLoop.gif +0 -0
- package/assets/logo.svg +9 -0
- package/assets/skill-health-badge.svg +20 -0
- package/cli/selftune/activation-rules.ts +171 -0
- package/cli/selftune/badge/badge-data.ts +108 -0
- package/cli/selftune/badge/badge-svg.ts +212 -0
- package/cli/selftune/badge/badge.ts +99 -0
- package/cli/selftune/canonical-export.ts +183 -0
- package/cli/selftune/constants.ts +103 -1
- package/cli/selftune/contribute/bundle.ts +314 -0
- package/cli/selftune/contribute/contribute.ts +214 -0
- package/cli/selftune/contribute/sanitize.ts +162 -0
- package/cli/selftune/cron/setup.ts +266 -0
- package/cli/selftune/dashboard-contract.ts +202 -0
- package/cli/selftune/dashboard-server.ts +1049 -0
- package/cli/selftune/dashboard.ts +43 -156
- package/cli/selftune/eval/baseline.ts +248 -0
- package/cli/selftune/eval/composability-v2.ts +273 -0
- package/cli/selftune/eval/composability.ts +117 -0
- package/cli/selftune/eval/generate-unit-tests.ts +143 -0
- package/cli/selftune/eval/hooks-to-evals.ts +101 -16
- package/cli/selftune/eval/import-skillsbench.ts +221 -0
- package/cli/selftune/eval/synthetic-evals.ts +172 -0
- package/cli/selftune/eval/unit-test-cli.ts +152 -0
- package/cli/selftune/eval/unit-test.ts +196 -0
- package/cli/selftune/evolution/deploy-proposal.ts +142 -1
- package/cli/selftune/evolution/evidence.ts +26 -0
- package/cli/selftune/evolution/evolve-body.ts +586 -0
- package/cli/selftune/evolution/evolve.ts +825 -116
- package/cli/selftune/evolution/extract-patterns.ts +105 -16
- package/cli/selftune/evolution/pareto.ts +314 -0
- package/cli/selftune/evolution/propose-body.ts +171 -0
- package/cli/selftune/evolution/propose-description.ts +100 -2
- package/cli/selftune/evolution/propose-routing.ts +166 -0
- package/cli/selftune/evolution/refine-body.ts +141 -0
- package/cli/selftune/evolution/rollback.ts +21 -4
- package/cli/selftune/evolution/validate-body.ts +254 -0
- package/cli/selftune/evolution/validate-proposal.ts +257 -35
- package/cli/selftune/evolution/validate-routing.ts +177 -0
- package/cli/selftune/grading/auto-grade.ts +200 -0
- package/cli/selftune/grading/grade-session.ts +513 -42
- package/cli/selftune/grading/pre-gates.ts +104 -0
- package/cli/selftune/grading/results.ts +42 -0
- package/cli/selftune/hooks/auto-activate.ts +185 -0
- package/cli/selftune/hooks/evolution-guard.ts +165 -0
- package/cli/selftune/hooks/prompt-log.ts +172 -2
- package/cli/selftune/hooks/session-stop.ts +123 -3
- package/cli/selftune/hooks/skill-change-guard.ts +112 -0
- package/cli/selftune/hooks/skill-eval.ts +119 -3
- package/cli/selftune/index.ts +415 -48
- package/cli/selftune/ingestors/claude-replay.ts +377 -0
- package/cli/selftune/ingestors/codex-rollout.ts +345 -46
- package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
- package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
- package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
- package/cli/selftune/init.ts +376 -16
- package/cli/selftune/last.ts +14 -5
- package/cli/selftune/localdb/db.ts +63 -0
- package/cli/selftune/localdb/materialize.ts +428 -0
- package/cli/selftune/localdb/queries.ts +376 -0
- package/cli/selftune/localdb/schema.ts +204 -0
- package/cli/selftune/memory/writer.ts +447 -0
- package/cli/selftune/monitoring/watch.ts +90 -16
- package/cli/selftune/normalization.ts +682 -0
- package/cli/selftune/observability.ts +19 -44
- package/cli/selftune/orchestrate.ts +1073 -0
- package/cli/selftune/quickstart.ts +203 -0
- package/cli/selftune/repair/skill-usage.ts +576 -0
- package/cli/selftune/schedule.ts +561 -0
- package/cli/selftune/status.ts +59 -33
- package/cli/selftune/sync.ts +627 -0
- package/cli/selftune/types.ts +525 -5
- package/cli/selftune/utils/canonical-log.ts +45 -0
- package/cli/selftune/utils/frontmatter.ts +217 -0
- package/cli/selftune/utils/hooks.ts +41 -0
- package/cli/selftune/utils/html.ts +27 -0
- package/cli/selftune/utils/llm-call.ts +103 -19
- package/cli/selftune/utils/math.ts +10 -0
- package/cli/selftune/utils/query-filter.ts +139 -0
- package/cli/selftune/utils/skill-discovery.ts +340 -0
- package/cli/selftune/utils/skill-log.ts +68 -0
- package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
- package/cli/selftune/utils/transcript.ts +307 -26
- package/cli/selftune/utils/trigger-check.ts +89 -0
- package/cli/selftune/utils/tui.ts +156 -0
- package/cli/selftune/workflows/discover.ts +254 -0
- package/cli/selftune/workflows/skill-md-writer.ts +288 -0
- package/cli/selftune/workflows/workflows.ts +188 -0
- package/package.json +28 -11
- package/packages/telemetry-contract/README.md +11 -0
- package/packages/telemetry-contract/fixtures/golden.json +87 -0
- package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
- package/packages/telemetry-contract/index.ts +1 -0
- package/packages/telemetry-contract/package.json +19 -0
- package/packages/telemetry-contract/src/index.ts +2 -0
- package/packages/telemetry-contract/src/types.ts +163 -0
- package/packages/telemetry-contract/src/validators.ts +109 -0
- package/skill/SKILL.md +180 -33
- package/skill/Workflows/AutoActivation.md +145 -0
- package/skill/Workflows/Badge.md +124 -0
- package/skill/Workflows/Baseline.md +144 -0
- package/skill/Workflows/Composability.md +107 -0
- package/skill/Workflows/Contribute.md +94 -0
- package/skill/Workflows/Cron.md +132 -0
- package/skill/Workflows/Dashboard.md +214 -0
- package/skill/Workflows/Doctor.md +63 -14
- package/skill/Workflows/Evals.md +110 -18
- package/skill/Workflows/EvolutionMemory.md +154 -0
- package/skill/Workflows/Evolve.md +181 -21
- package/skill/Workflows/EvolveBody.md +159 -0
- package/skill/Workflows/Grade.md +36 -31
- package/skill/Workflows/ImportSkillsBench.md +117 -0
- package/skill/Workflows/Ingest.md +142 -21
- package/skill/Workflows/Initialize.md +91 -23
- package/skill/Workflows/Orchestrate.md +139 -0
- package/skill/Workflows/Replay.md +91 -0
- package/skill/Workflows/Rollback.md +23 -4
- package/skill/Workflows/Schedule.md +61 -0
- package/skill/Workflows/Sync.md +88 -0
- package/skill/Workflows/UnitTest.md +150 -0
- package/skill/Workflows/Watch.md +33 -1
- package/skill/Workflows/Workflows.md +129 -0
- package/skill/assets/activation-rules-default.json +26 -0
- package/skill/assets/multi-skill-settings.json +63 -0
- package/skill/assets/single-skill-settings.json +57 -0
- package/skill/references/invocation-taxonomy.md +2 -2
- package/skill/references/logs.md +164 -2
- package/skill/references/setup-patterns.md +65 -0
- package/skill/references/version-history.md +40 -0
- package/skill/settings_snippet.json +23 -0
- package/templates/activation-rules-default.json +27 -0
- package/templates/multi-skill-settings.json +64 -0
- package/templates/single-skill-settings.json +58 -0
- package/dashboard/index.html +0 -1119
package/cli/selftune/types.ts
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
// ---------------------------------------------------------------------------
|
|
8
8
|
|
|
9
9
|
export interface SelftuneConfig {
|
|
10
|
-
agent_type: "claude_code" | "codex" | "opencode" | "unknown";
|
|
10
|
+
agent_type: "claude_code" | "codex" | "opencode" | "openclaw" | "unknown";
|
|
11
11
|
cli_path: string;
|
|
12
12
|
llm_mode: "agent";
|
|
13
13
|
agent_cli: string | null;
|
|
@@ -31,6 +31,10 @@ export interface SkillUsageRecord {
|
|
|
31
31
|
session_id: string;
|
|
32
32
|
skill_name: string;
|
|
33
33
|
skill_path: string;
|
|
34
|
+
skill_scope?: "project" | "global" | "admin" | "system" | "unknown";
|
|
35
|
+
skill_project_root?: string;
|
|
36
|
+
skill_registry_dir?: string;
|
|
37
|
+
skill_path_resolution_source?: "raw_log" | "installed_scope" | "launcher_base_dir" | "fallback";
|
|
34
38
|
query: string;
|
|
35
39
|
triggered: boolean;
|
|
36
40
|
source?: string;
|
|
@@ -45,6 +49,7 @@ export interface SessionTelemetryRecord {
|
|
|
45
49
|
total_tool_calls: number;
|
|
46
50
|
bash_commands: string[];
|
|
47
51
|
skills_triggered: string[];
|
|
52
|
+
skills_invoked?: string[];
|
|
48
53
|
assistant_turns: number;
|
|
49
54
|
errors_encountered: number;
|
|
50
55
|
transcript_chars: number;
|
|
@@ -56,6 +61,49 @@ export interface SessionTelemetryRecord {
|
|
|
56
61
|
rollout_path?: string;
|
|
57
62
|
}
|
|
58
63
|
|
|
64
|
+
export interface ImprovementSignalRecord {
|
|
65
|
+
timestamp: string;
|
|
66
|
+
session_id: string;
|
|
67
|
+
query: string;
|
|
68
|
+
signal_type: "correction" | "explicit_request" | "manual_invocation";
|
|
69
|
+
mentioned_skill?: string;
|
|
70
|
+
consumed: boolean;
|
|
71
|
+
consumed_at?: string;
|
|
72
|
+
consumed_by_run?: string;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export type {
|
|
76
|
+
CanonicalCaptureMode,
|
|
77
|
+
CanonicalCompletionStatus,
|
|
78
|
+
CanonicalExecutionFactRecord,
|
|
79
|
+
CanonicalInvocationMode,
|
|
80
|
+
CanonicalNormalizationRunRecord,
|
|
81
|
+
CanonicalPlatform,
|
|
82
|
+
CanonicalPromptKind,
|
|
83
|
+
CanonicalPromptRecord,
|
|
84
|
+
CanonicalRawSourceRef,
|
|
85
|
+
CanonicalRecord,
|
|
86
|
+
CanonicalRecordBase,
|
|
87
|
+
CanonicalRecordKind,
|
|
88
|
+
CanonicalSchemaVersion,
|
|
89
|
+
CanonicalSessionRecord,
|
|
90
|
+
CanonicalSkillInvocationRecord,
|
|
91
|
+
CanonicalSourceSessionKind,
|
|
92
|
+
} from "@selftune/telemetry-contract";
|
|
93
|
+
// ---------------------------------------------------------------------------
|
|
94
|
+
// Canonical normalization types (local + cloud projection layer)
|
|
95
|
+
// ---------------------------------------------------------------------------
|
|
96
|
+
export {
|
|
97
|
+
CANONICAL_CAPTURE_MODES,
|
|
98
|
+
CANONICAL_COMPLETION_STATUSES,
|
|
99
|
+
CANONICAL_INVOCATION_MODES,
|
|
100
|
+
CANONICAL_PLATFORMS,
|
|
101
|
+
CANONICAL_PROMPT_KINDS,
|
|
102
|
+
CANONICAL_RECORD_KINDS,
|
|
103
|
+
CANONICAL_SCHEMA_VERSION,
|
|
104
|
+
CANONICAL_SOURCE_SESSION_KINDS,
|
|
105
|
+
} from "@selftune/telemetry-contract";
|
|
106
|
+
|
|
59
107
|
// ---------------------------------------------------------------------------
|
|
60
108
|
// Transcript parsing
|
|
61
109
|
// ---------------------------------------------------------------------------
|
|
@@ -65,25 +113,33 @@ export interface TranscriptMetrics {
|
|
|
65
113
|
total_tool_calls: number;
|
|
66
114
|
bash_commands: string[];
|
|
67
115
|
skills_triggered: string[];
|
|
116
|
+
skills_invoked: string[];
|
|
68
117
|
assistant_turns: number;
|
|
69
118
|
errors_encountered: number;
|
|
70
119
|
transcript_chars: number;
|
|
71
120
|
last_user_query: string;
|
|
121
|
+
input_tokens?: number;
|
|
122
|
+
output_tokens?: number;
|
|
123
|
+
duration_ms?: number;
|
|
72
124
|
}
|
|
73
125
|
|
|
74
126
|
// ---------------------------------------------------------------------------
|
|
75
127
|
// Hook payloads (received via stdin from Claude Code)
|
|
76
128
|
// ---------------------------------------------------------------------------
|
|
77
129
|
|
|
130
|
+
// Shared base for pre/post tool-use hook payloads
|
|
131
|
+
export interface BaseToolUsePayload {
|
|
132
|
+
tool_name: string;
|
|
133
|
+
tool_input: Record<string, unknown>;
|
|
134
|
+
session_id?: string;
|
|
135
|
+
}
|
|
136
|
+
|
|
78
137
|
export interface PromptSubmitPayload {
|
|
79
138
|
user_prompt: string;
|
|
80
139
|
session_id?: string;
|
|
81
140
|
}
|
|
82
141
|
|
|
83
|
-
export interface PostToolUsePayload {
|
|
84
|
-
tool_name: string;
|
|
85
|
-
tool_input: Record<string, unknown>;
|
|
86
|
-
session_id?: string;
|
|
142
|
+
export interface PostToolUsePayload extends BaseToolUsePayload {
|
|
87
143
|
transcript_path?: string;
|
|
88
144
|
}
|
|
89
145
|
|
|
@@ -113,6 +169,8 @@ export interface GradingExpectation {
|
|
|
113
169
|
text: string;
|
|
114
170
|
passed: boolean;
|
|
115
171
|
evidence: string;
|
|
172
|
+
score?: number; // 0.0-1.0 graduated confidence
|
|
173
|
+
source?: "pre-gate" | "llm"; // which grading path produced this
|
|
116
174
|
}
|
|
117
175
|
|
|
118
176
|
export interface GradingClaim {
|
|
@@ -127,6 +185,15 @@ export interface GradingSummary {
|
|
|
127
185
|
failed: number;
|
|
128
186
|
total: number;
|
|
129
187
|
pass_rate: number;
|
|
188
|
+
mean_score?: number; // mean of all expectation scores
|
|
189
|
+
score_std_dev?: number; // standard deviation
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
export interface FailureFeedback {
|
|
193
|
+
query: string;
|
|
194
|
+
failure_reason: string;
|
|
195
|
+
improvement_hint: string;
|
|
196
|
+
invocation_type?: InvocationType;
|
|
130
197
|
}
|
|
131
198
|
|
|
132
199
|
/** Raw output from the LLM grader (before assembly into GradingResult). */
|
|
@@ -135,6 +202,7 @@ export interface GraderOutput {
|
|
|
135
202
|
summary: GradingSummary;
|
|
136
203
|
claims: GradingClaim[];
|
|
137
204
|
eval_feedback: EvalFeedback;
|
|
205
|
+
failure_feedback?: FailureFeedback[];
|
|
138
206
|
}
|
|
139
207
|
|
|
140
208
|
export interface EvalFeedback {
|
|
@@ -152,6 +220,7 @@ export interface GradingResult {
|
|
|
152
220
|
execution_metrics: ExecutionMetrics;
|
|
153
221
|
claims: GradingClaim[];
|
|
154
222
|
eval_feedback: EvalFeedback;
|
|
223
|
+
failure_feedback?: FailureFeedback[];
|
|
155
224
|
}
|
|
156
225
|
|
|
157
226
|
export interface ExecutionMetrics {
|
|
@@ -197,6 +266,7 @@ export interface FailurePattern {
|
|
|
197
266
|
frequency: number;
|
|
198
267
|
sample_sessions: string[];
|
|
199
268
|
extracted_at: string;
|
|
269
|
+
feedback?: FailureFeedback[];
|
|
200
270
|
}
|
|
201
271
|
|
|
202
272
|
export interface EvolutionProposal {
|
|
@@ -226,11 +296,41 @@ export interface EvalPassRate {
|
|
|
226
296
|
export interface EvolutionAuditEntry {
|
|
227
297
|
timestamp: string;
|
|
228
298
|
proposal_id: string;
|
|
299
|
+
skill_name?: string;
|
|
229
300
|
action: "created" | "validated" | "deployed" | "rolled_back" | "rejected";
|
|
230
301
|
details: string;
|
|
231
302
|
eval_snapshot?: EvalPassRate;
|
|
232
303
|
}
|
|
233
304
|
|
|
305
|
+
export interface EvolutionEvidenceValidation {
|
|
306
|
+
improved?: boolean;
|
|
307
|
+
before_pass_rate?: number;
|
|
308
|
+
after_pass_rate?: number;
|
|
309
|
+
net_change?: number;
|
|
310
|
+
regressions?: EvalEntry[] | string[];
|
|
311
|
+
new_passes?: EvalEntry[];
|
|
312
|
+
per_entry_results?: Array<{ entry: EvalEntry; before_pass: boolean; after_pass: boolean }>;
|
|
313
|
+
gates_passed?: number;
|
|
314
|
+
gates_total?: number;
|
|
315
|
+
gate_results?: Array<{ gate: ValidationGate; passed: boolean; reason: string }>;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
export interface EvolutionEvidenceEntry {
|
|
319
|
+
timestamp: string;
|
|
320
|
+
proposal_id: string;
|
|
321
|
+
skill_name: string;
|
|
322
|
+
skill_path: string;
|
|
323
|
+
target: EvolutionTarget;
|
|
324
|
+
stage: "created" | "validated" | "deployed" | "rejected" | "rolled_back";
|
|
325
|
+
rationale?: string;
|
|
326
|
+
confidence?: number;
|
|
327
|
+
details?: string;
|
|
328
|
+
original_text?: string;
|
|
329
|
+
proposed_text?: string;
|
|
330
|
+
eval_set?: EvalEntry[];
|
|
331
|
+
validation?: EvolutionEvidenceValidation;
|
|
332
|
+
}
|
|
333
|
+
|
|
234
334
|
export interface EvolutionConfig {
|
|
235
335
|
min_sessions: number;
|
|
236
336
|
min_improvement: number; // e.g., 0.10 = 10 percentage points
|
|
@@ -239,6 +339,68 @@ export interface EvolutionConfig {
|
|
|
239
339
|
dry_run: boolean;
|
|
240
340
|
}
|
|
241
341
|
|
|
342
|
+
// ---------------------------------------------------------------------------
|
|
343
|
+
// Validation result base (self-contained for Pareto types)
|
|
344
|
+
// ---------------------------------------------------------------------------
|
|
345
|
+
|
|
346
|
+
/** Compact summary of an evolve run, used for CLI JSON output. */
|
|
347
|
+
export interface EvolveResultSummary {
|
|
348
|
+
skill: string;
|
|
349
|
+
deployed: boolean;
|
|
350
|
+
reason: string;
|
|
351
|
+
before: number;
|
|
352
|
+
after: number;
|
|
353
|
+
net_change: number;
|
|
354
|
+
improved: boolean;
|
|
355
|
+
regressions: number;
|
|
356
|
+
new_passes: number;
|
|
357
|
+
confidence: number;
|
|
358
|
+
llm_calls: number;
|
|
359
|
+
elapsed_s: number;
|
|
360
|
+
proposal_id: string;
|
|
361
|
+
rationale: string;
|
|
362
|
+
version?: string;
|
|
363
|
+
dashboard_url: string;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
export interface ValidationResultBase {
|
|
367
|
+
proposal_id: string;
|
|
368
|
+
before_pass_rate: number;
|
|
369
|
+
after_pass_rate: number;
|
|
370
|
+
improved: boolean;
|
|
371
|
+
regressions: EvalEntry[];
|
|
372
|
+
new_passes: EvalEntry[];
|
|
373
|
+
net_change: number;
|
|
374
|
+
by_invocation_type?: InvocationTypeScores;
|
|
375
|
+
per_entry_results?: Array<{ entry: EvalEntry; before_pass: boolean; after_pass: boolean }>;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// ---------------------------------------------------------------------------
|
|
379
|
+
// Pareto types (multi-dimensional evolution selection)
|
|
380
|
+
// ---------------------------------------------------------------------------
|
|
381
|
+
|
|
382
|
+
export interface InvocationTypeScores {
|
|
383
|
+
explicit: { passed: number; total: number; pass_rate: number };
|
|
384
|
+
implicit: { passed: number; total: number; pass_rate: number };
|
|
385
|
+
contextual: { passed: number; total: number; pass_rate: number };
|
|
386
|
+
negative: { passed: number; total: number; pass_rate: number };
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
export interface ParetoCandidate {
|
|
390
|
+
proposal: EvolutionProposal;
|
|
391
|
+
validation: ValidationResultBase;
|
|
392
|
+
invocation_scores: InvocationTypeScores;
|
|
393
|
+
dominates_on: InvocationType[];
|
|
394
|
+
token_efficiency_score?: number;
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
export interface ParetoSelectionResult {
|
|
398
|
+
selected_proposal: EvolutionProposal;
|
|
399
|
+
frontier: ParetoCandidate[];
|
|
400
|
+
merge_applied: boolean;
|
|
401
|
+
merge_sources: string[];
|
|
402
|
+
}
|
|
403
|
+
|
|
242
404
|
// ---------------------------------------------------------------------------
|
|
243
405
|
// Monitoring types (v0.4)
|
|
244
406
|
// ---------------------------------------------------------------------------
|
|
@@ -247,9 +409,367 @@ export interface MonitoringSnapshot {
|
|
|
247
409
|
timestamp: string;
|
|
248
410
|
skill_name: string;
|
|
249
411
|
window_sessions: number;
|
|
412
|
+
skill_checks: number;
|
|
250
413
|
pass_rate: number;
|
|
251
414
|
false_negative_rate: number;
|
|
252
415
|
by_invocation_type: Record<InvocationType, { passed: number; total: number }>;
|
|
253
416
|
regression_detected: boolean;
|
|
254
417
|
baseline_pass_rate: number;
|
|
255
418
|
}
|
|
419
|
+
|
|
420
|
+
// ---------------------------------------------------------------------------
|
|
421
|
+
// Activation rule types (v0.5 — auto-activate hooks)
|
|
422
|
+
// ---------------------------------------------------------------------------
|
|
423
|
+
|
|
424
|
+
export interface ActivationRule {
|
|
425
|
+
id: string;
|
|
426
|
+
description: string;
|
|
427
|
+
/** Evaluate whether this rule fires. Returns a suggestion string or null. */
|
|
428
|
+
evaluate: (ctx: ActivationContext) => string | null;
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
export interface ActivationContext {
|
|
432
|
+
session_id: string;
|
|
433
|
+
query_log_path: string;
|
|
434
|
+
telemetry_log_path: string;
|
|
435
|
+
evolution_audit_log_path: string;
|
|
436
|
+
selftune_dir: string;
|
|
437
|
+
settings_path: string;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
export interface SessionState {
|
|
441
|
+
session_id: string;
|
|
442
|
+
suggestions_shown: string[]; // rule IDs already fired this session
|
|
443
|
+
updated_at: string;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// ---------------------------------------------------------------------------
|
|
447
|
+
// PreToolUse hook payloads
|
|
448
|
+
// ---------------------------------------------------------------------------
|
|
449
|
+
|
|
450
|
+
export interface PreToolUsePayload extends BaseToolUsePayload {}
|
|
451
|
+
|
|
452
|
+
// ---------------------------------------------------------------------------
|
|
453
|
+
// Evolution memory types (session context persistence)
|
|
454
|
+
// ---------------------------------------------------------------------------
|
|
455
|
+
|
|
456
|
+
export interface EvolutionMemory {
|
|
457
|
+
context: MemoryContext;
|
|
458
|
+
plan: MemoryPlan;
|
|
459
|
+
decisions: DecisionRecord[];
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
export interface MemoryContext {
|
|
463
|
+
activeEvolutions: Array<{
|
|
464
|
+
skillName: string;
|
|
465
|
+
status: string;
|
|
466
|
+
description: string;
|
|
467
|
+
}>;
|
|
468
|
+
knownIssues: string[];
|
|
469
|
+
lastUpdated: string;
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
export interface MemoryPlan {
|
|
473
|
+
currentPriorities: string[];
|
|
474
|
+
strategy: string;
|
|
475
|
+
lastUpdated: string;
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
export interface DecisionRecord {
|
|
479
|
+
timestamp: string;
|
|
480
|
+
/** Imperative verb for markdown headings (e.g. "evolve", "rollback", "watch"). */
|
|
481
|
+
actionType: string;
|
|
482
|
+
skillName: string;
|
|
483
|
+
/** Past-tense result state used programmatically. */
|
|
484
|
+
action: "evolved" | "rolled-back" | "watched";
|
|
485
|
+
rationale: string;
|
|
486
|
+
result: string;
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
// ---------------------------------------------------------------------------
|
|
490
|
+
// Contribution types (contribute command)
|
|
491
|
+
// ---------------------------------------------------------------------------
|
|
492
|
+
|
|
493
|
+
export interface ContributionQuery {
|
|
494
|
+
query: string;
|
|
495
|
+
invocation_type: InvocationType;
|
|
496
|
+
source: string;
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
export interface ContributionEvalEntry {
|
|
500
|
+
query: string;
|
|
501
|
+
should_trigger: boolean;
|
|
502
|
+
invocation_type?: InvocationType;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
export interface ContributionGradingSummary {
|
|
506
|
+
total_sessions: number;
|
|
507
|
+
graded_sessions: number;
|
|
508
|
+
average_pass_rate: number;
|
|
509
|
+
expectation_count: number;
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
export interface ContributionEvolutionSummary {
|
|
513
|
+
total_proposals: number;
|
|
514
|
+
deployed_proposals: number;
|
|
515
|
+
rolled_back_proposals: number;
|
|
516
|
+
average_improvement: number;
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
export interface ContributionSessionMetrics {
|
|
520
|
+
total_sessions: number;
|
|
521
|
+
avg_assistant_turns: number;
|
|
522
|
+
avg_tool_calls: number;
|
|
523
|
+
avg_errors: number;
|
|
524
|
+
top_tools: Array<{ tool: string; count: number }>;
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
export interface ContributionBundle {
|
|
528
|
+
schema_version: "1.0" | "1.1" | "1.2";
|
|
529
|
+
skill_name?: string;
|
|
530
|
+
contributor_id: string;
|
|
531
|
+
created_at: string;
|
|
532
|
+
selftune_version: string;
|
|
533
|
+
agent_type: string;
|
|
534
|
+
sanitization_level: "conservative" | "aggressive";
|
|
535
|
+
positive_queries: ContributionQuery[];
|
|
536
|
+
eval_entries: ContributionEvalEntry[];
|
|
537
|
+
grading_summary: ContributionGradingSummary | null;
|
|
538
|
+
evolution_summary: ContributionEvolutionSummary | null;
|
|
539
|
+
session_metrics: ContributionSessionMetrics;
|
|
540
|
+
unmatched_queries?: Array<{ query: string; timestamp: string }>;
|
|
541
|
+
pending_proposals?: Array<{
|
|
542
|
+
proposal_id: string;
|
|
543
|
+
skill_name?: string;
|
|
544
|
+
action: string;
|
|
545
|
+
timestamp: string;
|
|
546
|
+
details: string;
|
|
547
|
+
}>;
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
// ---------------------------------------------------------------------------
|
|
551
|
+
// Evolution target types (v0.6 — body + routing evolution)
|
|
552
|
+
// ---------------------------------------------------------------------------
|
|
553
|
+
|
|
554
|
+
/** Which part of a skill is being evolved. */
|
|
555
|
+
export type EvolutionTarget = "description" | "routing" | "body";
|
|
556
|
+
|
|
557
|
+
/** Parsed sections of a SKILL.md file. */
|
|
558
|
+
export interface SkillSections {
|
|
559
|
+
frontmatter: string;
|
|
560
|
+
title: string;
|
|
561
|
+
description: string;
|
|
562
|
+
sections: Record<string, string>;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
/** Proposal for evolving the full body of a SKILL.md. */
|
|
566
|
+
export interface BodyEvolutionProposal {
|
|
567
|
+
proposal_id: string;
|
|
568
|
+
skill_name: string;
|
|
569
|
+
skill_path: string;
|
|
570
|
+
original_body: string;
|
|
571
|
+
proposed_body: string;
|
|
572
|
+
rationale: string;
|
|
573
|
+
target: EvolutionTarget;
|
|
574
|
+
failure_patterns: string[];
|
|
575
|
+
confidence: number;
|
|
576
|
+
created_at: string;
|
|
577
|
+
status: "pending" | "validated" | "deployed" | "rolled_back";
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
/** Closed union of gate names used in the validation pipeline. */
|
|
581
|
+
export type ValidationGate = "structural" | "trigger_accuracy" | "quality";
|
|
582
|
+
|
|
583
|
+
/** Result of validating a body evolution proposal. */
|
|
584
|
+
export interface BodyValidationResult {
|
|
585
|
+
proposal_id: string;
|
|
586
|
+
gates_passed: number;
|
|
587
|
+
gates_total: number;
|
|
588
|
+
gate_results: Array<{ gate: ValidationGate; passed: boolean; reason: string }>;
|
|
589
|
+
improved: boolean;
|
|
590
|
+
regressions: string[];
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
/** Configuration for which LLM model a role should use. */
|
|
594
|
+
export interface LlmRoleConfig {
|
|
595
|
+
role: string;
|
|
596
|
+
model: string;
|
|
597
|
+
temperature?: number;
|
|
598
|
+
max_tokens?: number;
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
/** Token usage metrics for a session or eval run. */
|
|
602
|
+
export interface TokenUsageMetrics {
|
|
603
|
+
input_tokens: number;
|
|
604
|
+
output_tokens: number;
|
|
605
|
+
total_tokens: number;
|
|
606
|
+
estimated_cost_usd?: number;
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
// ---------------------------------------------------------------------------
|
|
610
|
+
// Baseline comparison types
|
|
611
|
+
// ---------------------------------------------------------------------------
|
|
612
|
+
|
|
613
|
+
/** Result of a no-skill baseline measurement. */
|
|
614
|
+
export interface BaselineResult {
|
|
615
|
+
skill_name: string;
|
|
616
|
+
query: string;
|
|
617
|
+
with_skill: boolean;
|
|
618
|
+
triggered: boolean;
|
|
619
|
+
pass: boolean;
|
|
620
|
+
latency_ms?: number;
|
|
621
|
+
tokens?: TokenUsageMetrics;
|
|
622
|
+
measured_at: string;
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
// ---------------------------------------------------------------------------
|
|
626
|
+
// Skill unit test types
|
|
627
|
+
// ---------------------------------------------------------------------------
|
|
628
|
+
|
|
629
|
+
/** Type of assertion for a skill unit test. */
|
|
630
|
+
export type AssertionType =
|
|
631
|
+
| "contains"
|
|
632
|
+
| "not_contains"
|
|
633
|
+
| "regex"
|
|
634
|
+
| "json_path"
|
|
635
|
+
| "tool_called"
|
|
636
|
+
| "tool_not_called";
|
|
637
|
+
|
|
638
|
+
/** A single assertion within a skill unit test. */
|
|
639
|
+
export interface SkillAssertion {
|
|
640
|
+
type: AssertionType;
|
|
641
|
+
value: string;
|
|
642
|
+
description?: string;
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
/** A skill unit test case. */
|
|
646
|
+
export interface SkillUnitTest {
|
|
647
|
+
id: string;
|
|
648
|
+
skill_name: string;
|
|
649
|
+
query: string;
|
|
650
|
+
assertions: SkillAssertion[];
|
|
651
|
+
timeout_ms?: number;
|
|
652
|
+
tags?: string[];
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
/** Result of running a single skill unit test. */
|
|
656
|
+
export interface UnitTestResult {
|
|
657
|
+
test_id: string;
|
|
658
|
+
passed: boolean;
|
|
659
|
+
assertion_results: Array<{ assertion: SkillAssertion; passed: boolean; actual?: string }>;
|
|
660
|
+
duration_ms: number;
|
|
661
|
+
error?: string;
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
/** Aggregated result of a skill unit test suite. */
|
|
665
|
+
export interface UnitTestSuiteResult {
|
|
666
|
+
skill_name: string;
|
|
667
|
+
total: number;
|
|
668
|
+
passed: number;
|
|
669
|
+
failed: number;
|
|
670
|
+
pass_rate: number;
|
|
671
|
+
results: UnitTestResult[];
|
|
672
|
+
run_at: string;
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
// ---------------------------------------------------------------------------
|
|
676
|
+
// Composability types
|
|
677
|
+
// ---------------------------------------------------------------------------
|
|
678
|
+
|
|
679
|
+
/** A pair of skills that co-occur in sessions. */
|
|
680
|
+
export interface CoOccurrencePair {
|
|
681
|
+
skill_a: string;
|
|
682
|
+
skill_b: string;
|
|
683
|
+
co_occurrence_count: number;
|
|
684
|
+
conflict_detected: boolean;
|
|
685
|
+
conflict_reason?: string;
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
/** Report on skill composability / conflicts. */
|
|
689
|
+
export interface ComposabilityReport {
|
|
690
|
+
pairs: CoOccurrencePair[];
|
|
691
|
+
total_sessions_analyzed: number;
|
|
692
|
+
conflict_count: number;
|
|
693
|
+
generated_at: string;
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
// ---------------------------------------------------------------------------
|
|
697
|
+
// SkillsBench types
|
|
698
|
+
// ---------------------------------------------------------------------------
|
|
699
|
+
|
|
700
|
+
/** A task from the SkillsBench benchmark suite. */
|
|
701
|
+
export interface SkillsBenchTask {
|
|
702
|
+
task_id: string;
|
|
703
|
+
category: string;
|
|
704
|
+
query: string;
|
|
705
|
+
expected_skill?: string;
|
|
706
|
+
expected_tools?: string[];
|
|
707
|
+
difficulty: "easy" | "medium" | "hard";
|
|
708
|
+
tags?: string[];
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
// ---------------------------------------------------------------------------
|
|
712
|
+
// Composability V2 types (synergy + sequence detection)
|
|
713
|
+
// ---------------------------------------------------------------------------
|
|
714
|
+
|
|
715
|
+
/** Extended pair with synergy detection */
|
|
716
|
+
export interface CoOccurrencePairV2 extends CoOccurrencePair {
|
|
717
|
+
synergy_score: number;
|
|
718
|
+
avg_errors_together: number;
|
|
719
|
+
avg_errors_alone: number;
|
|
720
|
+
workflow_candidate: boolean;
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
/** Ordered skill sequence detected from timestamps */
|
|
724
|
+
export interface SkillSequence {
|
|
725
|
+
skills: string[];
|
|
726
|
+
occurrence_count: number;
|
|
727
|
+
synergy_score: number;
|
|
728
|
+
representative_query: string;
|
|
729
|
+
sequence_consistency: number;
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
/** Extended report with synergy and sequence detection */
|
|
733
|
+
export interface ComposabilityReportV2 extends ComposabilityReport {
|
|
734
|
+
pairs: CoOccurrencePairV2[];
|
|
735
|
+
sequences: SkillSequence[];
|
|
736
|
+
workflow_candidates: CoOccurrencePairV2[];
|
|
737
|
+
synergy_count: number;
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
// ---------------------------------------------------------------------------
|
|
741
|
+
// Workflow Support types
|
|
742
|
+
// ---------------------------------------------------------------------------
|
|
743
|
+
|
|
744
|
+
export interface DiscoveredWorkflow {
|
|
745
|
+
workflow_id: string; // deterministic hash: skills.join("→")
|
|
746
|
+
skills: string[]; // ordered skill sequence
|
|
747
|
+
occurrence_count: number;
|
|
748
|
+
avg_errors: number;
|
|
749
|
+
avg_errors_individual: number;
|
|
750
|
+
synergy_score: number; // clamp((individual - together) / (individual + 1), -1, 1)
|
|
751
|
+
representative_query: string;
|
|
752
|
+
sequence_consistency: number; // [0,1]
|
|
753
|
+
completion_rate: number; // % sessions where all skills fired
|
|
754
|
+
first_seen: string;
|
|
755
|
+
last_seen: string;
|
|
756
|
+
session_ids: string[]; // sessions that contributed to this workflow
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
export interface CodifiedWorkflow {
|
|
760
|
+
name: string;
|
|
761
|
+
skills: string[];
|
|
762
|
+
description?: string;
|
|
763
|
+
source: "discovered" | "authored";
|
|
764
|
+
discovered_from?: {
|
|
765
|
+
workflow_id: string;
|
|
766
|
+
occurrence_count: number;
|
|
767
|
+
synergy_score: number;
|
|
768
|
+
};
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
export interface WorkflowDiscoveryReport {
|
|
772
|
+
workflows: DiscoveredWorkflow[];
|
|
773
|
+
total_sessions_analyzed: number;
|
|
774
|
+
generated_at: string;
|
|
775
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { existsSync, writeFileSync } from "node:fs";
|
|
2
|
+
import {
|
|
3
|
+
type CanonicalPlatform,
|
|
4
|
+
type CanonicalRecord,
|
|
5
|
+
type CanonicalRecordKind,
|
|
6
|
+
isCanonicalRecord,
|
|
7
|
+
} from "@selftune/telemetry-contract";
|
|
8
|
+
import { CANONICAL_LOG } from "../constants.js";
|
|
9
|
+
import { readJsonl } from "./jsonl.js";
|
|
10
|
+
|
|
11
|
+
export interface CanonicalRecordFilter {
|
|
12
|
+
platform?: CanonicalPlatform;
|
|
13
|
+
record_kind?: CanonicalRecordKind;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function readCanonicalRecords(logPath: string = CANONICAL_LOG): CanonicalRecord[] {
|
|
17
|
+
if (!existsSync(logPath)) return [];
|
|
18
|
+
return readJsonl<CanonicalRecord>(logPath).filter(isCanonicalRecord);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function filterCanonicalRecords(
|
|
22
|
+
records: CanonicalRecord[],
|
|
23
|
+
filter: CanonicalRecordFilter,
|
|
24
|
+
): CanonicalRecord[] {
|
|
25
|
+
return records.filter((record) => {
|
|
26
|
+
if (filter.platform && record.platform !== filter.platform) return false;
|
|
27
|
+
if (filter.record_kind && record.record_kind !== filter.record_kind) return false;
|
|
28
|
+
return true;
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function serializeCanonicalRecords(records: CanonicalRecord[], pretty = false): string {
|
|
33
|
+
if (pretty) return `${JSON.stringify(records, null, 2)}\n`;
|
|
34
|
+
return (
|
|
35
|
+
records.map((record) => JSON.stringify(record)).join("\n") + (records.length > 0 ? "\n" : "")
|
|
36
|
+
);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function writeCanonicalExport(
|
|
40
|
+
records: CanonicalRecord[],
|
|
41
|
+
outPath: string,
|
|
42
|
+
pretty = false,
|
|
43
|
+
): void {
|
|
44
|
+
writeFileSync(outPath, serializeCanonicalRecords(records, pretty), "utf-8");
|
|
45
|
+
}
|