selftune 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +146 -0
- package/.claude/agents/evolution-reviewer.md +167 -0
- package/.claude/agents/integration-guide.md +200 -0
- package/.claude/agents/pattern-analyst.md +147 -0
- package/CHANGELOG.md +37 -0
- package/README.md +96 -256
- package/assets/BeforeAfter.gif +0 -0
- package/assets/FeedbackLoop.gif +0 -0
- package/assets/logo.svg +9 -0
- package/assets/skill-health-badge.svg +20 -0
- package/cli/selftune/activation-rules.ts +171 -0
- package/cli/selftune/badge/badge-data.ts +108 -0
- package/cli/selftune/badge/badge-svg.ts +212 -0
- package/cli/selftune/badge/badge.ts +103 -0
- package/cli/selftune/constants.ts +75 -1
- package/cli/selftune/contribute/bundle.ts +314 -0
- package/cli/selftune/contribute/contribute.ts +214 -0
- package/cli/selftune/contribute/sanitize.ts +162 -0
- package/cli/selftune/cron/setup.ts +266 -0
- package/cli/selftune/dashboard-server.ts +582 -0
- package/cli/selftune/dashboard.ts +25 -3
- package/cli/selftune/eval/baseline.ts +247 -0
- package/cli/selftune/eval/composability.ts +117 -0
- package/cli/selftune/eval/generate-unit-tests.ts +143 -0
- package/cli/selftune/eval/hooks-to-evals.ts +68 -2
- package/cli/selftune/eval/import-skillsbench.ts +221 -0
- package/cli/selftune/eval/synthetic-evals.ts +172 -0
- package/cli/selftune/eval/unit-test-cli.ts +152 -0
- package/cli/selftune/eval/unit-test.ts +196 -0
- package/cli/selftune/evolution/deploy-proposal.ts +142 -1
- package/cli/selftune/evolution/evolve-body.ts +492 -0
- package/cli/selftune/evolution/evolve.ts +466 -103
- package/cli/selftune/evolution/extract-patterns.ts +32 -1
- package/cli/selftune/evolution/pareto.ts +314 -0
- package/cli/selftune/evolution/propose-body.ts +171 -0
- package/cli/selftune/evolution/propose-description.ts +100 -2
- package/cli/selftune/evolution/propose-routing.ts +166 -0
- package/cli/selftune/evolution/refine-body.ts +141 -0
- package/cli/selftune/evolution/rollback.ts +19 -2
- package/cli/selftune/evolution/validate-body.ts +254 -0
- package/cli/selftune/evolution/validate-proposal.ts +257 -35
- package/cli/selftune/evolution/validate-routing.ts +177 -0
- package/cli/selftune/grading/grade-session.ts +138 -18
- package/cli/selftune/grading/pre-gates.ts +104 -0
- package/cli/selftune/hooks/auto-activate.ts +185 -0
- package/cli/selftune/hooks/evolution-guard.ts +165 -0
- package/cli/selftune/hooks/skill-change-guard.ts +112 -0
- package/cli/selftune/index.ts +88 -0
- package/cli/selftune/ingestors/claude-replay.ts +351 -0
- package/cli/selftune/ingestors/openclaw-ingest.ts +440 -0
- package/cli/selftune/init.ts +150 -3
- package/cli/selftune/memory/writer.ts +447 -0
- package/cli/selftune/monitoring/watch.ts +25 -2
- package/cli/selftune/status.ts +17 -13
- package/cli/selftune/types.ts +377 -5
- package/cli/selftune/utils/frontmatter.ts +217 -0
- package/cli/selftune/utils/llm-call.ts +29 -3
- package/cli/selftune/utils/transcript.ts +35 -0
- package/cli/selftune/utils/trigger-check.ts +89 -0
- package/cli/selftune/utils/tui.ts +156 -0
- package/dashboard/index.html +569 -8
- package/package.json +8 -4
- package/skill/SKILL.md +124 -8
- package/skill/Workflows/AutoActivation.md +144 -0
- package/skill/Workflows/Badge.md +118 -0
- package/skill/Workflows/Baseline.md +121 -0
- package/skill/Workflows/Composability.md +100 -0
- package/skill/Workflows/Contribute.md +91 -0
- package/skill/Workflows/Cron.md +155 -0
- package/skill/Workflows/Dashboard.md +203 -0
- package/skill/Workflows/Doctor.md +37 -1
- package/skill/Workflows/Evals.md +69 -1
- package/skill/Workflows/EvolutionMemory.md +152 -0
- package/skill/Workflows/Evolve.md +111 -6
- package/skill/Workflows/EvolveBody.md +159 -0
- package/skill/Workflows/ImportSkillsBench.md +111 -0
- package/skill/Workflows/Ingest.md +117 -3
- package/skill/Workflows/Initialize.md +57 -3
- package/skill/Workflows/Replay.md +70 -0
- package/skill/Workflows/Rollback.md +20 -1
- package/skill/Workflows/UnitTest.md +138 -0
- package/skill/Workflows/Watch.md +22 -0
- package/skill/settings_snippet.json +23 -0
- package/templates/activation-rules-default.json +27 -0
- package/templates/multi-skill-settings.json +64 -0
- package/templates/single-skill-settings.json +58 -0
package/cli/selftune/types.ts
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
// ---------------------------------------------------------------------------
|
|
8
8
|
|
|
9
9
|
export interface SelftuneConfig {
|
|
10
|
-
agent_type: "claude_code" | "codex" | "opencode" | "unknown";
|
|
10
|
+
agent_type: "claude_code" | "codex" | "opencode" | "openclaw" | "unknown";
|
|
11
11
|
cli_path: string;
|
|
12
12
|
llm_mode: "agent";
|
|
13
13
|
agent_cli: string | null;
|
|
@@ -75,15 +75,19 @@ export interface TranscriptMetrics {
|
|
|
75
75
|
// Hook payloads (received via stdin from Claude Code)
|
|
76
76
|
// ---------------------------------------------------------------------------
|
|
77
77
|
|
|
78
|
+
// Shared base for pre/post tool-use hook payloads
|
|
79
|
+
export interface BaseToolUsePayload {
|
|
80
|
+
tool_name: string;
|
|
81
|
+
tool_input: Record<string, unknown>;
|
|
82
|
+
session_id?: string;
|
|
83
|
+
}
|
|
84
|
+
|
|
78
85
|
export interface PromptSubmitPayload {
|
|
79
86
|
user_prompt: string;
|
|
80
87
|
session_id?: string;
|
|
81
88
|
}
|
|
82
89
|
|
|
83
|
-
export interface PostToolUsePayload {
|
|
84
|
-
tool_name: string;
|
|
85
|
-
tool_input: Record<string, unknown>;
|
|
86
|
-
session_id?: string;
|
|
90
|
+
export interface PostToolUsePayload extends BaseToolUsePayload {
|
|
87
91
|
transcript_path?: string;
|
|
88
92
|
}
|
|
89
93
|
|
|
@@ -113,6 +117,8 @@ export interface GradingExpectation {
|
|
|
113
117
|
text: string;
|
|
114
118
|
passed: boolean;
|
|
115
119
|
evidence: string;
|
|
120
|
+
score?: number; // 0.0-1.0 graduated confidence
|
|
121
|
+
source?: "pre-gate" | "llm"; // which grading path produced this
|
|
116
122
|
}
|
|
117
123
|
|
|
118
124
|
export interface GradingClaim {
|
|
@@ -127,6 +133,15 @@ export interface GradingSummary {
|
|
|
127
133
|
failed: number;
|
|
128
134
|
total: number;
|
|
129
135
|
pass_rate: number;
|
|
136
|
+
mean_score?: number; // mean of all expectation scores
|
|
137
|
+
score_std_dev?: number; // standard deviation
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
export interface FailureFeedback {
|
|
141
|
+
query: string;
|
|
142
|
+
failure_reason: string;
|
|
143
|
+
improvement_hint: string;
|
|
144
|
+
invocation_type?: InvocationType;
|
|
130
145
|
}
|
|
131
146
|
|
|
132
147
|
/** Raw output from the LLM grader (before assembly into GradingResult). */
|
|
@@ -135,6 +150,7 @@ export interface GraderOutput {
|
|
|
135
150
|
summary: GradingSummary;
|
|
136
151
|
claims: GradingClaim[];
|
|
137
152
|
eval_feedback: EvalFeedback;
|
|
153
|
+
failure_feedback?: FailureFeedback[];
|
|
138
154
|
}
|
|
139
155
|
|
|
140
156
|
export interface EvalFeedback {
|
|
@@ -152,6 +168,7 @@ export interface GradingResult {
|
|
|
152
168
|
execution_metrics: ExecutionMetrics;
|
|
153
169
|
claims: GradingClaim[];
|
|
154
170
|
eval_feedback: EvalFeedback;
|
|
171
|
+
failure_feedback?: FailureFeedback[];
|
|
155
172
|
}
|
|
156
173
|
|
|
157
174
|
export interface ExecutionMetrics {
|
|
@@ -197,6 +214,7 @@ export interface FailurePattern {
|
|
|
197
214
|
frequency: number;
|
|
198
215
|
sample_sessions: string[];
|
|
199
216
|
extracted_at: string;
|
|
217
|
+
feedback?: FailureFeedback[];
|
|
200
218
|
}
|
|
201
219
|
|
|
202
220
|
export interface EvolutionProposal {
|
|
@@ -226,6 +244,7 @@ export interface EvalPassRate {
|
|
|
226
244
|
export interface EvolutionAuditEntry {
|
|
227
245
|
timestamp: string;
|
|
228
246
|
proposal_id: string;
|
|
247
|
+
skill_name?: string;
|
|
229
248
|
action: "created" | "validated" | "deployed" | "rolled_back" | "rejected";
|
|
230
249
|
details: string;
|
|
231
250
|
eval_snapshot?: EvalPassRate;
|
|
@@ -239,6 +258,68 @@ export interface EvolutionConfig {
|
|
|
239
258
|
dry_run: boolean;
|
|
240
259
|
}
|
|
241
260
|
|
|
261
|
+
// ---------------------------------------------------------------------------
|
|
262
|
+
// Validation result base (self-contained for Pareto types)
|
|
263
|
+
// ---------------------------------------------------------------------------
|
|
264
|
+
|
|
265
|
+
/** Compact summary of an evolve run, used for CLI JSON output. */
|
|
266
|
+
export interface EvolveResultSummary {
|
|
267
|
+
skill: string;
|
|
268
|
+
deployed: boolean;
|
|
269
|
+
reason: string;
|
|
270
|
+
before: number;
|
|
271
|
+
after: number;
|
|
272
|
+
net_change: number;
|
|
273
|
+
improved: boolean;
|
|
274
|
+
regressions: number;
|
|
275
|
+
new_passes: number;
|
|
276
|
+
confidence: number;
|
|
277
|
+
llm_calls: number;
|
|
278
|
+
elapsed_s: number;
|
|
279
|
+
proposal_id: string;
|
|
280
|
+
rationale: string;
|
|
281
|
+
version?: string;
|
|
282
|
+
dashboard_url: string;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
export interface ValidationResultBase {
|
|
286
|
+
proposal_id: string;
|
|
287
|
+
before_pass_rate: number;
|
|
288
|
+
after_pass_rate: number;
|
|
289
|
+
improved: boolean;
|
|
290
|
+
regressions: EvalEntry[];
|
|
291
|
+
new_passes: EvalEntry[];
|
|
292
|
+
net_change: number;
|
|
293
|
+
by_invocation_type?: InvocationTypeScores;
|
|
294
|
+
per_entry_results?: Array<{ entry: EvalEntry; before_pass: boolean; after_pass: boolean }>;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// ---------------------------------------------------------------------------
|
|
298
|
+
// Pareto types (multi-dimensional evolution selection)
|
|
299
|
+
// ---------------------------------------------------------------------------
|
|
300
|
+
|
|
301
|
+
export interface InvocationTypeScores {
|
|
302
|
+
explicit: { passed: number; total: number; pass_rate: number };
|
|
303
|
+
implicit: { passed: number; total: number; pass_rate: number };
|
|
304
|
+
contextual: { passed: number; total: number; pass_rate: number };
|
|
305
|
+
negative: { passed: number; total: number; pass_rate: number };
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
export interface ParetoCandidate {
|
|
309
|
+
proposal: EvolutionProposal;
|
|
310
|
+
validation: ValidationResultBase;
|
|
311
|
+
invocation_scores: InvocationTypeScores;
|
|
312
|
+
dominates_on: InvocationType[];
|
|
313
|
+
token_efficiency_score?: number;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
export interface ParetoSelectionResult {
|
|
317
|
+
selected_proposal: EvolutionProposal;
|
|
318
|
+
frontier: ParetoCandidate[];
|
|
319
|
+
merge_applied: boolean;
|
|
320
|
+
merge_sources: string[];
|
|
321
|
+
}
|
|
322
|
+
|
|
242
323
|
// ---------------------------------------------------------------------------
|
|
243
324
|
// Monitoring types (v0.4)
|
|
244
325
|
// ---------------------------------------------------------------------------
|
|
@@ -253,3 +334,294 @@ export interface MonitoringSnapshot {
|
|
|
253
334
|
regression_detected: boolean;
|
|
254
335
|
baseline_pass_rate: number;
|
|
255
336
|
}
|
|
337
|
+
|
|
338
|
+
// ---------------------------------------------------------------------------
|
|
339
|
+
// Activation rule types (v0.5 — auto-activate hooks)
|
|
340
|
+
// ---------------------------------------------------------------------------
|
|
341
|
+
|
|
342
|
+
export interface ActivationRule {
|
|
343
|
+
id: string;
|
|
344
|
+
description: string;
|
|
345
|
+
/** Evaluate whether this rule fires. Returns a suggestion string or null. */
|
|
346
|
+
evaluate: (ctx: ActivationContext) => string | null;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
export interface ActivationContext {
|
|
350
|
+
session_id: string;
|
|
351
|
+
query_log_path: string;
|
|
352
|
+
telemetry_log_path: string;
|
|
353
|
+
evolution_audit_log_path: string;
|
|
354
|
+
selftune_dir: string;
|
|
355
|
+
settings_path: string;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
export interface SessionState {
|
|
359
|
+
session_id: string;
|
|
360
|
+
suggestions_shown: string[]; // rule IDs already fired this session
|
|
361
|
+
updated_at: string;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// ---------------------------------------------------------------------------
|
|
365
|
+
// PreToolUse hook payloads
|
|
366
|
+
// ---------------------------------------------------------------------------
|
|
367
|
+
|
|
368
|
+
export interface PreToolUsePayload extends BaseToolUsePayload {}
|
|
369
|
+
|
|
370
|
+
// ---------------------------------------------------------------------------
|
|
371
|
+
// Evolution memory types (session context persistence)
|
|
372
|
+
// ---------------------------------------------------------------------------
|
|
373
|
+
|
|
374
|
+
export interface EvolutionMemory {
|
|
375
|
+
context: MemoryContext;
|
|
376
|
+
plan: MemoryPlan;
|
|
377
|
+
decisions: DecisionRecord[];
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
export interface MemoryContext {
|
|
381
|
+
activeEvolutions: Array<{
|
|
382
|
+
skillName: string;
|
|
383
|
+
status: string;
|
|
384
|
+
description: string;
|
|
385
|
+
}>;
|
|
386
|
+
knownIssues: string[];
|
|
387
|
+
lastUpdated: string;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
export interface MemoryPlan {
|
|
391
|
+
currentPriorities: string[];
|
|
392
|
+
strategy: string;
|
|
393
|
+
lastUpdated: string;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
export interface DecisionRecord {
|
|
397
|
+
timestamp: string;
|
|
398
|
+
/** Imperative verb for markdown headings (e.g. "evolve", "rollback", "watch"). */
|
|
399
|
+
actionType: string;
|
|
400
|
+
skillName: string;
|
|
401
|
+
/** Past-tense result state used programmatically. */
|
|
402
|
+
action: "evolved" | "rolled-back" | "watched";
|
|
403
|
+
rationale: string;
|
|
404
|
+
result: string;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
// ---------------------------------------------------------------------------
|
|
408
|
+
// Contribution types (contribute command)
|
|
409
|
+
// ---------------------------------------------------------------------------
|
|
410
|
+
|
|
411
|
+
export interface ContributionQuery {
|
|
412
|
+
query: string;
|
|
413
|
+
invocation_type: InvocationType;
|
|
414
|
+
source: string;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
export interface ContributionEvalEntry {
|
|
418
|
+
query: string;
|
|
419
|
+
should_trigger: boolean;
|
|
420
|
+
invocation_type?: InvocationType;
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
export interface ContributionGradingSummary {
|
|
424
|
+
total_sessions: number;
|
|
425
|
+
graded_sessions: number;
|
|
426
|
+
average_pass_rate: number;
|
|
427
|
+
expectation_count: number;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
export interface ContributionEvolutionSummary {
|
|
431
|
+
total_proposals: number;
|
|
432
|
+
deployed_proposals: number;
|
|
433
|
+
rolled_back_proposals: number;
|
|
434
|
+
average_improvement: number;
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
export interface ContributionSessionMetrics {
|
|
438
|
+
total_sessions: number;
|
|
439
|
+
avg_assistant_turns: number;
|
|
440
|
+
avg_tool_calls: number;
|
|
441
|
+
avg_errors: number;
|
|
442
|
+
top_tools: Array<{ tool: string; count: number }>;
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
export interface ContributionBundle {
|
|
446
|
+
schema_version: "1.0" | "1.1" | "1.2";
|
|
447
|
+
skill_name?: string;
|
|
448
|
+
contributor_id: string;
|
|
449
|
+
created_at: string;
|
|
450
|
+
selftune_version: string;
|
|
451
|
+
agent_type: string;
|
|
452
|
+
sanitization_level: "conservative" | "aggressive";
|
|
453
|
+
positive_queries: ContributionQuery[];
|
|
454
|
+
eval_entries: ContributionEvalEntry[];
|
|
455
|
+
grading_summary: ContributionGradingSummary | null;
|
|
456
|
+
evolution_summary: ContributionEvolutionSummary | null;
|
|
457
|
+
session_metrics: ContributionSessionMetrics;
|
|
458
|
+
unmatched_queries?: Array<{ query: string; timestamp: string }>;
|
|
459
|
+
pending_proposals?: Array<{
|
|
460
|
+
proposal_id: string;
|
|
461
|
+
skill_name?: string;
|
|
462
|
+
action: string;
|
|
463
|
+
timestamp: string;
|
|
464
|
+
details: string;
|
|
465
|
+
}>;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
// ---------------------------------------------------------------------------
|
|
469
|
+
// Evolution target types (v0.6 — body + routing evolution)
|
|
470
|
+
// ---------------------------------------------------------------------------
|
|
471
|
+
|
|
472
|
+
/** Which part of a skill is being evolved. */
|
|
473
|
+
export type EvolutionTarget = "description" | "routing" | "body";
|
|
474
|
+
|
|
475
|
+
/** Parsed sections of a SKILL.md file. */
|
|
476
|
+
export interface SkillSections {
|
|
477
|
+
frontmatter: string;
|
|
478
|
+
title: string;
|
|
479
|
+
description: string;
|
|
480
|
+
sections: Record<string, string>;
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
/** Proposal for evolving the full body of a SKILL.md. */
|
|
484
|
+
export interface BodyEvolutionProposal {
|
|
485
|
+
proposal_id: string;
|
|
486
|
+
skill_name: string;
|
|
487
|
+
skill_path: string;
|
|
488
|
+
original_body: string;
|
|
489
|
+
proposed_body: string;
|
|
490
|
+
rationale: string;
|
|
491
|
+
target: EvolutionTarget;
|
|
492
|
+
failure_patterns: string[];
|
|
493
|
+
confidence: number;
|
|
494
|
+
created_at: string;
|
|
495
|
+
status: "pending" | "validated" | "deployed" | "rolled_back";
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
/** Closed union of gate names used in the validation pipeline. */
|
|
499
|
+
export type ValidationGate = "structural" | "trigger_accuracy" | "quality";
|
|
500
|
+
|
|
501
|
+
/** Result of validating a body evolution proposal. */
|
|
502
|
+
export interface BodyValidationResult {
|
|
503
|
+
proposal_id: string;
|
|
504
|
+
gates_passed: number;
|
|
505
|
+
gates_total: number;
|
|
506
|
+
gate_results: Array<{ gate: ValidationGate; passed: boolean; reason: string }>;
|
|
507
|
+
improved: boolean;
|
|
508
|
+
regressions: string[];
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
/** Configuration for which LLM model a role should use. */
|
|
512
|
+
export interface LlmRoleConfig {
|
|
513
|
+
role: string;
|
|
514
|
+
model: string;
|
|
515
|
+
temperature?: number;
|
|
516
|
+
max_tokens?: number;
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
/** Token usage metrics for a session or eval run. */
|
|
520
|
+
export interface TokenUsageMetrics {
|
|
521
|
+
input_tokens: number;
|
|
522
|
+
output_tokens: number;
|
|
523
|
+
total_tokens: number;
|
|
524
|
+
estimated_cost_usd?: number;
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
// ---------------------------------------------------------------------------
|
|
528
|
+
// Baseline comparison types
|
|
529
|
+
// ---------------------------------------------------------------------------
|
|
530
|
+
|
|
531
|
+
/** Result of a no-skill baseline measurement. */
|
|
532
|
+
export interface BaselineResult {
|
|
533
|
+
skill_name: string;
|
|
534
|
+
query: string;
|
|
535
|
+
with_skill: boolean;
|
|
536
|
+
triggered: boolean;
|
|
537
|
+
pass: boolean;
|
|
538
|
+
latency_ms?: number;
|
|
539
|
+
tokens?: TokenUsageMetrics;
|
|
540
|
+
measured_at: string;
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
// ---------------------------------------------------------------------------
|
|
544
|
+
// Skill unit test types
|
|
545
|
+
// ---------------------------------------------------------------------------
|
|
546
|
+
|
|
547
|
+
/** Type of assertion for a skill unit test. */
|
|
548
|
+
export type AssertionType =
|
|
549
|
+
| "contains"
|
|
550
|
+
| "not_contains"
|
|
551
|
+
| "regex"
|
|
552
|
+
| "json_path"
|
|
553
|
+
| "tool_called"
|
|
554
|
+
| "tool_not_called";
|
|
555
|
+
|
|
556
|
+
/** A single assertion within a skill unit test. */
|
|
557
|
+
export interface SkillAssertion {
|
|
558
|
+
type: AssertionType;
|
|
559
|
+
value: string;
|
|
560
|
+
description?: string;
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
/** A skill unit test case. */
|
|
564
|
+
export interface SkillUnitTest {
|
|
565
|
+
id: string;
|
|
566
|
+
skill_name: string;
|
|
567
|
+
query: string;
|
|
568
|
+
assertions: SkillAssertion[];
|
|
569
|
+
timeout_ms?: number;
|
|
570
|
+
tags?: string[];
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
/** Result of running a single skill unit test. */
|
|
574
|
+
export interface UnitTestResult {
|
|
575
|
+
test_id: string;
|
|
576
|
+
passed: boolean;
|
|
577
|
+
assertion_results: Array<{ assertion: SkillAssertion; passed: boolean; actual?: string }>;
|
|
578
|
+
duration_ms: number;
|
|
579
|
+
error?: string;
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
/** Aggregated result of a skill unit test suite. */
|
|
583
|
+
export interface UnitTestSuiteResult {
|
|
584
|
+
skill_name: string;
|
|
585
|
+
total: number;
|
|
586
|
+
passed: number;
|
|
587
|
+
failed: number;
|
|
588
|
+
pass_rate: number;
|
|
589
|
+
results: UnitTestResult[];
|
|
590
|
+
run_at: string;
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
// ---------------------------------------------------------------------------
|
|
594
|
+
// Composability types
|
|
595
|
+
// ---------------------------------------------------------------------------
|
|
596
|
+
|
|
597
|
+
/** A pair of skills that co-occur in sessions. */
|
|
598
|
+
export interface CoOccurrencePair {
|
|
599
|
+
skill_a: string;
|
|
600
|
+
skill_b: string;
|
|
601
|
+
co_occurrence_count: number;
|
|
602
|
+
conflict_detected: boolean;
|
|
603
|
+
conflict_reason?: string;
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
/** Report on skill composability / conflicts. */
|
|
607
|
+
export interface ComposabilityReport {
|
|
608
|
+
pairs: CoOccurrencePair[];
|
|
609
|
+
total_sessions_analyzed: number;
|
|
610
|
+
conflict_count: number;
|
|
611
|
+
generated_at: string;
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
// ---------------------------------------------------------------------------
|
|
615
|
+
// SkillsBench types
|
|
616
|
+
// ---------------------------------------------------------------------------
|
|
617
|
+
|
|
618
|
+
/** A task from the SkillsBench benchmark suite. */
|
|
619
|
+
export interface SkillsBenchTask {
|
|
620
|
+
task_id: string;
|
|
621
|
+
category: string;
|
|
622
|
+
query: string;
|
|
623
|
+
expected_skill?: string;
|
|
624
|
+
expected_tools?: string[];
|
|
625
|
+
difficulty: "easy" | "medium" | "hard";
|
|
626
|
+
tags?: string[];
|
|
627
|
+
}
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* frontmatter.ts
|
|
3
|
+
*
|
|
4
|
+
* Line-based YAML frontmatter parser for SKILL.md files.
|
|
5
|
+
* Extracts name, description, and version without a YAML library.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
// Types
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
|
|
12
|
+
export interface SkillFrontmatter {
|
|
13
|
+
name: string;
|
|
14
|
+
description: string;
|
|
15
|
+
version: string;
|
|
16
|
+
body: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Parser
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Parse YAML frontmatter from a SKILL.md file.
|
|
25
|
+
*
|
|
26
|
+
* Handles two description formats:
|
|
27
|
+
* - Single-line: `description: When the user wants to...`
|
|
28
|
+
* - Folded scalar: `description: >\n Multi-line text...`
|
|
29
|
+
*
|
|
30
|
+
* Handles two version locations:
|
|
31
|
+
* - Top-level: `version: 1.0.0`
|
|
32
|
+
* - Nested: `metadata:\n version: 1.0.0`
|
|
33
|
+
*
|
|
34
|
+
* Returns the full content as description if no frontmatter is found.
|
|
35
|
+
*/
|
|
36
|
+
export function parseFrontmatter(content: string): SkillFrontmatter {
|
|
37
|
+
const lines = content.split("\n");
|
|
38
|
+
|
|
39
|
+
// Check for opening delimiter
|
|
40
|
+
if (lines[0]?.trim() !== "---") {
|
|
41
|
+
return { name: "", description: content, version: "", body: content };
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Find closing delimiter
|
|
45
|
+
let endIdx = -1;
|
|
46
|
+
for (let i = 1; i < lines.length; i++) {
|
|
47
|
+
if (lines[i].trim() === "---") {
|
|
48
|
+
endIdx = i;
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (endIdx < 0) {
|
|
54
|
+
return { name: "", description: content, version: "", body: content };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const yamlLines = lines.slice(1, endIdx);
|
|
58
|
+
const body = lines
|
|
59
|
+
.slice(endIdx + 1)
|
|
60
|
+
.join("\n")
|
|
61
|
+
.replace(/^\n+/, "");
|
|
62
|
+
|
|
63
|
+
let name = "";
|
|
64
|
+
let description = "";
|
|
65
|
+
let version = "";
|
|
66
|
+
let inMetadata = false;
|
|
67
|
+
|
|
68
|
+
for (let i = 0; i < yamlLines.length; i++) {
|
|
69
|
+
const line = yamlLines[i];
|
|
70
|
+
const trimmed = line.trimEnd();
|
|
71
|
+
|
|
72
|
+
// Top-level `name:`
|
|
73
|
+
if (trimmed.startsWith("name:")) {
|
|
74
|
+
name = trimmed.slice("name:".length).trim();
|
|
75
|
+
inMetadata = false;
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Top-level `version:`
|
|
80
|
+
if (trimmed.startsWith("version:") && !trimmed.startsWith(" ")) {
|
|
81
|
+
version = trimmed.slice("version:".length).trim();
|
|
82
|
+
inMetadata = false;
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// `metadata:` block start
|
|
87
|
+
if (trimmed === "metadata:" || trimmed.startsWith("metadata:")) {
|
|
88
|
+
inMetadata = true;
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Nested `version:` inside metadata
|
|
93
|
+
if (inMetadata && /^\s+version:/.test(trimmed)) {
|
|
94
|
+
version = trimmed.replace(/^\s+version:\s*/, "");
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Top-level `description:` — single-line or folded scalar
|
|
99
|
+
if (trimmed.startsWith("description:")) {
|
|
100
|
+
inMetadata = false;
|
|
101
|
+
const afterKey = trimmed.slice("description:".length).trim();
|
|
102
|
+
|
|
103
|
+
if (afterKey === ">" || afterKey === "|") {
|
|
104
|
+
// Folded/literal scalar: collect indented continuation lines
|
|
105
|
+
const descParts: string[] = [];
|
|
106
|
+
let j = i + 1;
|
|
107
|
+
while (j < yamlLines.length) {
|
|
108
|
+
const next = yamlLines[j];
|
|
109
|
+
// Continuation line must be indented (starts with whitespace)
|
|
110
|
+
if (next.length > 0 && /^\s/.test(next)) {
|
|
111
|
+
descParts.push(next.replace(/^\s+/, ""));
|
|
112
|
+
} else {
|
|
113
|
+
break;
|
|
114
|
+
}
|
|
115
|
+
j++;
|
|
116
|
+
}
|
|
117
|
+
description = descParts.join(" ").trim();
|
|
118
|
+
i = j - 1; // advance past consumed lines
|
|
119
|
+
} else {
|
|
120
|
+
// Single-line value
|
|
121
|
+
description = afterKey;
|
|
122
|
+
}
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Any other top-level key resets inMetadata
|
|
127
|
+
if (/^\S/.test(trimmed) && trimmed.includes(":")) {
|
|
128
|
+
inMetadata = false;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return { name, description, version, body };
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// ---------------------------------------------------------------------------
|
|
136
|
+
// Frontmatter description replacement
|
|
137
|
+
// ---------------------------------------------------------------------------
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Replace the `description:` field in YAML frontmatter, preserving all other
|
|
141
|
+
* content. If the new description contains special YAML characters, it is
|
|
142
|
+
* written as a folded scalar (`description: >`).
|
|
143
|
+
*
|
|
144
|
+
* Returns the original content unchanged if no frontmatter is found.
|
|
145
|
+
*/
|
|
146
|
+
export function replaceFrontmatterDescription(content: string, newDescription: string): string {
|
|
147
|
+
const lines = content.split("\n");
|
|
148
|
+
|
|
149
|
+
if (lines[0]?.trim() !== "---") return content;
|
|
150
|
+
|
|
151
|
+
let endIdx = -1;
|
|
152
|
+
for (let i = 1; i < lines.length; i++) {
|
|
153
|
+
if (lines[i].trim() === "---") {
|
|
154
|
+
endIdx = i;
|
|
155
|
+
break;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
if (endIdx < 0) return content;
|
|
159
|
+
|
|
160
|
+
// Find and replace the description within frontmatter lines
|
|
161
|
+
const yamlLines = lines.slice(1, endIdx);
|
|
162
|
+
const newYamlLines: string[] = [];
|
|
163
|
+
let i = 0;
|
|
164
|
+
let replaced = false;
|
|
165
|
+
|
|
166
|
+
while (i < yamlLines.length) {
|
|
167
|
+
const trimmed = yamlLines[i].trimEnd();
|
|
168
|
+
|
|
169
|
+
if (trimmed.startsWith("description:")) {
|
|
170
|
+
replaced = true;
|
|
171
|
+
const afterKey = trimmed.slice("description:".length).trim();
|
|
172
|
+
|
|
173
|
+
// Skip continuation lines of folded/literal scalars
|
|
174
|
+
if (afterKey === ">" || afterKey === "|") {
|
|
175
|
+
i++;
|
|
176
|
+
while (i < yamlLines.length && yamlLines[i].length > 0 && /^\s/.test(yamlLines[i])) {
|
|
177
|
+
i++;
|
|
178
|
+
}
|
|
179
|
+
} else {
|
|
180
|
+
i++;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Write new description — use folded scalar if it's long or has special chars
|
|
184
|
+
const needsFolded = newDescription.length > 120 || /[:#"'[\]{}|>]/.test(newDescription);
|
|
185
|
+
if (needsFolded) {
|
|
186
|
+
newYamlLines.push("description: >");
|
|
187
|
+
// Wrap at ~78 chars with 2-space indent
|
|
188
|
+
const words = newDescription.split(/\s+/);
|
|
189
|
+
let line = " ";
|
|
190
|
+
for (const word of words) {
|
|
191
|
+
if (line.length + word.length + 1 > 80 && line.trim().length > 0) {
|
|
192
|
+
newYamlLines.push(line);
|
|
193
|
+
line = ` ${word}`;
|
|
194
|
+
} else {
|
|
195
|
+
line = line.trim().length === 0 ? ` ${word}` : `${line} ${word}`;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
if (line.trim().length > 0) newYamlLines.push(line);
|
|
199
|
+
} else {
|
|
200
|
+
newYamlLines.push(`description: ${newDescription}`);
|
|
201
|
+
}
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
newYamlLines.push(yamlLines[i]);
|
|
206
|
+
i++;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// If description wasn't found in frontmatter, add it
|
|
210
|
+
if (!replaced) {
|
|
211
|
+
newYamlLines.push(`description: ${newDescription}`);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
const before = lines[0]; // "---"
|
|
215
|
+
const after = lines.slice(endIdx); // "---" + body
|
|
216
|
+
return [before, ...newYamlLines, ...after].join("\n");
|
|
217
|
+
}
|