skill-tree 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +75 -0
  2. package/dist/bowser-CQI7RKRA.mjs +2821 -0
  3. package/dist/chunk-2NL4MXNX.mjs +3156 -0
  4. package/dist/chunk-2STDJU5Y.mjs +1174 -0
  5. package/dist/chunk-3BCRI4CA.mjs +101 -0
  6. package/dist/chunk-3SRB47JW.mjs +8344 -0
  7. package/dist/chunk-43YOKLZP.mjs +6081 -0
  8. package/dist/chunk-4AGZU52D.mjs +7918 -0
  9. package/dist/chunk-4HXHCEFH.mjs +9157 -0
  10. package/dist/chunk-4OC5QFIF.mjs +11267 -0
  11. package/dist/chunk-4QGSDVGH.mjs +580 -0
  12. package/dist/chunk-4TFMKAVC.mjs +1225 -0
  13. package/dist/chunk-55SMGVTP.mjs +7126 -0
  14. package/dist/chunk-5C4MEQMR.mjs +125 -0
  15. package/dist/chunk-6FX4IK4Z.mjs +5368 -0
  16. package/dist/chunk-6UPDN5QM.mjs +163 -0
  17. package/dist/chunk-7EGDKOHV.mjs +9439 -0
  18. package/dist/chunk-7LMOQW5H.mjs +4893 -0
  19. package/dist/chunk-7QIQJVNP.mjs +14206 -0
  20. package/dist/chunk-7VB4ZRZO.mjs +7127 -0
  21. package/dist/chunk-A3SILZYX.mjs +8360 -0
  22. package/dist/chunk-BPVRW25O.mjs +6089 -0
  23. package/dist/chunk-BZ2JKJ54.mjs +1057 -0
  24. package/dist/chunk-CI4476KM.mjs +6607 -0
  25. package/dist/chunk-DCRKELD5.mjs +46 -0
  26. package/dist/chunk-DDXYQ74I.mjs +13969 -0
  27. package/dist/chunk-DQOFJXBX.mjs +6595 -0
  28. package/dist/chunk-E2CVK23F.mjs +8751 -0
  29. package/dist/chunk-F3YEUQAP.mjs +654 -0
  30. package/dist/chunk-FKJJ4RJG.mjs +13874 -0
  31. package/dist/chunk-II7DECZQ.mjs +9111 -0
  32. package/dist/chunk-INKVOZXK.mjs +15898 -0
  33. package/dist/chunk-J2JM7HAK.mjs +8787 -0
  34. package/dist/chunk-K6NRCSAZ.mjs +4355 -0
  35. package/dist/chunk-LACI6YL4.mjs +1379 -0
  36. package/dist/chunk-MBIGW6KU.mjs +644 -0
  37. package/dist/chunk-OYHYXKXO.mjs +7297 -0
  38. package/dist/chunk-P5GJJ4JB.mjs +9237 -0
  39. package/dist/chunk-PDPN7FW7.mjs +1045 -0
  40. package/dist/chunk-QNK3WYNA.mjs +8971 -0
  41. package/dist/chunk-QZ7TP4HQ.mjs +7 -0
  42. package/dist/chunk-RJYJGJO3.mjs +349 -0
  43. package/dist/chunk-T4PVQW5O.mjs +124 -0
  44. package/dist/chunk-TEUB6DZR.mjs +6453 -0
  45. package/dist/chunk-TWPEHDW4.mjs +1067 -0
  46. package/dist/chunk-VHFTX33A.mjs +6724 -0
  47. package/dist/chunk-Y54UK2J3.mjs +13071 -0
  48. package/dist/chunk-YDVZIFIU.mjs +2102 -0
  49. package/dist/chunk-ZQVS7MQK.mjs +6081 -0
  50. package/dist/chunk-ZYKRDDFO.mjs +163 -0
  51. package/dist/cli/index.js +1167 -323
  52. package/dist/cli/index.mjs +202 -9164
  53. package/dist/dist-es-2JG6ZWFR.mjs +69 -0
  54. package/dist/dist-es-2JGXQKUP.mjs +6077 -0
  55. package/dist/dist-es-644EP2LP.mjs +317 -0
  56. package/dist/dist-es-DSNCHWLJ.mjs +170 -0
  57. package/dist/dist-es-FIVW7BUZ.mjs +317 -0
  58. package/dist/dist-es-GXJAFBE5.mjs +22 -0
  59. package/dist/dist-es-HRBPKDMR.mjs +935 -0
  60. package/dist/dist-es-LHPJ63IO.mjs +4437 -0
  61. package/dist/dist-es-LT2AQAG7.mjs +4437 -0
  62. package/dist/dist-es-ORE4PQTL.mjs +87 -0
  63. package/dist/dist-es-TLCYJJ25.mjs +495 -0
  64. package/dist/dist-es-V4LHTSRG.mjs +69 -0
  65. package/dist/dist-es-XHTU3ZU2.mjs +935 -0
  66. package/dist/dist-es-Y2MPJ6IO.mjs +378 -0
  67. package/dist/dist-es-ZYHLY2E6.mjs +487 -0
  68. package/dist/event-streams-KIAAAC7Z.mjs +42 -0
  69. package/dist/index.d.mts +1074 -12
  70. package/dist/index.d.ts +1074 -12
  71. package/dist/index.js +38729 -600
  72. package/dist/index.mjs +129 -9693
  73. package/dist/loadSso-NPRY7QRT.mjs +579 -0
  74. package/dist/loadSso-OYKG6ZRE.mjs +579 -0
  75. package/dist/signin-LMFNL434.mjs +665 -0
  76. package/dist/signin-LUKXFXSI.mjs +743 -0
  77. package/dist/sqlite-MG45OOTV.mjs +6 -0
  78. package/dist/sqlite-OLU72GHB.mjs +6 -0
  79. package/dist/sqlite-RR2SJ3SR.mjs +7 -0
  80. package/dist/sqlite-XJRPMNAJ.mjs +6 -0
  81. package/dist/sso-oidc-NNH6SQIH.mjs +832 -0
  82. package/dist/sso-oidc-STZH2XK2.mjs +832 -0
  83. package/dist/sts-EF755UBF.mjs +6290 -0
  84. package/dist/sts-ZIS4G6FQ.mjs +6290 -0
  85. package/dist/sync-BSWMMDA6.mjs +14 -0
  86. package/dist/sync-WHIIDHML.mjs +14 -0
  87. package/dist/sync-XRWFQYBY.mjs +15 -0
  88. package/package.json +9 -2
package/dist/index.d.ts CHANGED
@@ -50,6 +50,8 @@ interface Skill {
50
50
  interface SkillServingMetadata {
51
51
  /** Short summary for collapsed view (defaults to description) */
52
52
  summary?: string;
53
+ /** Key insight preview shown in collapsed view to aid routing decisions */
54
+ instructionPreview?: string;
53
55
  /** Estimated token count for context budgeting */
54
56
  tokenEstimate?: number;
55
57
  /** Auto-expand triggers */
@@ -1056,6 +1058,83 @@ declare class SyncManager {
1056
1058
  */
1057
1059
  declare function createSyncManager(options: SyncManagerOptions): SyncManager;
1058
1060
 
1061
+ /**
1062
+ * Types for the learned execution-grounded utility scorer (Tier 2, T2.1).
1063
+ *
1064
+ * The retrieval signals (BM25, dense cosine) answer "does this skill *look*
1065
+ * relevant?". The utility scorer answers the harder question — "does including
1066
+ * this skill actually *help* solve this task?" — by learning from observed
1067
+ * loadout→outcome traces. This separates genuinely-useful skills from
1068
+ * generic, high-surface-area skills that retrieval over-rewards.
1069
+ *
1070
+ * @packageDocumentation
1071
+ */
1072
+ /** Per-(task, skill) signals available at scoring time. */
1073
+ interface UtilityFeatures {
1074
+ skillId: string;
1075
+ /** Absolute lexical (BM25) relevance in [0,1]. */
1076
+ lexAbs: number;
1077
+ /** Absolute dense (cosine) relevance in [0,1]; 0 when no embedder is used. */
1078
+ denseAbs: number;
1079
+ }
1080
+ /** A labeled example: did including this skill help on this task? */
1081
+ interface UtilityExample {
1082
+ /** Optional task text (for provenance/debugging; not used as a feature). */
1083
+ taskText?: string;
1084
+ features: UtilityFeatures;
1085
+ /** Ground-truth label: did the skill contribute to task success? */
1086
+ helped: boolean;
1087
+ /** Optional example weight (e.g. counterfactual gain magnitude). Default 1. */
1088
+ weight?: number;
1089
+ }
1090
+ interface UtilityTrainReport {
1091
+ examples: number;
1092
+ positives: number;
1093
+ iterations: number;
1094
+ finalLoss: number;
1095
+ /** Number of distinct skills that accumulated a learned helpfulness prior. */
1096
+ skillsWithPrior: number;
1097
+ }
1098
+ /**
1099
+ * A learned scorer fθ(task, skill) ∈ [0,1] feeding the confidence gate.
1100
+ * Pluggable so the default logistic model can be swapped for an MLP, etc.
1101
+ */
1102
+ interface UtilityScorer {
1103
+ /** Score a (task, skill) pair in [0,1]. Higher ⇒ more likely to help. */
1104
+ score(features: UtilityFeatures): number;
1105
+ /** Fit the model to labeled examples. */
1106
+ train(examples: UtilityExample[]): UtilityTrainReport;
1107
+ /** True once fitted; callers fall back to the hybrid blend when false. */
1108
+ readonly trained: boolean;
1109
+ }
1110
+
1111
+ /**
1112
+ * Listwise reranking (Tier 1.5) — a second-stage scorer over the *full skill
1113
+ * body* of a shortlist, the SkillRouter/Cohere-rerank pattern.
1114
+ *
1115
+ * First-stage retrieval (BM25 + dense) is recall-oriented but cheap and shallow
1116
+ * (it scores fields/embeddings, not the whole document). A cross-encoder
1117
+ * reranker reads the query against each candidate's full text and reorders them,
1118
+ * recovering true positives that retrieval ranked at 4..N into the top-K.
1119
+ * Measured on SkillsBench: BM25 recall@3 50% → reranked top-20 recall@3 65%.
1120
+ *
1121
+ * @packageDocumentation
1122
+ */
1123
+ interface RerankCandidate {
1124
+ id: string;
1125
+ /** The text the reranker scores against the query (typically the skill body). */
1126
+ text: string;
1127
+ }
1128
+ interface RerankResult {
1129
+ id: string;
1130
+ /** Reranker relevance score (provider-specific scale, higher = better). */
1131
+ score: number;
1132
+ }
1133
+ /** Reorders a shortlist by query-relevance. Returns results sorted desc by score. */
1134
+ interface RerankProvider {
1135
+ rerank(query: string, candidates: RerankCandidate[], topN?: number): Promise<RerankResult[]>;
1136
+ }
1137
+
1059
1138
  /**
1060
1139
  * Types for the skill-tree serving layer
1061
1140
  * @packageDocumentation
@@ -1212,6 +1291,19 @@ interface LoadoutCompilerConfig {
1212
1291
  defaultStatus?: SkillStatus[];
1213
1292
  /** Minimum similarity threshold for semantic matching (default: 0.6) */
1214
1293
  semanticThreshold?: number;
1294
+ /**
1295
+ * Hybrid-retrieval options for `compileWithScoring` (Tier 1). When set,
1296
+ * task scoring uses field-weighted BM25 (incl. the skill body) optionally
1297
+ * fused with dense embeddings, instead of the legacy Jaccard scorer.
1298
+ */
1299
+ retrieval?: HybridRetrievalOptions;
1300
+ /**
1301
+ * Size of the candidate pool that `compileWithScoring` scores over
1302
+ * before confidence-partitioning (default: 200). Larger than
1303
+ * `defaultMaxSkills` so the hybrid ranker re-ranks a real corpus rather
1304
+ * than just the lexically pre-filtered top-N.
1305
+ */
1306
+ scoringPoolSize?: number;
1215
1307
  }
1216
1308
  /**
1217
1309
  * Configuration for CatalogRenderer (browsable skill catalog)
@@ -1224,10 +1316,128 @@ interface CatalogRendererConfig {
1224
1316
  /** Max summary length for leaf-level skill descriptions (default: 80) */
1225
1317
  maxSummaryLength: number;
1226
1318
  }
1319
+ /**
1320
+ * A skill with its computed relevance score from task-based compilation.
1321
+ */
1322
+ interface ScoredSkill {
1323
+ skill: Skill;
1324
+ relevanceScore: number;
1325
+ }
1326
+ /**
1327
+ * Confidence thresholds for hybrid loadout compilation.
1328
+ * Skills above `expandAbove` are auto-expanded; between `expandAbove`
1329
+ * and `includeAbove` are included as summaries; below `includeAbove`
1330
+ * are excluded entirely.
1331
+ */
1332
+ interface ConfidenceThresholds {
1333
+ /** Score above which skills are auto-expanded (default: 0.3) */
1334
+ expandAbove: number;
1335
+ /** Score above which skills are included as summaries (default: 0.15) */
1336
+ includeAbove: number;
1337
+ /**
1338
+ * Absolute abstain floor (Tier 1, T1.3). If set, and even the
1339
+ * single best-scoring skill falls below this, the loadout injects
1340
+ * **nothing** — a first-class "no relevant skill" outcome (B=0).
1341
+ *
1342
+ * This is distinct from `includeAbove`: it can be set *higher* than
1343
+ * `includeAbove` to demand strong top-of-pool relevance before any
1344
+ * skill is served, which prevents the documented regression where
1345
+ * irrelevant retrieved skills drag task success *below* the no-skill
1346
+ * baseline (SkillsBench: 16/84 tasks went negative with curated skills;
1347
+ * "Skills in the Wild": retrieval drops weak models below baseline).
1348
+ *
1349
+ * Recommended to calibrate per domain. Default: undefined (no extra
1350
+ * floor beyond `includeAbove`).
1351
+ */
1352
+ minConfidence?: number;
1353
+ }
1354
+ /**
1355
+ * Provider of dense embeddings for hybrid retrieval (Tier 1, T1.1).
1356
+ *
1357
+ * Pluggable so callers can supply any embedding backend (e.g. a local
1358
+ * model, an API, or cognitive-core's provider). When no embedder is
1359
+ * configured, hybrid retrieval gracefully degrades to a field-weighted
1360
+ * BM25 lexical ranking (still a strict improvement over the legacy
1361
+ * Jaccard term-overlap scorer, since BM25 indexes the skill **body**).
1362
+ */
1363
+ interface EmbeddingProvider {
1364
+ /**
1365
+ * Embed a batch of texts into comparable numeric vectors. The first
1366
+ * element is conventionally the query; the rest are skill documents.
1367
+ * Implementations should return vectors of equal dimensionality.
1368
+ */
1369
+ embed(texts: string[]): Promise<number[][]>;
1370
+ }
1371
+ /**
1372
+ * Per-field weights for the BM25(F) lexical signal. Mirrors the
1373
+ * "Skills in the Wild" recipe (name:10 / description:5 / body:5);
1374
+ * tags get a small weight. The skill **body** (instructions) is indexed
1375
+ * because it carries the dominant retrieval signal (SkillRouter: ~92%
1376
+ * of reranker attention is on the body).
1377
+ */
1378
+ interface FieldWeights {
1379
+ name: number;
1380
+ description: number;
1381
+ body: number;
1382
+ tags: number;
1383
+ }
1384
+ /** How the lexical and dense signals are fused for ordering. */
1385
+ type FusionStrategy = 'weighted' | 'rrf';
1386
+ /**
1387
+ * Options for hybrid retrieval scoring (Tier 1, T1.1 + T1.2).
1388
+ */
1389
+ interface HybridRetrievalOptions {
1390
+ /** Optional dense-embedding provider. Absent → BM25-only. */
1391
+ embedder?: EmbeddingProvider;
1392
+ /** Field weights for BM25 (default: name:10/description:5/body:5/tags:3). */
1393
+ fieldWeights?: FieldWeights;
1394
+ /** Reciprocal-rank-fusion constant (default: 60). */
1395
+ rrfK?: number;
1396
+ /**
1397
+ * Ordering fusion strategy (default: 'weighted'). Both strategies report
1398
+ * the same absolute, calibrated `relevanceScore` (so confidence thresholds
1399
+ * and the abstain floor stay meaningful); they differ only in sort order.
1400
+ * 'rrf' uses reciprocal rank fusion of the lexical and dense rankings
1401
+ * (only meaningful when an embedder is present).
1402
+ */
1403
+ fusion?: FusionStrategy;
1404
+ /** Blend weights for the absolute score (renormalized over available signals). */
1405
+ signalWeights?: {
1406
+ lexical: number;
1407
+ dense: number;
1408
+ };
1409
+ /** BM25 term-frequency saturation (default: 1.2). */
1410
+ bm25K1?: number;
1411
+ /** BM25 length normalization (default: 0.75). */
1412
+ bm25B?: number;
1413
+ /**
1414
+ * Saturation constant mapping raw BM25 → absolute [0,1] confidence
1415
+ * (default: 8). Larger = stricter (needs more lexical evidence to look
1416
+ * confident). Keeps the score on the same scale the confidence
1417
+ * thresholds expect.
1418
+ */
1419
+ bm25Saturation?: number;
1420
+ /**
1421
+ * Optional learned utility scorer (Tier 2, T2.1). When present *and
1422
+ * trained*, its `score(task, skill) ∈ [0,1]` replaces the lexical/dense
1423
+ * blend as the `relevanceScore`, so the confidence gate ranks by *learned
1424
+ * utility* (does this skill help?) rather than raw similarity (does it look
1425
+ * relevant?). Falls back to the blend when absent or untrained.
1426
+ */
1427
+ utilityScorer?: UtilityScorer;
1428
+ /**
1429
+ * Optional listwise reranker (Tier 1.5). When set, the top `rerankTopN`
1430
+ * candidates from first-stage retrieval are reordered by the reranker over
1431
+ * their full body. Measured lift on SkillsBench: recall@3 50%→65% (BM25→rerank).
1432
+ */
1433
+ reranker?: RerankProvider;
1434
+ /** Shortlist size sent to the reranker (default: 20). */
1435
+ rerankTopN?: number;
1436
+ }
1227
1437
  /**
1228
1438
  * Eviction strategy when maxExpanded is reached
1229
1439
  */
1230
- type EvictionStrategy = 'lru' | 'priority' | 'manual';
1440
+ type EvictionStrategy = 'lru' | 'priority' | 'manual' | 'relevance';
1231
1441
  /**
1232
1442
  * Configuration for SkillGraphServer
1233
1443
  */
@@ -1260,6 +1470,19 @@ interface GraphServerConfig {
1260
1470
  outputFormat?: 'xml' | 'markdown';
1261
1471
  /** Include token estimates in output (default: false) */
1262
1472
  includeTokenEstimates?: boolean;
1473
+ /** Confidence thresholds for task-based auto-expansion */
1474
+ confidenceThresholds?: ConfidenceThresholds;
1475
+ /**
1476
+ * Hybrid-retrieval options for task-based loadouts (Tier 1). Passed
1477
+ * through to the LoadoutCompiler's `compileWithScoring`. When omitted,
1478
+ * scoring uses field-weighted BM25 over name/description/body/tags with
1479
+ * default weights; supply an `embedder` to enable dense+lexical fusion.
1480
+ */
1481
+ retrieval?: HybridRetrievalOptions;
1482
+ /** Candidate pool size scored before confidence-partitioning (default: 200). */
1483
+ scoringPoolSize?: number;
1484
+ /** Defer showing expanded content until agent explicitly requests it (default: false) */
1485
+ deferExpansion?: boolean;
1263
1486
  /** Enable catalog browsing for large libraries (default: true) */
1264
1487
  enableCatalog?: boolean;
1265
1488
  /** Catalog renderer config overrides */
@@ -1326,6 +1549,7 @@ declare class SkillGraphServer {
1326
1549
  private state;
1327
1550
  private handlers;
1328
1551
  private lruOrder;
1552
+ private relevanceScores;
1329
1553
  constructor(storage: StorageAdapter, config?: GraphServerConfig);
1330
1554
  /**
1331
1555
  * Initialize the server, applying initial loadout if configured
@@ -1340,7 +1564,12 @@ declare class SkillGraphServer {
1340
1564
  */
1341
1565
  setLoadout(criteria: LoadoutCriteria): Promise<LoadoutState>;
1342
1566
  /**
1343
- * Set loadout based on task description (semantic matching)
1567
+ * Set loadout based on task description using hybrid confidence-tiered
1568
+ * compilation. Skills above the high threshold are auto-expanded,
1569
+ * skills between high and low thresholds are included as summaries,
1570
+ * and skills below the low threshold are excluded.
1571
+ *
1572
+ * Stores relevance scores for use by the 'relevance' eviction strategy.
1344
1573
  */
1345
1574
  setLoadoutForTask(taskDescription: string): Promise<LoadoutState>;
1346
1575
  /**
@@ -1444,6 +1673,11 @@ declare class SkillGraphServer {
1444
1673
  /**
1445
1674
  * Render current state as system prompt content.
1446
1675
  * Includes catalog overview when catalog is enabled.
1676
+ *
1677
+ * When `deferExpansion` is enabled, all skills are rendered as
1678
+ * summaries regardless of expansion state — the agent must
1679
+ * explicitly request expansion. This avoids the reactive-signals
1680
+ * problem where upfront skill injection derails model planning.
1447
1681
  */
1448
1682
  renderSystemPrompt(): Promise<string>;
1449
1683
  /**
@@ -1459,9 +1693,25 @@ declare class SkillGraphServer {
1459
1693
  */
1460
1694
  private emit;
1461
1695
  /**
1462
- * Apply a new set of skills as the loadout
1696
+ * Get the relevance score for a skill (0 if not scored).
1697
+ */
1698
+ getRelevanceScore(skillId: string): number;
1699
+ /**
1700
+ * Apply a new set of skills as the loadout.
1701
+ * After populating the available set, evaluates autoExpand triggers
1702
+ * on each skill to determine if any should be pre-expanded.
1463
1703
  */
1464
1704
  private applyLoadout;
1705
+ /**
1706
+ * Evaluate autoExpand trigger conditions for all skills in the loadout.
1707
+ * Checks keyword matches against the task description, file pattern
1708
+ * matches against the project path, and framework matches.
1709
+ */
1710
+ private evaluateAutoExpand;
1711
+ /**
1712
+ * Check if a single autoExpand trigger matches the current context.
1713
+ */
1714
+ private matchesTrigger;
1465
1715
  /**
1466
1716
  * Evict a skill from expanded based on strategy
1467
1717
  */
@@ -1525,8 +1775,7 @@ type SkillBankToServingEvent = {
1525
1775
  *
1526
1776
  * `skill:used`, `skill:feedback`, and `skill:requested` were removed when
1527
1777
  * skill-tree dropped its `SkillMetrics` model — usage tracking is now
1528
- * external (e.g., cognitive-core's `playbook.evolution.*`). See
1529
- * docs/SKILL_TREE_METRICS_DEPRECATION.md.
1778
+ * external (e.g., cognitive-core's `playbook.evolution.*`).
1530
1779
  */
1531
1780
  type ServingToSkillBankEvent = {
1532
1781
  type: 'loadout:changed';
@@ -2489,8 +2738,7 @@ declare class SkillBank {
2489
2738
  * The `loadout:changed` event is currently the only one we react to.
2490
2739
  * Earlier versions also handled `skill:used` / `skill:feedback` to mutate
2491
2740
  * `Skill.metrics`, but skill-tree no longer tracks per-skill usage —
2492
- * cognitive-core owns that signal via `playbook.evolution.*`. See
2493
- * docs/SKILL_TREE_METRICS_DEPRECATION.md.
2741
+ * cognitive-core owns that signal via `playbook.evolution.*`.
2494
2742
  */
2495
2743
  private handleServingEvent;
2496
2744
  /**
@@ -2581,6 +2829,9 @@ declare abstract class BaseStorageAdapter implements StorageAdapter {
2581
2829
  * Simple text search across skill fields
2582
2830
  */
2583
2831
  protected textSearch(skills: Skill[], query: string): Skill[];
2832
+ private static readonly STOP_WORDS;
2833
+ private tokenize;
2834
+ private static stem;
2584
2835
  }
2585
2836
  /**
2586
2837
  * In-memory storage adapter (useful for testing)
@@ -3128,6 +3379,40 @@ declare class LoadoutCompiler {
3128
3379
  * Compile based on a task description (semantic matching)
3129
3380
  */
3130
3381
  compileForTask(taskDescription: string): Promise<Skill[]>;
3382
+ /**
3383
+ * Compile with hybrid-retrieval scoring against a task description
3384
+ * (Tier 1). Returns skills annotated with absolute relevance scores in
3385
+ * [0,1], sorted by descending relevance. Used by the hybrid loadout
3386
+ * strategy to determine which skills should be auto-expanded vs shown
3387
+ * as summaries vs excluded.
3388
+ *
3389
+ * Scoring uses field-weighted BM25 over the skill name/description/body/
3390
+ * tags (the body matters most), optionally fused with dense embeddings
3391
+ * (when an `embedder` is
3392
+ * configured via `retrieval`). The candidate pool is the (filtered) set
3393
+ * up to `scoringPoolSize` — larger than the final loadout — so the ranker
3394
+ * re-ranks a real corpus rather than only a lexically pre-truncated top-N.
3395
+ */
3396
+ compileWithScoring(taskDescription: string, criteria?: LoadoutCriteria): Promise<ScoredSkill[]>;
3397
+ /**
3398
+ * Partition scored skills into confidence tiers.
3399
+ * - High confidence (>= expandAbove): should be auto-expanded
3400
+ * - Medium confidence (>= includeAbove): included as summaries
3401
+ * - Below includeAbove: excluded
3402
+ *
3403
+ * Abstain floor (Tier 1, T1.3): if `thresholds.minConfidence` is set and
3404
+ * even the single best-scoring skill is below it, the whole loadout
3405
+ * abstains — every skill is excluded and **nothing** is injected. This
3406
+ * makes "no sufficiently relevant skill" a first-class outcome (B=0),
3407
+ * which prevents irrelevant skills from dragging task success below the
3408
+ * no-skill baseline. `scored` is expected to be sorted descending, but we
3409
+ * defensively take the max rather than assume order.
3410
+ */
3411
+ partitionByConfidence(scored: ScoredSkill[], thresholds: ConfidenceThresholds): {
3412
+ expand: ScoredSkill[];
3413
+ summarize: ScoredSkill[];
3414
+ excluded: ScoredSkill[];
3415
+ };
3131
3416
  /**
3132
3417
  * Compile from a named profile
3133
3418
  */
@@ -3151,12 +3436,14 @@ declare class LoadoutCompiler {
3151
3436
  */
3152
3437
  applyQualityFilters(skills: Skill[], criteria: LoadoutCriteria): Skill[];
3153
3438
  /**
3154
- * Apply semantic filters (task description, problem context, etc.)
3439
+ * Apply semantic filters (task description matching).
3155
3440
  *
3156
- * Currently returns skills unchanged. Semantic matching was removed;
3157
- * use SQLite FTS via storage.searchSkills() for keyword-based search.
3441
+ * When `taskDescription` is provided, uses storage.searchSkills()
3442
+ * to find matching skills and boosts them to the front. Skills not
3443
+ * matching the search are retained at lower priority so that tag
3444
+ * filters and explicit includes still work.
3158
3445
  */
3159
- applySemanticFilters(skills: Skill[], _criteria: LoadoutCriteria): Promise<Skill[]>;
3446
+ applySemanticFilters(skills: Skill[], criteria: LoadoutCriteria): Promise<Skill[]>;
3160
3447
  /**
3161
3448
  * Apply relationship-based filters (root skills, dependencies)
3162
3449
  */
@@ -3370,6 +3657,486 @@ declare class CatalogRenderer {
3370
3657
  private countNodeSkills;
3371
3658
  }
3372
3659
 
3660
+ /**
3661
+ * Term-overlap similarity scoring for skill-to-task matching.
3662
+ *
3663
+ * Used by the loadout compiler to score skills against a task description,
3664
+ * enabling confidence-tiered expansion (hybrid loadout strategy).
3665
+ *
3666
+ * @packageDocumentation
3667
+ */
3668
+ /**
3669
+ * Tokenize text into a list of normalized terms (duplicates preserved),
3670
+ * filtering stop words and short tokens. Used where term frequency
3671
+ * matters (e.g. BM25 in hybrid retrieval).
3672
+ */
3673
+ declare function tokenizeList(text: string): string[];
3674
+ /**
3675
+ * Tokenize text into a set of normalized terms, filtering stop words.
3676
+ */
3677
+ declare function tokenize(text: string): Set<string>;
3678
+ /**
3679
+ * Compute term-overlap similarity between two texts.
3680
+ *
3681
+ * Returns a score in [0, 1] using Jaccard-like overlap normalized
3682
+ * by the smaller set size. Requires at least 2 overlapping terms
3683
+ * to avoid false positives from single shared words.
3684
+ */
3685
+ declare function termSimilarity(textA: string, textB: string): number;
3686
+ /**
3687
+ * Score a skill's relevance to a task description.
3688
+ * Combines the skill's name, description, tags, and (when provided) the
3689
+ * SKILL.md body into a single text and scores against the task text.
3690
+ *
3691
+ * The body (`skillBody`) is included because it carries the dominant
3692
+ * matching signal (SkillRouter: ~92% of reranker attention is on the
3693
+ * body); omitting it leaves the strongest evidence unused. It is an
3694
+ * optional trailing argument for backward compatibility.
3695
+ */
3696
+ declare function scoreSkillRelevance(taskText: string, skillName: string, skillDescription: string, skillTags: string[], skillBody?: string): number;
3697
+
3698
+ /**
3699
+ * Hybrid retrieval scoring for skill selection (Tier 1).
3700
+ *
3701
+ * Replaces the legacy lexical-only Jaccard scorer with a field-weighted
3702
+ * **BM25(F)** lexical signal — indexing the skill *body*, not just its
3703
+ * name/description — optionally fused with a **dense embedding** signal.
3704
+ *
3705
+ * Grounded in the 2026 literature:
3706
+ * - "Skills in the Wild" (arXiv 2604.04323): BM25 with field weights
3707
+ * name:10/description:5/content:5 + dense + RRF (k=60) lifts retrieval
3708
+ * recall from ~27% (keyword-only) to ~68%@10.
3709
+ * - SkillRouter (arXiv 2603.22455): the skill *body* carries ~92% of the
3710
+ * discriminative signal; scoring descriptions alone leaves it unused.
3711
+ *
3712
+ * ### Score calibration
3713
+ * Each skill gets an **absolute** `relevanceScore` in [0,1] (a renormalized
3714
+ * blend of an absolute lexical sub-score and an absolute dense sub-score).
3715
+ * "Absolute" means an irrelevant skill scores ≈0 regardless of the rest of
3716
+ * the pool — this is what makes the confidence thresholds (`expandAbove`/
3717
+ * `includeAbove`) and the abstain floor (`minConfidence`) meaningful.
3718
+ *
3719
+ * The `fusion` option only changes the *sort order*:
3720
+ * - `'weighted'` (default): order by the absolute blended score.
3721
+ * - `'rrf'`: order by reciprocal rank fusion of the lexical and dense
3722
+ * rankings (only meaningful when an embedder is present). The reported
3723
+ * `relevanceScore` is still the calibrated absolute blend, so gating
3724
+ * stays well-defined.
3725
+ *
3726
+ * @packageDocumentation
3727
+ */
3728
+
3729
+ /**
3730
+ * Default per-field weights (mirrors the "Skills in the Wild" recipe;
3731
+ * tags get a small weight, body is indexed).
3732
+ */
3733
+ declare const DEFAULT_FIELD_WEIGHTS: FieldWeights;
3734
+ /**
3735
+ * Compute raw BM25(F) scores for `query` over `skills`. Field weights are
3736
+ * folded into the term frequencies (a standard BM25F simplification), so a
3737
+ * name match counts more than a body match. Returns id → raw score (≥0).
3738
+ */
3739
+ declare function bm25Scores(query: string, skills: Skill[], fieldWeights?: FieldWeights, k1?: number, b?: number): Map<string, number>;
3740
+ /** Cosine similarity of two equal-length numeric vectors (0 if degenerate). */
3741
+ declare function cosineSimilarity(a: number[], b: number[]): number;
3742
+ /**
3743
+ * Reciprocal Rank Fusion. Each ranking is an ordered list of ids (best
3744
+ * first). Returns id → fused score = Σ_rankings 1/(k + rank), rank 1-based.
3745
+ * Items absent from a ranking simply contribute nothing from it.
3746
+ */
3747
+ declare function reciprocalRankFusion(rankings: string[][], k?: number): Map<string, number>;
3748
+ /**
3749
+ * Score skills against a task/query with hybrid retrieval, returning
3750
+ * `ScoredSkill[]` sorted by descending relevance. `relevanceScore` is an
3751
+ * absolute, calibrated value in [0,1] suitable for confidence thresholds
3752
+ * and the abstain floor.
3753
+ *
3754
+ * Degrades gracefully: with no embedder (or if embedding throws), it
3755
+ * returns a pure field-weighted BM25 ranking — still a strict upgrade over
3756
+ * the legacy Jaccard scorer because it indexes the skill body.
3757
+ */
3758
+ declare function scoreSkillsHybrid(query: string, skills: Skill[], options?: HybridRetrievalOptions): Promise<ScoredSkill[]>;
3759
+
3760
+ /**
3761
+ * AWS Bedrock {@link EmbeddingProvider}.
3762
+ *
3763
+ * Defaults to Amazon Titan Text Embeddings v2 (`amazon.titan-embed-text-v2:0`),
3764
+ * which embeds one text per `InvokeModel` call; this provider fans the batch
3765
+ * out with bounded concurrency. Cohere embed models (`cohere.embed-*`) are
3766
+ * also supported via their native batch API.
3767
+ *
3768
+ * The `@aws-sdk/client-bedrock-runtime` package is an OPTIONAL dependency,
3769
+ * lazily imported on first use. Wrap this in a {@link CachingEmbeddingProvider}
3770
+ * so the library is embedded once rather than per task.
3771
+ *
3772
+ * @packageDocumentation
3773
+ */
3774
+
3775
+ interface BedrockEmbeddingConfig {
3776
+ /** Bedrock embedding model id. Default: `amazon.titan-embed-text-v2:0`. */
3777
+ modelId?: string;
3778
+ /** AWS region. Falls back to AWS_REGION / AWS_DEFAULT_REGION. */
3779
+ region?: string;
3780
+ /** Output dimensions (Titan v2 supports 256/512/1024). Default: 1024. */
3781
+ dimensions?: number;
3782
+ /** L2-normalize output (Titan v2 option). Default: true. */
3783
+ normalize?: boolean;
3784
+ /** Max concurrent InvokeModel calls for per-text models (Titan). Default: 8. */
3785
+ concurrency?: number;
3786
+ /**
3787
+ * Cohere `input_type` (asymmetric retrieval). Use `search_document` to embed
3788
+ * the skill library and `search_query` for the task. Default: `search_query`.
3789
+ * Ignored by Titan. (Symmetric use — same type for both — still works but is
3790
+ * slightly weaker for retrieval.)
3791
+ */
3792
+ inputType?: 'search_query' | 'search_document' | 'classification' | 'clustering';
3793
+ /**
3794
+ * Low-level invoke seam: given (modelId, requestBody), return the parsed
3795
+ * JSON response. Defaults to a lazily-imported Bedrock runtime client.
3796
+ * Override for testing or custom auth.
3797
+ */
3798
+ invoke?: (modelId: string, body: unknown) => Promise<any>;
3799
+ }
3800
+ declare class BedrockEmbeddingProvider implements EmbeddingProvider {
3801
+ private modelId;
3802
+ private region?;
3803
+ private dimensions;
3804
+ private normalize;
3805
+ private concurrency;
3806
+ private inputType;
3807
+ private invoke;
3808
+ private client;
3809
+ constructor(config?: BedrockEmbeddingConfig);
3810
+ embed(texts: string[]): Promise<number[][]>;
3811
+ private isCohere;
3812
+ /** Titan-style: one InvokeModel per text, fanned out with bounded concurrency. */
3813
+ private embedPerText;
3814
+ /** Cohere embed: batched in chunks of ≤96 (Bedrock's per-request cap). */
3815
+ private embedCohereBatch;
3816
+ private extractSingle;
3817
+ private defaultInvoke;
3818
+ }
3819
+
3820
+ /**
3821
+ * AWS SageMaker {@link EmbeddingProvider}.
3822
+ *
3823
+ * Invokes a SageMaker real-time inference endpoint. Endpoint I/O contracts
3824
+ * vary by the deployed model, so request serialization and response parsing
3825
+ * are configurable; the defaults match HuggingFace TEI / feature-extraction
3826
+ * images (`{ inputs: string[] }` → `number[][]`).
3827
+ *
3828
+ * `@aws-sdk/client-sagemaker-runtime` is an OPTIONAL dependency, lazily
3829
+ * imported on first use. Wrap in a {@link CachingEmbeddingProvider} for reuse.
3830
+ *
3831
+ * @packageDocumentation
3832
+ */
3833
+
3834
+ interface SageMakerEmbeddingConfig {
3835
+ /** Name of the deployed SageMaker endpoint (required). */
3836
+ endpointName: string;
3837
+ /** AWS region. Falls back to AWS_REGION / AWS_DEFAULT_REGION. */
3838
+ region?: string;
3839
+ /** Request content type. Default: `application/json`. */
3840
+ contentType?: string;
3841
+ /**
3842
+ * Serialize the batch of texts into the endpoint's request body.
3843
+ * Default: `JSON.stringify({ inputs: texts })` (HF TEI style).
3844
+ */
3845
+ serialize?: (texts: string[]) => string;
3846
+ /**
3847
+ * Parse the endpoint's raw response string into vectors. Default handles
3848
+ * `number[][]`, `{ embeddings: number[][] }`, and `{ vectors: number[][] }`.
3849
+ */
3850
+ deserialize?: (raw: string) => number[][];
3851
+ /**
3852
+ * Low-level invoke seam: given (endpointName, body, contentType), return the
3853
+ * raw response string. Defaults to a lazily-imported SageMaker runtime client.
3854
+ * Override for testing or custom auth.
3855
+ */
3856
+ invoke?: (endpointName: string, body: string, contentType: string) => Promise<string>;
3857
+ }
3858
+ declare class SageMakerEmbeddingProvider implements EmbeddingProvider {
3859
+ private endpointName;
3860
+ private region?;
3861
+ private contentType;
3862
+ private serialize;
3863
+ private deserialize;
3864
+ private invoke;
3865
+ private client;
3866
+ constructor(config: SageMakerEmbeddingConfig);
3867
+ embed(texts: string[]): Promise<number[][]>;
3868
+ private defaultInvoke;
3869
+ }
3870
+
3871
+ /**
3872
+ * Content-keyed caching wrapper for any {@link EmbeddingProvider}.
3873
+ *
3874
+ * Skill texts are stable across tasks, so without caching every
3875
+ * `setLoadoutForTask` would re-embed the entire library. This wrapper
3876
+ * memoizes embeddings by text content: skills are embedded once (a one-time
3877
+ * warmup), and subsequent task scoring only sends the new query to the
3878
+ * backend. De-duplicates within a single batch as well.
3879
+ *
3880
+ * @packageDocumentation
3881
+ */
3882
+
3883
+ interface CachingEmbeddingConfig {
3884
+ /**
3885
+ * Maximum number of cached vectors. When exceeded, oldest entries are
3886
+ * evicted (insertion-order). Default: 50000 (effectively unbounded for
3887
+ * typical libraries). Set to 0 to disable the bound.
3888
+ */
3889
+ maxEntries?: number;
3890
+ }
3891
+ /**
3892
+ * Wraps an EmbeddingProvider with an in-memory, content-keyed LRU-ish cache.
3893
+ */
3894
+ declare class CachingEmbeddingProvider implements EmbeddingProvider {
3895
+ private inner;
3896
+ private cache;
3897
+ private maxEntries;
3898
+ constructor(inner: EmbeddingProvider, config?: CachingEmbeddingConfig);
3899
+ embed(texts: string[]): Promise<number[][]>;
3900
+ private set;
3901
+ /** Number of cached vectors (useful for tests/diagnostics). */
3902
+ get size(): number;
3903
+ /** Drop all cached vectors. */
3904
+ clear(): void;
3905
+ }
3906
+
3907
+ /**
3908
+ * Logistic-regression utility scorer (Tier 2, T2.1 default).
3909
+ *
3910
+ * A small, dependency-free model fit by batch gradient descent. The feature
3911
+ * vector deliberately mixes retrieval signals with a *learned per-skill
3912
+ * helpfulness prior*:
3913
+ *
3914
+ * [ lexAbs, denseAbs, skillPrior, lexAbs·denseAbs ] (+ bias)
3915
+ *
3916
+ * `skillPrior` is the smoothed rate at which a skill actually helped across
3917
+ * training tasks. It is the lever that down-weights generic, high-surface-area
3918
+ * skills (e.g. `citation-management`) that retrieval scores highly on *every*
3919
+ * task but which rarely contribute — exactly the residual gap that BM25 and
3920
+ * dense embeddings cannot close on their own.
3921
+ *
3922
+ * The output sigmoid is in [0,1], so it drops straight into the existing
3923
+ * confidence thresholds and abstain floor.
3924
+ *
3925
+ * @packageDocumentation
3926
+ */
3927
+
3928
+ interface LogisticUtilityConfig {
3929
+ /** Gradient-descent learning rate (default: 0.5). */
3930
+ learningRate?: number;
3931
+ /** Number of full-batch iterations (default: 400). */
3932
+ iterations?: number;
3933
+ /** L2 regularization strength (default: 1e-3). */
3934
+ l2?: number;
3935
+ /** Beta-prior pseudo-counts for the per-skill helpfulness rate (default: 1/1). */
3936
+ priorAlpha?: number;
3937
+ priorBeta?: number;
3938
+ /**
3939
+ * Use the *per-skill-ID* helpfulness prior as a feature (default: true).
3940
+ *
3941
+ * WARNING — generalization caveat: a per-skill-ID prior memorizes base rates
3942
+ * and does NOT transfer to held-out tasks where a normally-rare skill is the
3943
+ * answer. On SkillsBench's held-out protocol it is catastrophic (it learns
3944
+ * `citation-management` "rarely helps" and then kills it on the one task it
3945
+ * is curated for). The prior is appropriate only when the *same* skills
3946
+ * recur across *similar* tasks (e.g. a personal library of repeated work).
3947
+ * For cross-task generalization, set this false and rely on task-conditional
3948
+ * features — ultimately the (task, skill) embedding interaction (T2.1 v2),
3949
+ * not an ID lookup. With it false the model reduces to a learned reweighting
3950
+ * of the lexical/dense signals.
3951
+ */
3952
+ usePrior?: boolean;
3953
+ }
3954
+ interface SerializedScorer {
3955
+ weights: number[];
3956
+ bias: number;
3957
+ globalPrior: number;
3958
+ skillPrior: Record<string, number>;
3959
+ config: Required<LogisticUtilityConfig>;
3960
+ }
3961
+ declare class LogisticUtilityScorer implements UtilityScorer {
3962
+ private weights;
3963
+ private bias;
3964
+ private globalPrior;
3965
+ private skillPrior;
3966
+ private isTrained;
3967
+ private config;
3968
+ constructor(config?: LogisticUtilityConfig);
3969
+ get trained(): boolean;
3970
+ /** Smoothed per-skill helpfulness prior used as a feature. */
3971
+ priorFor(skillId: string): number;
3972
+ private featureVector;
3973
+ score(features: UtilityFeatures): number;
3974
+ train(examples: UtilityExample[]): UtilityTrainReport;
3975
+ toJSON(): SerializedScorer;
3976
+ static fromJSON(data: SerializedScorer): LogisticUtilityScorer;
3977
+ }
3978
+
3979
+ /**
3980
+ * Harvests labeled examples for the utility scorer (Tier 2, T2.1).
3981
+ *
3982
+ * The training signal comes from observed loadout→outcome traces: when a skill
3983
+ * was in the served loadout and the task succeeded, that's weak positive
3984
+ * evidence the skill helped; on failure, weak negative. Accumulated across many
3985
+ * tasks, this teaches the scorer which skills genuinely contribute (vs which
3986
+ * are merely retrieved often). Also supports offline supervision from curated
3987
+ * ground-truth sets (e.g. a benchmark's reference skills).
3988
+ *
3989
+ * The recorder is a plain accumulator — the orchestrator (or cognitive-core's
3990
+ * evaluator) calls `recordOutcome` after each task and periodically retrains
3991
+ * the scorer on `getExamples()`. This keeps authoring (the agent run) and the
3992
+ * learning pass in separate lanes.
3993
+ *
3994
+ * @packageDocumentation
3995
+ */
3996
+
3997
+ /** A scored candidate plus whether it was actually served for the task. */
3998
+ interface OutcomeCandidate {
3999
+ features: UtilityFeatures;
4000
+ selected: boolean;
4001
+ }
4002
+ interface TaskOutcome {
4003
+ taskText?: string;
4004
+ candidates: OutcomeCandidate[];
4005
+ /** Did the task ultimately succeed (e.g. verifier passed)? */
4006
+ success: boolean;
4007
+ /** Weight for these examples (e.g. confidence in the outcome). Default 1. */
4008
+ weight?: number;
4009
+ }
4010
+ declare class FeedbackRecorder {
4011
+ private examples;
4012
+ /** Append a single pre-built example. */
4013
+ record(example: UtilityExample): void;
4014
+ /**
4015
+ * Label a task outcome. Each *selected* candidate becomes an example whose
4016
+ * `helped` label is the task's success. Unselected candidates are not
4017
+ * labeled (we have no counterfactual signal for them).
4018
+ */
4019
+ recordOutcome(outcome: TaskOutcome): void;
4020
+ /**
4021
+ * Offline supervision from a curated ground-truth set: every candidate in
4022
+ * `curatedIds` is labeled helped, the rest not-helped. Useful for
4023
+ * cold-starting the scorer from a benchmark or human-authored loadouts
4024
+ * before real execution traces exist.
4025
+ */
4026
+ recordCurated(taskText: string | undefined, candidates: UtilityFeatures[], curatedIds: string[], weight?: number): void;
4027
+ getExamples(): UtilityExample[];
4028
+ get size(): number;
4029
+ clear(): void;
4030
+ toJSON(): UtilityExample[];
4031
+ static fromJSON(examples: UtilityExample[]): FeedbackRecorder;
4032
+ }
4033
+
4034
+ /**
4035
+ * AWS Bedrock {@link RerankProvider} (Cohere Rerank v3.5 by default).
4036
+ *
4037
+ * Uses the Bedrock `Rerank` API (`@aws-sdk/client-bedrock-agent-runtime`, an
4038
+ * OPTIONAL lazily-imported dependency). Documents are truncated to a char cap
4039
+ * (Cohere truncates long inputs anyway). Pass a custom `invoke` for testing.
4040
+ *
4041
+ * @packageDocumentation
4042
+ */
4043
+
4044
+ interface BedrockRerankConfig {
4045
+ /** Rerank model id (region-scoped ARN built from this). Default: cohere.rerank-v3-5:0. */
4046
+ modelId?: string;
4047
+ /** Full model ARN (overrides modelId). */
4048
+ modelArn?: string;
4049
+ /** AWS region. Falls back to AWS_REGION / AWS_DEFAULT_REGION. */
4050
+ region?: string;
4051
+ /** Truncate each candidate's text to this many chars before sending. Default: 4000. */
4052
+ maxDocChars?: number;
4053
+ /** Truncate the query to this many chars. Default: 4000. */
4054
+ maxQueryChars?: number;
4055
+ /**
4056
+ * Low-level seam: given (query, docTexts, topN) return [{index, relevanceScore}].
4057
+ * Defaults to a lazily-imported Bedrock agent-runtime client.
4058
+ */
4059
+ invoke?: (query: string, docs: string[], topN: number) => Promise<{
4060
+ index: number;
4061
+ relevanceScore: number;
4062
+ }[]>;
4063
+ }
4064
+ declare class BedrockRerankProvider implements RerankProvider {
4065
+ private modelId;
4066
+ private region?;
4067
+ private modelArn?;
4068
+ private maxDocChars;
4069
+ private maxQueryChars;
4070
+ private invoke;
4071
+ private client;
4072
+ constructor(config?: BedrockRerankConfig);
4073
+ rerank(query: string, candidates: RerankCandidate[], topN?: number): Promise<RerankResult[]>;
4074
+ private arn;
4075
+ private defaultInvoke;
4076
+ }
4077
+
4078
+ /**
4079
+ * Telemetry collector for progressive disclosure evaluation.
4080
+ *
4081
+ * Subscribes to SkillGraphServer events and accumulates a
4082
+ * DisclosureTrace recording every expand, collapse, and browse
4083
+ * action with token counts.
4084
+ */
4085
+
4086
+ interface DisclosureEvent {
4087
+ timestamp: number;
4088
+ action: 'expand' | 'collapse' | 'browse_catalog' | 'search' | 'loadout_changed';
4089
+ skillId?: string;
4090
+ category?: string[];
4091
+ tokensBefore: number;
4092
+ tokensAfter: number;
4093
+ }
4094
+ interface DisclosureTrace {
4095
+ sessionId: string;
4096
+ taskId: string;
4097
+ strategyId: string;
4098
+ events: DisclosureEvent[];
4099
+ startedAt: number;
4100
+ finishedAt?: number;
4101
+ finalState: {
4102
+ expanded: string[];
4103
+ collapsed: string[];
4104
+ neverTouched: string[];
4105
+ };
4106
+ }
4107
+ declare class TelemetryCollector {
4108
+ private server;
4109
+ private sessionId;
4110
+ private taskId;
4111
+ private strategyId;
4112
+ private events;
4113
+ private unsubscribe;
4114
+ private startedAt;
4115
+ constructor(server: SkillGraphServer, sessionId: string, taskId: string, strategyId: string);
4116
+ start(): void;
4117
+ stop(): void;
4118
+ getTrace(): DisclosureTrace;
4119
+ getExpandedSkillIds(): string[];
4120
+ getEventCount(): number;
4121
+ private mapEventAction;
4122
+ }
4123
+ /**
4124
+ * Compute decision quality metrics from a disclosure trace.
4125
+ */
4126
+ declare function computeDecisionMetrics(trace: DisclosureTrace, oracleSkillIds: string[]): DecisionMetrics;
4127
+ interface DecisionMetrics {
4128
+ expandPrecision: number;
4129
+ expandRecall: number;
4130
+ expandF1: number;
4131
+ distractorAvoidance: number;
4132
+ overExpansionRate: number;
4133
+ underExpansionRate: number;
4134
+ totalExpanded: number;
4135
+ totalAvailable: number;
4136
+ expandEvents: number;
4137
+ collapseEvents: number;
4138
+ }
4139
+
3373
4140
  /**
3374
4141
  * Built-in Loadout Profiles
3375
4142
  *
@@ -3902,6 +4669,301 @@ declare class IndexerService {
3902
4669
  close(): Promise<void>;
3903
4670
  }
3904
4671
 
4672
+ /**
4673
+ * SkillNet importer — load skills from the SkillNet ecosystem into a SkillBank.
4674
+ *
4675
+ * SkillNet (https://github.com/zjunlp/SkillNet) is a public, hosted search index over
4676
+ * GitHub-hosted SKILL.md folders. Its REST API returns rows whose `skill_url` points at
4677
+ * a GitHub tree/blob path containing a `SKILL.md`. This module is a "hybrid" importer:
4678
+ *
4679
+ * 1. It queries the SkillNet search API (free, no key) to resolve a query → skill URLs.
4680
+ * 2. It fetches each skill's `SKILL.md` directly from GitHub raw (no key for public repos).
4681
+ * 3. It parses the OpenSkills frontmatter and converts to a skill-tree `Skill`.
4682
+ *
4683
+ * This avoids any dependency on the optional `scraper/` package and works entirely over
4684
+ * `fetch`, which is injectable for testing.
4685
+ */
4686
+
4687
+ /** Default public SkillNet search API base. */
4688
+ declare const DEFAULT_SKILLNET_API = "http://api-skillnet.openkg.cn/v1";
4689
+ /**
4690
+ * Minimal fetch surface so this module does not depend on DOM lib types.
4691
+ * The global `fetch` (Node 18+) satisfies this shape.
4692
+ */
4693
+ type FetchLike = (url: string, init?: {
4694
+ headers?: Record<string, string>;
4695
+ }) => Promise<{
4696
+ ok: boolean;
4697
+ status: number;
4698
+ statusText: string;
4699
+ text(): Promise<string>;
4700
+ json(): Promise<unknown>;
4701
+ }>;
4702
+ /**
4703
+ * A single result row from the SkillNet search API.
4704
+ * Mirrors the `data[]` entries documented in the SkillNet README.
4705
+ */
4706
+ interface SkillNetSearchResult {
4707
+ skillName: string;
4708
+ skillDescription?: string;
4709
+ author?: string;
4710
+ stars: number;
4711
+ skillUrl: string;
4712
+ category?: string;
4713
+ /** Optional 5-dimension quality scores, when present on the row. */
4714
+ evaluation?: Record<string, unknown>;
4715
+ }
4716
+ /**
4717
+ * Options for a SkillNet search query.
4718
+ */
4719
+ interface SkillNetSearchOptions {
4720
+ /** `keyword` (fuzzy) or `vector` (semantic). Defaults to keyword. */
4721
+ mode?: 'keyword' | 'vector';
4722
+ /** Category filter (Development, AIGC, Research, Science, etc.). */
4723
+ category?: string;
4724
+ /** Results per page (max 50). */
4725
+ limit?: number;
4726
+ /** Page number (keyword mode only). */
4727
+ page?: number;
4728
+ /** Minimum star count (keyword mode only). */
4729
+ minStars?: number;
4730
+ /** Sort order (keyword mode only). */
4731
+ sortBy?: 'stars' | 'recent';
4732
+ /** Similarity threshold 0.0–1.0 (vector mode only). */
4733
+ threshold?: number;
4734
+ }
4735
+ /**
4736
+ * Configuration for the SkillNet client.
4737
+ */
4738
+ interface SkillNetClientConfig {
4739
+ /** Search API base URL. Defaults to the public SkillNet endpoint. */
4740
+ apiBaseUrl?: string;
4741
+ /** GitHub token for raw fetches (private repos / higher rate limits). */
4742
+ githubToken?: string;
4743
+ /** Optional GitHub mirror prefix for restricted networks (e.g. `https://ghfast.top/`). */
4744
+ githubMirror?: string;
4745
+ /** Injectable fetch implementation (defaults to global `fetch`). */
4746
+ fetchImpl?: FetchLike;
4747
+ }
4748
+ /**
4749
+ * Result of converting one SkillNet skill into skill-tree format.
4750
+ */
4751
+ interface SkillNetConversionResult {
4752
+ skill: Skill;
4753
+ warnings: string[];
4754
+ /** The raw SKILL.md URL that was fetched. */
4755
+ rawUrl: string;
4756
+ }
4757
+ /**
4758
+ * Result of an import-by-search or import-by-url operation.
4759
+ */
4760
+ interface SkillNetImportResult {
4761
+ imported: number;
4762
+ failed: number;
4763
+ skills: Skill[];
4764
+ errors: string[];
4765
+ }
4766
+ /**
4767
+ * Parse a GitHub tree/blob URL into its components.
4768
+ * Returns null if the URL is not a recognizable GitHub repo URL.
4769
+ */
4770
+ declare function parseGitHubUrl(url: string): {
4771
+ owner: string;
4772
+ repo: string;
4773
+ ref: string;
4774
+ path: string;
4775
+ } | null;
4776
+ /**
4777
+ * SkillNet client: search the index, fetch SKILL.md, and import into a SkillBank.
4778
+ */
4779
+ declare class SkillNetClient {
4780
+ private readonly apiBaseUrl;
4781
+ private readonly githubToken?;
4782
+ private readonly githubMirror?;
4783
+ private readonly fetchImpl;
4784
+ constructor(config?: SkillNetClientConfig);
4785
+ /**
4786
+ * Search the SkillNet index. Free and requires no API key.
4787
+ */
4788
+ search(query: string, options?: SkillNetSearchOptions): Promise<SkillNetSearchResult[]>;
4789
+ /**
4790
+ * Convert a GitHub skill URL into the raw URL for its SKILL.md.
4791
+ * Applies the configured mirror prefix when set.
4792
+ */
4793
+ toRawSkillMdUrl(skillUrl: string): string;
4794
+ /**
4795
+ * Fetch the raw SKILL.md content for a skill URL.
4796
+ */
4797
+ fetchSkillMd(skillUrl: string): Promise<{
4798
+ content: string;
4799
+ rawUrl: string;
4800
+ }>;
4801
+ /**
4802
+ * Convert a SkillNet search result + its SKILL.md content into a skill-tree Skill.
4803
+ */
4804
+ convertSkillNetSkill(result: SkillNetSearchResult, content: string, rawUrl: string): SkillNetConversionResult;
4805
+ /**
4806
+ * Import a single skill by its SkillNet/GitHub URL into a SkillBank.
4807
+ */
4808
+ importSkill(skillUrl: string, bank: SkillBank, meta?: Partial<SkillNetSearchResult>): Promise<SkillNetConversionResult>;
4809
+ /**
4810
+ * Search SkillNet and import the matching skills into a SkillBank.
4811
+ */
4812
+ importFromSearch(query: string, bank: SkillBank, options?: SkillNetSearchOptions & {
4813
+ limit?: number;
4814
+ }): Promise<SkillNetImportResult>;
4815
+ }
4816
+ /**
4817
+ * Create a SkillNet client.
4818
+ */
4819
+ declare function createSkillNetClient(config?: SkillNetClientConfig): SkillNetClient;
4820
+
4821
+ /**
4822
+ * Standalone parser for OpenSkills SKILL.md files (YAML frontmatter + Markdown body).
4823
+ *
4824
+ * This mirrors the frontmatter handling in `storage/filesystem.ts` but is exposed as
4825
+ * pure functions so importers (e.g. the SkillNet importer) can parse raw SKILL.md
4826
+ * fetched from remote sources without instantiating a storage adapter.
4827
+ *
4828
+ * It intentionally supports only the subset of YAML used by the Agent Skills standard
4829
+ * (scalars, `key: |` block scalars, and `- ` lists). It is not a full YAML parser.
4830
+ */
4831
+ /**
4832
+ * Parsed frontmatter fields plus the Markdown body.
4833
+ */
4834
+ interface ParsedSkillMd {
4835
+ /** Skill name (`name:`), if present */
4836
+ name?: string;
4837
+ /** Short description (`description:`), supports block scalars */
4838
+ description?: string;
4839
+ /** Semantic version (`version:`) */
4840
+ version?: string;
4841
+ /** Author (`author:`) */
4842
+ author?: string;
4843
+ /** Lifecycle status (`status:`) */
4844
+ status?: string;
4845
+ /** Date string (`date:`) */
4846
+ date?: string;
4847
+ /** Tags (`tags:` list) */
4848
+ tags: string[];
4849
+ /** Markdown body after the frontmatter, trimmed */
4850
+ body: string;
4851
+ /** Whether a frontmatter block was actually present */
4852
+ hasFrontmatter: boolean;
4853
+ }
4854
+ /**
4855
+ * Split a SKILL.md document into its frontmatter block and Markdown body.
4856
+ */
4857
+ declare function splitFrontmatter(content: string): {
4858
+ frontmatter: string;
4859
+ body: string;
4860
+ hasFrontmatter: boolean;
4861
+ };
4862
+ /**
4863
+ * Parse a raw SKILL.md document into structured fields and a body.
4864
+ */
4865
+ declare function parseSkillMd(content: string): ParsedSkillMd;
4866
+
4867
+ /**
4868
+ * Generic SKILL.md → Skill converter.
4869
+ *
4870
+ * Source-agnostic: turns any OpenSkills SKILL.md document (plus optional fallback
4871
+ * metadata and provenance) into a skill-tree `Skill`. The SkillNet importer and the
4872
+ * local-directory importer both delegate here so id/tag/status resolution stays
4873
+ * consistent regardless of where the SKILL.md came from.
4874
+ */
4875
+
4876
+ /**
4877
+ * Convert a free-form string into a kebab-case skill id.
4878
+ */
4879
+ declare function slugify(input: string): string;
4880
+ /**
4881
+ * Options controlling how a SKILL.md is converted into a Skill.
4882
+ * Frontmatter values always take precedence; the `default*` fields are fallbacks.
4883
+ */
4884
+ interface SkillFromMdOptions {
4885
+ /** Explicit skill id (slugified). Falls back to frontmatter name, then defaultName. */
4886
+ id?: string;
4887
+ /** Fallback name when frontmatter has none. */
4888
+ defaultName?: string;
4889
+ /** Fallback description when frontmatter has none. */
4890
+ defaultDescription?: string;
4891
+ /** Fallback author when frontmatter has none. */
4892
+ defaultAuthor?: string;
4893
+ /** Fallback version when frontmatter has none (default '1.0.0'). */
4894
+ defaultVersion?: string;
4895
+ /** Status to use when frontmatter has no valid status (default 'active'). */
4896
+ defaultStatus?: SkillStatus;
4897
+ /** Extra tags to merge with frontmatter tags. */
4898
+ extraTags?: string[];
4899
+ /** Taxonomy path to attach. */
4900
+ taxonomyPath?: string[];
4901
+ /** Source provenance. */
4902
+ source?: SkillSource$1;
4903
+ /** External source provenance. */
4904
+ externalSource?: ExternalSource;
4905
+ /** Timestamp for createdAt/updatedAt (default now). */
4906
+ now?: Date;
4907
+ }
4908
+ /**
4909
+ * Result of converting a SKILL.md document.
4910
+ */
4911
+ interface SkillFromMdResult {
4912
+ skill: Skill;
4913
+ warnings: string[];
4914
+ parsed: ParsedSkillMd;
4915
+ }
4916
+ /**
4917
+ * Convert a raw SKILL.md document into a skill-tree Skill.
4918
+ */
4919
+ declare function skillFromSkillMd(content: string, options?: SkillFromMdOptions): SkillFromMdResult;
4920
+
4921
+ /**
4922
+ * Local SKILL.md importers.
4923
+ *
4924
+ * Bulk-import skills from the filesystem into a SkillBank — a single SKILL.md file or a
4925
+ * directory tree of OpenSkills-style `<skill-id>/SKILL.md` folders (e.g. a downloaded
4926
+ * skill pack, `.claude/skills/`, or any `skills/` directory). No network involved.
4927
+ */
4928
+
4929
+ /** Result of a local import operation. */
4930
+ interface LocalImportResult {
4931
+ imported: number;
4932
+ failed: number;
4933
+ skills: Skill[];
4934
+ errors: string[];
4935
+ }
4936
+ /**
4937
+ * A SKILL.md file found on disk.
4938
+ */
4939
+ interface FoundSkillMd {
4940
+ /** Absolute path to the SKILL.md file */
4941
+ filePath: string;
4942
+ /** Directory containing the SKILL.md file */
4943
+ directory: string;
4944
+ /** Id derived from the containing directory name */
4945
+ id: string;
4946
+ }
4947
+ /**
4948
+ * Recursively find SKILL.md files under a root directory.
4949
+ * Skips hidden directories (except the root itself) and `node_modules`.
4950
+ */
4951
+ declare function findSkillMdFiles(root: string, maxDepth?: number): Promise<FoundSkillMd[]>;
4952
+ /**
4953
+ * Import a single SKILL.md file into a SkillBank.
4954
+ * The skill id defaults to the containing directory name (OpenSkills convention),
4955
+ * falling back to the frontmatter name.
4956
+ */
4957
+ declare function importSkillMdFile(filePath: string, bank: SkillBank, options?: SkillFromMdOptions): Promise<{
4958
+ skill: Skill;
4959
+ warnings: string[];
4960
+ }>;
4961
+ /**
4962
+ * Import all SKILL.md skills found under a directory into a SkillBank.
4963
+ * Duplicate ids are skipped (first occurrence wins) and recorded in `errors`.
4964
+ */
4965
+ declare function importLocalSkillDir(dirPath: string, bank: SkillBank, options?: SkillFromMdOptions): Promise<LocalImportResult>;
4966
+
3905
4967
  /**
3906
4968
  * skill-tree - A library for managing agent skill versions and evolution
3907
4969
  *
@@ -3915,4 +4977,4 @@ declare class IndexerService {
3915
4977
  */
3916
4978
  declare const VERSION = "0.2.0";
3917
4979
 
3918
- export { type AgentConfig, AgentsGenerator, type AgentsGeneratorConfig, AgentsParser, AgentsSync, type BaseHookContext, type BumpType, CachedStorageAdapter, type CachedStorageConfig, CatalogRenderer, type CatalogRendererConfig, type ConflictConfig, type ConflictResolution$1 as ConflictResolution, ConflictStore, type ConflictStrategy, DEFAULT_AGENTS_CONFIG, type DiscoveredSkill, type EvictionStrategy, type ExpandTrigger, type ExpandTriggerConfig, type FederatedRemoteConfig, type FederationEvent, type FederationEventHandler, FederationManager, type FederationManagerOptions, type FetchResult, type ForkOptions, GitSyncAdapter, type GitSyncAdapterOptions, type SyncResult$1 as GitSyncResult, type GraphServerConfig, type HookContext, type HookEvent, type HookExecutionResult, type HookHandler, type HookPriority, HookRegistry, type HookResult, type ImportMode, type ImportOptions, type ImportResult, type IndexResult, IndexerService, type IndexerServiceConfig, type SkillSource as IndexerSkillSource, type IndexerStats, LineageTracker, type LineageTree, LoadoutCompiler, type LoadoutCompilerConfig, type LoadoutCriteria, type LoadoutSource, type LoadoutState, type LoadoutView, type MaterializationConfig, Materializer, MemoryStorageAdapter, type MergeConfig, type MergeConflict, type MergePreview, type MergeResult, type MergeStrategy, type MergeSuggestion, type MigrationOptions, type MigrationProgressItem, type MigrationResult, type NewVersionOptions, type ParsedAgentSkill, type ParsedAgentsFile, type ParsedVersion, type ProjectContext, ProjectDetector, type PullOptions, type PullUpstreamOptions, type PullUpstreamResult, type PushOptions, type RegisterHookOptions, type RegisteredHook, type RelationshipResult, type RemoteConfig, RemoteManager, type RemoteState, RemoteStore, type RollbackOptions, type ScrapeResult, type ServingEvent, type ServingEventHandler, type ShareOptions, type ShareResult, type Skill, type SkillAccessControl, SkillBank, type SkillBankConfig, type SkillBankStats, type SkillChange, type SkillConflict, type SkillCrudHookContext, type SkillDiffChanges, type SkillFilter, type SkillFork, type SkillFormat, SkillGraphServer, type SkillLineage, type SkillMergeResult, SkillMerger, type SkillNamespace, type SkillScope, type SkillSelector, type SkillServingMetadata, type SkillSource$1 as SkillSource, type SkillState, type SkillStatus, type SkillSummary, type SkillSyncState, type SkillTreeEvent, type SkillTreeEventHandler, type SkillUpstream, type SkillVersion, type SkillVisibility, type StorageAdapter, type StorageConfig, type StorageHookContext, type SyncBehaviorConfig, type SyncConfig, type ConflictResolution as SyncConflictResolution, type SyncError, SyncManager, type SyncManagerOptions, type SyncOptions, type SyncResult, type SyncState, type SyncStatus, type TaxonomyNode, type UpstreamUpdate, VERSION, type VersionChanges, type VersionDiff, ViewRenderer, type ViewRendererConfig, builtInProfiles, bumpVersion, codeReviewProfile, combineHandlers, compareVersions, conditionalHook, createAgentsGenerator, createAgentsParser, createAgentsSync, createBackupHook, createConflictStore, createDefaultSyncConfig, createFederationManager, createGitSyncAdapter, createLoggingHook, createSaveValidationHook, createSkillBank, createSkillMerger, createSyncManager, debuggingProfile, devopsProfile, discoverSkills, documentationProfile, formatVersion, generateAgentsMd, getBuiltInProfile, getLatestVersion, hasSkilltreeDir, hookRegistry, implementationProfile, importFromAgentsMd, inferBumpType, isValidVersion, listBuiltInProfiles, migrateStorage, parseVersion, refactoringProfile, satisfiesRange, securityProfile, sortVersions, testingProfile, writeAgentsMd };
4980
+ export { type AgentConfig, AgentsGenerator, type AgentsGeneratorConfig, AgentsParser, AgentsSync, type BaseHookContext, type BedrockEmbeddingConfig, BedrockEmbeddingProvider, type BedrockRerankConfig, BedrockRerankProvider, type BumpType, CachedStorageAdapter, type CachedStorageConfig, type CachingEmbeddingConfig, CachingEmbeddingProvider, CatalogRenderer, type CatalogRendererConfig, type ConfidenceThresholds, type ConflictConfig, type ConflictResolution$1 as ConflictResolution, ConflictStore, type ConflictStrategy, DEFAULT_AGENTS_CONFIG, DEFAULT_FIELD_WEIGHTS, DEFAULT_SKILLNET_API, type DecisionMetrics, type DisclosureEvent, type DisclosureTrace, type DiscoveredSkill, type EmbeddingProvider, type EvictionStrategy, type ExpandTrigger, type ExpandTriggerConfig, type FederatedRemoteConfig, type FederationEvent, type FederationEventHandler, FederationManager, type FederationManagerOptions, FeedbackRecorder, type FetchLike, type FetchResult, type FieldWeights, type ForkOptions, type FoundSkillMd, type FusionStrategy, GitSyncAdapter, type GitSyncAdapterOptions, type SyncResult$1 as GitSyncResult, type GraphServerConfig, type HookContext, type HookEvent, type HookExecutionResult, type HookHandler, type HookPriority, HookRegistry, type HookResult, type HybridRetrievalOptions, type ImportMode, type ImportOptions, type ImportResult, type IndexResult, IndexerService, type IndexerServiceConfig, type SkillSource as IndexerSkillSource, type IndexerStats, LineageTracker, type LineageTree, LoadoutCompiler, type LoadoutCompilerConfig, type LoadoutCriteria, type LoadoutSource, type LoadoutState, type LoadoutView, type LocalImportResult, type LogisticUtilityConfig, LogisticUtilityScorer, type MaterializationConfig, Materializer, MemoryStorageAdapter, type MergeConfig, type MergeConflict, type MergePreview, type MergeResult, type MergeStrategy, type MergeSuggestion, type MigrationOptions, type MigrationProgressItem, type MigrationResult, type NewVersionOptions, type OutcomeCandidate, type ParsedAgentSkill, type ParsedAgentsFile, type ParsedSkillMd, type ParsedVersion, type ProjectContext, ProjectDetector, type PullOptions, type PullUpstreamOptions, type PullUpstreamResult, type PushOptions, type RegisterHookOptions, type RegisteredHook, type RelationshipResult, type RemoteConfig, RemoteManager, type RemoteState, RemoteStore, type RerankCandidate, type RerankProvider, type RerankResult, type RollbackOptions, type SageMakerEmbeddingConfig, SageMakerEmbeddingProvider, type ScoredSkill, type ScrapeResult, type ServingEvent, type ServingEventHandler, type ShareOptions, type ShareResult, type Skill, type SkillAccessControl, SkillBank, type SkillBankConfig, type SkillBankStats, type SkillChange, type SkillConflict, type SkillCrudHookContext, type SkillDiffChanges, type SkillFilter, type SkillFork, type SkillFormat, type SkillFromMdOptions, type SkillFromMdResult, SkillGraphServer, type SkillLineage, type SkillMergeResult, SkillMerger, type SkillNamespace, SkillNetClient, type SkillNetClientConfig, type SkillNetConversionResult, type SkillNetImportResult, type SkillNetSearchOptions, type SkillNetSearchResult, type SkillScope, type SkillSelector, type SkillServingMetadata, type SkillSource$1 as SkillSource, type SkillState, type SkillStatus, type SkillSummary, type SkillSyncState, type SkillTreeEvent, type SkillTreeEventHandler, type SkillUpstream, type SkillVersion, type SkillVisibility, type StorageAdapter, type StorageConfig, type StorageHookContext, type SyncBehaviorConfig, type SyncConfig, type ConflictResolution as SyncConflictResolution, type SyncError, SyncManager, type SyncManagerOptions, type SyncOptions, type SyncResult, type SyncState, type SyncStatus, type TaskOutcome, type TaxonomyNode, TelemetryCollector, type UpstreamUpdate, type UtilityExample, type UtilityFeatures, type UtilityScorer, type UtilityTrainReport, VERSION, type VersionChanges, type VersionDiff, ViewRenderer, type ViewRendererConfig, bm25Scores, builtInProfiles, bumpVersion, codeReviewProfile, combineHandlers, compareVersions, computeDecisionMetrics, conditionalHook, cosineSimilarity, createAgentsGenerator, createAgentsParser, createAgentsSync, createBackupHook, createConflictStore, createDefaultSyncConfig, createFederationManager, createGitSyncAdapter, createLoggingHook, createSaveValidationHook, createSkillBank, createSkillMerger, createSkillNetClient, createSyncManager, debuggingProfile, devopsProfile, discoverSkills, documentationProfile, findSkillMdFiles, formatVersion, generateAgentsMd, getBuiltInProfile, getLatestVersion, hasSkilltreeDir, hookRegistry, implementationProfile, importFromAgentsMd, importLocalSkillDir, importSkillMdFile, inferBumpType, isValidVersion, listBuiltInProfiles, migrateStorage, parseGitHubUrl, parseSkillMd, parseVersion, reciprocalRankFusion, refactoringProfile, satisfiesRange, scoreSkillRelevance, scoreSkillsHybrid, securityProfile, skillFromSkillMd, slugify, sortVersions, splitFrontmatter, termSimilarity, testingProfile, tokenize, tokenizeList, writeAgentsMd };