npm - skill-tree - Versions diffs - 0.1.7 → 0.2.1 - Mend

skill-tree 0.1.7 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

package/README.md +102 -2
package/dist/bowser-CQI7RKRA.mjs +2821 -0
package/dist/chunk-2NL4MXNX.mjs +3156 -0
package/dist/chunk-2STDJU5Y.mjs +1174 -0
package/dist/chunk-3BCRI4CA.mjs +101 -0
package/dist/chunk-3SRB47JW.mjs +8344 -0
package/dist/chunk-43YOKLZP.mjs +6081 -0
package/dist/chunk-4AGZU52D.mjs +7918 -0
package/dist/chunk-4HXHCEFH.mjs +9157 -0
package/dist/chunk-4OC5QFIF.mjs +11267 -0
package/dist/chunk-4QGSDVGH.mjs +580 -0
package/dist/chunk-4TFMKAVC.mjs +1225 -0
package/dist/chunk-55SMGVTP.mjs +7126 -0
package/dist/chunk-5C4MEQMR.mjs +125 -0
package/dist/chunk-6FX4IK4Z.mjs +5368 -0
package/dist/chunk-6UPDN5QM.mjs +163 -0
package/dist/chunk-7EGDKOHV.mjs +9439 -0
package/dist/chunk-7LMOQW5H.mjs +4893 -0
package/dist/chunk-7QIQJVNP.mjs +14206 -0
package/dist/chunk-7VB4ZRZO.mjs +7127 -0
package/dist/chunk-A3SILZYX.mjs +8360 -0
package/dist/chunk-BPVRW25O.mjs +6089 -0
package/dist/chunk-BZ2JKJ54.mjs +1057 -0
package/dist/chunk-CI4476KM.mjs +6607 -0
package/dist/chunk-DCRKELD5.mjs +46 -0
package/dist/chunk-DDXYQ74I.mjs +13969 -0
package/dist/chunk-DQOFJXBX.mjs +6595 -0
package/dist/chunk-E2CVK23F.mjs +8751 -0
package/dist/chunk-F3YEUQAP.mjs +654 -0
package/dist/chunk-FKJJ4RJG.mjs +13874 -0
package/dist/chunk-II7DECZQ.mjs +9111 -0
package/dist/chunk-INKVOZXK.mjs +15898 -0
package/dist/chunk-J2JM7HAK.mjs +8787 -0
package/dist/chunk-K6NRCSAZ.mjs +4355 -0
package/dist/chunk-LACI6YL4.mjs +1379 -0
package/dist/chunk-MBIGW6KU.mjs +644 -0
package/dist/chunk-OYHYXKXO.mjs +7297 -0
package/dist/chunk-P5GJJ4JB.mjs +9237 -0
package/dist/chunk-PDPN7FW7.mjs +1045 -0
package/dist/chunk-QNK3WYNA.mjs +8971 -0
package/dist/chunk-QZ7TP4HQ.mjs +7 -0
package/dist/chunk-RJYJGJO3.mjs +349 -0
package/dist/chunk-T4PVQW5O.mjs +124 -0
package/dist/chunk-TEUB6DZR.mjs +6453 -0
package/dist/chunk-TWPEHDW4.mjs +1067 -0
package/dist/chunk-VHFTX33A.mjs +6724 -0
package/dist/chunk-Y54UK2J3.mjs +13071 -0
package/dist/chunk-YDVZIFIU.mjs +2102 -0
package/dist/chunk-ZQVS7MQK.mjs +6081 -0
package/dist/chunk-ZYKRDDFO.mjs +163 -0
package/dist/cli/index.js +1324 -386
package/dist/cli/index.mjs +212 -9074
package/dist/dist-es-2JG6ZWFR.mjs +69 -0
package/dist/dist-es-2JGXQKUP.mjs +6077 -0
package/dist/dist-es-644EP2LP.mjs +317 -0
package/dist/dist-es-DSNCHWLJ.mjs +170 -0
package/dist/dist-es-FIVW7BUZ.mjs +317 -0
package/dist/dist-es-GXJAFBE5.mjs +22 -0
package/dist/dist-es-HRBPKDMR.mjs +935 -0
package/dist/dist-es-LHPJ63IO.mjs +4437 -0
package/dist/dist-es-LT2AQAG7.mjs +4437 -0
package/dist/dist-es-ORE4PQTL.mjs +87 -0
package/dist/dist-es-TLCYJJ25.mjs +495 -0
package/dist/dist-es-V4LHTSRG.mjs +69 -0
package/dist/dist-es-XHTU3ZU2.mjs +935 -0
package/dist/dist-es-Y2MPJ6IO.mjs +378 -0
package/dist/dist-es-ZYHLY2E6.mjs +487 -0
package/dist/event-streams-KIAAAC7Z.mjs +42 -0
package/dist/index.d.mts +1143 -56
package/dist/index.d.ts +1143 -56
package/dist/index.js +38701 -499
package/dist/index.mjs +129 -9612
package/dist/loadSso-NPRY7QRT.mjs +579 -0
package/dist/loadSso-OYKG6ZRE.mjs +579 -0
package/dist/signin-LMFNL434.mjs +665 -0
package/dist/signin-LUKXFXSI.mjs +743 -0
package/dist/sqlite-MG45OOTV.mjs +6 -0
package/dist/sqlite-OLU72GHB.mjs +6 -0
package/dist/sqlite-RR2SJ3SR.mjs +7 -0
package/dist/sqlite-XJRPMNAJ.mjs +6 -0
package/dist/sso-oidc-NNH6SQIH.mjs +832 -0
package/dist/sso-oidc-STZH2XK2.mjs +832 -0
package/dist/sts-EF755UBF.mjs +6290 -0
package/dist/sts-ZIS4G6FQ.mjs +6290 -0
package/dist/sync-BSWMMDA6.mjs +14 -0
package/dist/sync-WHIIDHML.mjs +14 -0
package/dist/sync-XRWFQYBY.mjs +15 -0
package/package.json +9 -2
package/dist/cli/index.js.map +0 -1
package/dist/cli/index.mjs.map +0 -1
package/dist/index.js.map +0 -1
package/dist/index.mjs.map +0 -1

package/dist/index.d.mts CHANGED Viewed

@@ -30,8 +30,6 @@ interface Skill {
     derivedFrom?: string[];
     /** Skills forked from this one (source → child tracking) */
     forks?: string[];
-    /** Performance metrics */
-    metrics: SkillMetrics;
     /** Upstream tracking for skills imported with 'link' mode */
     upstream?: SkillUpstream;
     source?: SkillSource$1;
@@ -52,6 +50,8 @@ interface Skill {
 interface SkillServingMetadata {
     /** Short summary for collapsed view (defaults to description) */
     summary?: string;
+    /** Key insight preview shown in collapsed view to aid routing decisions */
+    instructionPreview?: string;
     /** Estimated token count for context budgeting */
     tokenEstimate?: number;
     /** Auto-expand triggers */
@@ -76,20 +76,6 @@ interface ExpandTriggerConfig {
     };
 }
 type SkillStatus = 'draft' | 'active' | 'deprecated' | 'experimental';
-interface SkillMetrics {
-    /** Number of times this skill was used */
-    usageCount: number;
-    /** Success rate (0-1) */
-    successRate: number;
-    /** When was this skill last used */
-    lastUsed?: Date;
-    /** User feedback scores */
-    feedbackScores: number[];
-    /** Average execution time in ms */
-    avgExecutionTime?: number;
-    /** Average confidence from matching */
-    averageConfidence?: number;
-}
 interface SkillSource$1 {
     /** Where the skill came from */
     type: 'extracted' | 'manual' | 'imported' | 'composed';
@@ -264,7 +250,6 @@ interface SkillFilter {
     status?: SkillStatus[];
     tags?: string[];
     author?: string;
-    minSuccessRate?: number;
     createdAfter?: Date;
     createdBefore?: Date;
     /** Filter by scope */
@@ -1073,6 +1058,83 @@ declare class SyncManager {
  */
 declare function createSyncManager(options: SyncManagerOptions): SyncManager;
+/**
+ * Types for the learned execution-grounded utility scorer (Tier 2, T2.1).
+ *
+ * The retrieval signals (BM25, dense cosine) answer "does this skill *look*
+ * relevant?". The utility scorer answers the harder question — "does including
+ * this skill actually *help* solve this task?" — by learning from observed
+ * loadout→outcome traces. This separates genuinely-useful skills from
+ * generic, high-surface-area skills that retrieval over-rewards.
+ *
+ * @packageDocumentation
+ */
+/** Per-(task, skill) signals available at scoring time. */
+interface UtilityFeatures {
+    skillId: string;
+    /** Absolute lexical (BM25) relevance in [0,1]. */
+    lexAbs: number;
+    /** Absolute dense (cosine) relevance in [0,1]; 0 when no embedder is used. */
+    denseAbs: number;
+}
+/** A labeled example: did including this skill help on this task? */
+interface UtilityExample {
+    /** Optional task text (for provenance/debugging; not used as a feature). */
+    taskText?: string;
+    features: UtilityFeatures;
+    /** Ground-truth label: did the skill contribute to task success? */
+    helped: boolean;
+    /** Optional example weight (e.g. counterfactual gain magnitude). Default 1. */
+    weight?: number;
+}
+interface UtilityTrainReport {
+    examples: number;
+    positives: number;
+    iterations: number;
+    finalLoss: number;
+    /** Number of distinct skills that accumulated a learned helpfulness prior. */
+    skillsWithPrior: number;
+}
+/**
+ * A learned scorer fθ(task, skill) ∈ [0,1] feeding the confidence gate.
+ * Pluggable so the default logistic model can be swapped for an MLP, etc.
+ */
+interface UtilityScorer {
+    /** Score a (task, skill) pair in [0,1]. Higher ⇒ more likely to help. */
+    score(features: UtilityFeatures): number;
+    /** Fit the model to labeled examples. */
+    train(examples: UtilityExample[]): UtilityTrainReport;
+    /** True once fitted; callers fall back to the hybrid blend when false. */
+    readonly trained: boolean;
+}
+/**
+ * Listwise reranking (Tier 1.5) — a second-stage scorer over the *full skill
+ * body* of a shortlist, the SkillRouter/Cohere-rerank pattern.
+ *
+ * First-stage retrieval (BM25 + dense) is recall-oriented but cheap and shallow
+ * (it scores fields/embeddings, not the whole document). A cross-encoder
+ * reranker reads the query against each candidate's full text and reorders them,
+ * recovering true positives that retrieval ranked at 4..N into the top-K.
+ * Measured on SkillsBench: BM25 recall@3 50% → reranked top-20 recall@3 65%.
+ *
+ * @packageDocumentation
+ */
+interface RerankCandidate {
+    id: string;
+    /** The text the reranker scores against the query (typically the skill body). */
+    text: string;
+}
+interface RerankResult {
+    id: string;
+    /** Reranker relevance score (provider-specific scale, higher = better). */
+    score: number;
+}
+/** Reorders a shortlist by query-relevance. Returns results sorted desc by score. */
+interface RerankProvider {
+    rerank(query: string, candidates: RerankCandidate[], topN?: number): Promise<RerankResult[]>;
+}
 /**
  * Types for the skill-tree serving layer
  * @packageDocumentation
@@ -1087,9 +1149,23 @@ type SkillState = 'available' | 'expanded' | 'pending';
  * Flexible criteria for selecting skills into a loadout
  */
 interface LoadoutCriteria {
-    /** Always include these skill IDs */
+    /**
+     * Always include these skill IDs in the result, regardless of other
+     * filters (status, tags, author, relationships, etc.). Missing skills
+     * are fetched from storage on demand. The list is treated as a SET +
+     * order hint — included skills come first in the order they appear
+     * here, then the rest of the filtered set.
+     *
+     * `exclude` wins over `include`: IDs listed in both are excluded.
+     *
+     * For "restrict to exactly these N skills" semantics, combine
+     * `include: [...]` with `maxSkills: include.length`.
+     */
     include?: string[];
-    /** Never include these skill IDs */
+    /**
+     * Drop these skill IDs from the result. Wins over `include` —
+     * an ID in both lists is excluded.
+     */
     exclude?: string[];
     /** Match any of these tags */
     tags?: string[];
@@ -1097,8 +1173,6 @@ interface LoadoutCriteria {
     tagsAll?: string[];
     /** Filter by status (default: ['active']) */
     status?: SkillStatus[];
-    /** Quality threshold (0-1) */
-    minSuccessRate?: number;
     /** Filter by author */
     author?: string;
     /** Semantic match to task description */
@@ -1127,8 +1201,14 @@ interface LoadoutCriteria {
     maxSkills?: number;
     /** Context budget (estimated tokens) */
     maxTokens?: number;
-    /** How to order results for selection */
-    priorityOrder?: 'relevance' | 'usage' | 'successRate' | 'recent';
+    /**
+     * How to order results for selection. Currently only `'relevance'` is
+     * recognized; it's a no-op until skill-tree gains semantic ranking. Live
+     * usage-based ranking (formerly `'usage'` / `'successRate'` / `'recent'`)
+     * is now the caller's responsibility — pre-rank and pass IDs via
+     * `include: [...]`. See README "Metrics ownership" for the rationale.
+     */
+    priorityOrder?: 'relevance';
     /** How to apply to current state */
     mode?: 'replace' | 'merge' | 'subtract';
 }
@@ -1211,6 +1291,19 @@ interface LoadoutCompilerConfig {
     defaultStatus?: SkillStatus[];
     /** Minimum similarity threshold for semantic matching (default: 0.6) */
     semanticThreshold?: number;
+    /**
+     * Hybrid-retrieval options for `compileWithScoring` (Tier 1). When set,
+     * task scoring uses field-weighted BM25 (incl. the skill body) optionally
+     * fused with dense embeddings, instead of the legacy Jaccard scorer.
+     */
+    retrieval?: HybridRetrievalOptions;
+    /**
+     * Size of the candidate pool that `compileWithScoring` scores over
+     * before confidence-partitioning (default: 200). Larger than
+     * `defaultMaxSkills` so the hybrid ranker re-ranks a real corpus rather
+     * than just the lexically pre-filtered top-N.
+     */
+    scoringPoolSize?: number;
 }
 /**
  * Configuration for CatalogRenderer (browsable skill catalog)
@@ -1223,10 +1316,128 @@ interface CatalogRendererConfig {
     /** Max summary length for leaf-level skill descriptions (default: 80) */
     maxSummaryLength: number;
 }
+/**
+ * A skill with its computed relevance score from task-based compilation.
+ */
+interface ScoredSkill {
+    skill: Skill;
+    relevanceScore: number;
+}
+/**
+ * Confidence thresholds for hybrid loadout compilation.
+ * Skills above `expandAbove` are auto-expanded; between `expandAbove`
+ * and `includeAbove` are included as summaries; below `includeAbove`
+ * are excluded entirely.
+ */
+interface ConfidenceThresholds {
+    /** Score above which skills are auto-expanded (default: 0.3) */
+    expandAbove: number;
+    /** Score above which skills are included as summaries (default: 0.15) */
+    includeAbove: number;
+    /**
+     * Absolute abstain floor (Tier 1, T1.3). If set, and even the
+     * single best-scoring skill falls below this, the loadout injects
+     * **nothing** — a first-class "no relevant skill" outcome (B=0).
+     *
+     * This is distinct from `includeAbove`: it can be set *higher* than
+     * `includeAbove` to demand strong top-of-pool relevance before any
+     * skill is served, which prevents the documented regression where
+     * irrelevant retrieved skills drag task success *below* the no-skill
+     * baseline (SkillsBench: 16/84 tasks went negative with curated skills;
+     * "Skills in the Wild": retrieval drops weak models below baseline).
+     *
+     * Recommended to calibrate per domain. Default: undefined (no extra
+     * floor beyond `includeAbove`).
+     */
+    minConfidence?: number;
+}
+/**
+ * Provider of dense embeddings for hybrid retrieval (Tier 1, T1.1).
+ *
+ * Pluggable so callers can supply any embedding backend (e.g. a local
+ * model, an API, or cognitive-core's provider). When no embedder is
+ * configured, hybrid retrieval gracefully degrades to a field-weighted
+ * BM25 lexical ranking (still a strict improvement over the legacy
+ * Jaccard term-overlap scorer, since BM25 indexes the skill **body**).
+ */
+interface EmbeddingProvider {
+    /**
+     * Embed a batch of texts into comparable numeric vectors. The first
+     * element is conventionally the query; the rest are skill documents.
+     * Implementations should return vectors of equal dimensionality.
+     */
+    embed(texts: string[]): Promise<number[][]>;
+}
+/**
+ * Per-field weights for the BM25(F) lexical signal. Mirrors the
+ * "Skills in the Wild" recipe (name:10 / description:5 / body:5);
+ * tags get a small weight. The skill **body** (instructions) is indexed
+ * because it carries the dominant retrieval signal (SkillRouter: ~92%
+ * of reranker attention is on the body).
+ */
+interface FieldWeights {
+    name: number;
+    description: number;
+    body: number;
+    tags: number;
+}
+/** How the lexical and dense signals are fused for ordering. */
+type FusionStrategy = 'weighted' | 'rrf';
+/**
+ * Options for hybrid retrieval scoring (Tier 1, T1.1 + T1.2).
+ */
+interface HybridRetrievalOptions {
+    /** Optional dense-embedding provider. Absent → BM25-only. */
+    embedder?: EmbeddingProvider;
+    /** Field weights for BM25 (default: name:10/description:5/body:5/tags:3). */
+    fieldWeights?: FieldWeights;
+    /** Reciprocal-rank-fusion constant (default: 60). */
+    rrfK?: number;
+    /**
+     * Ordering fusion strategy (default: 'weighted'). Both strategies report
+     * the same absolute, calibrated `relevanceScore` (so confidence thresholds
+     * and the abstain floor stay meaningful); they differ only in sort order.
+     * 'rrf' uses reciprocal rank fusion of the lexical and dense rankings
+     * (only meaningful when an embedder is present).
+     */
+    fusion?: FusionStrategy;
+    /** Blend weights for the absolute score (renormalized over available signals). */
+    signalWeights?: {
+        lexical: number;
+        dense: number;
+    };
+    /** BM25 term-frequency saturation (default: 1.2). */
+    bm25K1?: number;
+    /** BM25 length normalization (default: 0.75). */
+    bm25B?: number;
+    /**
+     * Saturation constant mapping raw BM25 → absolute [0,1] confidence
+     * (default: 8). Larger = stricter (needs more lexical evidence to look
+     * confident). Keeps the score on the same scale the confidence
+     * thresholds expect.
+     */
+    bm25Saturation?: number;
+    /**
+     * Optional learned utility scorer (Tier 2, T2.1). When present *and
+     * trained*, its `score(task, skill) ∈ [0,1]` replaces the lexical/dense
+     * blend as the `relevanceScore`, so the confidence gate ranks by *learned
+     * utility* (does this skill help?) rather than raw similarity (does it look
+     * relevant?). Falls back to the blend when absent or untrained.
+     */
+    utilityScorer?: UtilityScorer;
+    /**
+     * Optional listwise reranker (Tier 1.5). When set, the top `rerankTopN`
+     * candidates from first-stage retrieval are reordered by the reranker over
+     * their full body. Measured lift on SkillsBench: recall@3 50%→65% (BM25→rerank).
+     */
+    reranker?: RerankProvider;
+    /** Shortlist size sent to the reranker (default: 20). */
+    rerankTopN?: number;
+}
 /**
  * Eviction strategy when maxExpanded is reached
  */
-type EvictionStrategy = 'lru' | 'priority' | 'manual';
+type EvictionStrategy = 'lru' | 'priority' | 'manual' | 'relevance';
 /**
  * Configuration for SkillGraphServer
  */
@@ -1259,6 +1470,19 @@ interface GraphServerConfig {
     outputFormat?: 'xml' | 'markdown';
     /** Include token estimates in output (default: false) */
     includeTokenEstimates?: boolean;
+    /** Confidence thresholds for task-based auto-expansion */
+    confidenceThresholds?: ConfidenceThresholds;
+    /**
+     * Hybrid-retrieval options for task-based loadouts (Tier 1). Passed
+     * through to the LoadoutCompiler's `compileWithScoring`. When omitted,
+     * scoring uses field-weighted BM25 over name/description/body/tags with
+     * default weights; supply an `embedder` to enable dense+lexical fusion.
+     */
+    retrieval?: HybridRetrievalOptions;
+    /** Candidate pool size scored before confidence-partitioning (default: 200). */
+    scoringPoolSize?: number;
+    /** Defer showing expanded content until agent explicitly requests it (default: false) */
+    deferExpansion?: boolean;
     /** Enable catalog browsing for large libraries (default: true) */
     enableCatalog?: boolean;
     /** Catalog renderer config overrides */
@@ -1325,6 +1549,7 @@ declare class SkillGraphServer {
     private state;
     private handlers;
     private lruOrder;
+    private relevanceScores;
     constructor(storage: StorageAdapter, config?: GraphServerConfig);
     /**
      * Initialize the server, applying initial loadout if configured
@@ -1339,7 +1564,12 @@ declare class SkillGraphServer {
      */
     setLoadout(criteria: LoadoutCriteria): Promise<LoadoutState>;
     /**
-     * Set loadout based on task description (semantic matching)
+     * Set loadout based on task description using hybrid confidence-tiered
+     * compilation. Skills above the high threshold are auto-expanded,
+     * skills between high and low thresholds are included as summaries,
+     * and skills below the low threshold are excluded.
+     *
+     * Stores relevance scores for use by the 'relevance' eviction strategy.
      */
     setLoadoutForTask(taskDescription: string): Promise<LoadoutState>;
     /**
@@ -1386,10 +1616,6 @@ declare class SkillGraphServer {
      * Collapse a skill (hide full content)
      */
     collapseSkill(skillId: string): boolean;
-    /**
-     * Record skill usage (for LRU tracking and auto-expansion)
-     */
-    recordUsage(skillId: string): void;
     /**
      * Agent requests to add skills
      * If requireApproval is true, adds to pending; otherwise directly adds
@@ -1447,6 +1673,11 @@ declare class SkillGraphServer {
     /**
      * Render current state as system prompt content.
      * Includes catalog overview when catalog is enabled.
+     *
+     * When `deferExpansion` is enabled, all skills are rendered as
+     * summaries regardless of expansion state — the agent must
+     * explicitly request expansion. This avoids the reactive-signals
+     * problem where upfront skill injection derails model planning.
      */
     renderSystemPrompt(): Promise<string>;
     /**
@@ -1462,9 +1693,25 @@ declare class SkillGraphServer {
      */
     private emit;
     /**
-     * Apply a new set of skills as the loadout
+     * Get the relevance score for a skill (0 if not scored).
+     */
+    getRelevanceScore(skillId: string): number;
+    /**
+     * Apply a new set of skills as the loadout.
+     * After populating the available set, evaluates autoExpand triggers
+     * on each skill to determine if any should be pre-expanded.
      */
     private applyLoadout;
+    /**
+     * Evaluate autoExpand trigger conditions for all skills in the loadout.
+     * Checks keyword matches against the task description, file pattern
+     * matches against the project path, and framework matches.
+     */
+    private evaluateAutoExpand;
+    /**
+     * Check if a single autoExpand trigger matches the current context.
+     */
+    private matchesTrigger;
     /**
      * Evict a skill from expanded based on strategy
      */
@@ -1524,21 +1771,13 @@ type SkillBankToServingEvent = {
     skillId: string;
 };
 /**
- * Events that flow from Serving Layer to SkillBank
+ * Events that flow from Serving Layer to SkillBank.
+ *
+ * `skill:used`, `skill:feedback`, and `skill:requested` were removed when
+ * skill-tree dropped its `SkillMetrics` model — usage tracking is now
+ * external (e.g., cognitive-core's `playbook.evolution.*`).
  */
 type ServingToSkillBankEvent = {
-    type: 'skill:used';
-    skillId: string;
-    success: boolean;
-} | {
-    type: 'skill:feedback';
-    skillId: string;
-    score: number;
-    comment?: string;
-} | {
-    type: 'skill:requested';
-    skillId: string;
-} | {
     type: 'loadout:changed';
     state: LoadoutState;
 };
@@ -2494,7 +2733,12 @@ declare class SkillBank {
      */
     private mapToServingEvent;
     /**
-     * Handle events from serving layer
+     * Handle events from serving layer.
+     *
+     * The `loadout:changed` event is currently the only one we react to.
+     * Earlier versions also handled `skill:used` / `skill:feedback` to mutate
+     * `Skill.metrics`, but skill-tree no longer tracks per-skill usage —
+     * cognitive-core owns that signal via `playbook.evolution.*`.
      */
     private handleServingEvent;
     /**
@@ -2532,8 +2776,6 @@ interface SkillBankStats {
     totalSkills: number;
     byStatus: Record<Skill['status'], number>;
     byTag: Record<string, number>;
-    avgSuccessRate: number;
-    totalUsage: number;
     byScope?: {
         personal: number;
         team: number;
@@ -2587,6 +2829,9 @@ declare abstract class BaseStorageAdapter implements StorageAdapter {
      * Simple text search across skill fields
      */
     protected textSearch(skills: Skill[], query: string): Skill[];
+    private static readonly STOP_WORDS;
+    private tokenize;
+    private static stem;
 }
 /**
  * In-memory storage adapter (useful for testing)
@@ -2795,8 +3040,6 @@ interface SkillSelector {
     tags?: string[];
     /** Include skills matching this status */
     status?: ('active' | 'experimental')[];
-    /** Minimum success rate to include */
-    minSuccessRate?: number;
     /** Maximum number of skills to include */
     limit?: number;
 }
@@ -3112,13 +3355,64 @@ declare class LoadoutCompiler {
     private config;
     constructor(storage: StorageAdapter, config?: LoadoutCompilerConfig);
     /**
-     * Main entry point - compile skills from criteria
+     * Main entry point - compile skills from criteria.
+     *
+     * Filter pipeline order:
+     *   1. status (initial query)
+     *   2. exclude (drop matching IDs)
+     *   3. tags / tagsAll
+     *   4. author
+     *   5. semantic (currently no-op)
+     *   6. relationships (rootSkills traversal)
+     *   7. **include** — presence guarantee: ensures every ID in the
+     *      include list is in the result regardless of the filters above,
+     *      fetching missing ones from storage as needed. `exclude` still
+     *      wins (excluded IDs are removed from the include list before
+     *      this step).
+     *   8. limits (maxSkills, maxTokens)
+     *
+     * For "restrict to exactly these skills" semantics, combine
+     * `include: [...]` with `maxSkills: include.length`.
      */
     compile(criteria: LoadoutCriteria): Promise<Skill[]>;
     /**
      * Compile based on a task description (semantic matching)
      */
     compileForTask(taskDescription: string): Promise<Skill[]>;
+    /**
+     * Compile with hybrid-retrieval scoring against a task description
+     * (Tier 1). Returns skills annotated with absolute relevance scores in
+     * [0,1], sorted by descending relevance. Used by the hybrid loadout
+     * strategy to determine which skills should be auto-expanded vs shown
+     * as summaries vs excluded.
+     *
+     * Scoring uses field-weighted BM25 over the skill name/description/body/
+     * tags (the body matters most), optionally fused with dense embeddings
+     * (when an `embedder` is
+     * configured via `retrieval`). The candidate pool is the (filtered) set
+     * up to `scoringPoolSize` — larger than the final loadout — so the ranker
+     * re-ranks a real corpus rather than only a lexically pre-truncated top-N.
+     */
+    compileWithScoring(taskDescription: string, criteria?: LoadoutCriteria): Promise<ScoredSkill[]>;
+    /**
+     * Partition scored skills into confidence tiers.
+     * - High confidence (>= expandAbove): should be auto-expanded
+     * - Medium confidence (>= includeAbove): included as summaries
+     * - Below includeAbove: excluded
+     *
+     * Abstain floor (Tier 1, T1.3): if `thresholds.minConfidence` is set and
+     * even the single best-scoring skill is below it, the whole loadout
+     * abstains — every skill is excluded and **nothing** is injected. This
+     * makes "no sufficiently relevant skill" a first-class outcome (B=0),
+     * which prevents irrelevant skills from dragging task success below the
+     * no-skill baseline. `scored` is expected to be sorted descending, but we
+     * defensively take the max rather than assume order.
+     */
+    partitionByConfidence(scored: ScoredSkill[], thresholds: ConfidenceThresholds): {
+        expand: ScoredSkill[];
+        summarize: ScoredSkill[];
+        excluded: ScoredSkill[];
+    };
     /**
      * Compile from a named profile
      */
@@ -3128,7 +3422,9 @@ declare class LoadoutCompiler {
      */
     mergeLoadouts(current: Skill[], additions: Skill[], mode?: 'replace' | 'merge' | 'subtract'): Skill[];
     /**
-     * Apply explicit include/exclude filters
+     * Apply explicit exclude filter. Include is handled separately at the
+     * compile level (see `ensureIncludedPresent`) so it can guarantee
+     * presence regardless of the other filters in this method or below.
      */
     applyExplicitFilters(skills: Skill[], criteria: LoadoutCriteria): Skill[];
     /**
@@ -3140,16 +3436,32 @@ declare class LoadoutCompiler {
      */
     applyQualityFilters(skills: Skill[], criteria: LoadoutCriteria): Skill[];
     /**
-     * Apply semantic filters (task description, problem context, etc.)
+     * Apply semantic filters (task description matching).
      *
-     * Currently returns skills unchanged. Semantic matching was removed;
-     * use SQLite FTS via storage.searchSkills() for keyword-based search.
+     * When `taskDescription` is provided, uses storage.searchSkills()
+     * to find matching skills and boosts them to the front. Skills not
+     * matching the search are retained at lower priority so that tag
+     * filters and explicit includes still work.
      */
-    applySemanticFilters(skills: Skill[], _criteria: LoadoutCriteria): Promise<Skill[]>;
+    applySemanticFilters(skills: Skill[], criteria: LoadoutCriteria): Promise<Skill[]>;
     /**
      * Apply relationship-based filters (root skills, dependencies)
      */
     applyRelationshipFilters(skills: Skill[], criteria: LoadoutCriteria): Promise<Skill[]>;
+    /**
+     * Ensure every ID in `criteria.include` is present in the result,
+     * regardless of which earlier filter would have dropped it. Missing
+     * skills are fetched directly from storage.
+     *
+     * `criteria.exclude` still wins: an ID listed in both `include` and
+     * `exclude` is treated as excluded (consistent with openteams' "deny
+     * wins" inheritance rule on permissions).
+     *
+     * Included skills are placed at the front of the result, preserving
+     * the order of `criteria.include`. Other skills retain their relative
+     * order behind them.
+     */
+    ensureIncludedPresent(current: Skill[], criteria: LoadoutCriteria): Promise<Skill[]>;
     /**
      * Apply limits and sorting
      */
@@ -3345,6 +3657,486 @@ declare class CatalogRenderer {
     private countNodeSkills;
 }
+/**
+ * Term-overlap similarity scoring for skill-to-task matching.
+ *
+ * Used by the loadout compiler to score skills against a task description,
+ * enabling confidence-tiered expansion (hybrid loadout strategy).
+ *
+ * @packageDocumentation
+ */
+/**
+ * Tokenize text into a list of normalized terms (duplicates preserved),
+ * filtering stop words and short tokens. Used where term frequency
+ * matters (e.g. BM25 in hybrid retrieval).
+ */
+declare function tokenizeList(text: string): string[];
+/**
+ * Tokenize text into a set of normalized terms, filtering stop words.
+ */
+declare function tokenize(text: string): Set<string>;
+/**
+ * Compute term-overlap similarity between two texts.
+ *
+ * Returns a score in [0, 1] using Jaccard-like overlap normalized
+ * by the smaller set size. Requires at least 2 overlapping terms
+ * to avoid false positives from single shared words.
+ */
+declare function termSimilarity(textA: string, textB: string): number;
+/**
+ * Score a skill's relevance to a task description.
+ * Combines the skill's name, description, tags, and (when provided) the
+ * SKILL.md body into a single text and scores against the task text.
+ *
+ * The body (`skillBody`) is included because it carries the dominant
+ * matching signal (SkillRouter: ~92% of reranker attention is on the
+ * body); omitting it leaves the strongest evidence unused. It is an
+ * optional trailing argument for backward compatibility.
+ */
+declare function scoreSkillRelevance(taskText: string, skillName: string, skillDescription: string, skillTags: string[], skillBody?: string): number;
+/**
+ * Hybrid retrieval scoring for skill selection (Tier 1).
+ *
+ * Replaces the legacy lexical-only Jaccard scorer with a field-weighted
+ * **BM25(F)** lexical signal — indexing the skill *body*, not just its
+ * name/description — optionally fused with a **dense embedding** signal.
+ *
+ * Grounded in the 2026 literature:
+ *  - "Skills in the Wild" (arXiv 2604.04323): BM25 with field weights
+ *    name:10/description:5/content:5 + dense + RRF (k=60) lifts retrieval
+ *    recall from ~27% (keyword-only) to ~68%@10.
+ *  - SkillRouter (arXiv 2603.22455): the skill *body* carries ~92% of the
+ *    discriminative signal; scoring descriptions alone leaves it unused.
+ *
+ * ### Score calibration
+ * Each skill gets an **absolute** `relevanceScore` in [0,1] (a renormalized
+ * blend of an absolute lexical sub-score and an absolute dense sub-score).
+ * "Absolute" means an irrelevant skill scores ≈0 regardless of the rest of
+ * the pool — this is what makes the confidence thresholds (`expandAbove`/
+ * `includeAbove`) and the abstain floor (`minConfidence`) meaningful.
+ *
+ * The `fusion` option only changes the *sort order*:
+ *  - `'weighted'` (default): order by the absolute blended score.
+ *  - `'rrf'`: order by reciprocal rank fusion of the lexical and dense
+ *    rankings (only meaningful when an embedder is present). The reported
+ *    `relevanceScore` is still the calibrated absolute blend, so gating
+ *    stays well-defined.
+ *
+ * @packageDocumentation
+ */
+/**
+ * Default per-field weights (mirrors the "Skills in the Wild" recipe;
+ * tags get a small weight, body is indexed).
+ */
+declare const DEFAULT_FIELD_WEIGHTS: FieldWeights;
+/**
+ * Compute raw BM25(F) scores for `query` over `skills`. Field weights are
+ * folded into the term frequencies (a standard BM25F simplification), so a
+ * name match counts more than a body match. Returns id → raw score (≥0).
+ */
+declare function bm25Scores(query: string, skills: Skill[], fieldWeights?: FieldWeights, k1?: number, b?: number): Map<string, number>;
+/** Cosine similarity of two equal-length numeric vectors (0 if degenerate). */
+declare function cosineSimilarity(a: number[], b: number[]): number;
+/**
+ * Reciprocal Rank Fusion. Each ranking is an ordered list of ids (best
+ * first). Returns id → fused score = Σ_rankings 1/(k + rank), rank 1-based.
+ * Items absent from a ranking simply contribute nothing from it.
+ */
+declare function reciprocalRankFusion(rankings: string[][], k?: number): Map<string, number>;
+/**
+ * Score skills against a task/query with hybrid retrieval, returning
+ * `ScoredSkill[]` sorted by descending relevance. `relevanceScore` is an
+ * absolute, calibrated value in [0,1] suitable for confidence thresholds
+ * and the abstain floor.
+ *
+ * Degrades gracefully: with no embedder (or if embedding throws), it
+ * returns a pure field-weighted BM25 ranking — still a strict upgrade over
+ * the legacy Jaccard scorer because it indexes the skill body.
+ */
+declare function scoreSkillsHybrid(query: string, skills: Skill[], options?: HybridRetrievalOptions): Promise<ScoredSkill[]>;
+/**
+ * AWS Bedrock {@link EmbeddingProvider}.
+ *
+ * Defaults to Amazon Titan Text Embeddings v2 (`amazon.titan-embed-text-v2:0`),
+ * which embeds one text per `InvokeModel` call; this provider fans the batch
+ * out with bounded concurrency. Cohere embed models (`cohere.embed-*`) are
+ * also supported via their native batch API.
+ *
+ * The `@aws-sdk/client-bedrock-runtime` package is an OPTIONAL dependency,
+ * lazily imported on first use. Wrap this in a {@link CachingEmbeddingProvider}
+ * so the library is embedded once rather than per task.
+ *
+ * @packageDocumentation
+ */
+interface BedrockEmbeddingConfig {
+    /** Bedrock embedding model id. Default: `amazon.titan-embed-text-v2:0`. */
+    modelId?: string;
+    /** AWS region. Falls back to AWS_REGION / AWS_DEFAULT_REGION. */
+    region?: string;
+    /** Output dimensions (Titan v2 supports 256/512/1024). Default: 1024. */
+    dimensions?: number;
+    /** L2-normalize output (Titan v2 option). Default: true. */
+    normalize?: boolean;
+    /** Max concurrent InvokeModel calls for per-text models (Titan). Default: 8. */
+    concurrency?: number;
+    /**
+     * Cohere `input_type` (asymmetric retrieval). Use `search_document` to embed
+     * the skill library and `search_query` for the task. Default: `search_query`.
+     * Ignored by Titan. (Symmetric use — same type for both — still works but is
+     * slightly weaker for retrieval.)
+     */
+    inputType?: 'search_query' | 'search_document' | 'classification' | 'clustering';
+    /**
+     * Low-level invoke seam: given (modelId, requestBody), return the parsed
+     * JSON response. Defaults to a lazily-imported Bedrock runtime client.
+     * Override for testing or custom auth.
+     */
+    invoke?: (modelId: string, body: unknown) => Promise<any>;
+}
+declare class BedrockEmbeddingProvider implements EmbeddingProvider {
+    private modelId;
+    private region?;
+    private dimensions;
+    private normalize;
+    private concurrency;
+    private inputType;
+    private invoke;
+    private client;
+    constructor(config?: BedrockEmbeddingConfig);
+    embed(texts: string[]): Promise<number[][]>;
+    private isCohere;
+    /** Titan-style: one InvokeModel per text, fanned out with bounded concurrency. */
+    private embedPerText;
+    /** Cohere embed: batched in chunks of ≤96 (Bedrock's per-request cap). */
+    private embedCohereBatch;
+    private extractSingle;
+    private defaultInvoke;
+}
+/**
+ * AWS SageMaker {@link EmbeddingProvider}.
+ *
+ * Invokes a SageMaker real-time inference endpoint. Endpoint I/O contracts
+ * vary by the deployed model, so request serialization and response parsing
+ * are configurable; the defaults match HuggingFace TEI / feature-extraction
+ * images (`{ inputs: string[] }` → `number[][]`).
+ *
+ * `@aws-sdk/client-sagemaker-runtime` is an OPTIONAL dependency, lazily
+ * imported on first use. Wrap in a {@link CachingEmbeddingProvider} for reuse.
+ *
+ * @packageDocumentation
+ */
+interface SageMakerEmbeddingConfig {
+    /** Name of the deployed SageMaker endpoint (required). */
+    endpointName: string;
+    /** AWS region. Falls back to AWS_REGION / AWS_DEFAULT_REGION. */
+    region?: string;
+    /** Request content type. Default: `application/json`. */
+    contentType?: string;
+    /**
+     * Serialize the batch of texts into the endpoint's request body.
+     * Default: `JSON.stringify({ inputs: texts })` (HF TEI style).
+     */
+    serialize?: (texts: string[]) => string;
+    /**
+     * Parse the endpoint's raw response string into vectors. Default handles
+     * `number[][]`, `{ embeddings: number[][] }`, and `{ vectors: number[][] }`.
+     */
+    deserialize?: (raw: string) => number[][];
+    /**
+     * Low-level invoke seam: given (endpointName, body, contentType), return the
+     * raw response string. Defaults to a lazily-imported SageMaker runtime client.
+     * Override for testing or custom auth.
+     */
+    invoke?: (endpointName: string, body: string, contentType: string) => Promise<string>;
+}
+declare class SageMakerEmbeddingProvider implements EmbeddingProvider {
+    private endpointName;
+    private region?;
+    private contentType;
+    private serialize;
+    private deserialize;
+    private invoke;
+    private client;
+    constructor(config: SageMakerEmbeddingConfig);
+    embed(texts: string[]): Promise<number[][]>;
+    private defaultInvoke;
+}
+/**
+ * Content-keyed caching wrapper for any {@link EmbeddingProvider}.
+ *
+ * Skill texts are stable across tasks, so without caching every
+ * `setLoadoutForTask` would re-embed the entire library. This wrapper
+ * memoizes embeddings by text content: skills are embedded once (a one-time
+ * warmup), and subsequent task scoring only sends the new query to the
+ * backend. De-duplicates within a single batch as well.
+ *
+ * @packageDocumentation
+ */
+interface CachingEmbeddingConfig {
+    /**
+     * Maximum number of cached vectors. When exceeded, oldest entries are
+     * evicted (insertion-order). Default: 50000 (effectively unbounded for
+     * typical libraries). Set to 0 to disable the bound.
+     */
+    maxEntries?: number;
+}
+/**
+ * Wraps an EmbeddingProvider with an in-memory, content-keyed LRU-ish cache.
+ */
+declare class CachingEmbeddingProvider implements EmbeddingProvider {
+    private inner;
+    private cache;
+    private maxEntries;
+    constructor(inner: EmbeddingProvider, config?: CachingEmbeddingConfig);
+    embed(texts: string[]): Promise<number[][]>;
+    private set;
+    /** Number of cached vectors (useful for tests/diagnostics). */
+    get size(): number;
+    /** Drop all cached vectors. */
+    clear(): void;
+}
+/**
+ * Logistic-regression utility scorer (Tier 2, T2.1 default).
+ *
+ * A small, dependency-free model fit by batch gradient descent. The feature
+ * vector deliberately mixes retrieval signals with a *learned per-skill
+ * helpfulness prior*:
+ *
+ *   [ lexAbs, denseAbs, skillPrior, lexAbs·denseAbs ]   (+ bias)
+ *
+ * `skillPrior` is the smoothed rate at which a skill actually helped across
+ * training tasks. It is the lever that down-weights generic, high-surface-area
+ * skills (e.g. `citation-management`) that retrieval scores highly on *every*
+ * task but which rarely contribute — exactly the residual gap that BM25 and
+ * dense embeddings cannot close on their own.
+ *
+ * The output sigmoid is in [0,1], so it drops straight into the existing
+ * confidence thresholds and abstain floor.
+ *
+ * @packageDocumentation
+ */
+interface LogisticUtilityConfig {
+    /** Gradient-descent learning rate (default: 0.5). */
+    learningRate?: number;
+    /** Number of full-batch iterations (default: 400). */
+    iterations?: number;
+    /** L2 regularization strength (default: 1e-3). */
+    l2?: number;
+    /** Beta-prior pseudo-counts for the per-skill helpfulness rate (default: 1/1). */
+    priorAlpha?: number;
+    priorBeta?: number;
+    /**
+     * Use the *per-skill-ID* helpfulness prior as a feature (default: true).
+     *
+     * WARNING — generalization caveat: a per-skill-ID prior memorizes base rates
+     * and does NOT transfer to held-out tasks where a normally-rare skill is the
+     * answer. On SkillsBench's held-out protocol it is catastrophic (it learns
+     * `citation-management` "rarely helps" and then kills it on the one task it
+     * is curated for). The prior is appropriate only when the *same* skills
+     * recur across *similar* tasks (e.g. a personal library of repeated work).
+     * For cross-task generalization, set this false and rely on task-conditional
+     * features — ultimately the (task, skill) embedding interaction (T2.1 v2),
+     * not an ID lookup. With it false the model reduces to a learned reweighting
+     * of the lexical/dense signals.
+     */
+    usePrior?: boolean;
+}
+interface SerializedScorer {
+    weights: number[];
+    bias: number;
+    globalPrior: number;
+    skillPrior: Record<string, number>;
+    config: Required<LogisticUtilityConfig>;
+}
+declare class LogisticUtilityScorer implements UtilityScorer {
+    private weights;
+    private bias;
+    private globalPrior;
+    private skillPrior;
+    private isTrained;
+    private config;
+    constructor(config?: LogisticUtilityConfig);
+    get trained(): boolean;
+    /** Smoothed per-skill helpfulness prior used as a feature. */
+    priorFor(skillId: string): number;
+    private featureVector;
+    score(features: UtilityFeatures): number;
+    train(examples: UtilityExample[]): UtilityTrainReport;
+    toJSON(): SerializedScorer;
+    static fromJSON(data: SerializedScorer): LogisticUtilityScorer;
+}
+/**
+ * Harvests labeled examples for the utility scorer (Tier 2, T2.1).
+ *
+ * The training signal comes from observed loadout→outcome traces: when a skill
+ * was in the served loadout and the task succeeded, that's weak positive
+ * evidence the skill helped; on failure, weak negative. Accumulated across many
+ * tasks, this teaches the scorer which skills genuinely contribute (vs which
+ * are merely retrieved often). Also supports offline supervision from curated
+ * ground-truth sets (e.g. a benchmark's reference skills).
+ *
+ * The recorder is a plain accumulator — the orchestrator (or cognitive-core's
+ * evaluator) calls `recordOutcome` after each task and periodically retrains
+ * the scorer on `getExamples()`. This keeps authoring (the agent run) and the
+ * learning pass in separate lanes.
+ *
+ * @packageDocumentation
+ */
+/** A scored candidate plus whether it was actually served for the task. */
+interface OutcomeCandidate {
+    features: UtilityFeatures;
+    selected: boolean;
+}
+interface TaskOutcome {
+    taskText?: string;
+    candidates: OutcomeCandidate[];
+    /** Did the task ultimately succeed (e.g. verifier passed)? */
+    success: boolean;
+    /** Weight for these examples (e.g. confidence in the outcome). Default 1. */
+    weight?: number;
+}
+declare class FeedbackRecorder {
+    private examples;
+    /** Append a single pre-built example. */
+    record(example: UtilityExample): void;
+    /**
+     * Label a task outcome. Each *selected* candidate becomes an example whose
+     * `helped` label is the task's success. Unselected candidates are not
+     * labeled (we have no counterfactual signal for them).
+     */
+    recordOutcome(outcome: TaskOutcome): void;
+    /**
+     * Offline supervision from a curated ground-truth set: every candidate in
+     * `curatedIds` is labeled helped, the rest not-helped. Useful for
+     * cold-starting the scorer from a benchmark or human-authored loadouts
+     * before real execution traces exist.
+     */
+    recordCurated(taskText: string | undefined, candidates: UtilityFeatures[], curatedIds: string[], weight?: number): void;
+    getExamples(): UtilityExample[];
+    get size(): number;
+    clear(): void;
+    toJSON(): UtilityExample[];
+    static fromJSON(examples: UtilityExample[]): FeedbackRecorder;
+}
+/**
+ * AWS Bedrock {@link RerankProvider} (Cohere Rerank v3.5 by default).
+ *
+ * Uses the Bedrock `Rerank` API (`@aws-sdk/client-bedrock-agent-runtime`, an
+ * OPTIONAL lazily-imported dependency). Documents are truncated to a char cap
+ * (Cohere truncates long inputs anyway). Pass a custom `invoke` for testing.
+ *
+ * @packageDocumentation
+ */
+interface BedrockRerankConfig {
+    /** Rerank model id (region-scoped ARN built from this). Default: cohere.rerank-v3-5:0. */
+    modelId?: string;
+    /** Full model ARN (overrides modelId). */
+    modelArn?: string;
+    /** AWS region. Falls back to AWS_REGION / AWS_DEFAULT_REGION. */
+    region?: string;
+    /** Truncate each candidate's text to this many chars before sending. Default: 4000. */
+    maxDocChars?: number;
+    /** Truncate the query to this many chars. Default: 4000. */
+    maxQueryChars?: number;
+    /**
+     * Low-level seam: given (query, docTexts, topN) return [{index, relevanceScore}].
+     * Defaults to a lazily-imported Bedrock agent-runtime client.
+     */
+    invoke?: (query: string, docs: string[], topN: number) => Promise<{
+        index: number;
+        relevanceScore: number;
+    }[]>;
+}
+declare class BedrockRerankProvider implements RerankProvider {
+    private modelId;
+    private region?;
+    private modelArn?;
+    private maxDocChars;
+    private maxQueryChars;
+    private invoke;
+    private client;
+    constructor(config?: BedrockRerankConfig);
+    rerank(query: string, candidates: RerankCandidate[], topN?: number): Promise<RerankResult[]>;
+    private arn;
+    private defaultInvoke;
+}
+/**
+ * Telemetry collector for progressive disclosure evaluation.
+ *
+ * Subscribes to SkillGraphServer events and accumulates a
+ * DisclosureTrace recording every expand, collapse, and browse
+ * action with token counts.
+ */
+interface DisclosureEvent {
+    timestamp: number;
+    action: 'expand' | 'collapse' | 'browse_catalog' | 'search' | 'loadout_changed';
+    skillId?: string;
+    category?: string[];
+    tokensBefore: number;
+    tokensAfter: number;
+}
+interface DisclosureTrace {
+    sessionId: string;
+    taskId: string;
+    strategyId: string;
+    events: DisclosureEvent[];
+    startedAt: number;
+    finishedAt?: number;
+    finalState: {
+        expanded: string[];
+        collapsed: string[];
+        neverTouched: string[];
+    };
+}
+declare class TelemetryCollector {
+    private server;
+    private sessionId;
+    private taskId;
+    private strategyId;
+    private events;
+    private unsubscribe;
+    private startedAt;
+    constructor(server: SkillGraphServer, sessionId: string, taskId: string, strategyId: string);
+    start(): void;
+    stop(): void;
+    getTrace(): DisclosureTrace;
+    getExpandedSkillIds(): string[];
+    getEventCount(): number;
+    private mapEventAction;
+}
+/**
+ * Compute decision quality metrics from a disclosure trace.
+ */
+declare function computeDecisionMetrics(trace: DisclosureTrace, oracleSkillIds: string[]): DecisionMetrics;
+interface DecisionMetrics {
+    expandPrecision: number;
+    expandRecall: number;
+    expandF1: number;
+    distractorAvoidance: number;
+    overExpansionRate: number;
+    underExpansionRate: number;
+    totalExpanded: number;
+    totalAvailable: number;
+    expandEvents: number;
+    collapseEvents: number;
+}
 /**
  * Built-in Loadout Profiles
  *
@@ -3877,6 +4669,301 @@ declare class IndexerService {
     close(): Promise<void>;
 }
+/**
+ * SkillNet importer — load skills from the SkillNet ecosystem into a SkillBank.
+ *
+ * SkillNet (https://github.com/zjunlp/SkillNet) is a public, hosted search index over
+ * GitHub-hosted SKILL.md folders. Its REST API returns rows whose `skill_url` points at
+ * a GitHub tree/blob path containing a `SKILL.md`. This module is a "hybrid" importer:
+ *
+ *   1. It queries the SkillNet search API (free, no key) to resolve a query → skill URLs.
+ *   2. It fetches each skill's `SKILL.md` directly from GitHub raw (no key for public repos).
+ *   3. It parses the OpenSkills frontmatter and converts to a skill-tree `Skill`.
+ *
+ * This avoids any dependency on the optional `scraper/` package and works entirely over
+ * `fetch`, which is injectable for testing.
+ */
+/** Default public SkillNet search API base. */
+declare const DEFAULT_SKILLNET_API = "http://api-skillnet.openkg.cn/v1";
+/**
+ * Minimal fetch surface so this module does not depend on DOM lib types.
+ * The global `fetch` (Node 18+) satisfies this shape.
+ */
+type FetchLike = (url: string, init?: {
+    headers?: Record<string, string>;
+}) => Promise<{
+    ok: boolean;
+    status: number;
+    statusText: string;
+    text(): Promise<string>;
+    json(): Promise<unknown>;
+}>;
+/**
+ * A single result row from the SkillNet search API.
+ * Mirrors the `data[]` entries documented in the SkillNet README.
+ */
+interface SkillNetSearchResult {
+    skillName: string;
+    skillDescription?: string;
+    author?: string;
+    stars: number;
+    skillUrl: string;
+    category?: string;
+    /** Optional 5-dimension quality scores, when present on the row. */
+    evaluation?: Record<string, unknown>;
+}
+/**
+ * Options for a SkillNet search query.
+ */
+interface SkillNetSearchOptions {
+    /** `keyword` (fuzzy) or `vector` (semantic). Defaults to keyword. */
+    mode?: 'keyword' | 'vector';
+    /** Category filter (Development, AIGC, Research, Science, etc.). */
+    category?: string;
+    /** Results per page (max 50). */
+    limit?: number;
+    /** Page number (keyword mode only). */
+    page?: number;
+    /** Minimum star count (keyword mode only). */
+    minStars?: number;
+    /** Sort order (keyword mode only). */
+    sortBy?: 'stars' | 'recent';
+    /** Similarity threshold 0.0–1.0 (vector mode only). */
+    threshold?: number;
+}
+/**
+ * Configuration for the SkillNet client.
+ */
+interface SkillNetClientConfig {
+    /** Search API base URL. Defaults to the public SkillNet endpoint. */
+    apiBaseUrl?: string;
+    /** GitHub token for raw fetches (private repos / higher rate limits). */
+    githubToken?: string;
+    /** Optional GitHub mirror prefix for restricted networks (e.g. `https://ghfast.top/`). */
+    githubMirror?: string;
+    /** Injectable fetch implementation (defaults to global `fetch`). */
+    fetchImpl?: FetchLike;
+}
+/**
+ * Result of converting one SkillNet skill into skill-tree format.
+ */
+interface SkillNetConversionResult {
+    skill: Skill;
+    warnings: string[];
+    /** The raw SKILL.md URL that was fetched. */
+    rawUrl: string;
+}
+/**
+ * Result of an import-by-search or import-by-url operation.
+ */
+interface SkillNetImportResult {
+    imported: number;
+    failed: number;
+    skills: Skill[];
+    errors: string[];
+}
+/**
+ * Parse a GitHub tree/blob URL into its components.
+ * Returns null if the URL is not a recognizable GitHub repo URL.
+ */
+declare function parseGitHubUrl(url: string): {
+    owner: string;
+    repo: string;
+    ref: string;
+    path: string;
+} | null;
+/**
+ * SkillNet client: search the index, fetch SKILL.md, and import into a SkillBank.
+ */
+declare class SkillNetClient {
+    private readonly apiBaseUrl;
+    private readonly githubToken?;
+    private readonly githubMirror?;
+    private readonly fetchImpl;
+    constructor(config?: SkillNetClientConfig);
+    /**
+     * Search the SkillNet index. Free and requires no API key.
+     */
+    search(query: string, options?: SkillNetSearchOptions): Promise<SkillNetSearchResult[]>;
+    /**
+     * Convert a GitHub skill URL into the raw URL for its SKILL.md.
+     * Applies the configured mirror prefix when set.
+     */
+    toRawSkillMdUrl(skillUrl: string): string;
+    /**
+     * Fetch the raw SKILL.md content for a skill URL.
+     */
+    fetchSkillMd(skillUrl: string): Promise<{
+        content: string;
+        rawUrl: string;
+    }>;
+    /**
+     * Convert a SkillNet search result + its SKILL.md content into a skill-tree Skill.
+     */
+    convertSkillNetSkill(result: SkillNetSearchResult, content: string, rawUrl: string): SkillNetConversionResult;
+    /**
+     * Import a single skill by its SkillNet/GitHub URL into a SkillBank.
+     */
+    importSkill(skillUrl: string, bank: SkillBank, meta?: Partial<SkillNetSearchResult>): Promise<SkillNetConversionResult>;
+    /**
+     * Search SkillNet and import the matching skills into a SkillBank.
+     */
+    importFromSearch(query: string, bank: SkillBank, options?: SkillNetSearchOptions & {
+        limit?: number;
+    }): Promise<SkillNetImportResult>;
+}
+/**
+ * Create a SkillNet client.
+ */
+declare function createSkillNetClient(config?: SkillNetClientConfig): SkillNetClient;
+/**
+ * Standalone parser for OpenSkills SKILL.md files (YAML frontmatter + Markdown body).
+ *
+ * This mirrors the frontmatter handling in `storage/filesystem.ts` but is exposed as
+ * pure functions so importers (e.g. the SkillNet importer) can parse raw SKILL.md
+ * fetched from remote sources without instantiating a storage adapter.
+ *
+ * It intentionally supports only the subset of YAML used by the Agent Skills standard
+ * (scalars, `key: |` block scalars, and `- ` lists). It is not a full YAML parser.
+ */
+/**
+ * Parsed frontmatter fields plus the Markdown body.
+ */
+interface ParsedSkillMd {
+    /** Skill name (`name:`), if present */
+    name?: string;
+    /** Short description (`description:`), supports block scalars */
+    description?: string;
+    /** Semantic version (`version:`) */
+    version?: string;
+    /** Author (`author:`) */
+    author?: string;
+    /** Lifecycle status (`status:`) */
+    status?: string;
+    /** Date string (`date:`) */
+    date?: string;
+    /** Tags (`tags:` list) */
+    tags: string[];
+    /** Markdown body after the frontmatter, trimmed */
+    body: string;
+    /** Whether a frontmatter block was actually present */
+    hasFrontmatter: boolean;
+}
+/**
+ * Split a SKILL.md document into its frontmatter block and Markdown body.
+ */
+declare function splitFrontmatter(content: string): {
+    frontmatter: string;
+    body: string;
+    hasFrontmatter: boolean;
+};
+/**
+ * Parse a raw SKILL.md document into structured fields and a body.
+ */
+declare function parseSkillMd(content: string): ParsedSkillMd;
+/**
+ * Generic SKILL.md → Skill converter.
+ *
+ * Source-agnostic: turns any OpenSkills SKILL.md document (plus optional fallback
+ * metadata and provenance) into a skill-tree `Skill`. The SkillNet importer and the
+ * local-directory importer both delegate here so id/tag/status resolution stays
+ * consistent regardless of where the SKILL.md came from.
+ */
+/**
+ * Convert a free-form string into a kebab-case skill id.
+ */
+declare function slugify(input: string): string;
+/**
+ * Options controlling how a SKILL.md is converted into a Skill.
+ * Frontmatter values always take precedence; the `default*` fields are fallbacks.
+ */
+interface SkillFromMdOptions {
+    /** Explicit skill id (slugified). Falls back to frontmatter name, then defaultName. */
+    id?: string;
+    /** Fallback name when frontmatter has none. */
+    defaultName?: string;
+    /** Fallback description when frontmatter has none. */
+    defaultDescription?: string;
+    /** Fallback author when frontmatter has none. */
+    defaultAuthor?: string;
+    /** Fallback version when frontmatter has none (default '1.0.0'). */
+    defaultVersion?: string;
+    /** Status to use when frontmatter has no valid status (default 'active'). */
+    defaultStatus?: SkillStatus;
+    /** Extra tags to merge with frontmatter tags. */
+    extraTags?: string[];
+    /** Taxonomy path to attach. */
+    taxonomyPath?: string[];
+    /** Source provenance. */
+    source?: SkillSource$1;
+    /** External source provenance. */
+    externalSource?: ExternalSource;
+    /** Timestamp for createdAt/updatedAt (default now). */
+    now?: Date;
+}
+/**
+ * Result of converting a SKILL.md document.
+ */
+interface SkillFromMdResult {
+    skill: Skill;
+    warnings: string[];
+    parsed: ParsedSkillMd;
+}
+/**
+ * Convert a raw SKILL.md document into a skill-tree Skill.
+ */
+declare function skillFromSkillMd(content: string, options?: SkillFromMdOptions): SkillFromMdResult;
+/**
+ * Local SKILL.md importers.
+ *
+ * Bulk-import skills from the filesystem into a SkillBank — a single SKILL.md file or a
+ * directory tree of OpenSkills-style `<skill-id>/SKILL.md` folders (e.g. a downloaded
+ * skill pack, `.claude/skills/`, or any `skills/` directory). No network involved.
+ */
+/** Result of a local import operation. */
+interface LocalImportResult {
+    imported: number;
+    failed: number;
+    skills: Skill[];
+    errors: string[];
+}
+/**
+ * A SKILL.md file found on disk.
+ */
+interface FoundSkillMd {
+    /** Absolute path to the SKILL.md file */
+    filePath: string;
+    /** Directory containing the SKILL.md file */
+    directory: string;
+    /** Id derived from the containing directory name */
+    id: string;
+}
+/**
+ * Recursively find SKILL.md files under a root directory.
+ * Skips hidden directories (except the root itself) and `node_modules`.
+ */
+declare function findSkillMdFiles(root: string, maxDepth?: number): Promise<FoundSkillMd[]>;
+/**
+ * Import a single SKILL.md file into a SkillBank.
+ * The skill id defaults to the containing directory name (OpenSkills convention),
+ * falling back to the frontmatter name.
+ */
+declare function importSkillMdFile(filePath: string, bank: SkillBank, options?: SkillFromMdOptions): Promise<{
+    skill: Skill;
+    warnings: string[];
+}>;
+/**
+ * Import all SKILL.md skills found under a directory into a SkillBank.
+ * Duplicate ids are skipped (first occurrence wins) and recorded in `errors`.
+ */
+declare function importLocalSkillDir(dirPath: string, bank: SkillBank, options?: SkillFromMdOptions): Promise<LocalImportResult>;
 /**
  * skill-tree - A library for managing agent skill versions and evolution
  *
@@ -3888,6 +4975,6 @@ declare class IndexerService {
  *
  * @packageDocumentation
  */
-declare const VERSION = "0.1.0";
+declare const VERSION = "0.2.0";
-export { type AgentConfig, AgentsGenerator, type AgentsGeneratorConfig, AgentsParser, AgentsSync, type BaseHookContext, type BumpType, CachedStorageAdapter, type CachedStorageConfig, CatalogRenderer, type CatalogRendererConfig, type ConflictConfig, type ConflictResolution$1 as ConflictResolution, ConflictStore, type ConflictStrategy, DEFAULT_AGENTS_CONFIG, type DiscoveredSkill, type EvictionStrategy, type ExpandTrigger, type ExpandTriggerConfig, type FederatedRemoteConfig, type FederationEvent, type FederationEventHandler, FederationManager, type FederationManagerOptions, type FetchResult, type ForkOptions, GitSyncAdapter, type GitSyncAdapterOptions, type SyncResult$1 as GitSyncResult, type GraphServerConfig, type HookContext, type HookEvent, type HookExecutionResult, type HookHandler, type HookPriority, HookRegistry, type HookResult, type ImportMode, type ImportOptions, type ImportResult, type IndexResult, IndexerService, type IndexerServiceConfig, type SkillSource as IndexerSkillSource, type IndexerStats, LineageTracker, type LineageTree, LoadoutCompiler, type LoadoutCompilerConfig, type LoadoutCriteria, type LoadoutSource, type LoadoutState, type LoadoutView, type MaterializationConfig, Materializer, MemoryStorageAdapter, type MergeConfig, type MergeConflict, type MergePreview, type MergeResult, type MergeStrategy, type MergeSuggestion, type MigrationOptions, type MigrationProgressItem, type MigrationResult, type NewVersionOptions, type ParsedAgentSkill, type ParsedAgentsFile, type ParsedVersion, type ProjectContext, ProjectDetector, type PullOptions, type PullUpstreamOptions, type PullUpstreamResult, type PushOptions, type RegisterHookOptions, type RegisteredHook, type RelationshipResult, type RemoteConfig, RemoteManager, type RemoteState, RemoteStore, type RollbackOptions, type ScrapeResult, type ServingEvent, type ServingEventHandler, type ShareOptions, type ShareResult, type Skill, type SkillAccessControl, SkillBank, type SkillBankConfig, type SkillBankStats, type SkillChange, type SkillConflict, type SkillCrudHookContext, type SkillDiffChanges, type SkillFilter, type SkillFork, type SkillFormat, SkillGraphServer, type SkillLineage, type SkillMergeResult, SkillMerger, type SkillMetrics, type SkillNamespace, type SkillScope, type SkillSelector, type SkillServingMetadata, type SkillSource$1 as SkillSource, type SkillState, type SkillStatus, type SkillSummary, type SkillSyncState, type SkillTreeEvent, type SkillTreeEventHandler, type SkillUpstream, type SkillVersion, type SkillVisibility, type StorageAdapter, type StorageConfig, type StorageHookContext, type SyncBehaviorConfig, type SyncConfig, type ConflictResolution as SyncConflictResolution, type SyncError, SyncManager, type SyncManagerOptions, type SyncOptions, type SyncResult, type SyncState, type SyncStatus, type TaxonomyNode, type UpstreamUpdate, VERSION, type VersionChanges, type VersionDiff, ViewRenderer, type ViewRendererConfig, builtInProfiles, bumpVersion, codeReviewProfile, combineHandlers, compareVersions, conditionalHook, createAgentsGenerator, createAgentsParser, createAgentsSync, createBackupHook, createConflictStore, createDefaultSyncConfig, createFederationManager, createGitSyncAdapter, createLoggingHook, createSaveValidationHook, createSkillBank, createSkillMerger, createSyncManager, debuggingProfile, devopsProfile, discoverSkills, documentationProfile, formatVersion, generateAgentsMd, getBuiltInProfile, getLatestVersion, hasSkilltreeDir, hookRegistry, implementationProfile, importFromAgentsMd, inferBumpType, isValidVersion, listBuiltInProfiles, migrateStorage, parseVersion, refactoringProfile, satisfiesRange, securityProfile, sortVersions, testingProfile, writeAgentsMd };
+export { type AgentConfig, AgentsGenerator, type AgentsGeneratorConfig, AgentsParser, AgentsSync, type BaseHookContext, type BedrockEmbeddingConfig, BedrockEmbeddingProvider, type BedrockRerankConfig, BedrockRerankProvider, type BumpType, CachedStorageAdapter, type CachedStorageConfig, type CachingEmbeddingConfig, CachingEmbeddingProvider, CatalogRenderer, type CatalogRendererConfig, type ConfidenceThresholds, type ConflictConfig, type ConflictResolution$1 as ConflictResolution, ConflictStore, type ConflictStrategy, DEFAULT_AGENTS_CONFIG, DEFAULT_FIELD_WEIGHTS, DEFAULT_SKILLNET_API, type DecisionMetrics, type DisclosureEvent, type DisclosureTrace, type DiscoveredSkill, type EmbeddingProvider, type EvictionStrategy, type ExpandTrigger, type ExpandTriggerConfig, type FederatedRemoteConfig, type FederationEvent, type FederationEventHandler, FederationManager, type FederationManagerOptions, FeedbackRecorder, type FetchLike, type FetchResult, type FieldWeights, type ForkOptions, type FoundSkillMd, type FusionStrategy, GitSyncAdapter, type GitSyncAdapterOptions, type SyncResult$1 as GitSyncResult, type GraphServerConfig, type HookContext, type HookEvent, type HookExecutionResult, type HookHandler, type HookPriority, HookRegistry, type HookResult, type HybridRetrievalOptions, type ImportMode, type ImportOptions, type ImportResult, type IndexResult, IndexerService, type IndexerServiceConfig, type SkillSource as IndexerSkillSource, type IndexerStats, LineageTracker, type LineageTree, LoadoutCompiler, type LoadoutCompilerConfig, type LoadoutCriteria, type LoadoutSource, type LoadoutState, type LoadoutView, type LocalImportResult, type LogisticUtilityConfig, LogisticUtilityScorer, type MaterializationConfig, Materializer, MemoryStorageAdapter, type MergeConfig, type MergeConflict, type MergePreview, type MergeResult, type MergeStrategy, type MergeSuggestion, type MigrationOptions, type MigrationProgressItem, type MigrationResult, type NewVersionOptions, type OutcomeCandidate, type ParsedAgentSkill, type ParsedAgentsFile, type ParsedSkillMd, type ParsedVersion, type ProjectContext, ProjectDetector, type PullOptions, type PullUpstreamOptions, type PullUpstreamResult, type PushOptions, type RegisterHookOptions, type RegisteredHook, type RelationshipResult, type RemoteConfig, RemoteManager, type RemoteState, RemoteStore, type RerankCandidate, type RerankProvider, type RerankResult, type RollbackOptions, type SageMakerEmbeddingConfig, SageMakerEmbeddingProvider, type ScoredSkill, type ScrapeResult, type ServingEvent, type ServingEventHandler, type ShareOptions, type ShareResult, type Skill, type SkillAccessControl, SkillBank, type SkillBankConfig, type SkillBankStats, type SkillChange, type SkillConflict, type SkillCrudHookContext, type SkillDiffChanges, type SkillFilter, type SkillFork, type SkillFormat, type SkillFromMdOptions, type SkillFromMdResult, SkillGraphServer, type SkillLineage, type SkillMergeResult, SkillMerger, type SkillNamespace, SkillNetClient, type SkillNetClientConfig, type SkillNetConversionResult, type SkillNetImportResult, type SkillNetSearchOptions, type SkillNetSearchResult, type SkillScope, type SkillSelector, type SkillServingMetadata, type SkillSource$1 as SkillSource, type SkillState, type SkillStatus, type SkillSummary, type SkillSyncState, type SkillTreeEvent, type SkillTreeEventHandler, type SkillUpstream, type SkillVersion, type SkillVisibility, type StorageAdapter, type StorageConfig, type StorageHookContext, type SyncBehaviorConfig, type SyncConfig, type ConflictResolution as SyncConflictResolution, type SyncError, SyncManager, type SyncManagerOptions, type SyncOptions, type SyncResult, type SyncState, type SyncStatus, type TaskOutcome, type TaxonomyNode, TelemetryCollector, type UpstreamUpdate, type UtilityExample, type UtilityFeatures, type UtilityScorer, type UtilityTrainReport, VERSION, type VersionChanges, type VersionDiff, ViewRenderer, type ViewRendererConfig, bm25Scores, builtInProfiles, bumpVersion, codeReviewProfile, combineHandlers, compareVersions, computeDecisionMetrics, conditionalHook, cosineSimilarity, createAgentsGenerator, createAgentsParser, createAgentsSync, createBackupHook, createConflictStore, createDefaultSyncConfig, createFederationManager, createGitSyncAdapter, createLoggingHook, createSaveValidationHook, createSkillBank, createSkillMerger, createSkillNetClient, createSyncManager, debuggingProfile, devopsProfile, discoverSkills, documentationProfile, findSkillMdFiles, formatVersion, generateAgentsMd, getBuiltInProfile, getLatestVersion, hasSkilltreeDir, hookRegistry, implementationProfile, importFromAgentsMd, importLocalSkillDir, importSkillMdFile, inferBumpType, isValidVersion, listBuiltInProfiles, migrateStorage, parseGitHubUrl, parseSkillMd, parseVersion, reciprocalRankFusion, refactoringProfile, satisfiesRange, scoreSkillRelevance, scoreSkillsHybrid, securityProfile, skillFromSkillMd, slugify, sortVersions, splitFrontmatter, termSimilarity, testingProfile, tokenize, tokenizeList, writeAgentsMd };