npm - @skill-tools/router - Versions diffs - 0.2.0 - Mend

@skill-tools/router 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,426 @@
+/**
+ * Okapi BM25 — Zero-dependency, optimized text search index.
+ *
+ * Builds an inverted index from document text and scores queries
+ * using the BM25 ranking function. Designed for fast skill routing
+ * with catalogs up to ~10,000 entries.
+ *
+ * Performance:
+ * - Index build: O(n * avg_doc_len)
+ * - Query: O(q * avg_posting_len) — only visits docs containing query terms
+ * - Memory: O(vocabulary_size * avg_posting_len)
+ *
+ * @packageDocumentation
+ */
+/** A posting list entry: document index + term frequency */
+interface Posting {
+    readonly docIdx: number;
+    readonly tf: number;
+}
+/** Serialized snapshot of BM25Index state */
+interface BM25Snapshot {
+    readonly version: 2;
+    readonly documents: ReadonlyArray<{
+        readonly id: string;
+        readonly length: number;
+        readonly metadata: Record<string, unknown>;
+    }>;
+    readonly invertedIndex: ReadonlyArray<[string, Posting[]]>;
+    readonly idf: ReadonlyArray<[string, number]>;
+    readonly avgdl: number;
+    readonly k1: number;
+    readonly b: number;
+}
+/** Options for creating a BM25Index */
+interface BM25Options {
+    /** Term frequency saturation parameter (default: 1.2) */
+    readonly k1?: number;
+    /** Document length normalization parameter (default: 0.75) */
+    readonly b?: number;
+}
+/**
+ * BM25Index — Fast, zero-dependency full-text search using Okapi BM25.
+ *
+ * @example
+ * ```ts
+ * const idx = new BM25Index();
+ * idx.add([
+ *   { id: 'deploy', text: 'Deploy apps to Vercel production', metadata: {} },
+ *   { id: 'test', text: 'Run unit tests with coverage', metadata: {} },
+ * ]);
+ * const results = idx.search('deploy production', 5);
+ * ```
+ */
+declare class BM25Index {
+    private readonly k1;
+    private readonly b;
+    private documents;
+    private invertedIndex;
+    private idfCache;
+    private avgdl;
+    private totalDocLength;
+    constructor(options?: BM25Options);
+    /**
+     * Add documents to the index.
+     * Batch operation — IDF is recomputed once after all documents are added.
+     */
+    add(entries: ReadonlyArray<{
+        readonly id: string;
+        readonly text: string;
+        readonly metadata: Record<string, unknown>;
+    }>): void;
+    /**
+     * Remove documents by ID.
+     * Rebuilds internal index mappings after removal.
+     */
+    remove(ids: readonly string[]): void;
+    /**
+     * Search the index with a query string.
+     *
+     * Returns results sorted by BM25 score (highest first).
+     * Scores are normalized to [0, 1] — the top result gets 1.0.
+     *
+     * Only documents containing at least one query term are scored,
+     * making queries fast even on large indexes.
+     */
+    search(query: string, topK: number, threshold?: number): Array<{
+        readonly id: string;
+        readonly score: number;
+        readonly metadata: Record<string, unknown>;
+    }>;
+    /** Number of indexed documents */
+    size(): number;
+    /** Serialize to a JSON-compatible snapshot */
+    serialize(): BM25Snapshot;
+    /** Restore from a serialized snapshot */
+    deserialize(data: unknown): void;
+    /** Recompute IDF values for all terms in the inverted index */
+    private recomputeIDF;
+}
+/**
+ * Context extractor for contextual retrieval.
+ *
+ * Extracts supplementary terms from a skill's body and sections
+ * to enrich the description before BM25 indexing. This is a
+ * deterministic, zero-dependency alternative to LLM-generated
+ * chunk context (see: Anthropic's contextual retrieval paper).
+ *
+ * @packageDocumentation
+ */
+/** Minimal skill shape required for context extraction */
+interface ContextInput {
+    readonly name: string;
+    readonly description: string;
+    readonly body?: string;
+    readonly sections?: ReadonlyArray<{
+        readonly heading: string;
+        readonly depth: number;
+        readonly content: string;
+    }>;
+}
+/**
+ * Extract supplementary context from a skill's body and structure.
+ *
+ * Returns a space-separated string of unique terms derived from:
+ * 1. Skill name parts (split on `-` and `_`)
+ * 2. Section headings
+ * 3. Inline code references (backtick-wrapped)
+ * 4. Key terms from body text
+ *
+ * Terms already present in the description are omitted.
+ * Result is truncated to ~80 tokens.
+ *
+ * Returns empty string if no useful context can be extracted.
+ */
+declare function extractContext(skill: ContextInput): string;
+/**
+ * Interface for embedding providers.
+ * Implementations convert text into dense vector representations
+ * for semantic similarity comparison.
+ */
+interface EmbeddingProvider {
+    /** Human-readable name of the provider */
+    readonly name: string;
+    /** Dimensionality of the output vectors */
+    readonly dimensions: number;
+    /**
+     * Generate embeddings for a batch of texts.
+     * @param texts - Array of text strings to embed
+     * @returns Array of embedding vectors (same order as input)
+     */
+    embed(texts: string[]): Promise<number[][]>;
+}
+/**
+ * Configuration for embedding providers.
+ */
+type EmbeddingConfig = 'local' | {
+    provider: 'openai';
+    model?: string;
+    apiKey?: string;
+} | {
+    provider: 'ollama';
+    model?: string;
+    baseUrl?: string;
+} | {
+    provider: 'custom';
+    embed: (texts: string[]) => Promise<number[][]>;
+    dimensions: number;
+};
+/**
+ * Local TF-IDF based embedding provider.
+ *
+ * Uses a deterministic hash-based approach to create sparse-then-dense
+ * embeddings from text. No external API calls or model downloads needed.
+ *
+ * Quality is lower than neural embedding models, but sufficient for
+ * keyword-heavy skill descriptions where exact word matching matters.
+ * Ideal for catalogs under 500 skills.
+ */
+declare class LocalEmbeddingProvider implements EmbeddingProvider {
+    readonly name = "local-tfidf";
+    readonly dimensions: number;
+    private vocabulary;
+    private idfValues;
+    private isBuilt;
+    constructor(dimensions?: number);
+    /**
+     * Build the vocabulary and IDF values from a corpus.
+     * Call this once after indexing all skill descriptions.
+     */
+    buildVocabulary(texts: string[]): void;
+    embed(texts: string[]): Promise<number[][]>;
+    private embedSingle;
+}
+/**
+ * A skill entry prepared for indexing.
+ */
+interface SkillEntry {
+    /** Unique identifier (typically the skill name) */
+    readonly name: string;
+    /** The description text to embed */
+    readonly description: string;
+    /** Path to the SKILL.md file */
+    readonly path?: string;
+    /** Additional metadata to store alongside the embedding */
+    readonly metadata?: Record<string, unknown>;
+    /** Raw markdown body (used for contextual retrieval) */
+    readonly body?: string;
+    /** Parsed sections from the SKILL.md (used for contextual retrieval) */
+    readonly sections?: ReadonlyArray<{
+        readonly heading: string;
+        readonly depth: number;
+        readonly content: string;
+    }>;
+}
+/**
+ * Result of selecting a skill for a query.
+ */
+interface SelectionResult {
+    /** Skill name/ID */
+    readonly skill: string;
+    /** Similarity score (0-1) */
+    readonly score: number;
+    /** Metadata from the indexed skill */
+    readonly metadata: Record<string, unknown>;
+}
+/**
+ * Options for skill selection queries.
+ */
+interface SelectOptions {
+    /** Number of results to return (default: 5) */
+    readonly topK?: number;
+    /** Minimum similarity threshold (default: 0.0) */
+    readonly threshold?: number;
+    /** Skill names to boost in ranking */
+    readonly boost?: string[];
+    /** Skill name patterns to exclude */
+    readonly exclude?: string[];
+}
+/**
+ * Options for the SkillRouter constructor.
+ */
+interface SkillRouterOptions {
+    /** Embedding provider configuration. Defaults to BM25 ('local'). */
+    readonly embedding?: EmbeddingConfig;
+    /** BM25 tuning parameters (only used with the default BM25 engine) */
+    readonly bm25?: BM25Options;
+    /**
+     * Enable contextual retrieval. When true (default), skills with
+     * body or sections will have supplementary context extracted and
+     * prepended to their description before indexing.
+     * Only affects indexing — result descriptions stay unchanged.
+     */
+    readonly context?: boolean;
+}
+/**
+ * SkillRouter — Skill selection middleware using BM25 full-text search.
+ *
+ * Indexes skill descriptions and enables fast, ranked search to find
+ * the most relevant skills for a given query. Uses Okapi BM25 by default
+ * with zero external dependencies.
+ *
+ * For neural/semantic embeddings, pass a custom embedding provider
+ * via the `embedding` option.
+ *
+ * @example
+ * ```ts
+ * const router = new SkillRouter();
+ * await router.indexSkills([
+ *   { name: 'deploy-vercel', description: 'Deploy apps to Vercel...' },
+ *   { name: 'run-tests', description: 'Execute test suites...' },
+ * ]);
+ *
+ * const results = await router.select('deploy my app');
+ * // => [{ skill: 'deploy-vercel', score: 0.89, ... }]
+ * ```
+ */
+declare class SkillRouter {
+    /** BM25 index — used when no external embedding provider is configured */
+    private readonly bm25;
+    /** Embedding provider — used with custom/openai/ollama providers */
+    private readonly embedding;
+    /** Vector store — used alongside embedding provider */
+    private readonly store;
+    /** Whether the router uses the BM25 engine (true) or embedding+store (false) */
+    private readonly usesBM25;
+    /** Whether contextual retrieval is enabled */
+    private readonly contextEnabled;
+    private skillNames;
+    constructor(options?: SkillRouterOptions);
+    /**
+     * Index a list of skill entries.
+     * With BM25 (default): indexes description text directly.
+     * With embeddings: embeds descriptions and stores vectors.
+     */
+    indexSkills(skills: SkillEntry[]): Promise<void>;
+    /**
+     * Index all SKILL.md files in a directory.
+     * Parses each file and indexes its description.
+     */
+    indexDirectory(dirPath: string): Promise<number>;
+    /**
+     * Select the most relevant skills for a query.
+     */
+    select(query: string, options?: SelectOptions): Promise<SelectionResult[]>;
+    /**
+     * Detect skills with overlapping descriptions.
+     */
+    detectConflicts(threshold?: number): Promise<ConflictGroup[]>;
+    /**
+     * Build the text to index for a skill entry.
+     * When contextual retrieval is enabled and the skill has body/sections,
+     * prepends extracted context to the description.
+     */
+    private enrichText;
+    /**
+     * Get the number of indexed skills.
+     */
+    get count(): number;
+    /**
+     * Save the index to a JSON-serializable object.
+     */
+    save(): SkillRouterSnapshot;
+    /**
+     * Load a previously saved index.
+     * Validates that the snapshot format matches the current engine.
+     */
+    load(snapshot: SkillRouterSnapshot): void;
+    /**
+     * Create a SkillRouter from a saved snapshot.
+     */
+    static fromSnapshot(snapshot: SkillRouterSnapshot, options?: SkillRouterOptions): SkillRouter;
+}
+/**
+ * A group of conflicting (highly similar) skills.
+ */
+interface ConflictGroup {
+    readonly skills: string[];
+    readonly similarity: number;
+    readonly suggestion: string;
+}
+/**
+ * Serialized snapshot of a SkillRouter state.
+ */
+interface SkillRouterSnapshot {
+    readonly version: number;
+    readonly embeddingProvider: string;
+    readonly dimensions: number;
+    readonly store: unknown;
+    readonly skillNames: string[];
+}
+/**
+ * A single entry in the vector store.
+ */
+interface VectorEntry {
+    /** Unique identifier (skill name or path) */
+    readonly id: string;
+    /** The embedding vector */
+    readonly vector: number[];
+    /** Metadata associated with this entry */
+    readonly metadata: Record<string, unknown>;
+}
+/**
+ * Result of a similarity search.
+ */
+interface SearchResult {
+    /** Identifier of the matched entry */
+    readonly id: string;
+    /** Cosine similarity score (0-1, higher is more similar) */
+    readonly score: number;
+    /** Metadata from the matched entry */
+    readonly metadata: Record<string, unknown>;
+}
+/**
+ * Interface for vector storage backends.
+ */
+interface VectorStore {
+    /**
+     * Add entries to the store.
+     */
+    add(entries: VectorEntry[]): Promise<void>;
+    /**
+     * Search for the top-K most similar entries to a query vector.
+     */
+    search(queryVector: number[], topK: number, threshold?: number): Promise<SearchResult[]>;
+    /**
+     * Remove entries by ID.
+     */
+    remove(ids: string[]): Promise<void>;
+    /**
+     * Get the number of entries in the store.
+     */
+    size(): number;
+    /**
+     * Serialize the store to a JSON-compatible object.
+     */
+    serialize(): unknown;
+    /**
+     * Load from a serialized object.
+     */
+    deserialize(data: unknown): void;
+}
+/**
+ * In-memory vector store using brute-force cosine similarity search.
+ *
+ * Suitable for catalogs of up to ~1,000 skills. For larger catalogs,
+ * use the SQLite backend.
+ *
+ * At 1,000 entries with 256-dimensional vectors, search takes <5ms.
+ */
+declare class MemoryVectorStore implements VectorStore {
+    private entries;
+    add(entries: VectorEntry[]): Promise<void>;
+    search(queryVector: number[], topK: number, threshold?: number): Promise<SearchResult[]>;
+    remove(ids: string[]): Promise<void>;
+    size(): number;
+    serialize(): unknown;
+    deserialize(data: unknown): void;
+}
+export { BM25Index, type BM25Options, type BM25Snapshot, type ConflictGroup, type ContextInput, type EmbeddingConfig, type EmbeddingProvider, LocalEmbeddingProvider, MemoryVectorStore, type SearchResult, type SelectOptions, type SelectionResult, type SkillEntry, SkillRouter, type SkillRouterOptions, type SkillRouterSnapshot, type VectorEntry, type VectorStore, extractContext };