@tobilu/qmd 1.0.7 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/store.d.ts CHANGED
@@ -280,15 +280,6 @@ export type DocumentResult = {
280
280
  * Extract short docid from a full hash (first 6 characters).
281
281
  */
282
282
  export declare function getDocid(hash: string): string;
283
- /**
284
- * Handelize a filename to be more token-friendly.
285
- * - Convert triple underscore `___` to `/` (folder separator)
286
- * - Convert to lowercase
287
- * - Replace sequences of non-word chars (except /) with single dash
288
- * - Remove leading/trailing dashes from path segments
289
- * - Preserve folder structure (a/b/c/d.md stays structured)
290
- * - Preserve file extension
291
- */
292
283
  export declare function handelize(path: string): string;
293
284
  /**
294
285
  * Search result extends DocumentResult with score and source info
@@ -308,6 +299,38 @@ export type RankedResult = {
308
299
  body: string;
309
300
  score: number;
310
301
  };
302
+ export type RRFContributionTrace = {
303
+ listIndex: number;
304
+ source: "fts" | "vec";
305
+ queryType: "original" | "lex" | "vec" | "hyde";
306
+ query: string;
307
+ rank: number;
308
+ weight: number;
309
+ backendScore: number;
310
+ rrfContribution: number;
311
+ };
312
+ export type RRFScoreTrace = {
313
+ contributions: RRFContributionTrace[];
314
+ baseScore: number;
315
+ topRank: number;
316
+ topRankBonus: number;
317
+ totalScore: number;
318
+ };
319
+ export type HybridQueryExplain = {
320
+ ftsScores: number[];
321
+ vectorScores: number[];
322
+ rrf: {
323
+ rank: number;
324
+ positionScore: number;
325
+ weight: number;
326
+ baseScore: number;
327
+ topRankBonus: number;
328
+ totalScore: number;
329
+ contributions: RRFContributionTrace[];
330
+ };
331
+ rerankScore: number;
332
+ blendedScore: number;
333
+ };
311
334
  /**
312
335
  * Error result when document is not found
313
336
  */
@@ -548,6 +571,12 @@ export declare function getCollectionsWithoutContext(db: Database): {
548
571
  * Useful for suggesting where context might be needed.
549
572
  */
550
573
  export declare function getTopLevelPathsWithoutContext(db: Database, collectionName: string): string[];
574
+ /**
575
+ * Validate that a vec/hyde query doesn't use lex-only syntax.
576
+ * Returns error message if invalid, null if valid.
577
+ */
578
+ export declare function validateSemanticQuery(query: string): string | null;
579
+ export declare function validateLexQuery(query: string): string | null;
551
580
  export declare function searchFTS(db: Database, query: string, limit?: number, collectionName?: string): SearchResult[];
552
581
  export declare function searchVec(db: Database, query: string, model: string, limit?: number, collectionName?: string, session?: ILLMSession, precomputedEmbedding?: number[]): Promise<SearchResult[]>;
553
582
  /**
@@ -578,6 +607,10 @@ export declare function rerank(query: string, documents: {
578
607
  score: number;
579
608
  }[]>;
580
609
  export declare function reciprocalRankFusion(resultLists: RankedResult[][], weights?: number[], k?: number): RankedResult[];
610
+ /**
611
+ * Build per-document RRF contribution traces for explain/debug output.
612
+ */
613
+ export declare function buildRrfTrace(resultLists: RankedResult[][], weights?: number[], listMeta?: RankedListMeta[], k?: number): Map<string, RRFScoreTrace>;
581
614
  /**
582
615
  * Find a document by filename/path, docid (#hash), or with fuzzy matching.
583
616
  * Returns document metadata without body by default.
@@ -630,18 +663,25 @@ export declare function addLineNumbers(text: string, startLine?: number): string
630
663
  export interface SearchHooks {
631
664
  /** BM25 probe found strong signal — expansion will be skipped */
632
665
  onStrongSignal?: (topScore: number) => void;
633
- /** Query expansion complete. Empty array = strong signal skip (no expansion). */
634
- onExpand?: (original: string, expanded: ExpandedQuery[]) => void;
666
+ /** Query expansion starting */
667
+ onExpandStart?: () => void;
668
+ /** Query expansion complete. Empty array = strong signal skip. elapsedMs = time taken. */
669
+ onExpand?: (original: string, expanded: ExpandedQuery[], elapsedMs: number) => void;
670
+ /** Embedding starting (vec/hyde queries) */
671
+ onEmbedStart?: (count: number) => void;
672
+ /** Embedding complete */
673
+ onEmbedDone?: (elapsedMs: number) => void;
635
674
  /** Reranking is about to start */
636
675
  onRerankStart?: (chunkCount: number) => void;
637
676
  /** Reranking finished */
638
- onRerankDone?: () => void;
677
+ onRerankDone?: (elapsedMs: number) => void;
639
678
  }
640
679
  export interface HybridQueryOptions {
641
680
  collection?: string;
642
681
  limit?: number;
643
682
  minScore?: number;
644
683
  candidateLimit?: number;
684
+ explain?: boolean;
645
685
  hooks?: SearchHooks;
646
686
  }
647
687
  export interface HybridQueryResult {
@@ -654,7 +694,13 @@ export interface HybridQueryResult {
654
694
  score: number;
655
695
  context: string | null;
656
696
  docid: string;
697
+ explain?: HybridQueryExplain;
657
698
  }
699
+ export type RankedListMeta = {
700
+ source: "fts" | "vec";
701
+ queryType: "original" | "lex" | "vec" | "hyde";
702
+ query: string;
703
+ };
658
704
  /**
659
705
  * Hybrid search: BM25 + vector + query expansion + RRF + chunked reranking.
660
706
  *
@@ -694,3 +740,44 @@ export interface VectorSearchResult {
694
740
  * 4. Sort by score descending, filter by minScore, slice to limit
695
741
  */
696
742
  export declare function vectorSearchQuery(store: Store, query: string, options?: VectorSearchOptions): Promise<VectorSearchResult[]>;
743
+ /**
744
+ * A single sub-search in a structured search request.
745
+ * Matches the format used in QMD training data.
746
+ */
747
+ export interface StructuredSubSearch {
748
+ /** Search type: 'lex' for BM25, 'vec' for semantic, 'hyde' for hypothetical */
749
+ type: 'lex' | 'vec' | 'hyde';
750
+ /** The search query text */
751
+ query: string;
752
+ /** Optional line number for error reporting (CLI parser) */
753
+ line?: number;
754
+ }
755
+ export interface StructuredSearchOptions {
756
+ collections?: string[];
757
+ limit?: number;
758
+ minScore?: number;
759
+ candidateLimit?: number;
760
+ explain?: boolean;
761
+ /** Future: domain intent hint for routing/boosting */
762
+ intent?: string;
763
+ hooks?: SearchHooks;
764
+ }
765
+ /**
766
+ * Structured search: execute pre-expanded queries without LLM query expansion.
767
+ *
768
+ * Designed for LLM callers (MCP/HTTP) that generate their own query expansions.
769
+ * Skips the internal expandQuery() step — goes directly to:
770
+ *
771
+ * Pipeline:
772
+ * 1. Route searches: lex→FTS, vec/hyde→vector (batch embed)
773
+ * 2. RRF fusion across all result lists
774
+ * 3. Chunk documents + keyword-best-chunk selection
775
+ * 4. Rerank on chunks
776
+ * 5. Position-aware score blending
777
+ * 6. Dedup, filter, slice
778
+ *
779
+ * This is the recommended endpoint for capable LLMs — they can generate
780
+ * better query variations than our small local model, especially for
781
+ * domain-specific or nuanced queries.
782
+ */
783
+ export declare function structuredSearch(store: Store, searches: StructuredSubSearch[], options?: StructuredSearchOptions): Promise<HybridQueryResult[]>;