npm - raggrep - Versions diffs - 0.17.0 → 0.18.0 - Mend

raggrep 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +25 -40
package/dist/app/search/index.d.ts +2 -1
package/dist/cli/main.js +471 -97
package/dist/cli/main.js.map +24 -21
package/dist/domain/entities/index.d.ts +3 -1
package/dist/domain/entities/rankingWeights.d.ts +84 -0
package/dist/domain/entities/searchResult.d.ts +28 -1
package/dist/domain/services/bm25.d.ts +5 -0
package/dist/domain/services/discriminativeTerms.d.ts +28 -0
package/dist/domain/services/index.d.ts +2 -0
package/dist/domain/services/literalScorer.d.ts +9 -23
package/dist/domain/services/matchScales.d.ts +19 -0
package/dist/index.d.ts +2 -1
package/dist/index.js +454 -92
package/dist/index.js.map +24 -21
package/dist/infrastructure/embeddings/embeddingProviderFactory.d.ts +6 -1
package/dist/infrastructure/embeddings/index.d.ts +6 -3
package/dist/infrastructure/index.d.ts +1 -1
package/dist/types.d.ts +1 -1
package/package.json +12 -5

package/dist/domain/entities/index.d.ts CHANGED Viewed

@@ -8,8 +8,10 @@ export type { Chunk, ChunkType } from "./chunk";
 export { createChunkId } from "./chunk";
 export type { FileIndex, FileManifestEntry, ModuleManifest, GlobalManifest, } from "./fileIndex";
 export type { FileSummary, SymbolicIndexMeta, Tier1Manifest, } from "./fileSummary";
-export type { SearchResult, SearchOptions, SearchContributions, CoreContribution, LanguageContribution, IntrospectionContribution, ExactMatchOccurrence, ExactMatchFile, ExactMatchResults, HybridSearchResults, } from "./searchResult";
+export type { SearchResult, SearchOptions, SearchContributions, CoreContribution, LanguageContribution, IntrospectionContribution, RankBy, ExactMatchOccurrence, ExactMatchFile, ExactMatchResults, HybridSearchResults, } from "./searchResult";
 export { DEFAULT_SEARCH_OPTIONS } from "./searchResult";
+export type { DiscriminativeWeights, TypeScriptRankingWeights, LanguageRankingWeights, MarkdownRankingWeights, JsonRankingWeights, LiteralBoostWeights, LiteralConfidenceMultipliers, RankingWeightsConfig, RankingWeightsPartial, } from "./rankingWeights";
+export { DEFAULT_RANKING_WEIGHTS, DEFAULT_DISCRIMINATIVE_WEIGHTS, DEFAULT_LITERAL_BOOST_WEIGHTS, mergeRankingWeights, mergeLiteralWeights, } from "./rankingWeights";
 export type { Config, ModuleConfig } from "./config";
 export { DEFAULT_IGNORE_PATHS, DEFAULT_EXTENSIONS, createDefaultConfig, } from "./config";
 export type { FileIntrospection, ProjectStructure, Project, ProjectType, Scope, IntrospectionConfig, } from "./introspection";

package/dist/domain/entities/rankingWeights.d.ts ADDED Viewed

@@ -0,0 +1,84 @@
+/**
+ * Hybrid retrieval ranking weights — numeric knobs only, suitable for config
+ * and benchmark-driven tuning. Defaults follow golden-query sweeps on next-convex-starter-app
+ * (wave 2 winner: combined TS + markdown doc-intent; bge-small-en-v1.5, core + TS + markdown).
+ */
+import type { LiteralMatchType, LiteralConfidence } from "./literal";
+/** BM25-IDF–guided salient-term adjustment (all embedding-backed language + markdown modules). */
+export interface DiscriminativeWeights {
+    boostCap: number;
+    penaltyMax: number;
+    penaltyFloor: number;
+}
+/** TypeScript/JS module: semantic + BM25 + vocabulary blend. */
+export interface TypeScriptRankingWeights {
+    semantic: number;
+    bm25: number;
+    vocab: number;
+    /** Include chunk when vocab overlap exceeds this (bypass path). */
+    vocabBypassThreshold: number;
+}
+/** Rust, Go, Python modules: two-way blend. */
+export interface LanguageRankingWeights {
+    semantic: number;
+    bm25: number;
+}
+/** Markdown docs module. */
+export interface MarkdownRankingWeights {
+    semantic: number;
+    bm25: number;
+    /** Boost when query looks documentation-intent (legacy keyword list). */
+    docIntentBoost: number;
+    /** `headingBoost *= min + span * phraseMatch.coverage` */
+    headingPhraseCoverageMin: number;
+    headingPhraseCoverageSpan: number;
+}
+/** Per-confidence multipliers for one literal match kind (definition / reference / import). */
+export type LiteralConfidenceMultipliers = Record<LiteralConfidence, number>;
+/**
+ * Literal-match boosting: multipliers when identifier/path literals align with the query,
+ * plus vocabulary overlap tuning (TypeScript vocabulary track).
+ */
+export interface LiteralBoostWeights {
+    /** Base score when a chunk is reached only via the literal index (no BM25/semantic). */
+    baseScore: number;
+    multipliers: Record<LiteralMatchType, LiteralConfidenceMultipliers>;
+    vocabulary: {
+        baseMultiplier: number;
+        perWordBonus: number;
+        maxVocabularyBonus: number;
+        minWordsForMatch: number;
+    };
+}
+/** JSON data module (BM25 + literal paths). */
+export interface JsonRankingWeights {
+    bm25: number;
+    /** Scales literal-only base when BM25 is zero. */
+    literalBaseWeight: number;
+}
+/** Full resolved set used by search (every field required). */
+export interface RankingWeightsConfig {
+    discriminative: DiscriminativeWeights;
+    typescript: TypeScriptRankingWeights;
+    language: LanguageRankingWeights;
+    markdown: MarkdownRankingWeights;
+    json: JsonRankingWeights;
+    /** Literal / backtick / identifier match boosting (language + JSON modules). */
+    literal: LiteralBoostWeights;
+}
+/** Partial overrides for {@link SearchOptions} or persisted module options. */
+export type RankingWeightsPartial = {
+    [K in keyof RankingWeightsConfig]?: Partial<RankingWeightsConfig[K]>;
+};
+export declare const DEFAULT_DISCRIMINATIVE_WEIGHTS: DiscriminativeWeights;
+/** Default literal boosting (same coefficients as the original literal scorer). */
+export declare const DEFAULT_LITERAL_BOOST_WEIGHTS: LiteralBoostWeights;
+export declare const DEFAULT_RANKING_WEIGHTS: RankingWeightsConfig;
+/**
+ * Merge partial literal-boost overrides (including nested multipliers / vocabulary).
+ */
+export declare function mergeLiteralWeights(def: LiteralBoostWeights, partial?: Partial<LiteralBoostWeights>): LiteralBoostWeights;
+/**
+ * Deep-merge partial ranking overrides onto defaults. Pure function.
+ */
+export declare function mergeRankingWeights(partial?: RankingWeightsPartial): RankingWeightsConfig;

package/dist/domain/entities/searchResult.d.ts CHANGED Viewed

@@ -4,6 +4,11 @@
  * Represents a single result from a search query.
  */
 import type { Chunk } from "./chunk";
+import type { RankingWeightsPartial } from "./rankingWeights";
+/**
+ * How to order hybrid results after modules are merged.
+ */
+export type RankBy = "structured" | "semantic" | "combined";
 /**
  * Contribution from the core index.
  */
@@ -55,8 +60,18 @@ export interface SearchResult {
     filepath: string;
     /** The matching chunk */
     chunk: Chunk;
-    /** Final relevance score (0-1, higher is better) */
+    /** Fused hybrid relevance (module-specific scale). */
     score: number;
+    /**
+     * Embedding similarity as a [0,1] match strength (cosine mapped to a percentage scale).
+     * Omitted only before enrichment in hybrid search.
+     */
+    semanticMatch?: number;
+    /**
+     * Structured relevance: BM25, symbols, path/literal/phrase/heading signals (non-embedding).
+     * [0,1] — primary default sort key when {@link SearchOptions.rankBy} is `'structured'`.
+     */
+    structuredMatch?: number;
     /** ID of the module that produced this result */
     moduleId: string;
     /** Contribution tracking for learning */
@@ -99,6 +114,18 @@ export interface SearchOptions {
      * Set to false if you've already ensured freshness or want explicit control.
      */
     ensureFresh?: boolean;
+    /**
+     * Optional overrides for hybrid retrieval weights (merged with
+     * {@link DEFAULT_RANKING_WEIGHTS}). Intended for benchmarks and tuning.
+     */
+    rankingWeights?: RankingWeightsPartial;
+    /** Suppress noisy stdout during search (e.g. benchmark loops). */
+    quiet?: boolean;
+    /**
+     * Final sort order for merged hybrid results.
+     * @default 'structured' (BM25 / symbols / path / docs structure before embedding).
+     */
+    rankBy?: RankBy;
 }
 /**
  * Default search options.

package/dist/domain/services/bm25.d.ts CHANGED Viewed

@@ -51,6 +51,11 @@ export declare class BM25Index {
      * Calculate IDF (Inverse Document Frequency) for a term.
      */
     private idf;
+    /**
+     * IDF for a term in this index (same formula as BM25 scoring).
+     * Returns 0 if the term does not appear in any indexed document.
+     */
+    getInverseDocumentFrequency(term: string): number;
     /**
      * Calculate BM25 score for a document given query terms.
      */

package/dist/domain/services/discriminativeTerms.d.ts ADDED Viewed

@@ -0,0 +1,28 @@
+/**
+ * Discriminative query-term scoring using the session BM25 corpus.
+ *
+ * Rare query terms (high IDF among indexed query tokens) are treated as
+ * salient: chunks that match more salient term mass rank higher; chunks that
+ * miss them are slightly down-ranked. Pure logic — no I/O.
+ */
+import { BM25Index } from "./bm25";
+import type { DiscriminativeWeights } from "../entities/rankingWeights";
+/** @deprecated Use {@link DEFAULT_DISCRIMINATIVE_WEIGHTS} from entities. */
+export declare const DISCRIMINATIVE_CONSTANTS: DiscriminativeWeights;
+export interface DiscriminativeTermResult {
+    /** Additive boost ∈ [0, boostCap] */
+    boost: number;
+    /** Multiply hybrid score by this after additive boosts (≤ 1 when penalty applies) */
+    penaltyFactor: number;
+    /** Query tokens (indexed in BM25) with IDF ≥ median among indexed tokens */
+    salientTerms: string[];
+    matchedSalient: string[];
+    missingSalient: string[];
+    /** Σ IDF(matched salient) / Σ IDF(salient), or 1 if no salient set */
+    salientCoverage: number;
+}
+/**
+ * Score how well chunk text hits corpus-rare query terms using an already-built
+ * {@link BM25Index} over the same chunk set used for retrieval.
+ */
+export declare function scoreDiscriminativeTerms(bm25Index: BM25Index, query: string, chunkText: string, chunkName?: string, weights?: DiscriminativeWeights): DiscriminativeTermResult;

package/dist/domain/services/index.d.ts CHANGED Viewed

@@ -5,6 +5,8 @@
  * These services operate only on domain entities and primitive data.
  */
 export { BM25Index, tokenize, normalizeScore, type BM25Document, type BM25Result, type BM25SerializedData, } from "./bm25";
+export { scoreDiscriminativeTerms, DISCRIMINATIVE_CONSTANTS, type DiscriminativeTermResult, } from "./discriminativeTerms";
+export { attachMatchScales, compareSearchResultsByRankBy, clamp01, semanticPctFromCosine, } from "./matchScales";
 export { extractKeywords, extractPathKeywords, parsePathContext, formatPathContextForEmbedding, COMMON_KEYWORDS, type PathContext, } from "./keywords";
 export { cosineSimilarity, euclideanDistance } from "./similarity";
 export { detectQueryIntent, extractQueryTerms, calculateFileTypeBoost, isSourceCodeFile, isDocFile, isDataFile, IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS, DATA_EXTENSIONS, type QueryIntent, } from "./queryIntent";

package/dist/domain/services/literalScorer.d.ts CHANGED Viewed

@@ -7,26 +7,12 @@
  * This is a pure domain service with no external dependencies.
  */
 import type { LiteralMatch, LiteralMatchType, LiteralConfidence } from "../entities/literal";
+import type { LiteralBoostWeights } from "../entities/rankingWeights";
 /**
  * Scoring constants for literal boosting.
+ * @deprecated Prefer {@link DEFAULT_LITERAL_BOOST_WEIGHTS} / `rankingWeights.literal` from search options.
  */
-export declare const LITERAL_SCORING_CONSTANTS: {
-    /** Base score for chunks found only via literal index */
-    BASE_SCORE: number;
-    /** Multipliers by match type and confidence */
-    MULTIPLIERS: Record<LiteralMatchType, Record<LiteralConfidence, number>>;
-    /** Vocabulary match scoring */
-    VOCABULARY: {
-        /** Base multiplier for vocabulary-only matches (no exact literal match) */
-        BASE_MULTIPLIER: number;
-        /** Bonus per additional vocabulary word matched (up to a limit) */
-        PER_WORD_BONUS: number;
-        /** Maximum vocabulary bonus */
-        MAX_VOCABULARY_BONUS: number;
-        /** Minimum vocabulary words required for a match to count */
-        MIN_WORDS_FOR_MATCH: number;
-    };
-};
+export declare const LITERAL_SCORING_CONSTANTS: LiteralBoostWeights;
 /**
  * Calculate the literal multiplier for a given match type and confidence.
  *
@@ -34,7 +20,7 @@ export declare const LITERAL_SCORING_CONSTANTS: {
  * @param confidence - Detection confidence of the query literal
  * @returns Multiplier to apply to the base score
  */
-export declare function calculateLiteralMultiplier(matchType: LiteralMatchType, confidence: LiteralConfidence): number;
+export declare function calculateLiteralMultiplier(matchType: LiteralMatchType, confidence: LiteralConfidence, weights?: LiteralBoostWeights): number;
 /**
  * Calculate the maximum multiplier for a set of literal matches.
  *
@@ -43,7 +29,7 @@ export declare function calculateLiteralMultiplier(matchType: LiteralMatchType,
  * @param matches - Array of literal matches for a chunk
  * @returns The maximum multiplier, or 1.0 if no matches
  */
-export declare function calculateMaxMultiplier(matches: LiteralMatch[]): number;
+export declare function calculateMaxMultiplier(matches: LiteralMatch[], weights?: LiteralBoostWeights): number;
 /**
  * Result of vocabulary-based matching.
  */
@@ -67,7 +53,7 @@ export interface VocabularyMatchResult {
  * @param chunkVocabulary - Vocabulary words extracted from chunk literals
  * @returns Vocabulary match result with multiplier
  */
-export declare function calculateVocabularyMatch(queryVocabulary: string[], chunkVocabulary: string[]): VocabularyMatchResult;
+export declare function calculateVocabularyMatch(queryVocabulary: string[], chunkVocabulary: string[], weights?: LiteralBoostWeights): VocabularyMatchResult;
 /**
  * Score contribution from literal matches.
  * Used for debugging and explainability.
@@ -91,7 +77,7 @@ export interface LiteralScoreContribution {
  * @param hasSemanticOrBm25 - Whether the chunk was found by semantic or BM25 search
  * @returns Score contribution details
  */
-export declare function calculateLiteralContribution(matches: LiteralMatch[], hasSemanticOrBm25: boolean): LiteralScoreContribution;
+export declare function calculateLiteralContribution(matches: LiteralMatch[], hasSemanticOrBm25: boolean, weights?: LiteralBoostWeights): LiteralScoreContribution;
 /**
  * Apply literal boosting to a base score.
  *
@@ -105,7 +91,7 @@ export declare function calculateLiteralContribution(matches: LiteralMatch[], ha
  * @param hasSemanticOrBm25 - Whether the chunk was found by semantic or BM25
  * @returns Final score after literal boosting
  */
-export declare function applyLiteralBoost(baseScore: number, matches: LiteralMatch[], hasSemanticOrBm25: boolean): number;
+export declare function applyLiteralBoost(baseScore: number, matches: LiteralMatch[], hasSemanticOrBm25: boolean, weights?: LiteralBoostWeights): number;
 /**
  * Merge results from three search sources with literal boosting.
  *
@@ -125,4 +111,4 @@ export interface MergeOutput extends MergeInput {
     /** Literal contribution details */
     literalContribution: LiteralScoreContribution;
 }
-export declare function mergeWithLiteralBoost(semanticBm25Results: MergeInput[], literalMatchMap: Map<string, LiteralMatch[]>): MergeOutput[];
+export declare function mergeWithLiteralBoost(semanticBm25Results: MergeInput[], literalMatchMap: Map<string, LiteralMatch[]>, weights?: LiteralBoostWeights): MergeOutput[];

package/dist/domain/services/matchScales.d.ts ADDED Viewed

@@ -0,0 +1,19 @@
+/**
+ * Separates hybrid retrieval into two comparable [0,1] scales:
+ * - **semantic**: embedding cosine mapped to a match percentage
+ * - **structured**: BM25 / symbols / path / phrase / docs headings (non-embedding signals)
+ *
+ * Used for display and for default hybrid ranking (structured primary).
+ */
+import type { RankBy, SearchResult } from "../entities/searchResult";
+import type { RankingWeightsConfig } from "../entities/rankingWeights";
+/** Map cosine similarity [-1, 1] to [0, 1] for display / sorting. */
+export declare function semanticPctFromCosine(cosine: number): number;
+export declare function clamp01(x: number): number;
+/**
+ * Populate {@link SearchResult.semanticMatch} and {@link SearchResult.structuredMatch}
+ * from existing module `context` and merged ranking weights.
+ */
+export declare function attachMatchScales(result: SearchResult, rw: RankingWeightsConfig): SearchResult;
+/** Compare results for final hybrid ordering. Default: structured → semantic → fused score. */
+export declare function compareSearchResultsByRankBy(a: SearchResult, b: SearchResult, rankBy: RankBy): number;

package/dist/index.d.ts CHANGED Viewed

@@ -35,8 +35,9 @@ import { formatSearchResults, formatHybridSearchResults } from "./app/search";
 import type { SearchOptions, SearchResult, HybridSearchResults } from "./types";
 import { ConsoleLogger, InlineProgressLogger, SilentLogger, createLogger, createInlineLogger, createSilentLogger } from "./infrastructure/logger";
 export type { IndexResult, IndexOptions, CleanupResult, CleanupOptions, ResetResult, } from "./app/indexer";
-export type { SearchOptions, SearchResult, HybridSearchResults, Chunk, FileIndex, } from "./types";
+export type { SearchOptions, SearchResult, HybridSearchResults, RankBy, Chunk, FileIndex, RankingWeightsPartial, RankingWeightsConfig, LiteralBoostWeights, } from "./types";
 export type { Logger, LoggerFactory } from "./domain/ports";
+export { mergeRankingWeights, mergeLiteralWeights, DEFAULT_RANKING_WEIGHTS, DEFAULT_LITERAL_BOOST_WEIGHTS, } from "./domain/entities";
 export { ConsoleLogger, InlineProgressLogger, SilentLogger, createLogger, createInlineLogger, createSilentLogger, };
 /**
  * Index a directory for semantic search.