raggrep 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,8 +8,10 @@ export type { Chunk, ChunkType } from "./chunk";
8
8
  export { createChunkId } from "./chunk";
9
9
  export type { FileIndex, FileManifestEntry, ModuleManifest, GlobalManifest, } from "./fileIndex";
10
10
  export type { FileSummary, SymbolicIndexMeta, Tier1Manifest, } from "./fileSummary";
11
- export type { SearchResult, SearchOptions, SearchContributions, CoreContribution, LanguageContribution, IntrospectionContribution, ExactMatchOccurrence, ExactMatchFile, ExactMatchResults, HybridSearchResults, } from "./searchResult";
11
+ export type { SearchResult, SearchOptions, SearchContributions, CoreContribution, LanguageContribution, IntrospectionContribution, RankBy, ExactMatchOccurrence, ExactMatchFile, ExactMatchResults, HybridSearchResults, } from "./searchResult";
12
12
  export { DEFAULT_SEARCH_OPTIONS } from "./searchResult";
13
+ export type { DiscriminativeWeights, TypeScriptRankingWeights, LanguageRankingWeights, MarkdownRankingWeights, JsonRankingWeights, LiteralBoostWeights, LiteralConfidenceMultipliers, RankingWeightsConfig, RankingWeightsPartial, } from "./rankingWeights";
14
+ export { DEFAULT_RANKING_WEIGHTS, DEFAULT_DISCRIMINATIVE_WEIGHTS, DEFAULT_LITERAL_BOOST_WEIGHTS, mergeRankingWeights, mergeLiteralWeights, } from "./rankingWeights";
13
15
  export type { Config, ModuleConfig } from "./config";
14
16
  export { DEFAULT_IGNORE_PATHS, DEFAULT_EXTENSIONS, createDefaultConfig, } from "./config";
15
17
  export type { FileIntrospection, ProjectStructure, Project, ProjectType, Scope, IntrospectionConfig, } from "./introspection";
@@ -0,0 +1,84 @@
1
+ /**
2
+ * Hybrid retrieval ranking weights — numeric knobs only, suitable for config
3
+ * and benchmark-driven tuning. Defaults follow golden-query sweeps on next-convex-starter-app
4
+ * (wave 2 winner: combined TS + markdown doc-intent; bge-small-en-v1.5, core + TS + markdown).
5
+ */
6
+ import type { LiteralMatchType, LiteralConfidence } from "./literal";
7
+ /** BM25-IDF–guided salient-term adjustment (all embedding-backed language + markdown modules). */
8
+ export interface DiscriminativeWeights {
9
+ boostCap: number;
10
+ penaltyMax: number;
11
+ penaltyFloor: number;
12
+ }
13
+ /** TypeScript/JS module: semantic + BM25 + vocabulary blend. */
14
+ export interface TypeScriptRankingWeights {
15
+ semantic: number;
16
+ bm25: number;
17
+ vocab: number;
18
+ /** Include chunk when vocab overlap exceeds this (bypass path). */
19
+ vocabBypassThreshold: number;
20
+ }
21
+ /** Rust, Go, Python modules: two-way blend. */
22
+ export interface LanguageRankingWeights {
23
+ semantic: number;
24
+ bm25: number;
25
+ }
26
+ /** Markdown docs module. */
27
+ export interface MarkdownRankingWeights {
28
+ semantic: number;
29
+ bm25: number;
30
+ /** Boost when query looks documentation-intent (legacy keyword list). */
31
+ docIntentBoost: number;
32
+ /** `headingBoost *= min + span * phraseMatch.coverage` */
33
+ headingPhraseCoverageMin: number;
34
+ headingPhraseCoverageSpan: number;
35
+ }
36
+ /** Per-confidence multipliers for one literal match kind (definition / reference / import). */
37
+ export type LiteralConfidenceMultipliers = Record<LiteralConfidence, number>;
38
+ /**
39
+ * Literal-match boosting: multipliers when identifier/path literals align with the query,
40
+ * plus vocabulary overlap tuning (TypeScript vocabulary track).
41
+ */
42
+ export interface LiteralBoostWeights {
43
+ /** Base score when a chunk is reached only via the literal index (no BM25/semantic). */
44
+ baseScore: number;
45
+ multipliers: Record<LiteralMatchType, LiteralConfidenceMultipliers>;
46
+ vocabulary: {
47
+ baseMultiplier: number;
48
+ perWordBonus: number;
49
+ maxVocabularyBonus: number;
50
+ minWordsForMatch: number;
51
+ };
52
+ }
53
+ /** JSON data module (BM25 + literal paths). */
54
+ export interface JsonRankingWeights {
55
+ bm25: number;
56
+ /** Scales literal-only base when BM25 is zero. */
57
+ literalBaseWeight: number;
58
+ }
59
+ /** Full resolved set used by search (every field required). */
60
+ export interface RankingWeightsConfig {
61
+ discriminative: DiscriminativeWeights;
62
+ typescript: TypeScriptRankingWeights;
63
+ language: LanguageRankingWeights;
64
+ markdown: MarkdownRankingWeights;
65
+ json: JsonRankingWeights;
66
+ /** Literal / backtick / identifier match boosting (language + JSON modules). */
67
+ literal: LiteralBoostWeights;
68
+ }
69
+ /** Partial overrides for {@link SearchOptions} or persisted module options. */
70
+ export type RankingWeightsPartial = {
71
+ [K in keyof RankingWeightsConfig]?: Partial<RankingWeightsConfig[K]>;
72
+ };
73
+ export declare const DEFAULT_DISCRIMINATIVE_WEIGHTS: DiscriminativeWeights;
74
+ /** Default literal boosting (same coefficients as the original literal scorer). */
75
+ export declare const DEFAULT_LITERAL_BOOST_WEIGHTS: LiteralBoostWeights;
76
+ export declare const DEFAULT_RANKING_WEIGHTS: RankingWeightsConfig;
77
+ /**
78
+ * Merge partial literal-boost overrides (including nested multipliers / vocabulary).
79
+ */
80
+ export declare function mergeLiteralWeights(def: LiteralBoostWeights, partial?: Partial<LiteralBoostWeights>): LiteralBoostWeights;
81
+ /**
82
+ * Deep-merge partial ranking overrides onto defaults. Pure function.
83
+ */
84
+ export declare function mergeRankingWeights(partial?: RankingWeightsPartial): RankingWeightsConfig;
@@ -4,6 +4,11 @@
4
4
  * Represents a single result from a search query.
5
5
  */
6
6
  import type { Chunk } from "./chunk";
7
+ import type { RankingWeightsPartial } from "./rankingWeights";
8
+ /**
9
+ * How to order hybrid results after modules are merged.
10
+ */
11
+ export type RankBy = "structured" | "semantic" | "combined";
7
12
  /**
8
13
  * Contribution from the core index.
9
14
  */
@@ -55,8 +60,18 @@ export interface SearchResult {
55
60
  filepath: string;
56
61
  /** The matching chunk */
57
62
  chunk: Chunk;
58
- /** Final relevance score (0-1, higher is better) */
63
+ /** Fused hybrid relevance (module-specific scale). */
59
64
  score: number;
65
+ /**
66
+ * Embedding similarity as a [0,1] match strength (cosine mapped to a percentage scale).
67
+ * Omitted only before enrichment in hybrid search.
68
+ */
69
+ semanticMatch?: number;
70
+ /**
71
+ * Structured relevance: BM25, symbols, path/literal/phrase/heading signals (non-embedding).
72
+ * [0,1] — primary default sort key when {@link SearchOptions.rankBy} is `'structured'`.
73
+ */
74
+ structuredMatch?: number;
60
75
  /** ID of the module that produced this result */
61
76
  moduleId: string;
62
77
  /** Contribution tracking for learning */
@@ -99,6 +114,18 @@ export interface SearchOptions {
99
114
  * Set to false if you've already ensured freshness or want explicit control.
100
115
  */
101
116
  ensureFresh?: boolean;
117
+ /**
118
+ * Optional overrides for hybrid retrieval weights (merged with
119
+ * {@link DEFAULT_RANKING_WEIGHTS}). Intended for benchmarks and tuning.
120
+ */
121
+ rankingWeights?: RankingWeightsPartial;
122
+ /** Suppress noisy stdout during search (e.g. benchmark loops). */
123
+ quiet?: boolean;
124
+ /**
125
+ * Final sort order for merged hybrid results.
126
+ * @default 'structured' (BM25 / symbols / path / docs structure before embedding).
127
+ */
128
+ rankBy?: RankBy;
102
129
  }
103
130
  /**
104
131
  * Default search options.
@@ -51,6 +51,11 @@ export declare class BM25Index {
51
51
  * Calculate IDF (Inverse Document Frequency) for a term.
52
52
  */
53
53
  private idf;
54
+ /**
55
+ * IDF for a term in this index (same formula as BM25 scoring).
56
+ * Returns 0 if the term does not appear in any indexed document.
57
+ */
58
+ getInverseDocumentFrequency(term: string): number;
54
59
  /**
55
60
  * Calculate BM25 score for a document given query terms.
56
61
  */
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Discriminative query-term scoring using the session BM25 corpus.
3
+ *
4
+ * Rare query terms (high IDF among indexed query tokens) are treated as
5
+ * salient: chunks that match more salient term mass rank higher; chunks that
6
+ * miss them are slightly down-ranked. Pure logic — no I/O.
7
+ */
8
+ import { BM25Index } from "./bm25";
9
+ import type { DiscriminativeWeights } from "../entities/rankingWeights";
10
+ /** @deprecated Use {@link DEFAULT_DISCRIMINATIVE_WEIGHTS} from entities. */
11
+ export declare const DISCRIMINATIVE_CONSTANTS: DiscriminativeWeights;
12
+ export interface DiscriminativeTermResult {
13
+ /** Additive boost ∈ [0, boostCap] */
14
+ boost: number;
15
+ /** Multiply hybrid score by this after additive boosts (≤ 1 when penalty applies) */
16
+ penaltyFactor: number;
17
+ /** Query tokens (indexed in BM25) with IDF ≥ median among indexed tokens */
18
+ salientTerms: string[];
19
+ matchedSalient: string[];
20
+ missingSalient: string[];
21
+ /** Σ IDF(matched salient) / Σ IDF(salient), or 1 if no salient set */
22
+ salientCoverage: number;
23
+ }
24
+ /**
25
+ * Score how well chunk text hits corpus-rare query terms using an already-built
26
+ * {@link BM25Index} over the same chunk set used for retrieval.
27
+ */
28
+ export declare function scoreDiscriminativeTerms(bm25Index: BM25Index, query: string, chunkText: string, chunkName?: string, weights?: DiscriminativeWeights): DiscriminativeTermResult;
@@ -5,6 +5,8 @@
5
5
  * These services operate only on domain entities and primitive data.
6
6
  */
7
7
  export { BM25Index, tokenize, normalizeScore, type BM25Document, type BM25Result, type BM25SerializedData, } from "./bm25";
8
+ export { scoreDiscriminativeTerms, DISCRIMINATIVE_CONSTANTS, type DiscriminativeTermResult, } from "./discriminativeTerms";
9
+ export { attachMatchScales, compareSearchResultsByRankBy, clamp01, semanticPctFromCosine, } from "./matchScales";
8
10
  export { extractKeywords, extractPathKeywords, parsePathContext, formatPathContextForEmbedding, COMMON_KEYWORDS, type PathContext, } from "./keywords";
9
11
  export { cosineSimilarity, euclideanDistance } from "./similarity";
10
12
  export { detectQueryIntent, extractQueryTerms, calculateFileTypeBoost, isSourceCodeFile, isDocFile, isDataFile, IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS, DATA_EXTENSIONS, type QueryIntent, } from "./queryIntent";
@@ -7,26 +7,12 @@
7
7
  * This is a pure domain service with no external dependencies.
8
8
  */
9
9
  import type { LiteralMatch, LiteralMatchType, LiteralConfidence } from "../entities/literal";
10
+ import type { LiteralBoostWeights } from "../entities/rankingWeights";
10
11
  /**
11
12
  * Scoring constants for literal boosting.
13
+ * @deprecated Prefer {@link DEFAULT_LITERAL_BOOST_WEIGHTS} / `rankingWeights.literal` from search options.
12
14
  */
13
- export declare const LITERAL_SCORING_CONSTANTS: {
14
- /** Base score for chunks found only via literal index */
15
- BASE_SCORE: number;
16
- /** Multipliers by match type and confidence */
17
- MULTIPLIERS: Record<LiteralMatchType, Record<LiteralConfidence, number>>;
18
- /** Vocabulary match scoring */
19
- VOCABULARY: {
20
- /** Base multiplier for vocabulary-only matches (no exact literal match) */
21
- BASE_MULTIPLIER: number;
22
- /** Bonus per additional vocabulary word matched (up to a limit) */
23
- PER_WORD_BONUS: number;
24
- /** Maximum vocabulary bonus */
25
- MAX_VOCABULARY_BONUS: number;
26
- /** Minimum vocabulary words required for a match to count */
27
- MIN_WORDS_FOR_MATCH: number;
28
- };
29
- };
15
+ export declare const LITERAL_SCORING_CONSTANTS: LiteralBoostWeights;
30
16
  /**
31
17
  * Calculate the literal multiplier for a given match type and confidence.
32
18
  *
@@ -34,7 +20,7 @@ export declare const LITERAL_SCORING_CONSTANTS: {
34
20
  * @param confidence - Detection confidence of the query literal
35
21
  * @returns Multiplier to apply to the base score
36
22
  */
37
- export declare function calculateLiteralMultiplier(matchType: LiteralMatchType, confidence: LiteralConfidence): number;
23
+ export declare function calculateLiteralMultiplier(matchType: LiteralMatchType, confidence: LiteralConfidence, weights?: LiteralBoostWeights): number;
38
24
  /**
39
25
  * Calculate the maximum multiplier for a set of literal matches.
40
26
  *
@@ -43,7 +29,7 @@ export declare function calculateLiteralMultiplier(matchType: LiteralMatchType,
43
29
  * @param matches - Array of literal matches for a chunk
44
30
  * @returns The maximum multiplier, or 1.0 if no matches
45
31
  */
46
- export declare function calculateMaxMultiplier(matches: LiteralMatch[]): number;
32
+ export declare function calculateMaxMultiplier(matches: LiteralMatch[], weights?: LiteralBoostWeights): number;
47
33
  /**
48
34
  * Result of vocabulary-based matching.
49
35
  */
@@ -67,7 +53,7 @@ export interface VocabularyMatchResult {
67
53
  * @param chunkVocabulary - Vocabulary words extracted from chunk literals
68
54
  * @returns Vocabulary match result with multiplier
69
55
  */
70
- export declare function calculateVocabularyMatch(queryVocabulary: string[], chunkVocabulary: string[]): VocabularyMatchResult;
56
+ export declare function calculateVocabularyMatch(queryVocabulary: string[], chunkVocabulary: string[], weights?: LiteralBoostWeights): VocabularyMatchResult;
71
57
  /**
72
58
  * Score contribution from literal matches.
73
59
  * Used for debugging and explainability.
@@ -91,7 +77,7 @@ export interface LiteralScoreContribution {
91
77
  * @param hasSemanticOrBm25 - Whether the chunk was found by semantic or BM25 search
92
78
  * @returns Score contribution details
93
79
  */
94
- export declare function calculateLiteralContribution(matches: LiteralMatch[], hasSemanticOrBm25: boolean): LiteralScoreContribution;
80
+ export declare function calculateLiteralContribution(matches: LiteralMatch[], hasSemanticOrBm25: boolean, weights?: LiteralBoostWeights): LiteralScoreContribution;
95
81
  /**
96
82
  * Apply literal boosting to a base score.
97
83
  *
@@ -105,7 +91,7 @@ export declare function calculateLiteralContribution(matches: LiteralMatch[], ha
105
91
  * @param hasSemanticOrBm25 - Whether the chunk was found by semantic or BM25
106
92
  * @returns Final score after literal boosting
107
93
  */
108
- export declare function applyLiteralBoost(baseScore: number, matches: LiteralMatch[], hasSemanticOrBm25: boolean): number;
94
+ export declare function applyLiteralBoost(baseScore: number, matches: LiteralMatch[], hasSemanticOrBm25: boolean, weights?: LiteralBoostWeights): number;
109
95
  /**
110
96
  * Merge results from three search sources with literal boosting.
111
97
  *
@@ -125,4 +111,4 @@ export interface MergeOutput extends MergeInput {
125
111
  /** Literal contribution details */
126
112
  literalContribution: LiteralScoreContribution;
127
113
  }
128
- export declare function mergeWithLiteralBoost(semanticBm25Results: MergeInput[], literalMatchMap: Map<string, LiteralMatch[]>): MergeOutput[];
114
+ export declare function mergeWithLiteralBoost(semanticBm25Results: MergeInput[], literalMatchMap: Map<string, LiteralMatch[]>, weights?: LiteralBoostWeights): MergeOutput[];
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Separates hybrid retrieval into two comparable [0,1] scales:
3
+ * - **semantic**: embedding cosine mapped to a match percentage
4
+ * - **structured**: BM25 / symbols / path / phrase / docs headings (non-embedding signals)
5
+ *
6
+ * Used for display and for default hybrid ranking (structured primary).
7
+ */
8
+ import type { RankBy, SearchResult } from "../entities/searchResult";
9
+ import type { RankingWeightsConfig } from "../entities/rankingWeights";
10
+ /** Map cosine similarity [-1, 1] to [0, 1] for display / sorting. */
11
+ export declare function semanticPctFromCosine(cosine: number): number;
12
+ export declare function clamp01(x: number): number;
13
+ /**
14
+ * Populate {@link SearchResult.semanticMatch} and {@link SearchResult.structuredMatch}
15
+ * from existing module `context` and merged ranking weights.
16
+ */
17
+ export declare function attachMatchScales(result: SearchResult, rw: RankingWeightsConfig): SearchResult;
18
+ /** Compare results for final hybrid ordering. Default: structured → semantic → fused score. */
19
+ export declare function compareSearchResultsByRankBy(a: SearchResult, b: SearchResult, rankBy: RankBy): number;
package/dist/index.d.ts CHANGED
@@ -35,8 +35,9 @@ import { formatSearchResults, formatHybridSearchResults } from "./app/search";
35
35
  import type { SearchOptions, SearchResult, HybridSearchResults } from "./types";
36
36
  import { ConsoleLogger, InlineProgressLogger, SilentLogger, createLogger, createInlineLogger, createSilentLogger } from "./infrastructure/logger";
37
37
  export type { IndexResult, IndexOptions, CleanupResult, CleanupOptions, ResetResult, } from "./app/indexer";
38
- export type { SearchOptions, SearchResult, HybridSearchResults, Chunk, FileIndex, } from "./types";
38
+ export type { SearchOptions, SearchResult, HybridSearchResults, RankBy, Chunk, FileIndex, RankingWeightsPartial, RankingWeightsConfig, LiteralBoostWeights, } from "./types";
39
39
  export type { Logger, LoggerFactory } from "./domain/ports";
40
+ export { mergeRankingWeights, mergeLiteralWeights, DEFAULT_RANKING_WEIGHTS, DEFAULT_LITERAL_BOOST_WEIGHTS, } from "./domain/entities";
40
41
  export { ConsoleLogger, InlineProgressLogger, SilentLogger, createLogger, createInlineLogger, createSilentLogger, };
41
42
  /**
42
43
  * Index a directory for semantic search.