raggrep 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,3 +14,5 @@ export type { Config, ModuleConfig } from "./config";
14
14
  export { DEFAULT_IGNORE_PATHS, DEFAULT_EXTENSIONS, createDefaultConfig, } from "./config";
15
15
  export type { FileIntrospection, ProjectStructure, Project, ProjectType, Scope, IntrospectionConfig, } from "./introspection";
16
16
  export type { FileConvention, ConventionCategory, FrameworkConventions, ConventionMatch, } from "./conventions";
17
+ export type { LiteralType, LiteralMatchType, LiteralConfidence, LiteralDetectionMethod, ExtractedLiteral, DetectedLiteral, QueryLiteralParseResult, LiteralMatch, LiteralIndexEntry, LiteralIndexData, } from "./literal";
18
+ export { LITERAL_SCORING } from "./literal";
@@ -0,0 +1,101 @@
1
+ /**
2
+ * Literal Entity
3
+ *
4
+ * Types for literal boosting - exact-match term detection and scoring.
5
+ * Supports both query literal detection and code literal extraction.
6
+ */
7
+ /**
8
+ * Types of literals that can be extracted from code.
9
+ */
10
+ export type LiteralType = "className" | "functionName" | "variableName" | "interfaceName" | "typeName" | "enumName" | "packageName" | "identifier";
11
+ /**
12
+ * How the chunk relates to the literal.
13
+ * Used for scoring - definitions rank higher than references.
14
+ */
15
+ export type LiteralMatchType = "definition" | "reference" | "import";
16
+ /**
17
+ * Confidence level for detected literals.
18
+ */
19
+ export type LiteralConfidence = "high" | "medium" | "low";
20
+ /**
21
+ * How a literal was detected in a query.
22
+ */
23
+ export type LiteralDetectionMethod = "explicit-backtick" | "explicit-quote" | "implicit-casing";
24
+ /**
25
+ * A literal extracted from indexed code.
26
+ */
27
+ export interface ExtractedLiteral {
28
+ /** The exact term as it appears in code */
29
+ value: string;
30
+ /** Type classification */
31
+ type: LiteralType;
32
+ /** How this chunk relates to the literal */
33
+ matchType: LiteralMatchType;
34
+ }
35
+ /**
36
+ * A literal detected in a search query.
37
+ */
38
+ export interface DetectedLiteral {
39
+ /** The literal value (without backticks/quotes) */
40
+ value: string;
41
+ /** Original as it appeared in query (with backticks/quotes if explicit) */
42
+ rawValue: string;
43
+ /** Detection confidence */
44
+ confidence: LiteralConfidence;
45
+ /** How the literal was detected */
46
+ detectionMethod: LiteralDetectionMethod;
47
+ /** Inferred type based on pattern */
48
+ inferredType?: LiteralType;
49
+ }
50
+ /**
51
+ * Result of parsing a query for literals.
52
+ */
53
+ export interface QueryLiteralParseResult {
54
+ /** Detected literals */
55
+ literals: DetectedLiteral[];
56
+ /** Query with literals removed (for semantic search) */
57
+ remainingQuery: string;
58
+ }
59
+ /**
60
+ * A match between a query literal and an indexed literal.
61
+ */
62
+ export interface LiteralMatch {
63
+ /** The query literal that was matched */
64
+ queryLiteral: DetectedLiteral;
65
+ /** The indexed literal it matched */
66
+ indexedLiteral: ExtractedLiteral;
67
+ /** ID of the chunk containing this literal */
68
+ chunkId: string;
69
+ /** Filepath of the file containing the chunk */
70
+ filepath: string;
71
+ /** Whether the match is exact (case-sensitive) */
72
+ exactMatch: boolean;
73
+ }
74
+ /**
75
+ * Serialized format for literal index storage.
76
+ */
77
+ export interface LiteralIndexEntry {
78
+ chunkId: string;
79
+ filepath: string;
80
+ originalCasing: string;
81
+ type: LiteralType;
82
+ matchType: LiteralMatchType;
83
+ }
84
+ /**
85
+ * Serialized literal index data for persistence.
86
+ */
87
+ export interface LiteralIndexData {
88
+ /** Schema version */
89
+ version: string;
90
+ /** Map from literal value (lowercase) → entries */
91
+ entries: Record<string, LiteralIndexEntry[]>;
92
+ }
93
+ /**
94
+ * Scoring constants for literal boosting.
95
+ */
96
+ export declare const LITERAL_SCORING: {
97
+ /** Base score for chunks found only via literal index */
98
+ BASE_SCORE: number;
99
+ /** Multipliers by match type and confidence */
100
+ MULTIPLIERS: Record<LiteralMatchType, Record<LiteralConfidence, number>>;
101
+ };
@@ -9,3 +9,6 @@ export { extractKeywords, extractPathKeywords, parsePathContext, formatPathConte
9
9
  export { cosineSimilarity, euclideanDistance } from "./similarity";
10
10
  export { detectQueryIntent, extractQueryTerms, calculateFileTypeBoost, isSourceCodeFile, isDocFile, isDataFile, IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS, DATA_EXTENSIONS, type QueryIntent, } from "./queryIntent";
11
11
  export { createLineBasedChunks, createSingleChunk, generateChunkId, DEFAULT_CHUNK_SIZE, DEFAULT_OVERLAP, type TextChunk, type ChunkingOptions, } from "./chunking";
12
+ export { parseQueryLiterals } from "./queryLiteralParser";
13
+ export { extractLiterals, extractLiteralsWithReferences, } from "./literalExtractor";
14
+ export { calculateLiteralMultiplier, calculateMaxMultiplier, calculateLiteralContribution, applyLiteralBoost, mergeWithLiteralBoost, LITERAL_SCORING_CONSTANTS, type LiteralScoreContribution, type MergeInput, type MergeOutput, } from "./literalScorer";
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Literal Extractor
3
+ *
4
+ * Extracts literals from code chunks for indexing.
5
+ * For TypeScript/JavaScript, uses the chunk name from AST parsing.
6
+ *
7
+ * This is a pure domain service with no external dependencies.
8
+ */
9
+ import type { Chunk } from "../entities/chunk";
10
+ import type { ExtractedLiteral } from "../entities/literal";
11
+ /**
12
+ * Extract literals from a code chunk.
13
+ *
14
+ * For TypeScript/JavaScript chunks, this extracts the chunk's name
15
+ * as a "definition" literal. The name comes from proper AST parsing,
16
+ * so it's accurate and reliable.
17
+ *
18
+ * @param chunk - The code chunk to extract literals from
19
+ * @returns Array of extracted literals (typically just the definition)
20
+ */
21
+ export declare function extractLiterals(chunk: Chunk): ExtractedLiteral[];
22
+ /**
23
+ * Extract literals from a code chunk with additional reference extraction.
24
+ *
25
+ * This version also extracts references from the chunk content using
26
+ * pattern matching. Use this for modules that want deeper literal indexing.
27
+ *
28
+ * @param chunk - The code chunk to extract literals from
29
+ * @param options - Extraction options
30
+ * @returns Array of extracted literals
31
+ */
32
+ export declare function extractLiteralsWithReferences(chunk: Chunk, options?: {
33
+ includeImports?: boolean;
34
+ includeTypeRefs?: boolean;
35
+ }): ExtractedLiteral[];
@@ -0,0 +1,93 @@
1
+ /**
2
+ * Literal Scorer
3
+ *
4
+ * Calculates multiplicative score boosts for literal matches.
5
+ * Implements the three-source merge strategy for literal boosting.
6
+ *
7
+ * This is a pure domain service with no external dependencies.
8
+ */
9
+ import type { LiteralMatch, LiteralMatchType, LiteralConfidence } from "../entities/literal";
10
+ /**
11
+ * Scoring constants for literal boosting.
12
+ */
13
+ export declare const LITERAL_SCORING_CONSTANTS: {
14
+ /** Base score for chunks found only via literal index */
15
+ BASE_SCORE: number;
16
+ /** Multipliers by match type and confidence */
17
+ MULTIPLIERS: Record<LiteralMatchType, Record<LiteralConfidence, number>>;
18
+ };
19
+ /**
20
+ * Calculate the literal multiplier for a given match type and confidence.
21
+ *
22
+ * @param matchType - How the chunk relates to the literal (definition/reference/import)
23
+ * @param confidence - Detection confidence of the query literal
24
+ * @returns Multiplier to apply to the base score
25
+ */
26
+ export declare function calculateLiteralMultiplier(matchType: LiteralMatchType, confidence: LiteralConfidence): number;
27
+ /**
28
+ * Calculate the maximum multiplier for a set of literal matches.
29
+ *
30
+ * When a chunk has multiple literal matches, use the highest multiplier.
31
+ *
32
+ * @param matches - Array of literal matches for a chunk
33
+ * @returns The maximum multiplier, or 1.0 if no matches
34
+ */
35
+ export declare function calculateMaxMultiplier(matches: LiteralMatch[]): number;
36
+ /**
37
+ * Score contribution from literal matches.
38
+ * Used for debugging and explainability.
39
+ */
40
+ export interface LiteralScoreContribution {
41
+ /** The multiplier applied */
42
+ multiplier: number;
43
+ /** Whether this is a literal-only match (not found by semantic/BM25) */
44
+ literalOnly: boolean;
45
+ /** Match type of the best match */
46
+ bestMatchType?: LiteralMatchType;
47
+ /** Confidence of the best match */
48
+ bestConfidence?: LiteralConfidence;
49
+ /** Number of literal matches */
50
+ matchCount: number;
51
+ }
52
+ /**
53
+ * Calculate the literal score contribution for a chunk.
54
+ *
55
+ * @param matches - Literal matches for the chunk (may be empty)
56
+ * @param hasSemanticOrBm25 - Whether the chunk was found by semantic or BM25 search
57
+ * @returns Score contribution details
58
+ */
59
+ export declare function calculateLiteralContribution(matches: LiteralMatch[], hasSemanticOrBm25: boolean): LiteralScoreContribution;
60
+ /**
61
+ * Apply literal boosting to a base score.
62
+ *
63
+ * Scoring rules:
64
+ * - If chunk has both semantic/BM25 and literal match: multiply base by multiplier
65
+ * - If chunk has only literal match: use BASE_SCORE
66
+ * - If chunk has no literal match: use base score as-is
67
+ *
68
+ * @param baseScore - Score from semantic/BM25 search (0 if not found)
69
+ * @param matches - Literal matches for the chunk
70
+ * @param hasSemanticOrBm25 - Whether the chunk was found by semantic or BM25
71
+ * @returns Final score after literal boosting
72
+ */
73
+ export declare function applyLiteralBoost(baseScore: number, matches: LiteralMatch[], hasSemanticOrBm25: boolean): number;
74
+ /**
75
+ * Merge results from three search sources with literal boosting.
76
+ *
77
+ * @param semanticBm25Results - Results from semantic and BM25 search
78
+ * @param literalMatches - Map from chunk ID to literal matches
79
+ * @returns Results with literal boosting applied
80
+ */
81
+ export interface MergeInput {
82
+ /** Chunk ID */
83
+ chunkId: string;
84
+ /** Score from semantic/BM25 search */
85
+ baseScore: number;
86
+ }
87
+ export interface MergeOutput extends MergeInput {
88
+ /** Final score after literal boosting */
89
+ finalScore: number;
90
+ /** Literal contribution details */
91
+ literalContribution: LiteralScoreContribution;
92
+ }
93
+ export declare function mergeWithLiteralBoost(semanticBm25Results: MergeInput[], literalMatchMap: Map<string, LiteralMatch[]>): MergeOutput[];
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Query Literal Parser
3
+ *
4
+ * Parses search queries to extract literals for exact-match boosting.
5
+ * Supports explicit detection (backticks, quotes) and implicit detection (casing patterns).
6
+ *
7
+ * This is a pure domain service with no external dependencies.
8
+ */
9
+ import type { QueryLiteralParseResult } from "../entities/literal";
10
+ /**
11
+ * Parse a search query to extract literals.
12
+ *
13
+ * Detects:
14
+ * - Explicit literals: `backticks` or "quotes"
15
+ * - Implicit literals: PascalCase, camelCase, SCREAMING_SNAKE, snake_case, kebab-case
16
+ *
17
+ * @param query - The search query to parse
18
+ * @returns Detected literals and remaining query for semantic search
19
+ */
20
+ export declare function parseQueryLiterals(query: string): QueryLiteralParseResult;
@@ -0,0 +1,7 @@
1
+ /**
2
+ * QueryLiteralParser Tests
3
+ *
4
+ * Comprehensive tests for query literal detection.
5
+ * Tests explicit (backticks, quotes) and implicit (casing patterns) detection.
6
+ */
7
+ export {};