raggrep 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/cli/main.js +671 -124
- package/dist/cli/main.js.map +13 -9
- package/dist/domain/entities/index.d.ts +2 -0
- package/dist/domain/entities/literal.d.ts +101 -0
- package/dist/domain/services/index.d.ts +3 -0
- package/dist/domain/services/literalExtractor.d.ts +35 -0
- package/dist/domain/services/literalScorer.d.ts +93 -0
- package/dist/domain/services/queryLiteralParser.d.ts +20 -0
- package/dist/domain/services/queryLiteralParser.test.d.ts +7 -0
- package/dist/index.js +665 -118
- package/dist/index.js.map +12 -8
- package/dist/infrastructure/storage/index.d.ts +1 -0
- package/dist/infrastructure/storage/literalIndex.d.ts +103 -0
- package/dist/modules/language/typescript/index.d.ts +8 -4
- package/package.json +1 -1
|
@@ -14,3 +14,5 @@ export type { Config, ModuleConfig } from "./config";
|
|
|
14
14
|
export { DEFAULT_IGNORE_PATHS, DEFAULT_EXTENSIONS, createDefaultConfig, } from "./config";
|
|
15
15
|
export type { FileIntrospection, ProjectStructure, Project, ProjectType, Scope, IntrospectionConfig, } from "./introspection";
|
|
16
16
|
export type { FileConvention, ConventionCategory, FrameworkConventions, ConventionMatch, } from "./conventions";
|
|
17
|
+
export type { LiteralType, LiteralMatchType, LiteralConfidence, LiteralDetectionMethod, ExtractedLiteral, DetectedLiteral, QueryLiteralParseResult, LiteralMatch, LiteralIndexEntry, LiteralIndexData, } from "./literal";
|
|
18
|
+
export { LITERAL_SCORING } from "./literal";
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Literal Entity
|
|
3
|
+
*
|
|
4
|
+
* Types for literal boosting - exact-match term detection and scoring.
|
|
5
|
+
* Supports both query literal detection and code literal extraction.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Types of literals that can be extracted from code.
|
|
9
|
+
*/
|
|
10
|
+
export type LiteralType = "className" | "functionName" | "variableName" | "interfaceName" | "typeName" | "enumName" | "packageName" | "identifier";
|
|
11
|
+
/**
|
|
12
|
+
* How the chunk relates to the literal.
|
|
13
|
+
* Used for scoring - definitions rank higher than references.
|
|
14
|
+
*/
|
|
15
|
+
export type LiteralMatchType = "definition" | "reference" | "import";
|
|
16
|
+
/**
|
|
17
|
+
* Confidence level for detected literals.
|
|
18
|
+
*/
|
|
19
|
+
export type LiteralConfidence = "high" | "medium" | "low";
|
|
20
|
+
/**
|
|
21
|
+
* How a literal was detected in a query.
|
|
22
|
+
*/
|
|
23
|
+
export type LiteralDetectionMethod = "explicit-backtick" | "explicit-quote" | "implicit-casing";
|
|
24
|
+
/**
|
|
25
|
+
* A literal extracted from indexed code.
|
|
26
|
+
*/
|
|
27
|
+
export interface ExtractedLiteral {
|
|
28
|
+
/** The exact term as it appears in code */
|
|
29
|
+
value: string;
|
|
30
|
+
/** Type classification */
|
|
31
|
+
type: LiteralType;
|
|
32
|
+
/** How this chunk relates to the literal */
|
|
33
|
+
matchType: LiteralMatchType;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* A literal detected in a search query.
|
|
37
|
+
*/
|
|
38
|
+
export interface DetectedLiteral {
|
|
39
|
+
/** The literal value (without backticks/quotes) */
|
|
40
|
+
value: string;
|
|
41
|
+
/** Original as it appeared in query (with backticks/quotes if explicit) */
|
|
42
|
+
rawValue: string;
|
|
43
|
+
/** Detection confidence */
|
|
44
|
+
confidence: LiteralConfidence;
|
|
45
|
+
/** How the literal was detected */
|
|
46
|
+
detectionMethod: LiteralDetectionMethod;
|
|
47
|
+
/** Inferred type based on pattern */
|
|
48
|
+
inferredType?: LiteralType;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Result of parsing a query for literals.
|
|
52
|
+
*/
|
|
53
|
+
export interface QueryLiteralParseResult {
|
|
54
|
+
/** Detected literals */
|
|
55
|
+
literals: DetectedLiteral[];
|
|
56
|
+
/** Query with literals removed (for semantic search) */
|
|
57
|
+
remainingQuery: string;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* A match between a query literal and an indexed literal.
|
|
61
|
+
*/
|
|
62
|
+
export interface LiteralMatch {
|
|
63
|
+
/** The query literal that was matched */
|
|
64
|
+
queryLiteral: DetectedLiteral;
|
|
65
|
+
/** The indexed literal it matched */
|
|
66
|
+
indexedLiteral: ExtractedLiteral;
|
|
67
|
+
/** ID of the chunk containing this literal */
|
|
68
|
+
chunkId: string;
|
|
69
|
+
/** Filepath of the file containing the chunk */
|
|
70
|
+
filepath: string;
|
|
71
|
+
/** Whether the match is exact (case-sensitive) */
|
|
72
|
+
exactMatch: boolean;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Serialized format for literal index storage.
|
|
76
|
+
*/
|
|
77
|
+
export interface LiteralIndexEntry {
|
|
78
|
+
chunkId: string;
|
|
79
|
+
filepath: string;
|
|
80
|
+
originalCasing: string;
|
|
81
|
+
type: LiteralType;
|
|
82
|
+
matchType: LiteralMatchType;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Serialized literal index data for persistence.
|
|
86
|
+
*/
|
|
87
|
+
export interface LiteralIndexData {
|
|
88
|
+
/** Schema version */
|
|
89
|
+
version: string;
|
|
90
|
+
/** Map from literal value (lowercase) → entries */
|
|
91
|
+
entries: Record<string, LiteralIndexEntry[]>;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Scoring constants for literal boosting.
|
|
95
|
+
*/
|
|
96
|
+
export declare const LITERAL_SCORING: {
|
|
97
|
+
/** Base score for chunks found only via literal index */
|
|
98
|
+
BASE_SCORE: number;
|
|
99
|
+
/** Multipliers by match type and confidence */
|
|
100
|
+
MULTIPLIERS: Record<LiteralMatchType, Record<LiteralConfidence, number>>;
|
|
101
|
+
};
|
|
@@ -9,3 +9,6 @@ export { extractKeywords, extractPathKeywords, parsePathContext, formatPathConte
|
|
|
9
9
|
export { cosineSimilarity, euclideanDistance } from "./similarity";
|
|
10
10
|
export { detectQueryIntent, extractQueryTerms, calculateFileTypeBoost, isSourceCodeFile, isDocFile, isDataFile, IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS, DATA_EXTENSIONS, type QueryIntent, } from "./queryIntent";
|
|
11
11
|
export { createLineBasedChunks, createSingleChunk, generateChunkId, DEFAULT_CHUNK_SIZE, DEFAULT_OVERLAP, type TextChunk, type ChunkingOptions, } from "./chunking";
|
|
12
|
+
export { parseQueryLiterals } from "./queryLiteralParser";
|
|
13
|
+
export { extractLiterals, extractLiteralsWithReferences, } from "./literalExtractor";
|
|
14
|
+
export { calculateLiteralMultiplier, calculateMaxMultiplier, calculateLiteralContribution, applyLiteralBoost, mergeWithLiteralBoost, LITERAL_SCORING_CONSTANTS, type LiteralScoreContribution, type MergeInput, type MergeOutput, } from "./literalScorer";
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Literal Extractor
|
|
3
|
+
*
|
|
4
|
+
* Extracts literals from code chunks for indexing.
|
|
5
|
+
* For TypeScript/JavaScript, uses the chunk name from AST parsing.
|
|
6
|
+
*
|
|
7
|
+
* This is a pure domain service with no external dependencies.
|
|
8
|
+
*/
|
|
9
|
+
import type { Chunk } from "../entities/chunk";
|
|
10
|
+
import type { ExtractedLiteral } from "../entities/literal";
|
|
11
|
+
/**
|
|
12
|
+
* Extract literals from a code chunk.
|
|
13
|
+
*
|
|
14
|
+
* For TypeScript/JavaScript chunks, this extracts the chunk's name
|
|
15
|
+
* as a "definition" literal. The name comes from proper AST parsing,
|
|
16
|
+
* so it's accurate and reliable.
|
|
17
|
+
*
|
|
18
|
+
* @param chunk - The code chunk to extract literals from
|
|
19
|
+
* @returns Array of extracted literals (typically just the definition)
|
|
20
|
+
*/
|
|
21
|
+
export declare function extractLiterals(chunk: Chunk): ExtractedLiteral[];
|
|
22
|
+
/**
|
|
23
|
+
* Extract literals from a code chunk with additional reference extraction.
|
|
24
|
+
*
|
|
25
|
+
* This version also extracts references from the chunk content using
|
|
26
|
+
* pattern matching. Use this for modules that want deeper literal indexing.
|
|
27
|
+
*
|
|
28
|
+
* @param chunk - The code chunk to extract literals from
|
|
29
|
+
* @param options - Extraction options
|
|
30
|
+
* @returns Array of extracted literals
|
|
31
|
+
*/
|
|
32
|
+
export declare function extractLiteralsWithReferences(chunk: Chunk, options?: {
|
|
33
|
+
includeImports?: boolean;
|
|
34
|
+
includeTypeRefs?: boolean;
|
|
35
|
+
}): ExtractedLiteral[];
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Literal Scorer
|
|
3
|
+
*
|
|
4
|
+
* Calculates multiplicative score boosts for literal matches.
|
|
5
|
+
* Implements the three-source merge strategy for literal boosting.
|
|
6
|
+
*
|
|
7
|
+
* This is a pure domain service with no external dependencies.
|
|
8
|
+
*/
|
|
9
|
+
import type { LiteralMatch, LiteralMatchType, LiteralConfidence } from "../entities/literal";
|
|
10
|
+
/**
|
|
11
|
+
* Scoring constants for literal boosting.
|
|
12
|
+
*/
|
|
13
|
+
export declare const LITERAL_SCORING_CONSTANTS: {
|
|
14
|
+
/** Base score for chunks found only via literal index */
|
|
15
|
+
BASE_SCORE: number;
|
|
16
|
+
/** Multipliers by match type and confidence */
|
|
17
|
+
MULTIPLIERS: Record<LiteralMatchType, Record<LiteralConfidence, number>>;
|
|
18
|
+
};
|
|
19
|
+
/**
|
|
20
|
+
* Calculate the literal multiplier for a given match type and confidence.
|
|
21
|
+
*
|
|
22
|
+
* @param matchType - How the chunk relates to the literal (definition/reference/import)
|
|
23
|
+
* @param confidence - Detection confidence of the query literal
|
|
24
|
+
* @returns Multiplier to apply to the base score
|
|
25
|
+
*/
|
|
26
|
+
export declare function calculateLiteralMultiplier(matchType: LiteralMatchType, confidence: LiteralConfidence): number;
|
|
27
|
+
/**
|
|
28
|
+
* Calculate the maximum multiplier for a set of literal matches.
|
|
29
|
+
*
|
|
30
|
+
* When a chunk has multiple literal matches, use the highest multiplier.
|
|
31
|
+
*
|
|
32
|
+
* @param matches - Array of literal matches for a chunk
|
|
33
|
+
* @returns The maximum multiplier, or 1.0 if no matches
|
|
34
|
+
*/
|
|
35
|
+
export declare function calculateMaxMultiplier(matches: LiteralMatch[]): number;
|
|
36
|
+
/**
|
|
37
|
+
* Score contribution from literal matches.
|
|
38
|
+
* Used for debugging and explainability.
|
|
39
|
+
*/
|
|
40
|
+
export interface LiteralScoreContribution {
|
|
41
|
+
/** The multiplier applied */
|
|
42
|
+
multiplier: number;
|
|
43
|
+
/** Whether this is a literal-only match (not found by semantic/BM25) */
|
|
44
|
+
literalOnly: boolean;
|
|
45
|
+
/** Match type of the best match */
|
|
46
|
+
bestMatchType?: LiteralMatchType;
|
|
47
|
+
/** Confidence of the best match */
|
|
48
|
+
bestConfidence?: LiteralConfidence;
|
|
49
|
+
/** Number of literal matches */
|
|
50
|
+
matchCount: number;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Calculate the literal score contribution for a chunk.
|
|
54
|
+
*
|
|
55
|
+
* @param matches - Literal matches for the chunk (may be empty)
|
|
56
|
+
* @param hasSemanticOrBm25 - Whether the chunk was found by semantic or BM25 search
|
|
57
|
+
* @returns Score contribution details
|
|
58
|
+
*/
|
|
59
|
+
export declare function calculateLiteralContribution(matches: LiteralMatch[], hasSemanticOrBm25: boolean): LiteralScoreContribution;
|
|
60
|
+
/**
|
|
61
|
+
* Apply literal boosting to a base score.
|
|
62
|
+
*
|
|
63
|
+
* Scoring rules:
|
|
64
|
+
* - If chunk has both semantic/BM25 and literal match: multiply base by multiplier
|
|
65
|
+
* - If chunk has only literal match: use BASE_SCORE
|
|
66
|
+
* - If chunk has no literal match: use base score as-is
|
|
67
|
+
*
|
|
68
|
+
* @param baseScore - Score from semantic/BM25 search (0 if not found)
|
|
69
|
+
* @param matches - Literal matches for the chunk
|
|
70
|
+
* @param hasSemanticOrBm25 - Whether the chunk was found by semantic or BM25
|
|
71
|
+
* @returns Final score after literal boosting
|
|
72
|
+
*/
|
|
73
|
+
export declare function applyLiteralBoost(baseScore: number, matches: LiteralMatch[], hasSemanticOrBm25: boolean): number;
|
|
74
|
+
/**
|
|
75
|
+
* Merge results from three search sources with literal boosting.
|
|
76
|
+
*
|
|
77
|
+
* @param semanticBm25Results - Results from semantic and BM25 search
|
|
78
|
+
* @param literalMatches - Map from chunk ID to literal matches
|
|
79
|
+
* @returns Results with literal boosting applied
|
|
80
|
+
*/
|
|
81
|
+
export interface MergeInput {
|
|
82
|
+
/** Chunk ID */
|
|
83
|
+
chunkId: string;
|
|
84
|
+
/** Score from semantic/BM25 search */
|
|
85
|
+
baseScore: number;
|
|
86
|
+
}
|
|
87
|
+
export interface MergeOutput extends MergeInput {
|
|
88
|
+
/** Final score after literal boosting */
|
|
89
|
+
finalScore: number;
|
|
90
|
+
/** Literal contribution details */
|
|
91
|
+
literalContribution: LiteralScoreContribution;
|
|
92
|
+
}
|
|
93
|
+
export declare function mergeWithLiteralBoost(semanticBm25Results: MergeInput[], literalMatchMap: Map<string, LiteralMatch[]>): MergeOutput[];
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Query Literal Parser
|
|
3
|
+
*
|
|
4
|
+
* Parses search queries to extract literals for exact-match boosting.
|
|
5
|
+
* Supports explicit detection (backticks, quotes) and implicit detection (casing patterns).
|
|
6
|
+
*
|
|
7
|
+
* This is a pure domain service with no external dependencies.
|
|
8
|
+
*/
|
|
9
|
+
import type { QueryLiteralParseResult } from "../entities/literal";
|
|
10
|
+
/**
|
|
11
|
+
* Parse a search query to extract literals.
|
|
12
|
+
*
|
|
13
|
+
* Detects:
|
|
14
|
+
* - Explicit literals: `backticks` or "quotes"
|
|
15
|
+
* - Implicit literals: PascalCase, camelCase, SCREAMING_SNAKE, snake_case, kebab-case
|
|
16
|
+
*
|
|
17
|
+
* @param query - The search query to parse
|
|
18
|
+
* @returns Detected literals and remaining query for semantic search
|
|
19
|
+
*/
|
|
20
|
+
export declare function parseQueryLiterals(query: string): QueryLiteralParseResult;
|