raggrep 0.9.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -5
- package/dist/cli/main.js +6469 -671
- package/dist/cli/main.js.map +23 -14
- package/dist/domain/entities/introspection.d.ts +2 -0
- package/dist/domain/entities/literal.d.ts +4 -0
- package/dist/domain/ports/index.d.ts +1 -0
- package/dist/domain/ports/parser.d.ts +121 -0
- package/dist/domain/services/configValidator.d.ts +44 -0
- package/dist/domain/services/configValidator.test.d.ts +1 -0
- package/dist/domain/services/index.d.ts +4 -2
- package/dist/domain/services/introspection.d.ts +22 -2
- package/dist/domain/services/literalExtractor.d.ts +16 -0
- package/dist/domain/services/literalExtractor.test.d.ts +6 -0
- package/dist/domain/services/literalScorer.d.ts +35 -0
- package/dist/index.js +6397 -600
- package/dist/index.js.map +23 -14
- package/dist/infrastructure/index.d.ts +1 -0
- package/dist/infrastructure/introspection/IntrospectionIndex.d.ts +5 -1
- package/dist/infrastructure/parsing/grammarManager.d.ts +83 -0
- package/dist/infrastructure/parsing/index.d.ts +15 -0
- package/dist/infrastructure/parsing/parserFactory.d.ts +56 -0
- package/dist/infrastructure/parsing/parsing.test.d.ts +10 -0
- package/dist/infrastructure/parsing/treeSitterParser.d.ts +103 -0
- package/dist/infrastructure/parsing/typescriptParser.d.ts +43 -0
- package/dist/infrastructure/storage/literalIndex.d.ts +33 -1
- package/dist/modules/language/go/index.d.ts +58 -0
- package/dist/modules/language/go/index.test.d.ts +1 -0
- package/dist/modules/language/python/index.d.ts +59 -0
- package/dist/modules/language/rust/index.d.ts +58 -0
- package/dist/modules/language/rust/index.test.d.ts +1 -0
- package/dist/tests/vocabulary.test.d.ts +10 -0
- package/package.json +3 -2
|
@@ -46,6 +46,8 @@ export interface FileIntrospection {
|
|
|
46
46
|
depth: number;
|
|
47
47
|
/** Path segments for keyword matching */
|
|
48
48
|
pathSegments: string[];
|
|
49
|
+
/** Relative path to nearest README in directory hierarchy */
|
|
50
|
+
nearestReadme?: string;
|
|
49
51
|
}
|
|
50
52
|
/**
|
|
51
53
|
* Project structure metadata.
|
|
@@ -31,6 +31,8 @@ export interface ExtractedLiteral {
|
|
|
31
31
|
type: LiteralType;
|
|
32
32
|
/** How this chunk relates to the literal */
|
|
33
33
|
matchType: LiteralMatchType;
|
|
34
|
+
/** Vocabulary words extracted from the literal (e.g., getUserById → ["get", "user", "by", "id"]) */
|
|
35
|
+
vocabulary?: string[];
|
|
34
36
|
}
|
|
35
37
|
/**
|
|
36
38
|
* A literal detected in a search query.
|
|
@@ -80,6 +82,8 @@ export interface LiteralIndexEntry {
|
|
|
80
82
|
originalCasing: string;
|
|
81
83
|
type: LiteralType;
|
|
82
84
|
matchType: LiteralMatchType;
|
|
85
|
+
/** Vocabulary words for partial matching */
|
|
86
|
+
vocabulary?: string[];
|
|
83
87
|
}
|
|
84
88
|
/**
|
|
85
89
|
* Serialized literal index data for persistence.
|
|
@@ -8,3 +8,4 @@ export type { FileSystem, FileStats } from "./filesystem";
|
|
|
8
8
|
export type { EmbeddingProvider, EmbeddingConfig, EmbeddingModelName, } from "./embedding";
|
|
9
9
|
export type { IndexStorage } from "./storage";
|
|
10
10
|
export type { Logger, ProgressInfo, LoggerFactory } from "./logger";
|
|
11
|
+
export type { IParser, IGrammarManager, ParsedChunk, ParseResult, ParserConfig, ParserLanguage, GrammarStatus, } from "./parser";
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parser Port
|
|
3
|
+
*
|
|
4
|
+
* Defines the interface for code parsers that can extract semantic chunks from source files.
|
|
5
|
+
* This abstraction allows for different parsing implementations (TypeScript API, tree-sitter)
|
|
6
|
+
* while keeping the domain layer independent of specific parsing technologies.
|
|
7
|
+
*/
|
|
8
|
+
import type { ChunkType } from "../entities/chunk";
|
|
9
|
+
/**
|
|
10
|
+
* A parsed chunk of code with location and metadata.
|
|
11
|
+
* This is the output of any parser implementation.
|
|
12
|
+
*/
|
|
13
|
+
export interface ParsedChunk {
|
|
14
|
+
/** The source code content */
|
|
15
|
+
content: string;
|
|
16
|
+
/** 1-based start line number */
|
|
17
|
+
startLine: number;
|
|
18
|
+
/** 1-based end line number */
|
|
19
|
+
endLine: number;
|
|
20
|
+
/** The type of code construct */
|
|
21
|
+
type: ChunkType;
|
|
22
|
+
/** Name of the construct (function name, class name, etc.) */
|
|
23
|
+
name?: string;
|
|
24
|
+
/** Whether this is exported */
|
|
25
|
+
isExported?: boolean;
|
|
26
|
+
/** Documentation comment if present (JSDoc, docstring, etc.) */
|
|
27
|
+
docComment?: string;
|
|
28
|
+
/** Line comments associated with this chunk */
|
|
29
|
+
comments?: string[];
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Supported languages for parsing.
|
|
33
|
+
*/
|
|
34
|
+
export type ParserLanguage = "typescript" | "javascript" | "python" | "go" | "rust" | "java";
|
|
35
|
+
/**
|
|
36
|
+
* Parser configuration options.
|
|
37
|
+
*/
|
|
38
|
+
export interface ParserConfig {
|
|
39
|
+
/** Include full file chunk in output */
|
|
40
|
+
includeFullFileChunk?: boolean;
|
|
41
|
+
/** Associate comments with code chunks */
|
|
42
|
+
associateComments?: boolean;
|
|
43
|
+
/** Language-specific options */
|
|
44
|
+
languageOptions?: Record<string, unknown>;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Result of parsing a file.
|
|
48
|
+
*/
|
|
49
|
+
export interface ParseResult {
|
|
50
|
+
/** Parsed chunks */
|
|
51
|
+
chunks: ParsedChunk[];
|
|
52
|
+
/** Language detected/used */
|
|
53
|
+
language: ParserLanguage;
|
|
54
|
+
/** Whether parsing succeeded */
|
|
55
|
+
success: boolean;
|
|
56
|
+
/** Error message if parsing failed */
|
|
57
|
+
error?: string;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Parser interface for extracting semantic chunks from source code.
|
|
61
|
+
*
|
|
62
|
+
* Implementations:
|
|
63
|
+
* - TypeScriptParser: Uses TypeScript Compiler API for TS/JS files
|
|
64
|
+
* - TreeSitterParser: Uses tree-sitter for Python, Go, Rust, etc.
|
|
65
|
+
*/
|
|
66
|
+
export interface IParser {
|
|
67
|
+
/**
|
|
68
|
+
* Languages this parser supports.
|
|
69
|
+
*/
|
|
70
|
+
readonly supportedLanguages: ParserLanguage[];
|
|
71
|
+
/**
|
|
72
|
+
* Parse source code into semantic chunks.
|
|
73
|
+
*
|
|
74
|
+
* @param content - The source code content
|
|
75
|
+
* @param filepath - The file path (used for language detection and context)
|
|
76
|
+
* @param config - Optional parser configuration
|
|
77
|
+
* @returns Parse result with chunks or error
|
|
78
|
+
*/
|
|
79
|
+
parse(content: string, filepath: string, config?: ParserConfig): Promise<ParseResult>;
|
|
80
|
+
/**
|
|
81
|
+
* Check if the parser can handle a specific file.
|
|
82
|
+
*
|
|
83
|
+
* @param filepath - The file path to check
|
|
84
|
+
* @returns True if the parser can handle this file
|
|
85
|
+
*/
|
|
86
|
+
canParse(filepath: string): boolean;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Grammar status for dynamic installation.
|
|
90
|
+
*/
|
|
91
|
+
export interface GrammarStatus {
|
|
92
|
+
/** Language identifier */
|
|
93
|
+
language: ParserLanguage;
|
|
94
|
+
/** Whether the grammar is installed */
|
|
95
|
+
installed: boolean;
|
|
96
|
+
/** Grammar package name if installed */
|
|
97
|
+
packageName?: string;
|
|
98
|
+
/** Error message if installation failed */
|
|
99
|
+
error?: string;
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Grammar manager interface for dynamic grammar installation.
|
|
103
|
+
*/
|
|
104
|
+
export interface IGrammarManager {
|
|
105
|
+
/**
|
|
106
|
+
* Check if a grammar is installed.
|
|
107
|
+
*/
|
|
108
|
+
isInstalled(language: ParserLanguage): Promise<boolean>;
|
|
109
|
+
/**
|
|
110
|
+
* Install a grammar for a language.
|
|
111
|
+
*/
|
|
112
|
+
install(language: ParserLanguage): Promise<GrammarStatus>;
|
|
113
|
+
/**
|
|
114
|
+
* Get status of all grammars.
|
|
115
|
+
*/
|
|
116
|
+
getStatus(): Promise<GrammarStatus[]>;
|
|
117
|
+
/**
|
|
118
|
+
* Pre-install grammars for a batch of languages.
|
|
119
|
+
*/
|
|
120
|
+
preInstallBatch(languages: ParserLanguage[]): Promise<GrammarStatus[]>;
|
|
121
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Configuration Validator
|
|
3
|
+
*
|
|
4
|
+
* Validates RAGgrep configuration for correctness and provides
|
|
5
|
+
* helpful error messages for invalid configurations.
|
|
6
|
+
*/
|
|
7
|
+
import type { Config } from "../entities/config";
|
|
8
|
+
/**
|
|
9
|
+
* Validation result for a single field or section.
|
|
10
|
+
*/
|
|
11
|
+
export interface ValidationIssue {
|
|
12
|
+
/** The path to the invalid field (e.g., "modules[0].id") */
|
|
13
|
+
path: string;
|
|
14
|
+
/** The type of issue: error (invalid), warning (suboptimal), info (suggestion) */
|
|
15
|
+
severity: "error" | "warning" | "info";
|
|
16
|
+
/** Human-readable description of the issue */
|
|
17
|
+
message: string;
|
|
18
|
+
/** Suggested fix (optional) */
|
|
19
|
+
suggestion?: string;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Overall validation result.
|
|
23
|
+
*/
|
|
24
|
+
export interface ValidationResult {
|
|
25
|
+
/** Whether the configuration is valid (no errors) */
|
|
26
|
+
valid: boolean;
|
|
27
|
+
/** List of all issues found */
|
|
28
|
+
issues: ValidationIssue[];
|
|
29
|
+
/** Helper method to get issues by severity */
|
|
30
|
+
getErrors(): ValidationIssue[];
|
|
31
|
+
getWarnings(): ValidationIssue[];
|
|
32
|
+
getInfos(): ValidationIssue[];
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Validate a RAGgrep configuration.
|
|
36
|
+
*
|
|
37
|
+
* @param config - The configuration to validate
|
|
38
|
+
* @returns Validation result with any issues found
|
|
39
|
+
*/
|
|
40
|
+
export declare function validateConfig(config: Config): ValidationResult;
|
|
41
|
+
/**
|
|
42
|
+
* Format validation issues for display.
|
|
43
|
+
*/
|
|
44
|
+
export declare function formatValidationIssues(issues: ValidationIssue[]): string;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -10,7 +10,9 @@ export { cosineSimilarity, euclideanDistance } from "./similarity";
|
|
|
10
10
|
export { detectQueryIntent, extractQueryTerms, calculateFileTypeBoost, isSourceCodeFile, isDocFile, isDataFile, IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS, DATA_EXTENSIONS, type QueryIntent, } from "./queryIntent";
|
|
11
11
|
export { createLineBasedChunks, createSingleChunk, generateChunkId, DEFAULT_CHUNK_SIZE, DEFAULT_OVERLAP, type TextChunk, type ChunkingOptions, } from "./chunking";
|
|
12
12
|
export { parseQueryLiterals } from "./queryLiteralParser";
|
|
13
|
-
export { extractLiterals, extractLiteralsWithReferences, } from "./literalExtractor";
|
|
14
|
-
export { calculateLiteralMultiplier, calculateMaxMultiplier, calculateLiteralContribution, applyLiteralBoost, mergeWithLiteralBoost, LITERAL_SCORING_CONSTANTS, type LiteralScoreContribution, type MergeInput, type MergeOutput, } from "./literalScorer";
|
|
13
|
+
export { extractLiterals, extractLiteralsWithReferences, extractVocabulary, } from "./literalExtractor";
|
|
14
|
+
export { calculateLiteralMultiplier, calculateMaxMultiplier, calculateLiteralContribution, calculateVocabularyMatch, applyLiteralBoost, mergeWithLiteralBoost, LITERAL_SCORING_CONSTANTS, type LiteralScoreContribution, type VocabularyMatchResult, type MergeInput, type MergeOutput, } from "./literalScorer";
|
|
15
15
|
export { getSynonyms, expandQuery, DEFAULT_LEXICON, EXPANSION_WEIGHTS, DEFAULT_EXPANSION_OPTIONS, } from "./lexicon";
|
|
16
16
|
export { extractJsonPaths, extractJsonKeywords } from "./jsonPathExtractor";
|
|
17
|
+
export { introspectFile, findNearestReadme, introspectionToKeywords, detectScopeFromName, findProjectForFile, calculateIntrospectionBoost, type IntrospectFileOptions, } from "./introspection";
|
|
18
|
+
export { validateConfig, formatValidationIssues, type ValidationIssue, type ValidationResult, } from "./configValidator";
|
|
@@ -5,14 +5,34 @@
|
|
|
5
5
|
* No I/O operations - all functions operate on provided data.
|
|
6
6
|
*/
|
|
7
7
|
import type { FileIntrospection, Project, ProjectStructure, Scope } from "../entities/introspection";
|
|
8
|
+
/**
|
|
9
|
+
* Options for introspecting a file.
|
|
10
|
+
*/
|
|
11
|
+
export interface IntrospectFileOptions {
|
|
12
|
+
/** File content for framework detection */
|
|
13
|
+
fileContent?: string;
|
|
14
|
+
/**
|
|
15
|
+
* Function to check if a file exists.
|
|
16
|
+
* Used for README discovery without I/O in the domain layer.
|
|
17
|
+
*/
|
|
18
|
+
fileExists?: (filepath: string) => boolean;
|
|
19
|
+
}
|
|
8
20
|
/**
|
|
9
21
|
* Extract introspection metadata for a file.
|
|
10
22
|
*
|
|
11
23
|
* @param filepath - Relative file path
|
|
12
24
|
* @param structure - Project structure (from detectProjectStructure)
|
|
13
|
-
* @param
|
|
25
|
+
* @param options - Optional configuration including file content and file existence checker
|
|
26
|
+
*/
|
|
27
|
+
export declare function introspectFile(filepath: string, structure: ProjectStructure, options?: IntrospectFileOptions | string): FileIntrospection;
|
|
28
|
+
/**
|
|
29
|
+
* Find the nearest README file by traversing up the directory hierarchy.
|
|
30
|
+
*
|
|
31
|
+
* @param filepath - Relative file path (normalized with forward slashes)
|
|
32
|
+
* @param fileExists - Function to check if a file exists
|
|
33
|
+
* @returns Relative path to the nearest README, or undefined if none found
|
|
14
34
|
*/
|
|
15
|
-
export declare function
|
|
35
|
+
export declare function findNearestReadme(filepath: string, fileExists: (filepath: string) => boolean): string | undefined;
|
|
16
36
|
/**
|
|
17
37
|
* Extract keywords from introspection for search boosting.
|
|
18
38
|
*/
|
|
@@ -8,6 +8,20 @@
|
|
|
8
8
|
*/
|
|
9
9
|
import type { Chunk } from "../entities/chunk";
|
|
10
10
|
import type { ExtractedLiteral } from "../entities/literal";
|
|
11
|
+
/**
|
|
12
|
+
* Extract vocabulary words from a literal (identifier) name.
|
|
13
|
+
*
|
|
14
|
+
* Handles multiple naming conventions:
|
|
15
|
+
* - camelCase: getUserById → ["get", "user", "by", "id"]
|
|
16
|
+
* - PascalCase: AuthService → ["auth", "service"]
|
|
17
|
+
* - snake_case: get_user_by_id → ["get", "user", "by", "id"]
|
|
18
|
+
* - kebab-case: get-user-by-id → ["get", "user", "by", "id"]
|
|
19
|
+
* - SCREAMING_SNAKE_CASE: MAX_RETRY_COUNT → ["max", "retry", "count"]
|
|
20
|
+
*
|
|
21
|
+
* @param literal - The identifier name to extract vocabulary from
|
|
22
|
+
* @returns Array of unique vocabulary words (lowercase, length > 1)
|
|
23
|
+
*/
|
|
24
|
+
export declare function extractVocabulary(literal: string): string[];
|
|
11
25
|
/**
|
|
12
26
|
* Extract literals from a code chunk.
|
|
13
27
|
*
|
|
@@ -15,6 +29,8 @@ import type { ExtractedLiteral } from "../entities/literal";
|
|
|
15
29
|
* as a "definition" literal. The name comes from proper AST parsing,
|
|
16
30
|
* so it's accurate and reliable.
|
|
17
31
|
*
|
|
32
|
+
* Also extracts vocabulary words from the literal for partial matching.
|
|
33
|
+
*
|
|
18
34
|
* @param chunk - The code chunk to extract literals from
|
|
19
35
|
* @returns Array of extracted literals (typically just the definition)
|
|
20
36
|
*/
|
|
@@ -15,6 +15,17 @@ export declare const LITERAL_SCORING_CONSTANTS: {
|
|
|
15
15
|
BASE_SCORE: number;
|
|
16
16
|
/** Multipliers by match type and confidence */
|
|
17
17
|
MULTIPLIERS: Record<LiteralMatchType, Record<LiteralConfidence, number>>;
|
|
18
|
+
/** Vocabulary match scoring */
|
|
19
|
+
VOCABULARY: {
|
|
20
|
+
/** Base multiplier for vocabulary-only matches (no exact literal match) */
|
|
21
|
+
BASE_MULTIPLIER: number;
|
|
22
|
+
/** Bonus per additional vocabulary word matched (up to a limit) */
|
|
23
|
+
PER_WORD_BONUS: number;
|
|
24
|
+
/** Maximum vocabulary bonus */
|
|
25
|
+
MAX_VOCABULARY_BONUS: number;
|
|
26
|
+
/** Minimum vocabulary words required for a match to count */
|
|
27
|
+
MIN_WORDS_FOR_MATCH: number;
|
|
28
|
+
};
|
|
18
29
|
};
|
|
19
30
|
/**
|
|
20
31
|
* Calculate the literal multiplier for a given match type and confidence.
|
|
@@ -33,6 +44,30 @@ export declare function calculateLiteralMultiplier(matchType: LiteralMatchType,
|
|
|
33
44
|
* @returns The maximum multiplier, or 1.0 if no matches
|
|
34
45
|
*/
|
|
35
46
|
export declare function calculateMaxMultiplier(matches: LiteralMatch[]): number;
|
|
47
|
+
/**
|
|
48
|
+
* Result of vocabulary-based matching.
|
|
49
|
+
*/
|
|
50
|
+
export interface VocabularyMatchResult {
|
|
51
|
+
/** Number of vocabulary words that matched */
|
|
52
|
+
matchedWordCount: number;
|
|
53
|
+
/** The vocabulary words that matched */
|
|
54
|
+
matchedWords: string[];
|
|
55
|
+
/** Multiplier to apply based on vocabulary match */
|
|
56
|
+
multiplier: number;
|
|
57
|
+
/** Whether this is a meaningful match (above threshold) */
|
|
58
|
+
isSignificant: boolean;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Calculate vocabulary-based scoring for a chunk.
|
|
62
|
+
*
|
|
63
|
+
* This is used for partial matching when no exact literal match exists.
|
|
64
|
+
* E.g., query "user authentication" might match chunk with "getUserAuth" literal.
|
|
65
|
+
*
|
|
66
|
+
* @param queryVocabulary - Vocabulary words extracted from the query
|
|
67
|
+
* @param chunkVocabulary - Vocabulary words extracted from chunk literals
|
|
68
|
+
* @returns Vocabulary match result with multiplier
|
|
69
|
+
*/
|
|
70
|
+
export declare function calculateVocabularyMatch(queryVocabulary: string[], chunkVocabulary: string[]): VocabularyMatchResult;
|
|
36
71
|
/**
|
|
37
72
|
* Score contribution from literal matches.
|
|
38
73
|
* Used for debugging and explainability.
|