raggrep 0.15.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +112 -8
- package/dist/cli/main.js +827 -521
- package/dist/cli/main.js.map +26 -19
- package/dist/domain/ports/embedding.d.ts +10 -0
- package/dist/domain/ports/index.d.ts +1 -1
- package/dist/domain/services/chunkContext.d.ts +76 -0
- package/dist/domain/services/index.d.ts +1 -0
- package/dist/index.js +638 -390
- package/dist/index.js.map +25 -18
- package/dist/infrastructure/config/configLoader.d.ts +9 -11
- package/dist/infrastructure/config/index.d.ts +1 -1
- package/dist/infrastructure/embeddings/embeddingPaths.d.ts +6 -0
- package/dist/infrastructure/embeddings/embeddingProviderFactory.d.ts +9 -0
- package/dist/infrastructure/embeddings/globalEmbeddings.d.ts +28 -0
- package/dist/infrastructure/embeddings/huggingfaceEmbeddingProvider.d.ts +21 -0
- package/dist/infrastructure/embeddings/index.d.ts +9 -2
- package/dist/infrastructure/embeddings/modelCache.d.ts +10 -0
- package/dist/infrastructure/embeddings/modelCatalog.d.ts +23 -0
- package/dist/infrastructure/embeddings/xenovaEmbeddingProvider.d.ts +23 -0
- package/dist/infrastructure/index.d.ts +1 -1
- package/package.json +7 -3
- package/dist/infrastructure/embeddings/transformersEmbedding.d.ts +0 -52
|
@@ -10,12 +10,22 @@ import type { Logger } from "./logger";
|
|
|
10
10
|
* Available embedding model names
|
|
11
11
|
*/
|
|
12
12
|
export type EmbeddingModelName = "all-MiniLM-L6-v2" | "all-MiniLM-L12-v2" | "bge-small-en-v1.5" | "paraphrase-MiniLM-L3-v2" | "nomic-embed-text-v1.5";
|
|
13
|
+
/**
|
|
14
|
+
* Which Transformers.js distribution backs local embedding inference.
|
|
15
|
+
* Used by the composition root / factory to pick an adapter implementation.
|
|
16
|
+
*/
|
|
17
|
+
export type EmbeddingRuntime = "xenova" | "huggingface";
|
|
13
18
|
/**
|
|
14
19
|
* Configuration for embedding provider
|
|
15
20
|
*/
|
|
16
21
|
export interface EmbeddingConfig {
|
|
17
22
|
/** Model name to use */
|
|
18
23
|
model: EmbeddingModelName;
|
|
24
|
+
/**
|
|
25
|
+
* Embedding runtime (`@xenova/transformers` vs `@huggingface/transformers`).
|
|
26
|
+
* Defaults to `huggingface` when omitted.
|
|
27
|
+
*/
|
|
28
|
+
runtime?: EmbeddingRuntime;
|
|
19
29
|
/** Whether to show progress during model loading (deprecated, use logger instead) */
|
|
20
30
|
showProgress?: boolean;
|
|
21
31
|
/** Logger for reporting download progress */
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* These are implemented by infrastructure adapters.
|
|
6
6
|
*/
|
|
7
7
|
export type { FileSystem, FileStats } from "./filesystem";
|
|
8
|
-
export type { EmbeddingProvider, EmbeddingConfig, EmbeddingModelName, } from "./embedding";
|
|
8
|
+
export type { EmbeddingProvider, EmbeddingConfig, EmbeddingModelName, EmbeddingRuntime, } from "./embedding";
|
|
9
9
|
export type { IndexStorage } from "./storage";
|
|
10
10
|
export type { Logger, ProgressInfo, LoggerFactory } from "./logger";
|
|
11
11
|
export type { IParser, IGrammarManager, ParsedChunk, ParseResult, ParserConfig, ParserLanguage, GrammarStatus, } from "./parser";
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunk Context Preparation Service
|
|
3
|
+
*
|
|
4
|
+
* Provides a unified utility for preparing chunk content with path context
|
|
5
|
+
* for embedding. This ensures consistent behavior across all indexing modules.
|
|
6
|
+
*
|
|
7
|
+
* The path context helps embeddings understand the structural location of code,
|
|
8
|
+
* improving search relevance for queries that reference file paths or domains.
|
|
9
|
+
*/
|
|
10
|
+
/**
|
|
11
|
+
* Options for preparing a chunk for embedding.
|
|
12
|
+
*/
|
|
13
|
+
export interface ChunkContextOptions {
|
|
14
|
+
/** Relative file path (from project root) */
|
|
15
|
+
filepath: string;
|
|
16
|
+
/** The chunk content to embed */
|
|
17
|
+
content: string;
|
|
18
|
+
/** Optional name for the chunk (e.g., function name, heading) */
|
|
19
|
+
name?: string;
|
|
20
|
+
/** Optional documentation comment (e.g., JSDoc, docstring) */
|
|
21
|
+
docComment?: string;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Prepare chunk content for embedding by injecting path context.
|
|
25
|
+
*
|
|
26
|
+
* This function should be used by ALL indexing modules to ensure consistent
|
|
27
|
+
* path context injection. The path context is prepended to the content to
|
|
28
|
+
* give the embedding model awareness of the file's location in the project.
|
|
29
|
+
*
|
|
30
|
+
* Format: `[domain layer segment1 segment2] [name:] [docComment] content`
|
|
31
|
+
*
|
|
32
|
+
* @param options - Chunk context options
|
|
33
|
+
* @returns Content string ready for embedding
|
|
34
|
+
*
|
|
35
|
+
* @example
|
|
36
|
+
* ```typescript
|
|
37
|
+
* const embeddingContent = prepareChunkForEmbedding({
|
|
38
|
+
* filepath: 'services/dynamodb/streams/handler.ts',
|
|
39
|
+
* content: 'export function processStream() { ... }',
|
|
40
|
+
* name: 'processStream',
|
|
41
|
+
* });
|
|
42
|
+
* // Returns: "[dynamodb service streams] processStream: export function processStream() { ... }"
|
|
43
|
+
* ```
|
|
44
|
+
*/
|
|
45
|
+
export declare function prepareChunkForEmbedding(options: ChunkContextOptions): string;
|
|
46
|
+
/**
|
|
47
|
+
* Extract path keywords for inclusion in FileSummary.
|
|
48
|
+
*
|
|
49
|
+
* This provides a deduplicated list of keywords extracted from the file path,
|
|
50
|
+
* suitable for BM25 and other keyword-based search mechanisms.
|
|
51
|
+
*
|
|
52
|
+
* @param filepath - Relative file path
|
|
53
|
+
* @returns Array of deduplicated lowercase keywords
|
|
54
|
+
*
|
|
55
|
+
* @example
|
|
56
|
+
* ```typescript
|
|
57
|
+
* const keywords = extractPathKeywordsForFileSummary('services/dynamodb/streams/handler.ts');
|
|
58
|
+
* // Returns: ['services', 'dynamodb', 'streams', 'handler']
|
|
59
|
+
* ```
|
|
60
|
+
*/
|
|
61
|
+
export declare function extractPathKeywordsForFileSummary(filepath: string): string[];
|
|
62
|
+
/**
|
|
63
|
+
* Get path context data for inclusion in FileSummary.
|
|
64
|
+
*
|
|
65
|
+
* This returns the parsed path context that can be stored in FileSummary
|
|
66
|
+
* for later use during search scoring.
|
|
67
|
+
*
|
|
68
|
+
* @param filepath - Relative file path
|
|
69
|
+
* @returns Path context object
|
|
70
|
+
*/
|
|
71
|
+
export declare function getPathContextForFileSummary(filepath: string): {
|
|
72
|
+
segments: string[];
|
|
73
|
+
layer?: string;
|
|
74
|
+
domain?: string;
|
|
75
|
+
depth: number;
|
|
76
|
+
};
|
|
@@ -18,3 +18,4 @@ export { introspectFile, findNearestReadme, introspectionToKeywords, detectScope
|
|
|
18
18
|
export { validateConfig, formatValidationIssues, type ValidationIssue, type ValidationResult, } from "./configValidator";
|
|
19
19
|
export { calculatePhraseMatch, hasExactPhrase, calculateTokenCoverage, tokenizeForMatching, PHRASE_MATCH_CONSTANTS, type PhraseMatchResult, } from "./phraseMatch";
|
|
20
20
|
export { isIdentifierQuery, extractSearchLiteral, findOccurrences, searchFiles, extractIdentifiersFromContent, isSearchableContent, } from "./simpleSearch";
|
|
21
|
+
export { prepareChunkForEmbedding, extractPathKeywordsForFileSummary, getPathContextForFileSummary, type ChunkContextOptions, } from "./chunkContext";
|