raggrep 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +15 -0
- package/dist/application/index.d.ts +7 -0
- package/dist/application/usecases/cleanupIndex.d.ts +54 -0
- package/dist/application/usecases/index.d.ts +9 -0
- package/dist/application/usecases/indexDirectory.d.ts +54 -0
- package/dist/application/usecases/searchIndex.d.ts +48 -0
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.js +1596 -0
- package/dist/cli/main.js.map +22 -0
- package/dist/composition.d.ts +52 -0
- package/dist/domain/entities/chunk.d.ts +41 -0
- package/dist/domain/entities/config.d.ts +43 -0
- package/dist/domain/entities/fileIndex.d.ts +58 -0
- package/dist/domain/entities/fileSummary.d.ts +61 -0
- package/dist/domain/entities/index.d.ts +14 -0
- package/dist/domain/entities/searchResult.d.ts +36 -0
- package/dist/domain/index.d.ts +11 -0
- package/dist/domain/ports/embedding.d.ts +60 -0
- package/dist/domain/ports/filesystem.d.ts +78 -0
- package/dist/domain/ports/index.d.ts +10 -0
- package/dist/domain/ports/storage.d.ts +79 -0
- package/dist/domain/services/bm25.d.ts +82 -0
- package/dist/domain/services/bm25.test.d.ts +4 -0
- package/dist/domain/services/index.d.ts +8 -0
- package/dist/domain/services/keywords.d.ts +27 -0
- package/dist/index.d.ts +98 -0
- package/dist/index.js +1378 -0
- package/dist/index.js.map +22 -0
- package/dist/indexer/index.d.ts +33 -0
- package/dist/infrastructure/embeddings/index.d.ts +4 -0
- package/dist/infrastructure/embeddings/transformersEmbedding.d.ts +34 -0
- package/dist/infrastructure/filesystem/index.d.ts +4 -0
- package/dist/infrastructure/filesystem/nodeFileSystem.d.ts +28 -0
- package/dist/infrastructure/index.d.ts +9 -0
- package/dist/infrastructure/storage/fileIndexStorage.d.ts +68 -0
- package/dist/infrastructure/storage/index.d.ts +4 -0
- package/dist/modules/registry.d.ts +3 -0
- package/dist/modules/semantic/index.d.ts +55 -0
- package/dist/modules/semantic/parseCode.d.ts +44 -0
- package/dist/modules/semantic/parseCode.test.d.ts +4 -0
- package/dist/search/index.d.ts +11 -0
- package/dist/types.d.ts +84 -0
- package/dist/utils/bm25.d.ts +9 -0
- package/dist/utils/config.d.ts +45 -0
- package/dist/utils/embeddings.d.ts +46 -0
- package/dist/utils/embeddings.test.d.ts +4 -0
- package/dist/utils/tieredIndex.d.ts +100 -0
- package/package.json +66 -0
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type definitions for the RAG system
|
|
3
|
+
*
|
|
4
|
+
* This file re-exports domain entities and defines module interfaces.
|
|
5
|
+
* For new code, prefer importing directly from 'domain/entities'.
|
|
6
|
+
*/
|
|
7
|
+
export type { Chunk, ChunkType, FileIndex, FileManifestEntry, ModuleManifest, GlobalManifest, FileSummary, Tier1Manifest, SearchResult, SearchOptions, Config, ModuleConfig, } from './domain/entities';
|
|
8
|
+
export { createChunkId, DEFAULT_SEARCH_OPTIONS, DEFAULT_IGNORE_PATHS, DEFAULT_EXTENSIONS, createDefaultConfig, } from './domain/entities';
|
|
9
|
+
import type { Config, FileIndex, SearchResult, SearchOptions, ModuleConfig } from './domain/entities';
|
|
10
|
+
/**
|
|
11
|
+
* Context provided to modules during indexing
|
|
12
|
+
*/
|
|
13
|
+
export interface IndexContext {
|
|
14
|
+
rootDir: string;
|
|
15
|
+
config: Config;
|
|
16
|
+
/** Get the content of a file */
|
|
17
|
+
readFile: (filepath: string) => Promise<string>;
|
|
18
|
+
/** Get file stats */
|
|
19
|
+
getFileStats: (filepath: string) => Promise<{
|
|
20
|
+
lastModified: string;
|
|
21
|
+
}>;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Context provided to modules during search
|
|
25
|
+
*/
|
|
26
|
+
export interface SearchContext {
|
|
27
|
+
rootDir: string;
|
|
28
|
+
config: Config;
|
|
29
|
+
/** Load index data for a specific file */
|
|
30
|
+
loadFileIndex: (filepath: string) => Promise<FileIndex | null>;
|
|
31
|
+
/** List all indexed files */
|
|
32
|
+
listIndexedFiles: () => Promise<string[]>;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Base interface for index modules
|
|
36
|
+
*
|
|
37
|
+
* Modules provide different strategies for indexing and retrieving code.
|
|
38
|
+
* Examples:
|
|
39
|
+
* - SemanticModule: Uses text embeddings for natural language search
|
|
40
|
+
* - SymbolModule: Uses TypeScript/LSP symbol information
|
|
41
|
+
* - ASTModule: Uses AST-based code structure analysis
|
|
42
|
+
*/
|
|
43
|
+
export interface IndexModule {
|
|
44
|
+
/** Unique identifier for this module */
|
|
45
|
+
readonly id: string;
|
|
46
|
+
/** Human-readable name */
|
|
47
|
+
readonly name: string;
|
|
48
|
+
/** Description of what this module indexes */
|
|
49
|
+
readonly description: string;
|
|
50
|
+
/** Version of the module (for index compatibility) */
|
|
51
|
+
readonly version: string;
|
|
52
|
+
/**
|
|
53
|
+
* Index a single file
|
|
54
|
+
* @returns FileIndex with module-specific data, or null if file should be skipped
|
|
55
|
+
*/
|
|
56
|
+
indexFile(filepath: string, content: string, ctx: IndexContext): Promise<FileIndex | null>;
|
|
57
|
+
/**
|
|
58
|
+
* Search the index with a query
|
|
59
|
+
* @returns Ranked search results
|
|
60
|
+
*/
|
|
61
|
+
search(query: string, ctx: SearchContext, options?: SearchOptions): Promise<SearchResult[]>;
|
|
62
|
+
/**
|
|
63
|
+
* Optional: Initialize the module (e.g., load models, connect to services)
|
|
64
|
+
*/
|
|
65
|
+
initialize?(config: ModuleConfig): Promise<void>;
|
|
66
|
+
/**
|
|
67
|
+
* Optional: Called after all files have been indexed.
|
|
68
|
+
* Use for building secondary indexes (e.g., Tier 1 summaries, BM25 index).
|
|
69
|
+
*/
|
|
70
|
+
finalize?(ctx: IndexContext): Promise<void>;
|
|
71
|
+
/**
|
|
72
|
+
* Optional: Cleanup resources
|
|
73
|
+
*/
|
|
74
|
+
dispose?(): Promise<void>;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Registry for managing available modules
|
|
78
|
+
*/
|
|
79
|
+
export interface ModuleRegistry {
|
|
80
|
+
register(module: IndexModule): void;
|
|
81
|
+
get(id: string): IndexModule | undefined;
|
|
82
|
+
list(): IndexModule[];
|
|
83
|
+
getEnabled(config: Config): IndexModule[];
|
|
84
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BM25 Search Utilities
|
|
3
|
+
*
|
|
4
|
+
* Re-exports BM25 functionality from the domain layer.
|
|
5
|
+
* This file exists for backwards compatibility with existing code.
|
|
6
|
+
*
|
|
7
|
+
* For new code, import directly from 'domain/services'.
|
|
8
|
+
*/
|
|
9
|
+
export { BM25Index, tokenize, normalizeScore, type BM25Document, type BM25Result, } from '../domain/services/bm25';
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Configuration utilities
|
|
3
|
+
*
|
|
4
|
+
* Provides functions for loading, saving, and managing RAGgrep configuration.
|
|
5
|
+
*/
|
|
6
|
+
import type { Config, ModuleConfig } from '../domain/entities';
|
|
7
|
+
import { EmbeddingConfig } from './embeddings';
|
|
8
|
+
/** Default configuration instance */
|
|
9
|
+
export declare const DEFAULT_CONFIG: Config;
|
|
10
|
+
/**
|
|
11
|
+
* Get the root .raggrep directory path
|
|
12
|
+
*/
|
|
13
|
+
export declare function getRaggrepDir(rootDir: string, config?: Config): string;
|
|
14
|
+
/**
|
|
15
|
+
* Get the index data directory for a specific module
|
|
16
|
+
*/
|
|
17
|
+
export declare function getModuleIndexPath(rootDir: string, moduleId: string, config?: Config): string;
|
|
18
|
+
/**
|
|
19
|
+
* Get the manifest path for a specific module
|
|
20
|
+
*/
|
|
21
|
+
export declare function getModuleManifestPath(rootDir: string, moduleId: string, config?: Config): string;
|
|
22
|
+
/**
|
|
23
|
+
* Get the global manifest path
|
|
24
|
+
*/
|
|
25
|
+
export declare function getGlobalManifestPath(rootDir: string, config?: Config): string;
|
|
26
|
+
/**
|
|
27
|
+
* Get the config file path
|
|
28
|
+
*/
|
|
29
|
+
export declare function getConfigPath(rootDir: string, config?: Config): string;
|
|
30
|
+
/**
|
|
31
|
+
* Load config from file or return default
|
|
32
|
+
*/
|
|
33
|
+
export declare function loadConfig(rootDir: string): Promise<Config>;
|
|
34
|
+
/**
|
|
35
|
+
* Save config to file
|
|
36
|
+
*/
|
|
37
|
+
export declare function saveConfig(rootDir: string, config: Config): Promise<void>;
|
|
38
|
+
/**
|
|
39
|
+
* Get module config by ID
|
|
40
|
+
*/
|
|
41
|
+
export declare function getModuleConfig(config: Config, moduleId: string): ModuleConfig | undefined;
|
|
42
|
+
/**
|
|
43
|
+
* Extract embedding config from module options
|
|
44
|
+
*/
|
|
45
|
+
export declare function getEmbeddingConfigFromModule(moduleConfig: ModuleConfig): EmbeddingConfig;
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
export declare const EMBEDDING_MODELS: {
|
|
2
|
+
readonly 'all-MiniLM-L6-v2': "Xenova/all-MiniLM-L6-v2";
|
|
3
|
+
readonly 'all-MiniLM-L12-v2': "Xenova/all-MiniLM-L12-v2";
|
|
4
|
+
readonly 'bge-small-en-v1.5': "Xenova/bge-small-en-v1.5";
|
|
5
|
+
readonly 'paraphrase-MiniLM-L3-v2': "Xenova/paraphrase-MiniLM-L3-v2";
|
|
6
|
+
};
|
|
7
|
+
export type EmbeddingModelName = keyof typeof EMBEDDING_MODELS;
|
|
8
|
+
export interface EmbeddingConfig {
|
|
9
|
+
model: EmbeddingModelName;
|
|
10
|
+
/** Show progress during model download */
|
|
11
|
+
showProgress?: boolean;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Configure the embedding model
|
|
15
|
+
*/
|
|
16
|
+
export declare function configureEmbeddings(config: Partial<EmbeddingConfig>): void;
|
|
17
|
+
/**
|
|
18
|
+
* Get embedding for a single text
|
|
19
|
+
*/
|
|
20
|
+
export declare function getEmbedding(text: string): Promise<number[]>;
|
|
21
|
+
/**
|
|
22
|
+
* Get embeddings for multiple texts (batched for efficiency)
|
|
23
|
+
*
|
|
24
|
+
* Processes texts in batches of BATCH_SIZE for better performance
|
|
25
|
+
* while avoiding memory issues with very large batches.
|
|
26
|
+
*
|
|
27
|
+
* @param texts - Array of texts to embed
|
|
28
|
+
* @returns Array of embedding vectors
|
|
29
|
+
*/
|
|
30
|
+
export declare function getEmbeddings(texts: string[]): Promise<number[][]>;
|
|
31
|
+
/**
|
|
32
|
+
* Calculate cosine similarity between two vectors
|
|
33
|
+
*/
|
|
34
|
+
export declare function cosineSimilarity(a: number[], b: number[]): number;
|
|
35
|
+
/**
|
|
36
|
+
* Get current embedding configuration
|
|
37
|
+
*/
|
|
38
|
+
export declare function getEmbeddingConfig(): EmbeddingConfig;
|
|
39
|
+
/**
|
|
40
|
+
* Get the cache directory path
|
|
41
|
+
*/
|
|
42
|
+
export declare function getCacheDir(): string;
|
|
43
|
+
/**
|
|
44
|
+
* Check if a model is already cached
|
|
45
|
+
*/
|
|
46
|
+
export declare function isModelCached(model?: EmbeddingModelName): Promise<boolean>;
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Symbolic Index System
|
|
3
|
+
*
|
|
4
|
+
* Provides fast keyword-based filtering using BM25 before semantic search.
|
|
5
|
+
*
|
|
6
|
+
* Structure:
|
|
7
|
+
* .raggrep/index/<module>/symbolic/
|
|
8
|
+
* ├── _meta.json (BM25 statistics)
|
|
9
|
+
* └── <filepath>.json (per-file summaries)
|
|
10
|
+
*
|
|
11
|
+
* This approach scales well because:
|
|
12
|
+
* - Each file summary is stored separately
|
|
13
|
+
* - BM25 metadata is small and loads quickly
|
|
14
|
+
* - Summaries are loaded on-demand during search
|
|
15
|
+
*/
|
|
16
|
+
import type { FileSummary, SymbolicIndexMeta } from '../domain/entities';
|
|
17
|
+
export type { FileSummary, SymbolicIndexMeta } from '../domain/entities';
|
|
18
|
+
export { extractKeywords } from '../domain/services/keywords';
|
|
19
|
+
/** @deprecated Use SymbolicIndexMeta */
|
|
20
|
+
export type Tier1Manifest = SymbolicIndexMeta & {
|
|
21
|
+
files: Record<string, FileSummary>;
|
|
22
|
+
};
|
|
23
|
+
/**
|
|
24
|
+
* Symbolic Index Manager
|
|
25
|
+
*
|
|
26
|
+
* Manages the keyword-based index for fast file filtering.
|
|
27
|
+
*/
|
|
28
|
+
export declare class SymbolicIndex {
|
|
29
|
+
private meta;
|
|
30
|
+
private fileSummaries;
|
|
31
|
+
private bm25Index;
|
|
32
|
+
private symbolicPath;
|
|
33
|
+
private moduleId;
|
|
34
|
+
constructor(indexDir: string, moduleId: string);
|
|
35
|
+
/**
|
|
36
|
+
* Initialize or load the symbolic index
|
|
37
|
+
*/
|
|
38
|
+
initialize(): Promise<void>;
|
|
39
|
+
/**
|
|
40
|
+
* Add or update a file summary
|
|
41
|
+
*/
|
|
42
|
+
addFile(summary: FileSummary): void;
|
|
43
|
+
/**
|
|
44
|
+
* Remove a file from the index
|
|
45
|
+
*/
|
|
46
|
+
removeFile(filepath: string): boolean;
|
|
47
|
+
/**
|
|
48
|
+
* Build BM25 index from file summaries
|
|
49
|
+
*/
|
|
50
|
+
buildBM25Index(): void;
|
|
51
|
+
/**
|
|
52
|
+
* Find candidate files using BM25 keyword search
|
|
53
|
+
*/
|
|
54
|
+
findCandidates(query: string, maxCandidates?: number): string[];
|
|
55
|
+
/**
|
|
56
|
+
* Get all file paths in the index
|
|
57
|
+
*/
|
|
58
|
+
getAllFiles(): string[];
|
|
59
|
+
/**
|
|
60
|
+
* Get summary for a specific file
|
|
61
|
+
*/
|
|
62
|
+
getFileSummary(filepath: string): FileSummary | undefined;
|
|
63
|
+
/**
|
|
64
|
+
* Save the index to disk (per-file structure)
|
|
65
|
+
*/
|
|
66
|
+
save(): Promise<void>;
|
|
67
|
+
/**
|
|
68
|
+
* Load the index from disk
|
|
69
|
+
*/
|
|
70
|
+
load(): Promise<void>;
|
|
71
|
+
/**
|
|
72
|
+
* Recursively load file summaries from the symbolic directory
|
|
73
|
+
*/
|
|
74
|
+
private loadFileSummariesRecursive;
|
|
75
|
+
/**
|
|
76
|
+
* Get the path for a file summary
|
|
77
|
+
*/
|
|
78
|
+
private getFileSummaryPath;
|
|
79
|
+
/**
|
|
80
|
+
* Delete a file summary from disk
|
|
81
|
+
*/
|
|
82
|
+
deleteFileSummary(filepath: string): Promise<void>;
|
|
83
|
+
/**
|
|
84
|
+
* Check if the index exists on disk
|
|
85
|
+
*/
|
|
86
|
+
exists(): Promise<boolean>;
|
|
87
|
+
/**
|
|
88
|
+
* Get the number of indexed files
|
|
89
|
+
*/
|
|
90
|
+
get size(): number;
|
|
91
|
+
/**
|
|
92
|
+
* Clear the index
|
|
93
|
+
*/
|
|
94
|
+
clear(): void;
|
|
95
|
+
}
|
|
96
|
+
/** @deprecated Use SymbolicIndex instead */
|
|
97
|
+
export declare const Tier1Index: typeof SymbolicIndex;
|
|
98
|
+
/** @deprecated Use SymbolicIndex instead */
|
|
99
|
+
export declare function getTier1Path(rootDir: string, moduleId: string, indexDir?: string): string;
|
|
100
|
+
export declare function getSymbolicPath(rootDir: string, moduleId: string, indexDir?: string): string;
|
package/package.json
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "raggrep",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.js",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"import": "./dist/index.js",
|
|
11
|
+
"types": "./dist/index.d.ts"
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"bin": {
|
|
15
|
+
"raggrep": "./dist/cli/main.js"
|
|
16
|
+
},
|
|
17
|
+
"files": [
|
|
18
|
+
"dist",
|
|
19
|
+
"README.md",
|
|
20
|
+
"LICENSE"
|
|
21
|
+
],
|
|
22
|
+
"scripts": {
|
|
23
|
+
"build": "bun run build:clean && bun run build:bundle && bun run build:types && bun run build:shebang",
|
|
24
|
+
"build:clean": "rm -rf dist",
|
|
25
|
+
"build:bundle": "bun build src/index.ts --outdir dist --target node --sourcemap=external --external '@xenova/transformers' --external 'glob' --external 'typescript' && bun build src/cli/main.ts --outdir dist/cli --target node --sourcemap=external --external '@xenova/transformers' --external 'glob' --external 'typescript'",
|
|
26
|
+
"build:types": "tsc --emitDeclarationOnly --outDir dist",
|
|
27
|
+
"build:shebang": "echo '#!/usr/bin/env node' | cat - dist/cli/main.js > temp && mv temp dist/cli/main.js",
|
|
28
|
+
"prepublishOnly": "bun run build",
|
|
29
|
+
"raggrep": "bun run src/cli/main.ts",
|
|
30
|
+
"test": "bun test",
|
|
31
|
+
"dev": "bun run src/cli/main.ts"
|
|
32
|
+
},
|
|
33
|
+
"keywords": [
|
|
34
|
+
"rag",
|
|
35
|
+
"search",
|
|
36
|
+
"semantic-search",
|
|
37
|
+
"embeddings",
|
|
38
|
+
"codebase",
|
|
39
|
+
"local",
|
|
40
|
+
"ai",
|
|
41
|
+
"code-search",
|
|
42
|
+
"transformers"
|
|
43
|
+
],
|
|
44
|
+
"author": "",
|
|
45
|
+
"license": "MIT",
|
|
46
|
+
"repository": {
|
|
47
|
+
"type": "git",
|
|
48
|
+
"url": "git+https://github.com/conradkoh/raggrep.git"
|
|
49
|
+
},
|
|
50
|
+
"bugs": {
|
|
51
|
+
"url": "https://github.com/conradkoh/raggrep/issues"
|
|
52
|
+
},
|
|
53
|
+
"homepage": "https://github.com/conradkoh/raggrep#readme",
|
|
54
|
+
"engines": {
|
|
55
|
+
"node": ">=18.0.0"
|
|
56
|
+
},
|
|
57
|
+
"dependencies": {
|
|
58
|
+
"@xenova/transformers": "^2.17.0",
|
|
59
|
+
"glob": "^10.0.0",
|
|
60
|
+
"typescript": "^5.0.0"
|
|
61
|
+
},
|
|
62
|
+
"devDependencies": {
|
|
63
|
+
"@types/bun": "latest",
|
|
64
|
+
"@types/node": "^20.0.0"
|
|
65
|
+
}
|
|
66
|
+
}
|