raggrep 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +22 -0
  2. package/README.md +15 -0
  3. package/dist/application/index.d.ts +7 -0
  4. package/dist/application/usecases/cleanupIndex.d.ts +54 -0
  5. package/dist/application/usecases/index.d.ts +9 -0
  6. package/dist/application/usecases/indexDirectory.d.ts +54 -0
  7. package/dist/application/usecases/searchIndex.d.ts +48 -0
  8. package/dist/cli/main.d.ts +1 -0
  9. package/dist/cli/main.js +1596 -0
  10. package/dist/cli/main.js.map +22 -0
  11. package/dist/composition.d.ts +52 -0
  12. package/dist/domain/entities/chunk.d.ts +41 -0
  13. package/dist/domain/entities/config.d.ts +43 -0
  14. package/dist/domain/entities/fileIndex.d.ts +58 -0
  15. package/dist/domain/entities/fileSummary.d.ts +61 -0
  16. package/dist/domain/entities/index.d.ts +14 -0
  17. package/dist/domain/entities/searchResult.d.ts +36 -0
  18. package/dist/domain/index.d.ts +11 -0
  19. package/dist/domain/ports/embedding.d.ts +60 -0
  20. package/dist/domain/ports/filesystem.d.ts +78 -0
  21. package/dist/domain/ports/index.d.ts +10 -0
  22. package/dist/domain/ports/storage.d.ts +79 -0
  23. package/dist/domain/services/bm25.d.ts +82 -0
  24. package/dist/domain/services/bm25.test.d.ts +4 -0
  25. package/dist/domain/services/index.d.ts +8 -0
  26. package/dist/domain/services/keywords.d.ts +27 -0
  27. package/dist/index.d.ts +98 -0
  28. package/dist/index.js +1378 -0
  29. package/dist/index.js.map +22 -0
  30. package/dist/indexer/index.d.ts +33 -0
  31. package/dist/infrastructure/embeddings/index.d.ts +4 -0
  32. package/dist/infrastructure/embeddings/transformersEmbedding.d.ts +34 -0
  33. package/dist/infrastructure/filesystem/index.d.ts +4 -0
  34. package/dist/infrastructure/filesystem/nodeFileSystem.d.ts +28 -0
  35. package/dist/infrastructure/index.d.ts +9 -0
  36. package/dist/infrastructure/storage/fileIndexStorage.d.ts +68 -0
  37. package/dist/infrastructure/storage/index.d.ts +4 -0
  38. package/dist/modules/registry.d.ts +3 -0
  39. package/dist/modules/semantic/index.d.ts +55 -0
  40. package/dist/modules/semantic/parseCode.d.ts +44 -0
  41. package/dist/modules/semantic/parseCode.test.d.ts +4 -0
  42. package/dist/search/index.d.ts +11 -0
  43. package/dist/types.d.ts +84 -0
  44. package/dist/utils/bm25.d.ts +9 -0
  45. package/dist/utils/config.d.ts +45 -0
  46. package/dist/utils/embeddings.d.ts +46 -0
  47. package/dist/utils/embeddings.test.d.ts +4 -0
  48. package/dist/utils/tieredIndex.d.ts +100 -0
  49. package/package.json +66 -0
@@ -0,0 +1,84 @@
1
+ /**
2
+ * Type definitions for the RAG system
3
+ *
4
+ * This file re-exports domain entities and defines module interfaces.
5
+ * For new code, prefer importing directly from 'domain/entities'.
6
+ */
7
+ export type { Chunk, ChunkType, FileIndex, FileManifestEntry, ModuleManifest, GlobalManifest, FileSummary, Tier1Manifest, SearchResult, SearchOptions, Config, ModuleConfig, } from './domain/entities';
8
+ export { createChunkId, DEFAULT_SEARCH_OPTIONS, DEFAULT_IGNORE_PATHS, DEFAULT_EXTENSIONS, createDefaultConfig, } from './domain/entities';
9
+ import type { Config, FileIndex, SearchResult, SearchOptions, ModuleConfig } from './domain/entities';
10
+ /**
11
+ * Context provided to modules during indexing
12
+ */
13
+ export interface IndexContext {
14
+ rootDir: string;
15
+ config: Config;
16
+ /** Get the content of a file */
17
+ readFile: (filepath: string) => Promise<string>;
18
+ /** Get file stats */
19
+ getFileStats: (filepath: string) => Promise<{
20
+ lastModified: string;
21
+ }>;
22
+ }
23
+ /**
24
+ * Context provided to modules during search
25
+ */
26
+ export interface SearchContext {
27
+ rootDir: string;
28
+ config: Config;
29
+ /** Load index data for a specific file */
30
+ loadFileIndex: (filepath: string) => Promise<FileIndex | null>;
31
+ /** List all indexed files */
32
+ listIndexedFiles: () => Promise<string[]>;
33
+ }
34
+ /**
35
+ * Base interface for index modules
36
+ *
37
+ * Modules provide different strategies for indexing and retrieving code.
38
+ * Examples:
39
+ * - SemanticModule: Uses text embeddings for natural language search
40
+ * - SymbolModule: Uses TypeScript/LSP symbol information
41
+ * - ASTModule: Uses AST-based code structure analysis
42
+ */
43
+ export interface IndexModule {
44
+ /** Unique identifier for this module */
45
+ readonly id: string;
46
+ /** Human-readable name */
47
+ readonly name: string;
48
+ /** Description of what this module indexes */
49
+ readonly description: string;
50
+ /** Version of the module (for index compatibility) */
51
+ readonly version: string;
52
+ /**
53
+ * Index a single file
54
+ * @returns FileIndex with module-specific data, or null if file should be skipped
55
+ */
56
+ indexFile(filepath: string, content: string, ctx: IndexContext): Promise<FileIndex | null>;
57
+ /**
58
+ * Search the index with a query
59
+ * @returns Ranked search results
60
+ */
61
+ search(query: string, ctx: SearchContext, options?: SearchOptions): Promise<SearchResult[]>;
62
+ /**
63
+ * Optional: Initialize the module (e.g., load models, connect to services)
64
+ */
65
+ initialize?(config: ModuleConfig): Promise<void>;
66
+ /**
67
+ * Optional: Called after all files have been indexed.
68
+ * Use for building secondary indexes (e.g., Tier 1 summaries, BM25 index).
69
+ */
70
+ finalize?(ctx: IndexContext): Promise<void>;
71
+ /**
72
+ * Optional: Cleanup resources
73
+ */
74
+ dispose?(): Promise<void>;
75
+ }
76
+ /**
77
+ * Registry for managing available modules
78
+ */
79
+ export interface ModuleRegistry {
80
+ register(module: IndexModule): void;
81
+ get(id: string): IndexModule | undefined;
82
+ list(): IndexModule[];
83
+ getEnabled(config: Config): IndexModule[];
84
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * BM25 Search Utilities
3
+ *
4
+ * Re-exports BM25 functionality from the domain layer.
5
+ * This file exists for backwards compatibility with existing code.
6
+ *
7
+ * For new code, import directly from 'domain/services'.
8
+ */
9
+ export { BM25Index, tokenize, normalizeScore, type BM25Document, type BM25Result, } from '../domain/services/bm25';
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Configuration utilities
3
+ *
4
+ * Provides functions for loading, saving, and managing RAGgrep configuration.
5
+ */
6
+ import type { Config, ModuleConfig } from '../domain/entities';
7
+ import { EmbeddingConfig } from './embeddings';
8
+ /** Default configuration instance */
9
+ export declare const DEFAULT_CONFIG: Config;
10
+ /**
11
+ * Get the root .raggrep directory path
12
+ */
13
+ export declare function getRaggrepDir(rootDir: string, config?: Config): string;
14
+ /**
15
+ * Get the index data directory for a specific module
16
+ */
17
+ export declare function getModuleIndexPath(rootDir: string, moduleId: string, config?: Config): string;
18
+ /**
19
+ * Get the manifest path for a specific module
20
+ */
21
+ export declare function getModuleManifestPath(rootDir: string, moduleId: string, config?: Config): string;
22
+ /**
23
+ * Get the global manifest path
24
+ */
25
+ export declare function getGlobalManifestPath(rootDir: string, config?: Config): string;
26
+ /**
27
+ * Get the config file path
28
+ */
29
+ export declare function getConfigPath(rootDir: string, config?: Config): string;
30
+ /**
31
+ * Load config from file or return default
32
+ */
33
+ export declare function loadConfig(rootDir: string): Promise<Config>;
34
+ /**
35
+ * Save config to file
36
+ */
37
+ export declare function saveConfig(rootDir: string, config: Config): Promise<void>;
38
+ /**
39
+ * Get module config by ID
40
+ */
41
+ export declare function getModuleConfig(config: Config, moduleId: string): ModuleConfig | undefined;
42
+ /**
43
+ * Extract embedding config from module options
44
+ */
45
+ export declare function getEmbeddingConfigFromModule(moduleConfig: ModuleConfig): EmbeddingConfig;
@@ -0,0 +1,46 @@
1
+ export declare const EMBEDDING_MODELS: {
2
+ readonly 'all-MiniLM-L6-v2': "Xenova/all-MiniLM-L6-v2";
3
+ readonly 'all-MiniLM-L12-v2': "Xenova/all-MiniLM-L12-v2";
4
+ readonly 'bge-small-en-v1.5': "Xenova/bge-small-en-v1.5";
5
+ readonly 'paraphrase-MiniLM-L3-v2': "Xenova/paraphrase-MiniLM-L3-v2";
6
+ };
7
+ export type EmbeddingModelName = keyof typeof EMBEDDING_MODELS;
8
+ export interface EmbeddingConfig {
9
+ model: EmbeddingModelName;
10
+ /** Show progress during model download */
11
+ showProgress?: boolean;
12
+ }
13
+ /**
14
+ * Configure the embedding model
15
+ */
16
+ export declare function configureEmbeddings(config: Partial<EmbeddingConfig>): void;
17
+ /**
18
+ * Get embedding for a single text
19
+ */
20
+ export declare function getEmbedding(text: string): Promise<number[]>;
21
+ /**
22
+ * Get embeddings for multiple texts (batched for efficiency)
23
+ *
24
+ * Processes texts in batches of BATCH_SIZE for better performance
25
+ * while avoiding memory issues with very large batches.
26
+ *
27
+ * @param texts - Array of texts to embed
28
+ * @returns Array of embedding vectors
29
+ */
30
+ export declare function getEmbeddings(texts: string[]): Promise<number[][]>;
31
+ /**
32
+ * Calculate cosine similarity between two vectors
33
+ */
34
+ export declare function cosineSimilarity(a: number[], b: number[]): number;
35
+ /**
36
+ * Get current embedding configuration
37
+ */
38
+ export declare function getEmbeddingConfig(): EmbeddingConfig;
39
+ /**
40
+ * Get the cache directory path
41
+ */
42
+ export declare function getCacheDir(): string;
43
+ /**
44
+ * Check if a model is already cached
45
+ */
46
+ export declare function isModelCached(model?: EmbeddingModelName): Promise<boolean>;
@@ -0,0 +1,4 @@
1
+ /**
2
+ * Tests for embedding utilities
3
+ */
4
+ export {};
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Symbolic Index System
3
+ *
4
+ * Provides fast keyword-based filtering using BM25 before semantic search.
5
+ *
6
+ * Structure:
7
+ * .raggrep/index/<module>/symbolic/
8
+ * ├── _meta.json (BM25 statistics)
9
+ * └── <filepath>.json (per-file summaries)
10
+ *
11
+ * This approach scales well because:
12
+ * - Each file summary is stored separately
13
+ * - BM25 metadata is small and loads quickly
14
+ * - Summaries are loaded on-demand during search
15
+ */
16
+ import type { FileSummary, SymbolicIndexMeta } from '../domain/entities';
17
+ export type { FileSummary, SymbolicIndexMeta } from '../domain/entities';
18
+ export { extractKeywords } from '../domain/services/keywords';
19
+ /** @deprecated Use SymbolicIndexMeta */
20
+ export type Tier1Manifest = SymbolicIndexMeta & {
21
+ files: Record<string, FileSummary>;
22
+ };
23
+ /**
24
+ * Symbolic Index Manager
25
+ *
26
+ * Manages the keyword-based index for fast file filtering.
27
+ */
28
+ export declare class SymbolicIndex {
29
+ private meta;
30
+ private fileSummaries;
31
+ private bm25Index;
32
+ private symbolicPath;
33
+ private moduleId;
34
+ constructor(indexDir: string, moduleId: string);
35
+ /**
36
+ * Initialize or load the symbolic index
37
+ */
38
+ initialize(): Promise<void>;
39
+ /**
40
+ * Add or update a file summary
41
+ */
42
+ addFile(summary: FileSummary): void;
43
+ /**
44
+ * Remove a file from the index
45
+ */
46
+ removeFile(filepath: string): boolean;
47
+ /**
48
+ * Build BM25 index from file summaries
49
+ */
50
+ buildBM25Index(): void;
51
+ /**
52
+ * Find candidate files using BM25 keyword search
53
+ */
54
+ findCandidates(query: string, maxCandidates?: number): string[];
55
+ /**
56
+ * Get all file paths in the index
57
+ */
58
+ getAllFiles(): string[];
59
+ /**
60
+ * Get summary for a specific file
61
+ */
62
+ getFileSummary(filepath: string): FileSummary | undefined;
63
+ /**
64
+ * Save the index to disk (per-file structure)
65
+ */
66
+ save(): Promise<void>;
67
+ /**
68
+ * Load the index from disk
69
+ */
70
+ load(): Promise<void>;
71
+ /**
72
+ * Recursively load file summaries from the symbolic directory
73
+ */
74
+ private loadFileSummariesRecursive;
75
+ /**
76
+ * Get the path for a file summary
77
+ */
78
+ private getFileSummaryPath;
79
+ /**
80
+ * Delete a file summary from disk
81
+ */
82
+ deleteFileSummary(filepath: string): Promise<void>;
83
+ /**
84
+ * Check if the index exists on disk
85
+ */
86
+ exists(): Promise<boolean>;
87
+ /**
88
+ * Get the number of indexed files
89
+ */
90
+ get size(): number;
91
+ /**
92
+ * Clear the index
93
+ */
94
+ clear(): void;
95
+ }
96
+ /** @deprecated Use SymbolicIndex instead */
97
+ export declare const Tier1Index: typeof SymbolicIndex;
98
+ /** @deprecated Use SymbolicIndex instead */
99
+ export declare function getTier1Path(rootDir: string, moduleId: string, indexDir?: string): string;
100
+ export declare function getSymbolicPath(rootDir: string, moduleId: string, indexDir?: string): string;
package/package.json ADDED
@@ -0,0 +1,66 @@
1
+ {
2
+ "name": "raggrep",
3
+ "version": "0.1.0",
4
+ "description": "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
5
+ "type": "module",
6
+ "main": "./dist/index.js",
7
+ "types": "./dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "import": "./dist/index.js",
11
+ "types": "./dist/index.d.ts"
12
+ }
13
+ },
14
+ "bin": {
15
+ "raggrep": "./dist/cli/main.js"
16
+ },
17
+ "files": [
18
+ "dist",
19
+ "README.md",
20
+ "LICENSE"
21
+ ],
22
+ "scripts": {
23
+ "build": "bun run build:clean && bun run build:bundle && bun run build:types && bun run build:shebang",
24
+ "build:clean": "rm -rf dist",
25
+ "build:bundle": "bun build src/index.ts --outdir dist --target node --sourcemap=external --external '@xenova/transformers' --external 'glob' --external 'typescript' && bun build src/cli/main.ts --outdir dist/cli --target node --sourcemap=external --external '@xenova/transformers' --external 'glob' --external 'typescript'",
26
+ "build:types": "tsc --emitDeclarationOnly --outDir dist",
27
+ "build:shebang": "echo '#!/usr/bin/env node' | cat - dist/cli/main.js > temp && mv temp dist/cli/main.js",
28
+ "prepublishOnly": "bun run build",
29
+ "raggrep": "bun run src/cli/main.ts",
30
+ "test": "bun test",
31
+ "dev": "bun run src/cli/main.ts"
32
+ },
33
+ "keywords": [
34
+ "rag",
35
+ "search",
36
+ "semantic-search",
37
+ "embeddings",
38
+ "codebase",
39
+ "local",
40
+ "ai",
41
+ "code-search",
42
+ "transformers"
43
+ ],
44
+ "author": "",
45
+ "license": "MIT",
46
+ "repository": {
47
+ "type": "git",
48
+ "url": "git+https://github.com/conradkoh/raggrep.git"
49
+ },
50
+ "bugs": {
51
+ "url": "https://github.com/conradkoh/raggrep/issues"
52
+ },
53
+ "homepage": "https://github.com/conradkoh/raggrep#readme",
54
+ "engines": {
55
+ "node": ">=18.0.0"
56
+ },
57
+ "dependencies": {
58
+ "@xenova/transformers": "^2.17.0",
59
+ "glob": "^10.0.0",
60
+ "typescript": "^5.0.0"
61
+ },
62
+ "devDependencies": {
63
+ "@types/bun": "latest",
64
+ "@types/node": "^20.0.0"
65
+ }
66
+ }